X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.web-live.git;a=blobdiff_plain;f=pctb%2Focr.h;h=792110466f4dfe05cde96b469697321ea56b1f1d;hp=26bccbc7b83b81bb693b27995063ed7cedbecc1d;hb=5d58a08423953756871493042a9cfff032b66e18;hpb=a7c52a5cdad0b1a4c4e053992858674e8d7953a7 diff --git a/pctb/ocr.h b/pctb/ocr.h index 26bccbc..7921104 100644 --- a/pctb/ocr.h +++ b/pctb/ocr.h @@ -1,14 +1,35 @@ #ifndef OCR_H #define OCR_H + +#define DEBUG_RECTANGLES +// #define DEBUG_OCR + + + +#define _GNU_SOURCE + #include #include #include #include #include #include +#include +#include +#include +#include + #include #include +#include + + +typedef struct { + int w,h; + char d[]; +} CanonImage; + typedef uint32_t Pixcol; #define PSPIXCOL(priscan) priscan##32 @@ -16,19 +37,81 @@ typedef uint32_t Pixcol; typedef struct { const char *s; /* valid until next call to ocr() */ int l,r; /* column numbers */ - int ctx; /* match context index */ + unsigned ctxmap; /* match context index */ } OcrResultGlyph; -OcrResultGlyph *ocr(int w, int h, Pixcol cols[]); +typedef const struct OcrCellTypeInfo *OcrCellType; +extern const struct OcrCellTypeInfo ocr_celltype_text; +extern const struct OcrCellTypeInfo ocr_celltype_number; + +typedef struct OcrReader OcrReader; +OcrReader *ocr_init(int h); + +OcrResultGlyph *ocr(OcrReader *rd, OcrCellType, int w, Pixcol cols[]); /* return value is array terminated by {0,-1,-1} * array is valid until next call to ocr() */ -void ocr_init(void); - void debug_flush(void); #define eassert assert #define debug stdout +const char *get_vardir(void); + +CanonImage *file_read_image(FILE *f); +int main_test(void); + +#define MAX_PAGES 100 +extern CanonImage *page_images[MAX_PAGES]; +extern int npages; + + +typedef struct { + unsigned long rgb; /* on screen */ + char c; /* canonical */ +} CanonColourInfo; + +extern const CanonColourInfo canoncolourinfos[]; + +CanonImage *alloc_canon_image(int w, int h); + +#ifdef DEBUG_RECTANGLES +# define CANIMG_DEBUG_RECTANGLE_1LINE(im,w,h) \ + fprintf(debug, "%4d ",y); \ + r= fwrite(im->d + y*w, 1,w, debug); \ + eassert(r==w); \ + fputc('\n',debug); +#else +# define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */ +#endif + +#define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{ \ + /* compute_rgb should be a number of statements, or \ + * a block, which assigns to \ + * unsigned long rgb; \ + * given the values of \ + * int x,y; \ + * all of which are anamorphic. Result is stored in im. \ + */ \ + (im)= alloc_canon_image((w), (h)); \ + \ + int x,y,r; \ + for (y=0; y<(h); y++) { \ + for (x=0; x<(w); x++) { \ + const CanonColourInfo *cci; \ + unsigned long rgb; \ + COMPUTE_RGB; \ + for (cci=canoncolourinfos; cci->c; cci++) \ + if (cci->rgb == rgb) { \ + (im)->d[y*(w) + x]= cci->c; \ + break; \ + } \ + } \ + CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h)) \ + } \ + debug_flush(); \ + }while(0) + + #endif /*OCR_H*/