-#include "ocr.h"
+#include "convert.h"
-typedef struct {
- unsigned long rgb; /* on screen */
- char c; /* canonical */
-} CanonColourInfo;
-
-static int height, width;
-static char *image;
void debug_flush(void) {
eassert(!fflush(debug));
eassert(!ferror(debug));
}
-typedef struct {
- int x, y;
-} Point;
-
-typedef struct { /* both inclusive */
- Point tl;
- Point br;
-} Rect;
-
-static inline char get(int x, int y) { return image[y * width + x]; }
-static inline char get_p(Point p) { return get(p.x,p.y); }
-
-
-#define START_MAIN {200,200}
-#define MIN_COLUMNS 6
-#define INTERESTING_COLUMNS 6
-#define TEXT_COLUMNS 2
-#define MAX_COLUMNS 7
-
-static Rect mainr = { START_MAIN,START_MAIN };
-static int commbasey, comminty;
-static int colrightx[INTERESTING_COLUMNS];
-static int text_h;
-static OcrReader *rd;
-
-static const CanonColourInfo canoncolourinfos[]= {
- { 0x475A5E, '*' }, /* edge */
- { 0x2C5F7A, '*' }, /* edge just under box heading shadow */
- { 0x7D9094, '+' }, /* interbox */
- { 0xBDC5BF, ' ' }, /* background - pale */
- { 0xADB5AF, ' ' }, /* background - dark */
- { 0x000000, 'o' }, /* foreground */
- { 0xD4B356, ' ' }, /* background (cursor) */
- { 0xFFFFFF, 'o' }, /* foreground (cursor) */
- { 0,0 }
-};
-
-static void require_rectangle(int tlx, int tly, int brx, int bry,
- const char *ok) {
- int x,y;
- for (x=tlx; x<=brx; x++)
- for (y=tly; y<=bry; y++) {
- int c= get(x,y);
- assert(strchr(ok,c));
- }
-}
-static void require_rectangle_r(Rect rr, const char *ok) {
- require_rectangle(rr.tl.x,rr.tl.y, rr.br.x,rr.br.y, ok);
-}
-
-static void debug_rect(const char *what, int whati, Rect rr) {
-#ifdef DEBUG_RECTANGLES
- int y,r,w;
- fprintf(debug, "%s %d: %d,%d..%d,%d:\n", what, whati,
- rr.tl.x,rr.tl.y, rr.br.x,rr.br.y);
- w= rr.br.x - rr.tl.x + 1;
- for (y=rr.tl.y; y<=rr.br.y; y++) {
- fprintf(debug, "%4d%*s|", y, rr.tl.x,"");
- r= fwrite(image + y*width + rr.tl.x, 1, w, debug);
- eassert(r==w);
- fputc('|',debug);
- fputc('\n',debug);
- }
-#endif
- debug_flush();
-}
-
-#define WALK_UNTIL(point,coord,increm,last,edge) \
- for (;;) { \
- if ((point).coord == (last)+(increm)) break; \
- if (get_p((point)) == (edge)) { (point).coord -= (increm); break; } \
- (point).coord += (increm); \
- }
-
-#define WALK_UNTIL_MUST(point,coord,increm,last,edge) \
- do { \
- WALK_UNTIL(point,coord,increm,last,edge); \
- eassert((point).coord != (last)+(increm)); \
- } while(0)
-
-static void find_structure(void) {
- Rect whole = { {0,0}, {width-1,height-1} };
-
- WALK_UNTIL_MUST(mainr.tl, x,-1, whole.tl.x, '*');
- WALK_UNTIL_MUST(mainr.tl, y,-1, whole.tl.y, '*');
- WALK_UNTIL_MUST(mainr.br, x,+1, whole.br.x, '*');
- WALK_UNTIL_MUST(mainr.br, y,+1, whole.br.y, '*');
-
- require_rectangle(mainr.tl.x-1, mainr.tl.y, mainr.tl.x-1, mainr.br.y, "*");
- require_rectangle(mainr.br.x+1, mainr.tl.y, mainr.br.x+1, mainr.br.y, "*");
- require_rectangle(mainr.tl.x, mainr.tl.y-1, mainr.br.x, mainr.tl.y-1, "*");
- require_rectangle(mainr.tl.x, mainr.br.y+1, mainr.br.x, mainr.br.y+1, "*");
-
-#define CHECK_STRIP_BORDER(tlbr,xy,increm) \
- do { \
- Point csb_p; \
- Rect csb_r; \
- csb_p= mainr.tl; \
- csb_p.xy= mainr.tlbr.xy; \
- if (get_p(csb_p)=='+') { \
- csb_r= mainr; \
- csb_r.tl.xy= csb_p.xy; \
- csb_r.br.xy= csb_p.xy; \
- require_rectangle_r(csb_r, "+"); \
- mainr.tlbr.xy += increm; \
- } \
- } while(0)
-
- debug_rect("mainr",0, mainr);
-
- CHECK_STRIP_BORDER(tl,x,+1);
- CHECK_STRIP_BORDER(tl,y,+1);
- CHECK_STRIP_BORDER(br,x,-1);
- CHECK_STRIP_BORDER(br,y,-1);
-
- debug_rect("mainr",1, mainr);
- Point up = START_MAIN;
- WALK_UNTIL_MUST(up, y,-1, mainr.tl.y, '+');
-
- Point down = START_MAIN;
- down.y++;
- WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+');
-
-#ifdef DEBUG_RECTANGLES
- int xscaleunit, y,x;
- for (y=0, xscaleunit=1; y<4; y++, xscaleunit*=10) {
- fprintf(debug," ");
- for (x=0; x<=width; x++) {
- if (x % xscaleunit) fputc(' ',debug);
- else fprintf(debug,"%d",(x / xscaleunit)%10);
- }
- fputc('\n',debug);
- }
-#endif
-
- commbasey= up.y;
- comminty= down.y - up.y + 2;
+const char *get_vardir(void) { return "."; }
- Point across= { mainr.tl.x, commbasey };
- int colno=0;
- for (;;) {
- eassert(get_p(across) != '+');
- WALK_UNTIL(across, x,+1, mainr.br.x, '+');
- eassert(colno < MAX_COLUMNS);
- int colrx= across.x;
- if (colrx > mainr.br.x) colrx= mainr.br.x;
- if (colno < INTERESTING_COLUMNS)
- colrightx[colno]= colrx;
-
- colno++;
-
- if (across.x >= mainr.br.x-1)
- break;
- across.x++;
- require_rectangle(across.x,mainr.tl.y, across.x,mainr.br.y, "+");
- across.x++;
+static enum { mode_all=03, mode_screenshot=01, mode_analyse=02 }
+ o_mode= mode_all;
+static char *o_screenshots_fn;
+static int o_single_page;
+
+FILE *screenshots_file;
+
+int main(int argc, char **argv) {
+ const char *arg;
+ int r;
+
+ while ((arg=*++argv)) {
+ if (!strcmp(arg,"--screenshots-only"))
+ o_mode= mode_screenshot;
+ else if (!strcmp(arg,"--analyse-only"))
+ o_mode= mode_analyse;
+ else if (!strcmp(arg,"--single-page"))
+ o_single_page= 1;
+ else if (!strcmp(arg,"--screenshots-file"))
+ eassert( o_screenshots_fn= *++argv );
+#define DF(f) \
+ else if (!strcmp(arg,"-D" #f)) \
+ debug_flags |= dbg_##f;
+ DEBUG_FLAG_LIST
+#undef DF
+ else if (!strcmp(arg,"--window-id")) {
+ char *ep;
+ eassert((arg=*++argv));
+ unsigned long windowid= strtoul(arg,&ep,0);
+ eassert(!*ep);
+ set_yppclient_window(windowid);
+ } else
+ eassert(!"bad option");
}
- eassert(colno >= MIN_COLUMNS);
-
- text_h = comminty - 1;
-}
-
-static void find_commodity(int offset, Rect *rr) {
- /* rr->tl.x==-1 if offset out of range */
- rr->tl.y= commbasey - offset*comminty;
- rr->br.y= rr->tl.y + comminty-2;
- if (rr->tl.y < mainr.tl.y || rr->br.y > mainr.br.y) { rr->tl.x=-1; return; }
- if (rr->tl.y > mainr.tl.y)
- require_rectangle(rr->tl.x,rr->tl.y-1, rr->br.x,rr->tl.y-1, "+");
- if (rr->br.y < mainr.tl.y)
- require_rectangle(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+");
- rr->tl.x= mainr.tl.x;
- rr->br.x= mainr.br.x;
-}
-
-static void find_table_entry(Rect commod, int colno, Rect *cellr) {
- cellr->tl.y= commod.tl.y;
- cellr->br.y= commod.br.y;
- cellr->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2;
- cellr->br.x= colrightx[colno];
- debug_rect("cell", colno, *cellr);
- require_rectangle_r(*cellr, " o");
-}
-
-static void load_image_and_canonify(void) {
- struct pam inpam;
- unsigned char rgb[3];
- int x,y,r;
- const CanonColourInfo *cci;
-
- pnm_readpaminit(stdin, &inpam, sizeof(inpam));
- height= inpam.height;
- width= inpam.width;
- eassert(inpam.maxval == 255);
- eassert(inpam.bytes_per_sample == 1);
-
- image= malloc(width*height);
- eassert(image);
- memset(image,'?',width*height);
-
- for (y=0; y<height; y++) {
- for (x=0; x<width; x++) {
- r= fread(&rgb,1,3,stdin); eassert(r==3);
- unsigned long rgb_l=
- ((unsigned long)rgb[0]<<16) |
- ((unsigned long)rgb[1]<<8) |
- (rgb[2]);
- for (cci=canoncolourinfos; cci->c; cci++)
- if (cci->rgb == rgb_l) {
- image[y*width + x]= cci->c;
- break;
- }
- }
-#ifdef DEBUG_RECTANGLES
- fprintf(debug, "%4d ",y);
- r= fwrite(image + y*width, 1,width, debug); eassert(r==width);
- fputc('\n',debug);
-#endif
+ if (!o_screenshots_fn) {
+ r= asprintf(&o_screenshots_fn,"%s/#pages#.ppm",get_vardir());
+ eassert(r>=0); eassert(o_screenshots_fn);
}
- debug_flush();
-}
-static void ocr_rectangle(Rect r, const OcrCellType ct) {
- OcrResultGlyph *results, *res;
-
- int w= r.br.x - r.tl.x + 1;
- Pixcol cols[w+1];
- int x,y;
- for (x=0; x<w; x++) {
- Pixcol cx, rv;
- for (y=0, cx=0, rv=1; y<text_h; y++, rv<<=1) {
- switch (get(x+r.tl.x, y+r.tl.y)) {
- case ' ': break;
- case 'o': cx |= rv; break;
- default: eassert(!"wrong pixel");
- }
- }
- cols[x]= cx;
+ if (o_mode & mode_screenshot) {
+ screenshot_startup();
+ find_yppclient_window();
+ screenshots_file= fopen(o_screenshots_fn, "w"); eassert(screenshots_file);
+ if (o_single_page)
+ take_one_screenshot();
+ else
+ take_screenshots();
+ } else {
+ screenshots_file= fopen(o_screenshots_fn, "r"); eassert(screenshots_file);
+ read_screenshots();
}
- cols[w]= 0;
-
- results= ocr(rd,ct,w,cols);
- printf("YES! \"");
- for (res=results; res->s; res++)
- printf("%s",res->s);
- printf("\"\n");
- eassert(!ferror(stdout));
- eassert(!fflush(stdout));
-}
-
-int main(void) {
- Rect thisr, entryr;
- int tryrect, colno;
-
- load_image_and_canonify();
- find_structure();
- rd= ocr_init(text_h);
-
- for (tryrect= +height; tryrect >= -height; tryrect--) {
- find_commodity(tryrect, &thisr);
- if (thisr.tl.x < 0)
- continue;
- debug_rect("commod",tryrect, thisr);
-
- for (colno=0; colno<MIN_COLUMNS; colno++) {
- find_table_entry(thisr,colno,&entryr);
- ocr_rectangle(entryr,
- colno<TEXT_COLUMNS
- ? &ocr_celltype_text
- : &ocr_celltype_number);
- }
+ if (o_mode & mode_analyse) {
+ analyse();
+ //output_tsv();
}
return 0;
}
-
-const char *get_vardir(void) { return "."; }