From: Ian Jackson Date: Sat, 6 Jun 2009 17:55:21 +0000 (+0100) Subject: WIP actual useful structure X-Git-Tag: 1.9.2~164 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=commitdiff_plain;h=0826bbc65e59d47c270f05af419f24fdfc3c349c WIP actual useful structure --- diff --git a/.gitignore b/.gitignore index fd7f321..367468d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,13 @@ *~ t +core + pctb/*.o pctb/t.* pctb/u.* + +pctb/#*#.* pctb/convert pctb/x-manip-window diff --git a/pctb/Makefile b/pctb/Makefile index 55f4d53..7d1d6ef 100644 --- a/pctb/Makefile +++ b/pctb/Makefile @@ -3,9 +3,9 @@ CFLAGS += -Wall -Wwrite-strings -Wpointer-arith -Wmissing-prototypes \ all: convert x-manip-window -CONVERT_OBJS= convert.o ocr.o pages.o +CONVERT_OBJS= convert.o ocr.o pages.o structure.o convert: $(CONVERT_OBJS) -lnetpbm -lXtst -lX11 -$(CONVERT_OBJS): ocr.h +$(CONVERT_OBJS): ocr.h convert.h structure.h x-manip-window: -lXtst -lX11 diff --git a/pctb/convert.c b/pctb/convert.c index 4fb18f4..f38abf5 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -1,308 +1,66 @@ -#include "ocr.h" +#include "convert.h" -static CanonImage *cim; void debug_flush(void) { eassert(!fflush(debug)); eassert(!ferror(debug)); } -typedef struct { - int x, y; -} Point; -typedef struct { /* both inclusive */ - Point tl; - Point br; -} Rect; - -static inline char get(int x, int y) { return cim->d[y * cim->w + x]; } -static inline char get_p(Point p) { return get(p.x,p.y); } - - -#define START_MAIN {200,200} -#define MIN_COLUMNS 6 -#define INTERESTING_COLUMNS 6 -#define TEXT_COLUMNS 2 -#define MAX_COLUMNS 7 - -static Rect mainr = { START_MAIN,START_MAIN }; -static int commbasey, comminty; -static int colrightx[INTERESTING_COLUMNS]; -static int text_h; -static OcrReader *rd; - -const CanonColourInfo canoncolourinfos[]= { - { 0x475A5E, '*' }, /* edge */ - { 0x2C5F7A, '*' }, /* edge just under box heading shadow */ - { 0x7D9094, '+' }, /* interbox */ - - { 0xBDC5BF, ' ' }, /* background - pale Sugar cane, etc. */ - { 0xADB5AF, ' ' }, /* background - dark */ - { 0xC7E1C3, ' ' }, /* background - pale Swill, etc. */ - { 0xB5CFB1, ' ' }, /* background - dark */ - { 0xD6CEB0, ' ' }, /* background - pale Madder, etc. */ - { 0xC8C0A2, ' ' }, /* background - dark */ - { 0xE0E1D3, ' ' }, /* background - pale Lorandite, etc. */ - { 0xD0D1C3, ' ' }, /* background - dark */ - { 0xE5E6C1, ' ' }, /* background - pale Cloth */ - { 0xD7D8B3, ' ' }, /* background - dark */ - { 0xEDDED9, ' ' }, /* background - pale Dye */ - { 0xDACBC6, ' ' }, /* background - dark */ - { 0xD3DEDF, ' ' }, /* background - pale Paint */ - { 0xC5D0D1, ' ' }, /* background - dark */ - { 0xDCD1CF, ' ' }, /* background - pale Enamel */ - { 0xCEC3C1, ' ' }, /* background - dark */ - { 0xF3F6F5, ' ' }, /* background - pale fruit */ - { 0xE2E7E5, ' ' }, /* background - dark */ - - { 0x000000, 'o' }, /* foreground */ - { 0xD4B356, ' ' }, /* background (cursor) */ - { 0xFFFFFF, 'o' }, /* foreground (cursor) */ - - { 0x5B93BF, '_' }, /* selector dropdown background */ - { 0xD7C94F, 'X' }, /* selector dropdown foreground */ - { 0,0 } -}; - -static void require_rectangle(int tlx, int tly, int brx, int bry, - const char *ok) { - int x,y; - for (x=tlx; x<=brx; x++) - for (y=tly; y<=bry; y++) { - int c= get(x,y); - assert(strchr(ok,c)); - } -} -static void require_rectangle_r(Rect rr, const char *ok) { - require_rectangle(rr.tl.x,rr.tl.y, rr.br.x,rr.br.y, ok); -} +const char *get_vardir(void) { return "."; } -static void debug_rect(const char *what, int whati, Rect rr) { -#ifdef DEBUG_RECTANGLES - int y,r,w; - fprintf(debug, "%s %d: %d,%d..%d,%d:\n", what, whati, - rr.tl.x,rr.tl.y, rr.br.x,rr.br.y); - w= rr.br.x - rr.tl.x + 1; - for (y=rr.tl.y; y<=rr.br.y; y++) { - fprintf(debug, "%4d%*s|", y, rr.tl.x,""); - r= fwrite(cim->d + y*cim->w + rr.tl.x, 1, w, debug); - eassert(r==w); - fputc('|',debug); - fputc('\n',debug); - } -#endif - debug_flush(); -} -#define WALK_UNTIL(point,coord,increm,last,edge) \ - for (;;) { \ - if ((point).coord == (last)+(increm)) break; \ - if (get_p((point)) == (edge)) { (point).coord -= (increm); break; } \ - (point).coord += (increm); \ +static enum { mode_all=03, mode_screenshot=01, mode_analyse=02 } + o_mode= mode_all; +static char *o_screenshots_fn; +static int o_single_page; + +FILE *screenshots_file; + +int main(int argc, char **argv) { + const char *arg; + int r; + + while ((arg=*++argv)) { + if (!strcmp(arg,"--screenshots-only")) + o_mode= mode_screenshot; + else if (!strcmp(arg,"--analyse-only")) + o_mode= mode_analyse; + else if (!strcmp(arg,"--single-page")) + o_single_page= 1; + else if (!strcmp(arg,"--screenshots-file")) + eassert( o_screenshots_fn= *++argv ); + else if (!strcmp(arg,"--window-id")) { + char *ep; + eassert((arg=*++argv)); + unsigned long windowid= strtoul(arg,&ep,0); + eassert(!*ep); + set_yppclient_window(windowid); + } else + eassert(!"bad option"); } - -#define WALK_UNTIL_MUST(point,coord,increm,last,edge) \ - do { \ - WALK_UNTIL(point,coord,increm,last,edge); \ - eassert((point).coord != (last)+(increm)); \ - } while(0) - -void find_structure(CanonImage *im) { - cim= im; - Rect whole = { {0,0}, {cim->w-1,cim->h-1} }; - - WALK_UNTIL_MUST(mainr.tl, x,-1, whole.tl.x, '*'); - WALK_UNTIL_MUST(mainr.tl, y,-1, whole.tl.y, '*'); - WALK_UNTIL_MUST(mainr.br, x,+1, whole.br.x, '*'); - WALK_UNTIL_MUST(mainr.br, y,+1, whole.br.y, '*'); - - require_rectangle(mainr.tl.x-1, mainr.tl.y, mainr.tl.x-1, mainr.br.y, "*"); - require_rectangle(mainr.br.x+1, mainr.tl.y, mainr.br.x+1, mainr.br.y, "*"); - require_rectangle(mainr.tl.x, mainr.tl.y-1, mainr.br.x, mainr.tl.y-1, "*"); - require_rectangle(mainr.tl.x, mainr.br.y+1, mainr.br.x, mainr.br.y+1, "*"); - -#define CHECK_STRIP_BORDER(tlbr,xy,increm) \ - do { \ - Point csb_p; \ - Rect csb_r; \ - csb_p= mainr.tl; \ - csb_p.xy= mainr.tlbr.xy; \ - if (get_p(csb_p)=='+') { \ - csb_r= mainr; \ - csb_r.tl.xy= csb_p.xy; \ - csb_r.br.xy= csb_p.xy; \ - require_rectangle_r(csb_r, "+"); \ - mainr.tlbr.xy += increm; \ - } \ - } while(0) - - debug_rect("mainr",0, mainr); - - CHECK_STRIP_BORDER(tl,x,+1); - CHECK_STRIP_BORDER(tl,y,+1); - CHECK_STRIP_BORDER(br,x,-1); - CHECK_STRIP_BORDER(br,y,-1); - - debug_rect("mainr",1, mainr); - - Point up = START_MAIN; - WALK_UNTIL_MUST(up, y,-1, mainr.tl.y, '+'); - - Point down = START_MAIN; - down.y++; - WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+'); - -#ifdef DEBUG_RECTANGLES - int xscaleunit, y,x; - for (y=0, xscaleunit=1; y<4; y++, xscaleunit*=10) { - fprintf(debug," "); - for (x=0; x<=cim->w; x++) { - if (x % xscaleunit) fputc(' ',debug); - else fprintf(debug,"%d",(x / xscaleunit)%10); - } - fputc('\n',debug); + if (!o_screenshots_fn) { + r= asprintf(&o_screenshots_fn,"%s/#pages#.ppm",get_vardir()); + eassert(r>=0); eassert(o_screenshots_fn); } -#endif - - commbasey= up.y; - comminty= down.y - up.y + 2; - - Point across= { mainr.tl.x, commbasey }; - int colno=0; - for (;;) { - eassert(get_p(across) != '+'); - WALK_UNTIL(across, x,+1, mainr.br.x, '+'); - eassert(colno < MAX_COLUMNS); - int colrx= across.x; - if (colrx > mainr.br.x) colrx= mainr.br.x; - if (colno < INTERESTING_COLUMNS) - colrightx[colno]= colrx; - - colno++; - - if (across.x >= mainr.br.x-1) - break; - across.x++; - require_rectangle(across.x,mainr.tl.y, across.x,mainr.br.y, "+"); - across.x++; + if (o_mode & mode_screenshot) { + screenshot_startup(); + find_yppclient_window(); + screenshots_file= fopen(o_screenshots_fn, "w"); eassert(screenshots_file); + if (o_single_page) + take_one_screenshot(); + else + take_screenshots(); + } else { + screenshots_file= fopen(o_screenshots_fn, "r"); eassert(screenshots_file); + read_screenshots(); } - eassert(colno >= MIN_COLUMNS); - - text_h = comminty - 1; -} - -static void find_commodity(int offset, Rect *rr) { - /* rr->tl.x==-1 if offset out of range */ - rr->tl.y= commbasey - offset*comminty; - rr->br.y= rr->tl.y + comminty-2; - if (rr->tl.y < mainr.tl.y || rr->br.y > mainr.br.y) { rr->tl.x=-1; return; } - if (rr->tl.y > mainr.tl.y) - require_rectangle(rr->tl.x,rr->tl.y-1, rr->br.x,rr->tl.y-1, "+"); - if (rr->br.y < mainr.tl.y) - require_rectangle(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+"); - - rr->tl.x= mainr.tl.x; - rr->br.x= mainr.br.x; -} - -static void find_table_entry(Rect commod, int colno, Rect *cellr) { - cellr->tl.y= commod.tl.y; - cellr->br.y= commod.br.y; - cellr->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2; - cellr->br.x= colrightx[colno]; - debug_rect("cell", colno, *cellr); - require_rectangle_r(*cellr, " o"); -} - -CanonImage *alloc_canon_image(int w, int h) { - CanonImage *im= malloc(sizeof(CanonImage) + w*h); - eassert(im); - im->w= w; - im->h= h; - memset(im->d,'?',w*h); - return im; -} - -CanonImage *file_read_image(FILE *f) { - struct pam inpam; - unsigned char rgb_buf[3]; - CanonImage *im; - - pnm_readpaminit(f, &inpam, sizeof(inpam)); - eassert(inpam.maxval == 255); - eassert(inpam.bytes_per_sample == 1); - - CANONICALISE_IMAGE(im, inpam.width, inpam.height, { - r= fread(&rgb_buf,1,3,f); eassert(r==3); - - rgb= - ((unsigned long)rgb_buf[0]<<16) | - ((unsigned long)rgb_buf[1]<<8) | - (rgb_buf[2]); - }); - - return im; -} - -static void load_image_and_canonify(void) { - cim= file_read_image(stdin); -} - -static void ocr_rectangle(Rect r, const OcrCellType ct) { - OcrResultGlyph *results, *res; - - int w= r.br.x - r.tl.x + 1; - Pixcol cols[w+1]; - int x,y; - for (x=0; xs; res++) - printf("%s",res->s); - printf("\"\n"); - eassert(!ferror(stdout)); - eassert(!fflush(stdout)); -} - -int main_test(void) { - Rect thisr, entryr; - int tryrect, colno; - - load_image_and_canonify(); - find_structure(cim); - rd= ocr_init(text_h); - - for (tryrect= +cim->h; tryrect >= -cim->h; tryrect--) { - find_commodity(tryrect, &thisr); - if (thisr.tl.x < 0) - continue; - debug_rect("commod",tryrect, thisr); - - for (colno=0; colno +#include +#include + +#include + + +/*----- from structure.c -----*/ + +void find_structure(CanonImage *im); +CanonImage *file_read_image_ppm(FILE *f); +void read_screenshots(void); +void analyse(void); + +/*----- from convert.c -----*/ + +extern FILE *screenshots_file; + +/*----- from pages.c -----*/ + +void screenshot_startup(void); +void set_yppclient_window(unsigned long wul); +void find_yppclient_window(void); +void take_screenshots(void); +void take_one_screenshot(void); + +#define MAX_PAGES 100 +extern CanonImage *page_images[MAX_PAGES]; +extern int npages; + + +#endif /*CONVERT_H*/ diff --git a/pctb/ocr.h b/pctb/ocr.h index 8e1263d..60234a8 100644 --- a/pctb/ocr.h +++ b/pctb/ocr.h @@ -1,28 +1,30 @@ +/* + * ocr.c forms a mostly-self-contained bit + * so we put its declarations in this separate file + */ + #ifndef OCR_H #define OCR_H -#define DEBUG_RECTANGLES +// #define DEBUG_RECTANGLES // #define DEBUG_OCR - #define _GNU_SOURCE -#include -#include -#include -#include #include -#include #include +#include +#include +#include +#include +#include #include -#include -#include +#include #include #include -#include typedef struct { @@ -30,7 +32,6 @@ typedef struct { char d[]; } CanonImage; - typedef uint32_t Pixcol; #define PSPIXCOL(priscan) priscan##32 @@ -40,10 +41,12 @@ typedef struct { unsigned ctxmap; /* match context index */ } OcrResultGlyph; + typedef const struct OcrCellTypeInfo *OcrCellType; extern const struct OcrCellTypeInfo ocr_celltype_text; extern const struct OcrCellTypeInfo ocr_celltype_number; + typedef struct OcrReader OcrReader; OcrReader *ocr_init(int h); @@ -52,68 +55,14 @@ OcrResultGlyph *ocr(OcrReader *rd, OcrCellType, int w, Pixcol cols[]); * array is valid until next call to ocr() */ -void debug_flush(void); -void find_structure(CanonImage *im); +/*----- debugging arrangements, rather contingent -----*/ + +void debug_flush(void); #define eassert assert -#define debug stdout +#define debug stderr const char *get_vardir(void); -CanonImage *file_read_image(FILE *f); -int main_test(void); - -#define MAX_PAGES 100 -extern CanonImage *page_images[MAX_PAGES]; -extern int npages; - - -typedef struct { - unsigned long rgb; /* on screen */ - char c; /* canonical */ -} CanonColourInfo; - -extern const CanonColourInfo canoncolourinfos[]; - -CanonImage *alloc_canon_image(int w, int h); - -#ifdef DEBUG_RECTANGLES -# define CANIMG_DEBUG_RECTANGLE_1LINE(im,w,h) \ - fprintf(debug, "%4d ",y); \ - r= fwrite(im->d + y*w, 1,w, debug); \ - eassert(r==w); \ - fputc('\n',debug); -#else -# define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */ -#endif - -#define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{ \ - /* compute_rgb should be a number of statements, or \ - * a block, which assigns to \ - * unsigned long rgb; \ - * given the values of \ - * int x,y; \ - * all of which are anamorphic. Result is stored in im. \ - */ \ - (im)= alloc_canon_image((w), (h)); \ - \ - int x,y,r; \ - for (y=0; y<(h); y++) { \ - for (x=0; x<(w); x++) { \ - const CanonColourInfo *cci; \ - unsigned long rgb; \ - COMPUTE_RGB; \ - for (cci=canoncolourinfos; cci->c; cci++) \ - if (cci->rgb == rgb) { \ - (im)->d[y*(w) + x]= cci->c; \ - break; \ - } \ - } \ - CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h)) \ - } \ - debug_flush(); \ - }while(0) - - #endif /*OCR_H*/ diff --git a/pctb/pages.c b/pctb/pages.c index 9b05fcd..361ea0e 100644 --- a/pctb/pages.c +++ b/pctb/pages.c @@ -1,7 +1,7 @@ /* */ -#include "ocr.h" +#include "structure.h" #include #include @@ -22,54 +22,16 @@ static KeyCode keycode(KeySym sym) { return XKeysymToKeycode(disp,sym); } -#if 0 -static void check_exitstatus(int st) { - eassert(WIFEXITED(st)); - eassert(!WEXITSTATUS(st)); -} - -static void check_pclose(FILE *f, char *cmd) { - int r; - eassert(!ferror(f)); - r= fgetc(f); eassert(r==EOF); eassert(feof(f)); - r= pclose(f); eassert(r>=0); check_exitstatus(r); - free(cmd); -} - -static CanonImage *screenshot_now(void) { - char *cmd; - CanonImage *ci; - int r; - - r= asprintf(&cmd, "xwd -silent -id 0x%lx | xwdtopnm", (unsigned long)id); - eassert(r>=0); - FILE *f= popen(cmd,"r"); eassert(f); - ci= file_read_image(f); - check_pclose(f, cmd); - return ci; -} -#endif - -static void screenshot_startup(void) { +void screenshot_startup(void) { int r; disp= XOpenDisplay(0); eassert(disp); r= gettimeofday(&tv_startup,0); eassert(!r); } -#if 0 -static CanonImage *single_page(void) { - int r; - r= XRaiseWindow(disp, id); eassert(r); - r= XSync(disp, False); eassert(r); - return screenshot_now(); -} -#endif - /*---------- pager ----------*/ typedef XImage Snapshot; -//static size_t snapshot_alloc= 1024; static double last_input; static const double min_update_allowance= 0.25; @@ -224,24 +186,20 @@ static void set_focus(void) { fprintf(stderr,"PAGING raise_and_set_focus done.\n"); } +#define SAMPLEMASK 0xfful + typedef struct { - unsigned long mask; int lshift, rshift; } ShMask; -static void compute_shift_mask(ShMask *sm, int targshift, - unsigned long ximage_mask) { - unsigned long below; - +static void compute_shift_mask(ShMask *sm, unsigned long ximage_mask) { sm->lshift= 0; sm->rshift= 0; - sm->mask= 0xfful << targshift; - below= ~0ul << targshift; for (;;) { - if (ximage_mask < sm->mask) { + if (ximage_mask <= (SAMPLEMASK>>1)) { sm->lshift++; ximage_mask <<= 1; - } else if ((ximage_mask & ~below) > sm->mask) { + } else if (ximage_mask > SAMPLEMASK) { sm->rshift++; ximage_mask >>= 1; } else { break; @@ -256,25 +214,36 @@ static CanonImage *convert_page(Snapshot *sn) { ShMask shiftmasks[3]; CanonImage *im; -#define COMPUTE_SHIFT_MASK(ix, targshift, rgb) \ - compute_shift_mask(&shiftmasks[ix], targshift, sn->rgb##_mask) - COMPUTE_SHIFT_MASK(0, 16, red); - COMPUTE_SHIFT_MASK(1, 8, green); - COMPUTE_SHIFT_MASK(2, 0, blue); + fprintf(screenshots_file, + "P6\n" + "%d %d\n" + "255\n", sn->width, sn->height); + +#define COMPUTE_SHIFT_MASK(ix, rgb) \ + compute_shift_mask(&shiftmasks[ix], sn->rgb##_mask) + COMPUTE_SHIFT_MASK(0, red); + COMPUTE_SHIFT_MASK(1, green); + COMPUTE_SHIFT_MASK(2, blue); CANONICALISE_IMAGE(im, sn->width, sn->height, { long xrgb= XGetPixel(sn, x, y); int i; rgb= 0; - for (i=0; i<3; i++) - rgb |= ((xrgb << shiftmasks[i].lshift) - >> shiftmasks[i].rshift) & shiftmasks[i].mask; + for (i=0; i<3; i++, rgb <<= 8) { + unsigned long sample= + ((xrgb << shiftmasks[i].lshift) + >> shiftmasks[i].rshift) & SAMPLEMASK; + rgb |= sample; + fputc(sample, screenshots_file); + } }); + eassert(!fflush(screenshots_file)); + return im; } -static void read_pages(void) { +void take_screenshots(void) { Snapshot *current=0, *last=0; CanonImage *test; @@ -312,12 +281,21 @@ static void read_pages(void) { fprintf(stderr,"PAGING all done.\n"); } -int main(int argc, char **argv) { - screenshot_startup(); - - id= strtoul(*++argv,0,0); +void take_one_screenshot(void) { + Snapshot *current=0; + + raise_and_get_details(); + sync_after_input(); + snapshot(¤t); + page_images[0]= convert_page(current); + npages= 1; +} - read_pages(); - return 0; +void set_yppclient_window(unsigned long wul) { + id= wul; } +void find_yppclient_window(void) { + if (id) return; + eassert(!"finding client window NYI"); +} diff --git a/pctb/structure.c b/pctb/structure.c new file mode 100644 index 0000000..fc077c1 --- /dev/null +++ b/pctb/structure.c @@ -0,0 +1,320 @@ +/* + */ + +#include "structure.h" + + +typedef struct { + int x, y; +} Point; + +typedef struct { /* both inclusive */ + Point tl; + Point br; +} Rect; + +static CanonImage *cim; + +static inline char get(int x, int y) { return cim->d[y * cim->w + x]; } +static inline char get_p(Point p) { return get(p.x,p.y); } + +#define START_MAIN {200,200} +#define MIN_COLUMNS 6 +#define INTERESTING_COLUMNS 7 +#define TEXT_COLUMNS 2 +#define MAX_COLUMNS 7 + +static Rect mainr = { START_MAIN,START_MAIN }; +static int commbasey, comminty; +static int colrightx[INTERESTING_COLUMNS]; +static int text_h=-1, columns=-1; +static OcrReader *rd; + +const CanonColourInfo canoncolourinfos[]= { + { 0x475A5E, '*' }, /* edge */ + { 0x2C5F7A, '*' }, /* edge just under box heading shadow */ + { 0x7D9094, '+' }, /* interbox */ + + { 0xBDC5BF, ' ' }, /* background - pale Sugar cane, etc. */ + { 0xADB5AF, ' ' }, /* background - dark */ + { 0xC7E1C3, ' ' }, /* background - pale Swill, etc. */ + { 0xB5CFB1, ' ' }, /* background - dark */ + { 0xD6CEB0, ' ' }, /* background - pale Madder, etc. */ + { 0xC8C0A2, ' ' }, /* background - dark */ + { 0xE0E1D3, ' ' }, /* background - pale Lorandite, etc. */ + { 0xD0D1C3, ' ' }, /* background - dark */ + { 0xE5E6C1, ' ' }, /* background - pale Cloth */ + { 0xD7D8B3, ' ' }, /* background - dark */ + { 0xEDDED9, ' ' }, /* background - pale Dye */ + { 0xDACBC6, ' ' }, /* background - dark */ + { 0xD3DEDF, ' ' }, /* background - pale Paint */ + { 0xC5D0D1, ' ' }, /* background - dark */ + { 0xDCD1CF, ' ' }, /* background - pale Enamel */ + { 0xCEC3C1, ' ' }, /* background - dark */ + { 0xF3F6F5, ' ' }, /* background - pale fruit */ + { 0xE2E7E5, ' ' }, /* background - dark */ + + { 0x000000, 'o' }, /* foreground */ + { 0xD4B356, ' ' }, /* background (cursor) */ + { 0xFFFFFF, 'o' }, /* foreground (cursor) */ + + { 0x5B93BF, '_' }, /* selector dropdown background */ + { 0xD7C94F, 'X' }, /* selector dropdown foreground */ + { 0,0 } +}; + +static void require_rectangle(int tlx, int tly, int brx, int bry, + const char *ok) { + int x,y; + for (x=tlx; x<=brx; x++) + for (y=tly; y<=bry; y++) { + int c= get(x,y); + assert(strchr(ok,c)); + } +} +static void require_rectangle_r(Rect rr, const char *ok) { + require_rectangle(rr.tl.x,rr.tl.y, rr.br.x,rr.br.y, ok); +} + +static void debug_rect(const char *what, int whati, Rect rr) { +#ifdef DEBUG_RECTANGLES + int y,r,w; + fprintf(debug, "%s %d: %d,%d..%d,%d:\n", what, whati, + rr.tl.x,rr.tl.y, rr.br.x,rr.br.y); + w= rr.br.x - rr.tl.x + 1; + for (y=rr.tl.y; y<=rr.br.y; y++) { + fprintf(debug, "%4d%*s|", y, rr.tl.x,""); + r= fwrite(cim->d + y*cim->w + rr.tl.x, 1, w, debug); + eassert(r==w); + fputc('|',debug); + fputc('\n',debug); + } +#endif + debug_flush(); +} + +#define WALK_UNTIL(point,coord,increm,last,edge) \ + for (;;) { \ + if ((point).coord == (last)+(increm)) break; \ + if (get_p((point)) == (edge)) { (point).coord -= (increm); break; } \ + (point).coord += (increm); \ + } + +#define WALK_UNTIL_MUST(point,coord,increm,last,edge) \ + do { \ + WALK_UNTIL(point,coord,increm,last,edge); \ + eassert((point).coord != (last)+(increm)); \ + } while(0) + +void find_structure(CanonImage *im) { + cim= im; + + Rect whole = { {0,0}, {cim->w-1,cim->h-1} }; + + WALK_UNTIL_MUST(mainr.tl, x,-1, whole.tl.x, '*'); + WALK_UNTIL_MUST(mainr.tl, y,-1, whole.tl.y, '*'); + WALK_UNTIL_MUST(mainr.br, x,+1, whole.br.x, '*'); + WALK_UNTIL_MUST(mainr.br, y,+1, whole.br.y, '*'); + + require_rectangle(mainr.tl.x-1, mainr.tl.y, mainr.tl.x-1, mainr.br.y, "*"); + require_rectangle(mainr.br.x+1, mainr.tl.y, mainr.br.x+1, mainr.br.y, "*"); + require_rectangle(mainr.tl.x, mainr.tl.y-1, mainr.br.x, mainr.tl.y-1, "*"); + require_rectangle(mainr.tl.x, mainr.br.y+1, mainr.br.x, mainr.br.y+1, "*"); + +#define CHECK_STRIP_BORDER(tlbr,xy,increm) \ + do { \ + Point csb_p; \ + Rect csb_r; \ + csb_p= mainr.tl; \ + csb_p.xy= mainr.tlbr.xy; \ + if (get_p(csb_p)=='+') { \ + csb_r= mainr; \ + csb_r.tl.xy= csb_p.xy; \ + csb_r.br.xy= csb_p.xy; \ + require_rectangle_r(csb_r, "+"); \ + mainr.tlbr.xy += increm; \ + } \ + } while(0) + + debug_rect("mainr",0, mainr); + + CHECK_STRIP_BORDER(tl,x,+1); + CHECK_STRIP_BORDER(tl,y,+1); + CHECK_STRIP_BORDER(br,x,-1); + CHECK_STRIP_BORDER(br,y,-1); + + debug_rect("mainr",1, mainr); + + Point up = START_MAIN; + WALK_UNTIL_MUST(up, y,-1, mainr.tl.y, '+'); + + Point down = START_MAIN; + down.y++; + WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+'); + +#ifdef DEBUG_RECTANGLES + int xscaleunit, y,x; + for (y=0, xscaleunit=1; y<4; y++, xscaleunit*=10) { + fprintf(debug," "); + for (x=0; x<=cim->w; x++) { + if (x % xscaleunit) fputc(' ',debug); + else fprintf(debug,"%d",(x / xscaleunit)%10); + } + fputc('\n',debug); + } +#endif + + commbasey= up.y; + comminty= down.y - up.y + 2; + + Point across= { mainr.tl.x, commbasey }; + int colno=0; + for (;;) { + eassert(get_p(across) != '+'); + WALK_UNTIL(across, x,+1, mainr.br.x, '+'); + eassert(colno < MAX_COLUMNS); + int colrx= across.x; + if (colrx > mainr.br.x) colrx= mainr.br.x; + if (colno < INTERESTING_COLUMNS) + colrightx[colno]= colrx; + + colno++; + + if (across.x >= mainr.br.x-1) + break; + + across.x++; + require_rectangle(across.x,mainr.tl.y, across.x,mainr.br.y, "+"); + across.x++; + } + eassert(colno >= MIN_COLUMNS); + +#define SET_ONCE(var,val) do{ \ + int v= (val); \ + if ((var)==-1) (var)= v; \ + else eassert((var) == v); \ + }while(0) + + SET_ONCE(columns, colno); + SET_ONCE(text_h, comminty - 1); +} + +CanonImage *alloc_canon_image(int w, int h) { + CanonImage *im= malloc(sizeof(CanonImage) + w*h); + eassert(im); + im->w= w; + im->h= h; + memset(im->d,'?',w*h); + return im; +} + +CanonImage *file_read_image_ppm(FILE *f) { + struct pam inpam; + unsigned char rgb_buf[3]; + CanonImage *im; + + pnm_readpaminit(f, &inpam, sizeof(inpam)); + eassert(inpam.maxval == 255); + eassert(inpam.bytes_per_sample == 1); + + CANONICALISE_IMAGE(im, inpam.width, inpam.height, { + int r= fread(&rgb_buf,1,3,f); eassert(r==3); + + rgb= + ((unsigned long)rgb_buf[0]<<16) | + ((unsigned long)rgb_buf[1]<<8) | + (rgb_buf[2]); + }); + + return im; +} + +void read_screenshots(void) { + int c; + while ((c= fgetc(screenshots_file) != EOF)) { + ungetc(c, screenshots_file); + + eassert(npages < MAX_PAGES); + page_images[npages++]= file_read_image_ppm(screenshots_file); + } +} + +static void find_commodity(int offset, Rect *rr) { + /* rr->tl.x==-1 if offset out of range */ + rr->tl.y= commbasey - offset*comminty; + rr->br.y= rr->tl.y + comminty-2; + if (rr->tl.y < mainr.tl.y || rr->br.y > mainr.br.y) { rr->tl.x=-1; return; } + if (rr->tl.y > mainr.tl.y) + require_rectangle(rr->tl.x,rr->tl.y-1, rr->br.x,rr->tl.y-1, "+"); + if (rr->br.y < mainr.tl.y) + require_rectangle(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+"); + + rr->tl.x= mainr.tl.x; + rr->br.x= mainr.br.x; +} + +static void find_table_entry(Rect commod, int colno, Rect *cellr) { + cellr->tl.y= commod.tl.y; + cellr->br.y= commod.br.y; + cellr->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2; + cellr->br.x= colrightx[colno]; + debug_rect("cell", colno, *cellr); + require_rectangle_r(*cellr, " o"); +} + +static void ocr_rectangle(Rect r, const OcrCellType ct) { + OcrResultGlyph *results, *res; + + int w= r.br.x - r.tl.x + 1; + Pixcol cols[w+1]; + int x,y; + for (x=0; xs; res++) + printf("%s",res->s); +} + +void analyse(void) { + Rect thisr, entryr; + int page, tryrect, colno; + + for (page=0; pageh; tryrect >= -cim->h; tryrect--) { + find_commodity(tryrect, &thisr); + if (thisr.tl.x < 0) + continue; + debug_rect("commod",tryrect, thisr); + + const char *tab= ""; + for (colno=0; colnod + y*w, 1,w, debug); \ + eassert(r==w); \ + fputc('\n',debug); +#else +# define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */ +#endif + +#define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{ \ + /* compute_rgb should be a number of statements, or \ + * a block, which assigns to \ + * unsigned long rgb; \ + * given the values of \ + * int x,y; \ + * all of which are anamorphic. Result is stored in im. \ + * The COMPUTE_RGB is executed exactly once for \ + * each pixel in reading order. \ + */ \ + (im)= alloc_canon_image((w), (h)); \ + \ + int x,y; \ + for (y=0; y<(h); y++) { \ + for (x=0; x<(w); x++) { \ + const CanonColourInfo *cci; \ + unsigned long rgb; \ + COMPUTE_RGB; \ + for (cci=canoncolourinfos; cci->c; cci++) \ + if (cci->rgb == rgb) { \ + (im)->d[y*(w) + x]= cci->c; \ + break; \ + } \ + } \ + CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h)) \ + } \ + debug_flush(); \ + }while(0) + + +#endif /*STRUCTURE_H*/