#include "structure.h"
-static CanonImage *cim;
-
-static inline char get(int x, int y) { return cim->d[y * cim->w + x]; }
-static inline char get_p(Point p) { return get(p.x,p.y); }
-
DEBUG_DEFINE_DEBUGF(struct)
#define START_MAIN {200,200}
#define TEXT_COLUMNS 2
#define MAX_COLUMNS 7
-static Rect mainr = { START_MAIN,START_MAIN };
-static int commbasey, comminty;
-static int colrightx[INTERESTING_COLUMNS];
+struct PageStruct {
+ Rect mr;
+ int commbasey, comminty;
+ int colrightx[INTERESTING_COLUMNS];
+};
+
+const CanonImage *page_images[MAX_PAGES];
+static PageStruct page_structs[MAX_PAGES];
+const RgbImage *page0_rgbimage;
+int npages;
+
static int text_h=-1, columns=-1;
+
static OcrReader *rd;
+static const CanonImage *cim;
+static PageStruct s;
+
char *archipelago, *island;
#define OTHERCOORD_x y
#define OTHERCOORD_y x
+
+void select_page(int page) {
+ cim= page_images[page];
+ s= page_structs[page];
+ assert(cim);
+}
+
+
typedef struct {
Rgb rgb; /* on screen */
char c; /* canonical */
}
}
+static inline char get(int x, int y) { return cim->d[y * cim->w + x]; }
+static inline char get_p(Point p) { return get(p.x,p.y); }
+
static void mustfail1(const char *file, int line, const char *what) {
fprintf(stderr,
}
#define MUST(x, ifnot) do{ \
- if (!(x)) { \
+ if (__builtin_expect(!(x), 0)) { \
mustfail1(__FILE__,__LINE__,#x); \
ifnot; \
mustfail2(); \
#define ADJUST_BOX(search,insidechrs,OP,want, lim,LIMIT_MUST, TLBR,XY,increm) \
for (;;) { \
LIMIT_MUST( (search).tl.XY != (search).br.XY && \
- (search).tl.XY != (lim), \
+ (search).TLBR.XY != (lim), \
MR((search));MSB(#TLBR);MSB(#XY) ); \
int got=0; \
Point p=(search).tl; \
(search).TLBR.XY += increm; \
}
-void find_structure(CanonImage *im, int *max_relevant_y_r) {
+void find_structure(const CanonImage *im,
+ PageStruct **pagestruct_r,
+ int *max_relevant_y_r,
+ Point *commod_focus_point_r,
+ Point *commod_page_point_r,
+ Point *commod_focuslast_point_r) {
cim= im;
-
+
+ FILLZERO(s);
Rect whole = { {0,0}, {cim->w-1,cim->h-1} };
if (DEBUGP(rect)) {
}
}
- WALK_UNTIL_MUST(mainr.tl, x,-1, whole.tl.x, '*');
- WALK_UNTIL_MUST(mainr.tl, y,-1, whole.tl.y, '*');
- WALK_UNTIL_MUST(mainr.br, x,+1, whole.br.x, '*');
- WALK_UNTIL_MUST(mainr.br, y,+1, whole.br.y, '*');
+ Point mainr_tl= START_MAIN;
+ s.mr.tl= mainr_tl;
+ WALK_UNTIL_MUST(s.mr.tl, y,-1, whole.tl.y, ' ');
+ s.mr.br= s.mr.tl;
- REQUIRE_RECTANGLE(mainr.tl.x-1, mainr.tl.y, mainr.tl.x-1, mainr.br.y, "*");
- REQUIRE_RECTANGLE(mainr.br.x+1, mainr.tl.y, mainr.br.x+1, mainr.br.y, "*");
- REQUIRE_RECTANGLE(mainr.tl.x, mainr.tl.y-1, mainr.br.x, mainr.tl.y-1, "*");
- REQUIRE_RECTANGLE(mainr.tl.x, mainr.br.y+1, mainr.br.x, mainr.br.y+1, "*");
+ WALK_UNTIL_MUST(s.mr.tl, x,-1, whole.tl.x, '*');
+ WALK_UNTIL_MUST(s.mr.tl, y,-1, whole.tl.y, '*');
+ WALK_UNTIL_MUST(s.mr.br, x,+1, whole.br.x, '*');
+ WALK_UNTIL_MUST(s.mr.br, y,+1, whole.br.y, '*');
+
+ REQUIRE_RECTANGLE(s.mr.tl.x-1, s.mr.tl.y, s.mr.tl.x-1, s.mr.br.y, "*");
+ REQUIRE_RECTANGLE(s.mr.br.x+1, s.mr.tl.y, s.mr.br.x+1, s.mr.br.y, "*");
+ REQUIRE_RECTANGLE(s.mr.tl.x, s.mr.tl.y-1, s.mr.br.x, s.mr.tl.y-1, "*");
+ REQUIRE_RECTANGLE(s.mr.tl.x, s.mr.br.y+1, s.mr.br.x, s.mr.br.y+1, "*");
#define CHECK_STRIP_BORDER(tlbr,xy,increm) \
do { \
Point csb_p; \
Rect csb_r; \
- csb_p= mainr.tl; \
- csb_p.xy= mainr.tlbr.xy; \
+ csb_p= s.mr.tl; \
+ csb_p.x++; csb_p.y++; \
+ csb_p.xy= s.mr.tlbr.xy; \
if (get_p(csb_p)=='+') { \
- csb_r= mainr; \
+ csb_r= s.mr; \
csb_r.tl.xy= csb_p.xy; \
csb_r.br.xy= csb_p.xy; \
require_rectangle_r(csb_r, "+", __LINE__); \
- mainr.tlbr.xy += increm; \
+ s.mr.tlbr.xy += increm; \
} \
} while(0)
- debug_rect("mainr",0, mainr);
+ debug_rect("s.mr",0, s.mr);
CHECK_STRIP_BORDER(tl,x,+1);
CHECK_STRIP_BORDER(tl,y,+1);
CHECK_STRIP_BORDER(br,x,-1);
CHECK_STRIP_BORDER(br,y,-1);
- debug_rect("mainr",1, mainr);
+ debug_rect("s.mr",1, s.mr);
+
+ Rect updown= {START_MAIN,START_MAIN};
+ const int chkw= 100;
+ updown.br.x += chkw-1;
+ updown.br.y++;
+ debug_rect("updown",__LINE__,updown);
- Point up = START_MAIN;
- WALK_UNTIL_MUST(up, y,-1, mainr.tl.y, '+');
+ ADJUST_BOX(updown, "+", >=,chkw, s.mr.tl.y, MUST, tl,y,-1);
+ debug_rect("updown",__LINE__,updown);
+ updown.br.y= updown.tl.y;
+ updown.tl.y= updown.tl.y-1;
- Point down = START_MAIN;
- down.y++;
- WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+');
+ ADJUST_BOX(updown, "+*",>=,chkw, s.mr.tl.y-1, MUST, tl,y,-1);
+ debug_rect("updown",__LINE__,updown);
- commbasey= up.y;
- comminty= down.y - up.y + 2;
+ s.commbasey= updown.tl.y + 1;
+ s.comminty= updown.br.y - updown.tl.y;
- Point across= { mainr.tl.x, commbasey };
+ Rect across= {{ s.mr.tl.x - 1, s.commbasey },
+ { s.mr.tl.x, s.commbasey + s.comminty-2 }};
int colno=0;
for (;;) {
- MUST( get_p(across) != '+', MI(colno);MP(across);MR(mainr);MI(commbasey) );
- WALK_UNTIL(across, x,+1, mainr.br.x, '+');
- MUST( colno < MAX_COLUMNS, MP(across);MR(mainr);MI(commbasey); );
- int colrx= across.x;
- if (colrx > mainr.br.x) colrx= mainr.br.x;
+
+#define LIMIT_QUITEQ(cond,mp) { if (!(cond)) break; }
+ debug_rect("across",colno*1000000+__LINE__, across);
+ ADJUST_BOX(across, "+",>=,s.comminty-1,s.mr.br.x,LIMIT_QUITEQ,br,x,+1);
+ debug_rect("across",colno*1000000+__LINE__, across);
+
+ MUST( colno < MAX_COLUMNS,
+ MI(colno);MR(across);MR(s.mr);MI(s.commbasey); );
+ int colrx= across.br.x-1;
+ if (colrx >= s.mr.br.x) colrx= s.mr.br.x;
if (colno < INTERESTING_COLUMNS)
- colrightx[colno]= colrx;
+ s.colrightx[colno]= colrx;
colno++;
- if (across.x >= mainr.br.x-1)
+ if (across.br.x >= s.mr.br.x)
break;
- across.x++;
- REQUIRE_RECTANGLE(across.x,mainr.tl.y, across.x,mainr.br.y, "+");
- across.x++;
+ REQUIRE_RECTANGLE(across.br.x,s.mr.tl.y, across.br.x,s.mr.br.y, "+");
+ across.br.x++;
}
- MUST( colno >= MIN_COLUMNS, MI(colno);MR(mainr);MP(across); );
+ MUST( colno >= MIN_COLUMNS, MI(colno);MR(s.mr);MR(across); );
+
+ const int pagerh= 6;
+ Rect pager= {{ s.mr.br.x, s.mr.br.y - (pagerh-1) },
+ { s.mr.br.x + 1, s.mr.br.y }};
+
+ debug_rect("pager",__LINE__,pager);
+ ADJUST_BOX(pager, "o",>=,pagerh-2, whole.br.x,MUST, br,x,+1);
+ debug_rect("pager",__LINE__,pager);
+
+ pager.tl.x= pager.br.x;
+ pager.br.x= pager.br.x + 1;
+ debug_rect("pager",__LINE__,pager);
+ ADJUST_BOX(pager, "o",>=,pagerh-2, whole.br.x,MUST, br,x,+1);
+ debug_rect("pager",__LINE__,pager);
+
+ ADJUST_BOX(pager, "o",>=,RECT_W(pager)-2, s.mr.tl.y,LIMIT_QUITEQ, tl,y,-1);
+ debug_rect("pager",__LINE__,pager);
#define SET_ONCE(var,val) do{ \
int v= (val); \
if ((var)==-1) (var)= v; \
- else MUST( (var) == v, MSB(#var);MI((var));MI(v);MR(mainr); ); \
+ else MUST( (var) == v, MSB(#var);MI((var));MI(v);MR(s.mr); ); \
}while(0)
SET_ONCE(columns, colno);
- SET_ONCE(text_h, comminty - 1);
+ SET_ONCE(text_h, s.comminty - 1);
+
+ if (pagestruct_r) {
+ *pagestruct_r= mmalloc(sizeof(s));
+ **pagestruct_r= s;
+ }
+
if (max_relevant_y_r)
- SET_ONCE(*max_relevant_y_r, mainr.br.y + 10);
+ SET_ONCE(*max_relevant_y_r, s.mr.br.y + 10);
+
+ if (commod_focus_point_r) {
+ *commod_focus_point_r= s.mr.tl;
+ commod_focus_point_r->x += 10;
+ commod_focus_point_r->y += s.comminty/3;
+ }
+ if (commod_focuslast_point_r) {
+ *commod_focuslast_point_r= s.mr.br;
+ commod_focuslast_point_r->x -= 10;
+ commod_focuslast_point_r->y -= s.comminty/3;
+ }
+ if (commod_page_point_r) {
+ commod_page_point_r->x= (pager.tl.x + pager.br.x) / 2;
+ commod_page_point_r->y= pager.tl.y - 1;
+ }
MUST( text_h <= OCR_MAX_H, MI(text_h) );
}
struct pam inpam;
unsigned char rgb_buf[3];
CanonImage *im;
+ RgbImage *rgb;
+ PageStruct *pstruct;
+
+ progress("page %d reading...",npages);
pnm_readpaminit(f, &inpam, sizeof(inpam));
if (!(inpam.maxval == 255 &&
inpam.format == RPPM_FORMAT))
fatal("PNM screenshot(s) file must be 8bpp 1 byte-per-sample RGB raw");
- CANONICALISE_IMAGE(im, inpam.width, inpam.height, {
+ CANONICALISE_IMAGE(im, inpam.width, inpam.height, rgb, {
int rr= fread(&rgb_buf,1,3,f);
sysassert(!ferror(f));
if (rr!=3) fatal("PNM screenshot(s) file ends unexpectedly");
if (!(npages < MAX_PAGES))
fatal("Too many images in screenshots file; max is %d.\n", MAX_PAGES);
- page_images[npages++]= im;
+ find_structure(im,&pstruct, 0,0,0,0);
+ store_current_page(im,pstruct,rgb);
+ npages++;
+}
+
+void store_current_page(CanonImage *ci, PageStruct *pstruct, RgbImage *rgb) {
+ assert(ci==cim);
+ progress("page %d condensing...",npages);
+ adjust_colours(ci, rgb);
+ progress("page %d storing...",npages);
+ if (!npages) page0_rgbimage= rgb;
+ else free(rgb);
+ page_images[npages]= cim;
+ page_structs[npages]= *pstruct;
+ free(pstruct);
}
void read_one_screenshot(void) {
progress("reading screenshot...");
- file_read_image_ppm(screenshot_file);
progress_log("read screenshot.");
}
if (c==EOF) break;
ungetc(c, screenshot_file);
}
- progress("reading screenshot %d...",npages);
file_read_image_ppm(screenshot_file);
}
sysassert(!ferror(screenshot_file));
progress_log("read %d screenshots.",npages);
}
-static double find_aa_density(const RgbImage *ri, Point p, long background,
- long foreground, int fg_extra) {
+static inline double find_aa_density(const RgbImage *ri,
+ Point p, long background,
+ long foreground, int fg_extra) {
Rgb here= ri_rgb(ri, p.x, p.y);
double alpha[3], alpha_mean=0;
static void find_commodity(int offset, Rect *rr) {
/* rr->tl.x==-1 if offset out of range */
- rr->tl.y= commbasey - offset*comminty;
- rr->br.y= rr->tl.y + comminty-2;
- if (rr->tl.y < mainr.tl.y || rr->br.y > mainr.br.y) { rr->tl.x=-1; return; }
+ rr->tl.y= s.commbasey - offset*s.comminty;
+ rr->br.y= rr->tl.y + s.comminty-2;
+ if (rr->tl.y < s.mr.tl.y || rr->br.y > s.mr.br.y) { rr->tl.x=-1; return; }
- rr->tl.x= mainr.tl.x;
- rr->br.x= mainr.br.x;
+ rr->tl.x= s.mr.tl.x;
+ rr->br.x= s.mr.br.x;
- if (rr->tl.y > mainr.tl.y)
+ if (rr->tl.y > s.mr.tl.y)
REQUIRE_RECTANGLE(rr->tl.x,rr->tl.y-1, rr->br.x,rr->tl.y-1, "+");
- if (rr->br.y < mainr.tl.y)
+ if (rr->br.y < s.mr.tl.y)
REQUIRE_RECTANGLE(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+");
}
-static void find_table_entry(Rect commod, int colno, Rect *cell) {
+static void compute_table_location(Rect commod, int colno, Rect *cell) {
cell->tl.y= commod.tl.y;
cell->br.y= commod.br.y;
- cell->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2;
- cell->br.x= colrightx[colno];
+ cell->tl.x= !colno ? commod.tl.x : s.colrightx[colno-1]+2;
+ cell->br.x= s.colrightx[colno];
debug_rect("cell", colno, *cell);
+}
- const RgbImage *ri= cim->rgb;
-
+static void ocr_rectangle(Rect r, const OcrCellType ct, FILE *tsv_output) {
+ OcrResultGlyph *results, *res;
+
+ int w= r.br.x - r.tl.x + 1;
+ Pixcol cols[w+1];
+ int x,y;
+ for (x=0; x<w; x++) {
+ FILLZERO(cols[x]);
+ for (y=0; y<text_h; y++) {
+ Point here= { x+r.tl.x, y+r.tl.y };
+ int pixel= get_p(here);
+ if (pixel==' ') pixel= '0';
+ MUST( pixel >= '0' && pixel <= '0'+AAMAXVAL,
+ MC(pixel);MP(here);MSB(ocr_celltype_name(ct));MR(r); );
+ pixcol_p_add(&cols[x], y, pixel-'0');
+ }
+ }
+ FILLZERO(cols[w]);
+
+ results= ocr(rd,ct,w,cols);
+ for (res=results; res->s; res++)
+ fputs(res->s,tsv_output);
+}
+
+#define FOR_COMMODITY_CELL(ROW_START, CELL, ROW_END) do{ \
+ Rect rowr, cell; \
+ int tryrect, colno; \
+ \
+ for (tryrect= +cim->h; tryrect >= -cim->h; tryrect--) { \
+ find_commodity(tryrect, &rowr); \
+ if (rowr.tl.x < 0) \
+ continue; \
+ debug_rect("commod",tryrect, rowr); \
+ \
+ ROW_START; \
+ \
+ for (colno=0; colno<columns; colno++) { \
+ compute_table_location(rowr,colno,&cell); \
+ \
+ CELL; \
+ } \
+ \
+ ROW_END; \
+ } \
+ }while(0);
+
+static void adjust_colours_cell(CanonImage *ci, const RgbImage *ri,
+ int colno, Rect cell) {
Rgb background;
unsigned char chanbg[3];
long bg_count=0, light_count=0, dark_count=0;
Point p;
- background= ri_rgb(ri, cell->br.x, cell->br.y);
- memcpy(chanbg, RI_PIXEL(ri, cell->br.x, cell->br.y), 3);
+ background= ri_rgb(ri, cell.br.x, cell.br.y);
+ memcpy(chanbg, RI_PIXEL(ri, cell.br.x, cell.br.y), 3);
- FOR_P_RECT(p,*cell) {
+ FOR_P_RECT(p,cell) {
const unsigned char *here_pixel= RI_PIXEL(ri, p.x, p.y);
int i;
for (i=0; i<3; i++) {
else if (here > chanbg[i]) light_count += (here - chanbg[i])/4 + 1;
}
}
- long total_count= RECT_W(*cell) * RECT_H(*cell) * 3;
+ long total_count= RECT_W(cell) * RECT_H(cell) * 3;
MUST( bg_count > total_count / 2,
- MR(*cell);MIL(total_count);MIL(bg_count);
+ MR(cell);MIL(total_count);MIL(bg_count);
MIL(light_count);MIL(dark_count) );
if (bg_count == total_count)
fg_extra= -1;
} else {
MUST( !"tell light from dark",
- MR(*cell);MIL(total_count);MIL(bg_count);
+ MR(cell);MIL(total_count);MIL(bg_count);
MIL(light_count);MIL(dark_count);MRGB(background); );
}
debugf("TABLEENTRY col=%d %d,%d..%d,%d bg=%ld light=%ld dark=%ld\n",
- colno, cell->tl.x,cell->tl.y, cell->br.x,cell->br.y,
+ colno, cell.tl.x,cell.tl.y, cell.br.x,cell.br.y,
bg_count, light_count, dark_count);
-
+
int monochrome= 1;
- FOR_P_RECT(p,*cell) {
+ FOR_P_RECT(p,cell) {
double alpha= find_aa_density(ri,p,background,foreground,fg_extra);
int here_int= floor((AAMAXVAL+1)*alpha);
assert(here_int <= AAMAXVAL);
if (!(here_int==0 || here_int==AAMAXVAL)) monochrome=0;
- cim->d[p.y * cim->w + p.x]= '0' + here_int;
+ ci->d[p.y * ci->w + p.x]= '0' + here_int;
}
- debug_rect("cell0M", colno, *cell);
+ debug_rect("cell0M", colno, cell);
- require_rectangle_r(*cell, "0123456789", __LINE__);
+ require_rectangle_r(cell, "0123456789", __LINE__);
}
-static void ocr_rectangle(Rect r, const OcrCellType ct, FILE *tsv_output) {
- OcrResultGlyph *results, *res;
+void adjust_colours(CanonImage *ci, const RgbImage *ri) {
+ if (!(o_mode & mf_analyse))
+ return;
- int w= r.br.x - r.tl.x + 1;
- Pixcol cols[w+1];
- int x,y;
- for (x=0; x<w; x++) {
- FILLZERO(cols[x]);
- for (y=0; y<text_h; y++) {
- Point here= { x+r.tl.x, y+r.tl.y };
- int pixel= get_p(here);
- if (pixel==' ') pixel= '0';
- MUST( pixel >= '0' && pixel <= '0'+AAMAXVAL,
- MC(pixel);MP(here);MSB(ocr_celltype_name(ct));MR(r); );
- pixcol_p_add(&cols[x], y, pixel-'0');
- }
- }
- FILLZERO(cols[w]);
+ cim= ci;
- results= ocr(rd,ct,w,cols);
- for (res=results; res->s; res++)
- fputs(res->s,tsv_output);
+ FOR_COMMODITY_CELL({},({
+ adjust_colours_cell(ci,ri,colno,cell);
+ }),{});
}
void analyse(FILE *tsv_output) {
- Rect thisr, entryr;
- int page, tryrect, colno;
+ int page;
for (page=0; page<npages; page++) {
- find_structure(page_images[page], 0);
+ select_page(page);
if (!page)
check_correct_commodities();
progress("Scanning page %d...",page);
- for (tryrect= +cim->h; tryrect >= -cim->h; tryrect--) {
- find_commodity(tryrect, &thisr);
- if (thisr.tl.x < 0)
- continue;
- debug_rect("commod",tryrect, thisr);
-
- const char *tab= "";
- for (colno=0; colno<columns; colno++) {
- find_table_entry(thisr,colno,&entryr);
- fputs(tab, tsv_output);
- ocr_rectangle(entryr,
- colno<TEXT_COLUMNS
- ? &ocr_celltype_text
- : &ocr_celltype_number,
- tsv_output);
- tab= "\t";
- }
+ const char *tab= "";
+
+ FOR_COMMODITY_CELL({
+ tab= "";
+ },{
+ fputs(tab, tsv_output);
+ ocr_rectangle(cell,
+ colno<TEXT_COLUMNS
+ ? &ocr_celltype_text
+ : &ocr_celltype_number,
+ tsv_output);
+ tab= "\t";
+ },{
fputs("\n", tsv_output);
sysassert(!ferror(tsv_output));
sysassert(!fflush(tsv_output));
- }
+ })
}
progress("Commodity table scan complete.");
}
}
void find_islandname(void) {
- find_structure(page_images[0], 0);
+ const RgbImage *rgbsrc= page0_rgbimage;
+ select_page(0);
- RgbImage *ri= alloc_rgb_image(page_images[0]->rgb->w,
- page_images[0]->rgb->h);
- memcpy(ri->data, page_images[0]->rgb->data, ri->w * ri->h * 3);
+ RgbImage *ri= alloc_rgb_image(rgbsrc->w, rgbsrc->h);
+ memcpy(ri->data, rgbsrc->data, ri->w * ri->h * 3);
Rect sunshiner= find_sunshine_widget();
char sunshine[MAXIMGIDENT], archisland[MAXIMGIDENT];
const unsigned char *srcp;
unsigned char *destp, *endp;
- for (srcp=page_images[0]->rgb->data, destp=ri->data,
+ for (srcp= rgbsrc->data, destp=ri->data,
endp= ri->data + 3 * ri->w * ri->h;
destp < endp;
srcp++, destp++) {
might_be_colon=1;
goto ok_might_be_colon;
}
+ } else if (nruns==1 && runs[0]==1 && might_be_colon) {
+ goto colon_found;
}
might_be_colon=0;
ok_might_be_colon: