X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=pctb%2Fstructure.c;h=9d92877c6e5b6a6fc23c42ce4332ed27d0c8112b;hp=bf4fcbd6399c496e0e800b3d05f5f16ad12d7129;hb=fdb2c7e1f2211fe4328ee6390f40a55446ecd078;hpb=9334b48a221df6f96a88933be8fde35e2ef41b35 diff --git a/pctb/structure.c b/pctb/structure.c index bf4fcbd..9d92877 100644 --- a/pctb/structure.c +++ b/pctb/structure.c @@ -27,7 +27,7 @@ #include "structure.h" -static const CanonImage *cim; +static CanonImage *cim; static inline char get(int x, int y) { return cim->d[y * cim->w + x]; } static inline char get_p(Point p) { return get(p.x,p.y); } @@ -123,8 +123,11 @@ static void mustfail2(void) { #define MP(v) fprintf(stderr," %s=%d,%d",#v,(v).x,(v).y) #define MI(v) fprintf(stderr," %s=%d", #v,(v)) +#define MIL(v) fprintf(stderr," %s=%ld", #v,(v)) +#define MRGB(v) fprintf(stderr," %s=%06lx", #v,(v)) #define MC(v) fprintf(stderr," %s='%c'", #v,(v)) #define MS(v) fprintf(stderr," %s=\"%s\"", #v,(v)) +#define MF(v) fprintf(stderr," %s=\"%f\"", #v,(v)) #define MSB(v) fprintf(stderr," %s", (v)) #define MR(v) fprintf(stderr," %s=%d,%d..%d,%d",\ #v,(v).tl.x,(v).tl.y,(v).br.x,(v).br.y) @@ -133,7 +136,7 @@ static void mustfail2(void) { #define REQUIRE_RECTANGLE(tlx,tly,brx,bry,ok) \ require_rectangle(tlx, tly, brx, bry, ok, __LINE__); -#define FOR_P_RECT(p,rect) \ +#define FOR_P_RECT(p,rr) \ for ((p).x=(rr).tl.x; (p).x<=(rr).br.x; (p).x++) \ for ((p).y=(rr).tl.y; (p).y<=(rr).br.y; (p).y++) @@ -224,7 +227,7 @@ static int commod_selector_matches(Rect search, const char *const *all, (search).TLBR.XY += increm; \ } -void find_structure(const CanonImage *im, int *max_relevant_y_r) { +void find_structure(CanonImage *im, int *max_relevant_y_r) { cim= im; Rect whole = { {0,0}, {cim->w-1,cim->h-1} }; @@ -317,6 +320,8 @@ void find_structure(const CanonImage *im, int *max_relevant_y_r) { SET_ONCE(text_h, comminty - 1); if (max_relevant_y_r) SET_ONCE(*max_relevant_y_r, mainr.br.y + 10); + + MUST( text_h <= OCR_MAX_H, MI(text_h) ); } void check_correct_commodities(void) { @@ -447,13 +452,76 @@ static void find_commodity(int offset, Rect *rr) { REQUIRE_RECTANGLE(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+"); } -static void find_table_entry(Rect commod, int colno, Rect *cellr) { - cellr->tl.y= commod.tl.y; - cellr->br.y= commod.br.y; - cellr->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2; - cellr->br.x= colrightx[colno]; - debug_rect("cell", colno, *cellr); - require_rectangle_r(*cellr, " o", __LINE__); +static void find_table_entry(Rect commod, int colno, Rect *cell) { + cell->tl.y= commod.tl.y; + cell->br.y= commod.br.y; + cell->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2; + cell->br.x= colrightx[colno]; + debug_rect("cell", colno, *cell); + + const RgbImage *ri= cim->rgb; + + Rgb background= ri_rgb(ri, cell->br.x, cell->br.y); + long bg_count=0, light_count=0, dark_count=0; + Point p; + FOR_P_RECT(p,*cell) { + Rgb here= ri_rgb(ri, p.x, p.y); + if (here == background) bg_count++; + else if (here < background) dark_count++; + else if (here > background) light_count++; + } + long total_count= RECT_W(*cell) * RECT_H(*cell); + MUST( bg_count > total_count / 2, + MR(*cell);MIL(total_count);MIL(bg_count); + MIL(light_count);MIL(dark_count) ); + if (bg_count == total_count) + return; + + MUST( !!dark_count != !!light_count, + MR(*cell);MIL(total_count);MIL(bg_count); + MIL(light_count);MIL(dark_count) ); + + debugf("TABLEENTRY col=%d %d,%d..%d,%d bg=%ld light=%ld dark=%ld\n", + colno, cell->tl.x,cell->tl.y, cell->br.x,cell->br.y, + bg_count, light_count, dark_count); + + Rgb foreground= light_count ? 0xffffffU : 0; + int monochrome= 1; + + FOR_P_RECT(p,*cell) { + Rgb here= ri_rgb(ri, p.x, p.y); + double alpha[3], alpha_mean=0; + int i; + for (i=0; i<3; i++) { + unsigned char here_chan= here >> (i*8); + unsigned char bg_chan= background >> (i*8); + unsigned char fg_chan= foreground >> (i*8); + double alpha_chan= + ((double)here_chan - (double)bg_chan) / + ((double)fg_chan - (double)bg_chan); + alpha[i]= alpha_chan; + alpha_mean += alpha_chan * (1/3.0); + } + + double thresh= 1.0/AAMAXVAL; + double alpha_min= alpha_mean - thresh; + double alpha_max= alpha_mean + thresh; + for (i=0; i<3; i++) + MUST( alpha_min <= alpha[i] && alpha[i] <= alpha_max, + MI(i);MRGB(here);MRGB(background);MRGB(foreground); + MF(alpha_min);MF(alpha[i]);MF(alpha_max) ); + + MUST( 0 <= alpha_mean && alpha_mean <= 1, + MRGB(here);MRGB(background);MRGB(foreground); + MF(alpha_mean) ); + int here_int= AAMAXVAL*alpha_mean; + if (!(here_int==0 || here_int==AAMAXVAL)) monochrome=0; + cim->d[p.y * cim->w + p.x]= '0' + here_int; + } + + debug_rect("cell0M", colno, *cell); + + require_rectangle_r(*cell, "0123456789", __LINE__); } static void ocr_rectangle(Rect r, const OcrCellType ct, FILE *tsv_output) { @@ -463,21 +531,17 @@ static void ocr_rectangle(Rect r, const OcrCellType ct, FILE *tsv_output) { Pixcol cols[w+1]; int x,y; for (x=0; x= '0' && pixel <= '0'+AAMAXVAL, + MC(pixel);MP(here);MSB(ocr_celltype_name(ct));MR(r); ); + pixcol_p_add(&cols[x], y, pixel-'0'); } - cols[x]= cx; } - cols[w]= 0; + FILLZERO(cols[w]); results= ocr(rd,ct,w,cols); for (res=results; res->s; res++) @@ -689,8 +753,7 @@ void find_islandname(RgbImage *ri) { char *delim= strstr(archisland," - "); assert(delim); - archipelago= masprintf("%.*s", delim-archisland, archisland); + archipelago= masprintf("%.*s", (int)(delim-archisland), archisland); island= masprintf("%s", delim+3); - free(ri); }