From: Ian Jackson Date: Sat, 4 Jul 2009 10:09:03 +0000 (+0100) Subject: antialiasing text conversion: canonimage has 0..7 X-Git-Tag: 1.9.2~47 X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=commitdiff_plain;h=9b746cc0a0512dd570aa5d4b6185bc1007f74b03 antialiasing text conversion: canonimage has 0..7 --- diff --git a/pctb/convert.h b/pctb/convert.h index b61563f..0ab09db 100644 --- a/pctb/convert.h +++ b/pctb/convert.h @@ -69,7 +69,7 @@ static inline Rgb ri_rgb(const RgbImage *ri, int x, int y) { /*----- from structure.c -----*/ -void find_structure(const CanonImage *im, int *max_relevant_y_r); +void find_structure(CanonImage *im, int *max_relevant_y_r); Rect find_sunshine_widget(void); void find_islandname(RgbImage *ri); diff --git a/pctb/structure.c b/pctb/structure.c index 1f3c743..a3cca0b 100644 --- a/pctb/structure.c +++ b/pctb/structure.c @@ -27,7 +27,7 @@ #include "structure.h" -static const CanonImage *cim; +static CanonImage *cim; static inline char get(int x, int y) { return cim->d[y * cim->w + x]; } static inline char get_p(Point p) { return get(p.x,p.y); } @@ -123,8 +123,11 @@ static void mustfail2(void) { #define MP(v) fprintf(stderr," %s=%d,%d",#v,(v).x,(v).y) #define MI(v) fprintf(stderr," %s=%d", #v,(v)) +#define MIL(v) fprintf(stderr," %s=%ld", #v,(v)) +#define MRGB(v) fprintf(stderr," %s=%06lx", #v,(v)) #define MC(v) fprintf(stderr," %s='%c'", #v,(v)) #define MS(v) fprintf(stderr," %s=\"%s\"", #v,(v)) +#define MF(v) fprintf(stderr," %s=\"%f\"", #v,(v)) #define MSB(v) fprintf(stderr," %s", (v)) #define MR(v) fprintf(stderr," %s=%d,%d..%d,%d",\ #v,(v).tl.x,(v).tl.y,(v).br.x,(v).br.y) @@ -133,7 +136,7 @@ static void mustfail2(void) { #define REQUIRE_RECTANGLE(tlx,tly,brx,bry,ok) \ require_rectangle(tlx, tly, brx, bry, ok, __LINE__); -#define FOR_P_RECT(p,rect) \ +#define FOR_P_RECT(p,rr) \ for ((p).x=(rr).tl.x; (p).x<=(rr).br.x; (p).x++) \ for ((p).y=(rr).tl.y; (p).y<=(rr).br.y; (p).y++) @@ -224,7 +227,7 @@ static int commod_selector_matches(Rect search, const char *const *all, (search).TLBR.XY += increm; \ } -void find_structure(const CanonImage *im, int *max_relevant_y_r) { +void find_structure(CanonImage *im, int *max_relevant_y_r) { cim= im; Rect whole = { {0,0}, {cim->w-1,cim->h-1} }; @@ -447,13 +450,73 @@ static void find_commodity(int offset, Rect *rr) { REQUIRE_RECTANGLE(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+"); } -static void find_table_entry(Rect commod, int colno, Rect *cellr) { - cellr->tl.y= commod.tl.y; - cellr->br.y= commod.br.y; - cellr->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2; - cellr->br.x= colrightx[colno]; - debug_rect("cell", colno, *cellr); - require_rectangle_r(*cellr, " o", __LINE__); +static void find_table_entry(Rect commod, int colno, Rect *cell) { + cell->tl.y= commod.tl.y; + cell->br.y= commod.br.y; + cell->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2; + cell->br.x= colrightx[colno]; + debug_rect("cell", colno, *cell); + + const RgbImage *ri= cim->rgb; + + Rgb background= ri_rgb(ri, cell->br.x, cell->br.y); + long bg_count=0, white_count=0, black_count=0; + Point p; + FOR_P_RECT(p,*cell) { + Rgb here= ri_rgb(ri, p.x, p.y); + if (here == background) bg_count++; + else if (here == 0) black_count++; + else if (here == 0xffffffU) white_count++; + } + long total_count= RECT_W(*cell) * RECT_H(*cell); + MUST( bg_count > 8*total_count / 10, + MR(*cell);MIL(bg_count);MIL(white_count);MIL(black_count) ); + if (bg_count == total_count) + return; + + MUST( !!black_count != !!white_count, + MR(*cell);MIL(bg_count);MIL(white_count);MIL(black_count) ); + + debugf("TABLEENTRY col=%d %d,%d..%d,%d bg=%ld white=%ld black=%ld\n", + colno, cell->tl.x,cell->tl.y, cell->br.x,cell->br.y, + bg_count, white_count, black_count); + + Rgb foreground= white_count ? 0xffffffU : 0; + int monochrome= 1; + + FOR_P_RECT(p,*cell) { + Rgb here= ri_rgb(ri, p.x, p.y); + double alpha[3], alpha_mean=0; + int i; + for (i=0; i<3; i++) { + unsigned char here_chan= here >> (i*8); + unsigned char bg_chan= background >> (i*8); + unsigned char fg_chan= foreground >> (i*8); + double alpha_chan= + ((double)here_chan - (double)bg_chan) / + ((double)fg_chan - (double)bg_chan); + alpha[i]= alpha_chan; + alpha_mean += alpha_chan / 3; + } + + double alpha_min = alpha_mean - 0.13; + double alpha_max = alpha_mean + 0.13; + for (i=0; i<3; i++) + MUST( alpha_min <= alpha[i] && alpha[i] <= alpha_max, + MI(i);MRGB(here);MRGB(background);MRGB(foreground); + MF(alpha_min);MF(alpha[i]);MF(alpha_max) ); + + MUST( 0 <= alpha_mean && alpha_mean <= 1, + MRGB(here);MRGB(background);MRGB(foreground); + MF(alpha_mean) ); + int here_int= 7*alpha_mean; + if (!(here_int==0 || here_int==7)) monochrome=0; + cim->d[p.y * cim->w + p.x]= '0' + here_int; + } + + debug_rect("cell07", colno, *cell); + + require_rectangle_r(*cell, "0123456789", __LINE__); } static void ocr_rectangle(Rect r, const OcrCellType ct, FILE *tsv_output) { @@ -469,7 +532,8 @@ static void ocr_rectangle(Rect r, const OcrCellType ct, FILE *tsv_output) { int pixel= get_p(here); switch (pixel) { case ' ': break; - case 'o': cx |= rv; break; + case '0': break; + case '7': cx |= rv; break; default: MUST(!"wrong pixel", MC(pixel);MP(here);MSB(ocr_celltype_name(ct));MR(r); ); @@ -692,5 +756,4 @@ void find_islandname(RgbImage *ri) { archipelago= masprintf("%.*s", (int)(delim-archisland), archisland); island= masprintf("%s", delim+3); - free(ri); }