X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.web-live.git;a=blobdiff_plain;f=pctb%2Fconvert.c;h=4784c653994e8e4bd0c675389075cca391b23cf4;hp=a0ecca16703749cd47ef6dbd81c03437e2868a33;hb=5d58a08423953756871493042a9cfff032b66e18;hpb=59acfe7ae6e8c81462dce00da5006166efb63d22 diff --git a/pctb/convert.c b/pctb/convert.c index a0ecca1..4784c65 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -1,22 +1,9 @@ -#include -#include -#include -#include -#include -#include -#define eassert assert -#define debug stdout +#include "ocr.h" -typedef struct { - unsigned long rgb; /* on screen */ - char c; /* canonical */ -} CanonColourInfo; - -static int height, width; -static char *image; +static CanonImage *cim; -static void debug_flush(void) { +void debug_flush(void) { eassert(!fflush(debug)); eassert(!ferror(debug)); } @@ -30,29 +17,52 @@ typedef struct { /* both inclusive */ Point br; } Rect; -static inline char get(int x, int y) { return image[y * width + x]; } +static inline char get(int x, int y) { return cim->d[y * cim->w + x]; } static inline char get_p(Point p) { return get(p.x,p.y); } #define START_MAIN {200,200} #define MIN_COLUMNS 6 #define INTERESTING_COLUMNS 6 +#define TEXT_COLUMNS 2 #define MAX_COLUMNS 7 static Rect mainr = { START_MAIN,START_MAIN }; static int commbasey, comminty; static int colrightx[INTERESTING_COLUMNS]; +static int text_h; +static OcrReader *rd; - -static const CanonColourInfo canoncolourinfos[]= { +const CanonColourInfo canoncolourinfos[]= { { 0x475A5E, '*' }, /* edge */ { 0x2C5F7A, '*' }, /* edge just under box heading shadow */ { 0x7D9094, '+' }, /* interbox */ - { 0xBDC5BF, ' ' }, /* background - pale */ - { 0xADB5AF, ' ' }, /* background - dark */ + + { 0xBDC5BF, ' ' }, /* background - pale Sugar cane, etc. */ + { 0xADB5AF, ' ' }, /* background - dark */ + { 0xC7E1C3, ' ' }, /* background - pale Swill, etc. */ + { 0xB5CFB1, ' ' }, /* background - dark */ + { 0xD6CEB0, ' ' }, /* background - pale Madder, etc. */ + { 0xC8C0A2, ' ' }, /* background - dark */ + { 0xE0E1D3, ' ' }, /* background - pale Lorandite, etc. */ + { 0xD0D1C3, ' ' }, /* background - dark */ + { 0xE5E6C1, ' ' }, /* background - pale Cloth */ + { 0xD7D8B3, ' ' }, /* background - dark */ + { 0xEDDED9, ' ' }, /* background - pale Dye */ + { 0xDACBC6, ' ' }, /* background - dark */ + { 0xD3DEDF, ' ' }, /* background - pale Paint */ + { 0xC5D0D1, ' ' }, /* background - dark */ + { 0xDCD1CF, ' ' }, /* background - pale Enamel */ + { 0xCEC3C1, ' ' }, /* background - dark */ + { 0xF3F6F5, ' ' }, /* background - pale fruit */ + { 0xE2E7E5, ' ' }, /* background - dark */ + { 0x000000, 'o' }, /* foreground */ { 0xD4B356, ' ' }, /* background (cursor) */ { 0xFFFFFF, 'o' }, /* foreground (cursor) */ + + { 0x5B93BF, '_' }, /* selector dropdown background */ + { 0xD7C94F, 'X' }, /* selector dropdown foreground */ { 0,0 } }; @@ -70,17 +80,19 @@ static void require_rectangle_r(Rect rr, const char *ok) { } static void debug_rect(const char *what, int whati, Rect rr) { +#ifdef DEBUG_RECTANGLES int y,r,w; fprintf(debug, "%s %d: %d,%d..%d,%d:\n", what, whati, rr.tl.x,rr.tl.y, rr.br.x,rr.br.y); w= rr.br.x - rr.tl.x + 1; for (y=rr.tl.y; y<=rr.br.y; y++) { fprintf(debug, "%4d%*s|", y, rr.tl.x,""); - r= fwrite(image + y*width + rr.tl.x, 1, w, debug); + r= fwrite(cim->d + y*cim->w + rr.tl.x, 1, w, debug); eassert(r==w); fputc('|',debug); fputc('\n',debug); } +#endif debug_flush(); } @@ -98,7 +110,7 @@ static void debug_rect(const char *what, int whati, Rect rr) { } while(0) static void find_structure(void) { - Rect whole = { {0,0}, {width-1,height-1} }; + Rect whole = { {0,0}, {cim->w-1,cim->h-1} }; WALK_UNTIL_MUST(mainr.tl, x,-1, whole.tl.x, '*'); WALK_UNTIL_MUST(mainr.tl, y,-1, whole.tl.y, '*'); @@ -141,20 +153,20 @@ static void find_structure(void) { down.y++; WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+'); +#ifdef DEBUG_RECTANGLES int xscaleunit, y,x; for (y=0, xscaleunit=1; y<4; y++, xscaleunit*=10) { fprintf(debug," "); - for (x=0; x<=width; x++) { + for (x=0; x<=cim->w; x++) { if (x % xscaleunit) fputc(' ',debug); else fprintf(debug,"%d",(x / xscaleunit)%10); } fputc('\n',debug); } +#endif commbasey= up.y; comminty= down.y - up.y + 2; - fprintf(debug, "up.y=%d down.y=%d commbasey=%d comminty=%d\n", - up.y,down.y, commbasey,comminty); Point across= { mainr.tl.x, commbasey }; int colno=0; @@ -164,12 +176,8 @@ static void find_structure(void) { eassert(colno < MAX_COLUMNS); int colrx= across.x; if (colrx > mainr.br.x) colrx= mainr.br.x; - if (colno < INTERESTING_COLUMNS) { + if (colno < INTERESTING_COLUMNS) colrightx[colno]= colrx; - fprintf(debug,"colrightx[%d]= %d\n",colno,colrx); - } else { - fprintf(debug,"extra colr %d %d\n",colno,colrx); - } colno++; @@ -181,6 +189,8 @@ static void find_structure(void) { across.x++; } eassert(colno >= MIN_COLUMNS); + + text_h = comminty - 1; } static void find_commodity(int offset, Rect *rr) { @@ -206,131 +216,49 @@ static void find_table_entry(Rect commod, int colno, Rect *cellr) { require_rectangle_r(*cellr, " o"); } -static void load_image_and_canonify(void) { +CanonImage *alloc_canon_image(int w, int h) { + CanonImage *im= malloc(sizeof(CanonImage) + w*h); + eassert(im); + im->w= w; + im->h= h; + memset(im->d,'?',w*h); + return im; +} + +CanonImage *file_read_image(FILE *f) { struct pam inpam; - unsigned char rgb[3]; - int x,y,r; - const CanonColourInfo *cci; + unsigned char rgb_buf[3]; + CanonImage *im; - pnm_readpaminit(stdin, &inpam, sizeof(inpam)); - height= inpam.height; - width= inpam.width; + pnm_readpaminit(f, &inpam, sizeof(inpam)); eassert(inpam.maxval == 255); eassert(inpam.bytes_per_sample == 1); - image= malloc(width*height); - eassert(image); - memset(image,'?',width*height); - - for (y=0; yc; cci++) - if (cci->rgb == rgb_l) { - image[y*width + x]= cci->c; - break; - } - } - fprintf(debug, "%4d ",y); - r= fwrite(image + y*width, 1,width, debug); eassert(r==width); - fputc('\n',debug); - } - debug_flush(); -} - -typedef uint32_t Pixcol; -#define PSPIXCOL(priscan) priscan##32 - -typedef struct { - Pixcol col; - struct OCRDatabaseNode *then; -} OCRDatabaseLink; - -#define MAXGLYPHCHRS 3 - -typedef struct OCRDatabaseNode { - char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */ - int nlinks, alinks; - OCRDatabaseLink *links; -} OCRDatabaseNode; - -#define N_OCR_CONTEXTS 2 -static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS]; + CANONICALISE_IMAGE(im, inpam.width, inpam.height, { + r= fread(&rgb_buf,1,3,f); eassert(r==3); -static void load_ocr_database(void) { - int ctx,nchrs; - OCRDatabaseNode *current, *additional; - char chrs[MAXGLYPHCHRS+1]; - Pixcol cv; - int r,i,j; + rgb= + ((unsigned long)rgb_buf[0]<<16) | + ((unsigned long)rgb_buf[1]<<8) | + (rgb_buf[2]); + }); - FILE *db= fopen("database","r"); eassert(db); - - for (;;) { - r= fscanf(db, "%d %d", &ctx, &nchrs); - if (r==EOF) break; - eassert(r==2); - eassert(ctx>=0 && ctx0 && nchrs<=MAXGLYPHCHRS); - - for (i=0; i0 && c<=255); - chrs[i]= c; - } - chrs[nchrs]= 0; - - int twidth; - r= fscanf(db, "%d", &twidth); eassert(r==1); - current= &ocr_contexts[ctx]; - for (i=0; inlinks; j++) - if (current->links[j].col == cv) { - current= current->links[j].then; - goto found_link; - } - - additional= malloc(sizeof(*additional)); eassert(additional); - additional->s[0]= 0; - additional->nlinks= additional->alinks= 0; - additional->links= 0; - if (current->nlinks==current->alinks) { - current->alinks++; - current->alinks<<=1; - current->links= realloc(current->links, - sizeof(*current->links) * current->alinks); - eassert(current->links); - } - current->links[current->nlinks].col= cv; - current->links[current->nlinks].then= additional; - current->nlinks++; - current= additional; + return im; +} - found_link:; - } +static void load_image_and_canonify(void) { + cim= file_read_image(stdin); +} - eassert(!current->s[0]); - strcpy(current->s, chrs); - } - eassert(!ferror(db)); - eassert(feof(db)); - fclose(db); -} +static void ocr_rectangle(Rect r, const OcrCellType ct) { + OcrResultGlyph *results, *res; -static void ocr_rectangle(Rect r) { int w= r.br.x - r.tl.x + 1; - int h= r.br.y - r.tl.y + 1; Pixcol cols[w+1]; int x,y; for (x=0; xw) break; - - if (!cols[x]) { - nspaces++; - x++; - if (nspaces>3) ctx=1; - continue; - } - - OCRDatabaseNode *current=0, *lastmatch=0; - int startx=x; - int afterlastmatchx=-1; - current= &ocr_contexts[ctx]; - for (;;) { - if (x>w) break; - Pixcol cv= cols[x]; - for (i=0; inlinks; i++) - if (current->links[i].col == cv) - goto found; - /* not found */ - break; - found: - x++; - current= current->links[i].then; - if (current->s[0]) { lastmatch=current; afterlastmatchx=x; } - } - - if (!lastmatch) { - int x2; - for (x2=x+1; x2s); - x= afterlastmatchx; - ctx= 0; - } - } + results= ocr(rd,ct,w,cols); + printf("YES! \""); + for (res=results; res->s; res++) + printf("%s",res->s); + printf("\"\n"); + eassert(!ferror(stdout)); + eassert(!fflush(stdout)); } -int main(void) { +int main_test(void) { Rect thisr, entryr; int tryrect, colno; - load_ocr_database(); load_image_and_canonify(); find_structure(); + rd= ocr_init(text_h); - for (tryrect= +height; tryrect >= -height; tryrect--) { + for (tryrect= +cim->h; tryrect >= -cim->h; tryrect--) { find_commodity(tryrect, &thisr); if (thisr.tl.x < 0) continue; @@ -402,8 +294,13 @@ int main(void) { for (colno=0; colno