X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?a=blobdiff_plain;f=pctb%2Fconvert.c;h=1d516f9cbfdb5ca5cb9ae2c0babb27d032ed47a5;hb=d3eaa28d304c5b0639bc0f7e29b3285317d5c1e7;hp=a0ecca16703749cd47ef6dbd81c03437e2868a33;hpb=59acfe7ae6e8c81462dce00da5006166efb63d22;p=ypp-sc-tools.db-test.git diff --git a/pctb/convert.c b/pctb/convert.c index a0ecca1..1d516f9 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -1,12 +1,5 @@ -#include -#include -#include -#include -#include -#include -#define eassert assert -#define debug stdout +#include "ocr.h" typedef struct { unsigned long rgb; /* on screen */ @@ -16,7 +9,7 @@ typedef struct { static int height, width; static char *image; -static void debug_flush(void) { +void debug_flush(void) { eassert(!fflush(debug)); eassert(!ferror(debug)); } @@ -37,12 +30,14 @@ static inline char get_p(Point p) { return get(p.x,p.y); } #define START_MAIN {200,200} #define MIN_COLUMNS 6 #define INTERESTING_COLUMNS 6 +#define TEXT_COLUMNS 2 #define MAX_COLUMNS 7 static Rect mainr = { START_MAIN,START_MAIN }; static int commbasey, comminty; static int colrightx[INTERESTING_COLUMNS]; - +static int text_h; +static OcrReader *rd; static const CanonColourInfo canoncolourinfos[]= { { 0x475A5E, '*' }, /* edge */ @@ -70,6 +65,7 @@ static void require_rectangle_r(Rect rr, const char *ok) { } static void debug_rect(const char *what, int whati, Rect rr) { +#ifdef DEBUG_RECTANGLES int y,r,w; fprintf(debug, "%s %d: %d,%d..%d,%d:\n", what, whati, rr.tl.x,rr.tl.y, rr.br.x,rr.br.y); @@ -81,6 +77,7 @@ static void debug_rect(const char *what, int whati, Rect rr) { fputc('|',debug); fputc('\n',debug); } +#endif debug_flush(); } @@ -141,6 +138,7 @@ static void find_structure(void) { down.y++; WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+'); +#ifdef DEBUG_RECTANGLES int xscaleunit, y,x; for (y=0, xscaleunit=1; y<4; y++, xscaleunit*=10) { fprintf(debug," "); @@ -150,11 +148,10 @@ static void find_structure(void) { } fputc('\n',debug); } +#endif commbasey= up.y; comminty= down.y - up.y + 2; - fprintf(debug, "up.y=%d down.y=%d commbasey=%d comminty=%d\n", - up.y,down.y, commbasey,comminty); Point across= { mainr.tl.x, commbasey }; int colno=0; @@ -164,12 +161,8 @@ static void find_structure(void) { eassert(colno < MAX_COLUMNS); int colrx= across.x; if (colrx > mainr.br.x) colrx= mainr.br.x; - if (colno < INTERESTING_COLUMNS) { + if (colno < INTERESTING_COLUMNS) colrightx[colno]= colrx; - fprintf(debug,"colrightx[%d]= %d\n",colno,colrx); - } else { - fprintf(debug,"extra colr %d %d\n",colno,colrx); - } colno++; @@ -181,6 +174,8 @@ static void find_structure(void) { across.x++; } eassert(colno >= MIN_COLUMNS); + + text_h = comminty - 1; } static void find_commodity(int offset, Rect *rr) { @@ -235,102 +230,24 @@ static void load_image_and_canonify(void) { break; } } +#ifdef DEBUG_RECTANGLES fprintf(debug, "%4d ",y); r= fwrite(image + y*width, 1,width, debug); eassert(r==width); fputc('\n',debug); +#endif } debug_flush(); } -typedef uint32_t Pixcol; -#define PSPIXCOL(priscan) priscan##32 - -typedef struct { - Pixcol col; - struct OCRDatabaseNode *then; -} OCRDatabaseLink; - -#define MAXGLYPHCHRS 3 - -typedef struct OCRDatabaseNode { - char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */ - int nlinks, alinks; - OCRDatabaseLink *links; -} OCRDatabaseNode; - -#define N_OCR_CONTEXTS 2 -static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS]; - -static void load_ocr_database(void) { - int ctx,nchrs; - OCRDatabaseNode *current, *additional; - char chrs[MAXGLYPHCHRS+1]; - Pixcol cv; - int r,i,j; - - FILE *db= fopen("database","r"); eassert(db); - - for (;;) { - r= fscanf(db, "%d %d", &ctx, &nchrs); - if (r==EOF) break; - eassert(r==2); - eassert(ctx>=0 && ctx0 && nchrs<=MAXGLYPHCHRS); - - for (i=0; i0 && c<=255); - chrs[i]= c; - } - chrs[nchrs]= 0; - - int twidth; - r= fscanf(db, "%d", &twidth); eassert(r==1); - current= &ocr_contexts[ctx]; - for (i=0; inlinks; j++) - if (current->links[j].col == cv) { - current= current->links[j].then; - goto found_link; - } - - additional= malloc(sizeof(*additional)); eassert(additional); - additional->s[0]= 0; - additional->nlinks= additional->alinks= 0; - additional->links= 0; - if (current->nlinks==current->alinks) { - current->alinks++; - current->alinks<<=1; - current->links= realloc(current->links, - sizeof(*current->links) * current->alinks); - eassert(current->links); - } - current->links[current->nlinks].col= cv; - current->links[current->nlinks].then= additional; - current->nlinks++; - current= additional; - - found_link:; - } - - eassert(!current->s[0]); - strcpy(current->s, chrs); - } - eassert(!ferror(db)); - eassert(feof(db)); - fclose(db); -} +static void ocr_rectangle(Rect r, const OcrCellType ct) { + OcrResultGlyph *results, *res; -static void ocr_rectangle(Rect r) { int w= r.br.x - r.tl.x + 1; - int h= r.br.y - r.tl.y + 1; Pixcol cols[w+1]; int x,y; for (x=0; xw) break; - - if (!cols[x]) { - nspaces++; - x++; - if (nspaces>3) ctx=1; - continue; - } - - OCRDatabaseNode *current=0, *lastmatch=0; - int startx=x; - int afterlastmatchx=-1; - current= &ocr_contexts[ctx]; - for (;;) { - if (x>w) break; - Pixcol cv= cols[x]; - for (i=0; inlinks; i++) - if (current->links[i].col == cv) - goto found; - /* not found */ - break; - found: - x++; - current= current->links[i].then; - if (current->s[0]) { lastmatch=current; afterlastmatchx=x; } - } - - if (!lastmatch) { - int x2; - for (x2=x+1; x2s); - x= afterlastmatchx; - ctx= 0; - } - } + results= ocr(rd,ct,w,cols); + printf("YES! \""); + for (res=results; res->s; res++) + printf("%s",res->s); + printf("\"\n"); + eassert(!ferror(stdout)); + eassert(!fflush(stdout)); } int main(void) { Rect thisr, entryr; int tryrect, colno; - load_ocr_database(); load_image_and_canonify(); find_structure(); + rd= ocr_init(text_h); for (tryrect= +height; tryrect >= -height; tryrect--) { find_commodity(tryrect, &thisr); @@ -402,8 +283,13 @@ int main(void) { for (colno=0; colno