X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?a=blobdiff_plain;f=pctb%2Focr.c;h=bb9576769e10a1d633ed6ebb7786d75e3bfcf585;hb=8b3b006869bfdc6c2ddbf58d6709a73433abe2f9;hp=04aecc0d48a0431642d73c85f8b65144fa74b491;hpb=5a2c03e2e4f52b8329f45cf67afc3edec1f2c65b;p=ypp-sc-tools.db-test.git diff --git a/pctb/ocr.c b/pctb/ocr.c index 04aecc0..bb95767 100644 --- a/pctb/ocr.c +++ b/pctb/ocr.c @@ -24,6 +24,8 @@ static const char *context_names[]= { #define NCONTEXTS (sizeof(context_names)/sizeof(context_names[0])) +#define SPACE_SPACES 3 + struct OcrReader { int h; DatabaseNode contexts[NCONTEXTS]; @@ -61,12 +63,19 @@ static void readdb(OcrReader *rd) { char lbuf[100]; FILE *db; + for (ctxi=0; ctxicontexts[ctxi]); + char *dbfname=0; asprintf(&dbfname,"%s/charset-%d.txt",get_vardir(),rd->h); eassert(dbfname); - db= fopen(dbfname,"r"); eassert(db); + db= fopen(dbfname,"r"); free(dbfname); + if (!db) { + eassert(errno==ENOENT); + return; + } FGETSLINE(db,lbuf); eassert(!strcmp(lbuf,"# ypp-sc-tools pctb font v1")); @@ -75,9 +84,6 @@ static void readdb(OcrReader *rd) { eassert(r==1); eassert(h==rd->h); - for (ctxi=0; ctxicontexts[ctxi]); - for (;;) { FGETSLINE(db,lbuf); if (!lbuf || lbuf[0]=='#') continue; @@ -305,13 +311,16 @@ OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) { if (!cols[x]) { nspaces++; x++; - if (nspaces==3) { + if (nspaces==SPACE_SPACES) { fprintf(debug,"OCR x=%x nspaces=%d space\n",x,nspaces); - add_result(rd," ",x-nspaces,x+1,0); ctxmap= ct->nextword; } continue; } + + /* something here, so we need to add the spaces */ + if (nspaces>=SPACE_SPACES) + add_result(rd," ",x-nspaces,x+1,0); nspaces=0; /* find character */