From: Ian Jackson Date: Thu, 4 Jun 2009 19:25:01 +0000 (+0100) Subject: fix space handling X-Git-Tag: 1.9.2~184 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.main.git;a=commitdiff_plain;h=cd6a4f773c32f73aff27f97e8994c6b7c1019bb7 fix space handling --- diff --git a/pctb/convert.c b/pctb/convert.c index c8082e6..163f814 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -257,7 +257,7 @@ static void ocr_rectangle(Rect r) { results= ocr(w,h,cols); printf("YES! \""); - for (res=0; res->s; res++) + for (res=results; res->s; res++) printf("%s",res->s); printf("\"\n"); eassert(!ferror(stdout)); diff --git a/pctb/database b/pctb/database index 65c84ff..847b618 100644 --- a/pctb/database +++ b/pctb/database @@ -1,3 +1,11 @@ +0 1 61 +6 +640 +920 +920 +920 +fc0 +800 0 1 67 5 27c0 @@ -5,6 +13,12 @@ 2820 2420 1fe0 +0 1 72 +4 +fe0 +40 +20 +20 0 1 75 5 7e0 @@ -20,37 +34,3 @@ fe0 888 708 8 -1 1 61 -6 -640 -920 -920 -920 -fc0 -800 -1 1 63 -4 -7c0 -820 -820 -820 -1 1 65 -5 -7c0 -920 -920 -920 -9c0 -1 1 6e -5 -fe0 -40 -20 -20 -fc0 -1 1 72 -4 -fe0 -40 -20 -20 diff --git a/pctb/ocr.c b/pctb/ocr.c index df4aacf..be479f8 100644 --- a/pctb/ocr.c +++ b/pctb/ocr.c @@ -121,6 +121,7 @@ static void callout_unknown(int w, int h, Pixcol cols[], } fprintf(resolver,"%d %d %d",unk_l,unk_r,unk_ctx); for (i=0, s=sofar; is," ")) continue; fprintf(resolver," %d %d %d ",s->l,s->r,s->ctx); for (p=s->s; (c= *p); p++) { if (c=='\\') fprintf(resolver,"\\%c",c); @@ -198,7 +199,7 @@ static void add_result(const char *s, int l, int r, int ctx) { } OcrResultGlyph *ocr(int w, int h, Pixcol cols[]) { - int nspaces=0; + int nspaces=-w; int ctx=1,i, x; nresults=0; @@ -220,9 +221,14 @@ OcrResultGlyph *ocr(int w, int h, Pixcol cols[]) { if (!cols[x]) { nspaces++; x++; - if (nspaces>3) ctx=1; + if (nspaces==3) { + fprintf(debug,"OCR x=%x nspaces=%d space\n",x,nspaces); + add_result(" ",x-nspaces,x+1,0); + ctx=1; + } continue; } + nspaces=0; /* find character */ OCRDatabaseNode *current=0, *bestmatch=0;