X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=pctb%2Focr.c;h=df4aacfbb05e299e09642f66115abfbe0bd8b584;hp=a90ecea64e590641911a3faa5e250373ce16863c;hb=1ae1abc15aae822e996b23e2dc73612b4d401044;hpb=ad09cd7cce6584c63c275d7ed1106e66959b3f9d diff --git a/pctb/ocr.c b/pctb/ocr.c index a90ecea..df4aacf 100644 --- a/pctb/ocr.c +++ b/pctb/ocr.c @@ -25,6 +25,7 @@ static int aresults, nresults; static FILE *resolver; static pid_t resolver_pid; +static int resolver_done; static void ocr_readdb(void) { int ctx,nchrs; @@ -88,27 +89,37 @@ static void ocr_readdb(void) { eassert(feof(db)); } -static void callout_unknown(int w, int h, Pixcol cols[], int unk_l, int unk_r, +static void callout_unknown(int w, int h, Pixcol cols[], + int unk_l, int unk_r, int unk_ctx, const OcrResultGlyph *sofar, int nsofar) { - int pfd[2], c, r,i, x,y; + int jobpipe[2],donepipe[2], c, r,i, x,y; const OcrResultGlyph *s; const char *p; + char cb; Pixcol pv; if (!resolver) { - r= pipe(pfd); eassert(!r); + r= pipe(jobpipe); eassert(!r); + r= pipe(donepipe); eassert(!r); resolver_pid= fork(); eassert(resolver_pid!=-1); if (!resolver_pid) { - r= dup2(pfd[0],0); eassert(!r); - r= close(pfd[1]); eassert(!r); - execlp("./show-thing.tcl", "./show-thing.tcl",(char*)0); + r= dup2(jobpipe[0],0); eassert(r==0); + r= close(jobpipe[1]); eassert(!r); + r= close(donepipe[0]); eassert(!r); + /* we know donepipe[1] is >= 4 and we have dealt with all the others + * so we aren't in any danger of overwriting some other fd 4: */ + r= dup2(donepipe[1],4); eassert(r==4); + execlp("./show-thing.tcl", "./show-thing.tcl", + "--automatic","1",(char*)0); eassert(!"execlp failed"); } - r= close(pfd[0]); eassert(!r); - resolver= fdopen(pfd[1],"w"); eassert(resolver); + r= close(jobpipe[0]); eassert(!r); + r= close(donepipe[1]); eassert(!r); + resolver= fdopen(jobpipe[1],"w"); eassert(resolver); + resolver_done= donepipe[0]; } - fprintf(resolver,"%d %d",unk_l,unk_r); + fprintf(resolver,"%d %d %d",unk_l,unk_r,unk_ctx); for (i=0, s=sofar; il,s->r,s->ctx); for (p=s->s; (c= *p); p++) { @@ -137,30 +148,37 @@ static void callout_unknown(int w, int h, Pixcol cols[], int unk_l, int unk_r, eassert(!ferror(resolver)); eassert(!fflush(resolver)); + eassert(resolver); + for (;;) { - eassert(resolver); - pid_t pid= waitpid(resolver_pid, &r, WUNTRACED); - if (pid==-1) { eassert(errno==EINTR); continue; } + r= read(resolver_done,&cb,1); + if (r==-1) { eassert(errno==EINTR); continue; } + break; + } + + if (r==0) { + pid_t pid; + for (;;) { + pid= waitpid(resolver_pid, &r, 0); + if (pid==-1) { eassert(errno==EINTR); continue; } + break; + } eassert(pid==resolver_pid); if (WIFEXITED(r)) { eassert(!WEXITSTATUS(r)); fclose(resolver); + close(resolver_done); resolver= 0; - } else if (WIFSTOPPED(r)) { - r= kill(resolver_pid,SIGCONT); - eassert(!r); } else if (WIFSIGNALED(r)) { eassert(!"resolver child died due to signal"); } else { eassert(!"weird wait status"); } - struct stat stab, fstab; - r= stat("database",&stab); eassert(!r); - r= fstat(fileno(db),&fstab); eassert(!r); - if (stab.st_ino != fstab.st_ino || - stab.st_dev != fstab.st_dev) - break; + } else { + eassert(r==1); + eassert(cb==0); } + fclose(db); db= 0; ocr_readdb(); @@ -186,11 +204,18 @@ OcrResultGlyph *ocr(int w, int h, Pixcol cols[]) { nresults=0; assert(db); + fprintf(debug,"OCR h=%d w=%d",w,h); + for (x=0; x=w) break; + if (x>=w) + break; if (!cols[x]) { nspaces++; @@ -204,34 +229,50 @@ OcrResultGlyph *ocr(int w, int h, Pixcol cols[]) { int lx=x; int bestmatch_rx=-1; current= &ocr_contexts[ctx]; + fprintf(debug,"OCR lx=%d ctx=%d ",lx,ctx); + for (;;) { + debug_flush(); + fprintf(debug,"| x=%d",x); if (x>w) break; Pixcol cv= cols[x]; + fprintf(debug," cv=%"PSPIXCOL(PRIx),x); for (i=0; inlinks; i++) if (current->links[i].col == cv) goto found; /* not found */ + fprintf(debug," ?"); break; + found: current= current->links[i].then; - if (current->s[0]) { bestmatch=current; bestmatch_rx=x; } + if (current->s[0]) { + fprintf(debug," \"%s\"",current->s); + bestmatch=current; bestmatch_rx=x; + } else { + fprintf(debug," ..."); + } x++; } - if (!bestmatch) { + if (bestmatch) { + fprintf(debug," YES\n"); add_result(bestmatch->s, lx, bestmatch_rx, ctx); x= bestmatch_rx+1; ctx= 0; } else { - int rx; - for (rx=lx+1; rx