struct DatabaseNode *then;
} DatabaseLink;
-#define MAXGLYPHCHRS 3
+#define MAXGLYPHCHRS 7
typedef struct DatabaseNode {
char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */
static const char *context_names[]= {
"Lower",
"Upper",
-/* "Digit"*/
+ "Digit"
};
#define NCONTEXTS (sizeof(context_names)/sizeof(context_names[0]))
+#define SPACE_SPACES 4
+
struct OcrReader {
int h;
DatabaseNode contexts[NCONTEXTS];
static pid_t resolver_pid;
static int resolver_done;
+DEBUG_DEFINE_DEBUGF(ocr)
+
static void fgetsline(FILE *f, char *lbuf, size_t lbufsz) {
char *s= fgets(lbuf,lbufsz,f);
eassert(s);
char lbuf[100];
FILE *db;
+ for (ctxi=0; ctxi<NCONTEXTS; ctxi++)
+ cleardb_node(&rd->contexts[ctxi]);
+
char *dbfname=0;
asprintf(&dbfname,"%s/charset-%d.txt",get_vardir(),rd->h);
eassert(dbfname);
- db= fopen(dbfname,"r"); eassert(db);
+ db= fopen(dbfname,"r");
free(dbfname);
+ if (!db) {
+ eassert(errno==ENOENT);
+ return;
+ }
FGETSLINE(db,lbuf);
eassert(!strcmp(lbuf,"# ypp-sc-tools pctb font v1"));
eassert(r==1);
eassert(h==rd->h);
- for (ctxi=0; ctxi<NCONTEXTS; ctxi++)
- cleardb_node(&rd->contexts[ctxi]);
-
for (;;) {
FGETSLINE(db,lbuf);
if (!lbuf || lbuf[0]=='#') continue;
}
eassert(!ferror(db));
eassert(!fclose(db));
-}
+}
+
+static void cu_pr_ctxmap(unsigned ctxmap) {
+ fprintf(resolver,"{");
+ const char *spc="";
+ int ctxi;
+ for (ctxi=0; ctxi<NCONTEXTS; ctxi++) {
+ if (!(ctxmap & (1u << ctxi))) continue;
+ fprintf(resolver,"%s%s",spc,context_names[ctxi]);
+ spc=" ";
+ }
+ fprintf(resolver,"}");
+}
static void callout_unknown(OcrReader *rd, int w, Pixcol cols[],
int unk_l, int unk_r, unsigned unk_ctxmap) {
* so we aren't in any danger of overwriting some other fd 4: */
r= dup2(donepipe[1],4); eassert(r==4);
execlp("./show-thing.tcl", "./show-thing.tcl",
- "--automatic","1",(char*)0);
+ DEBUGP(callout) ? "--debug" : "--noop-arg",
+ "--automatic-1",
+ (char*)0);
eassert(!"execlp failed");
}
r= close(jobpipe[0]); eassert(!r);
resolver= fdopen(jobpipe[1],"w"); eassert(resolver);
resolver_done= donepipe[0];
}
- fprintf(resolver,"%d %d {",unk_l,unk_r);
- const char *spc="";
- int ctxi;
- for (ctxi=0; ctxi<NCONTEXTS; ctxi++) {
- if (!(unk_ctxmap & (1u << ctxi))) continue;
- fprintf(resolver,"%s%s",spc,context_names[ctxi]);
- spc=" ";
- }
- fprintf(resolver,"}");
+ fprintf(resolver,"%d %d ",unk_l,unk_r);
+ cu_pr_ctxmap(unk_ctxmap);
for (i=0, s=rd->results; i<rd->nresults; i++, s++) {
if (!strcmp(s->s," ")) continue;
- fprintf(resolver," %d %d %s ",s->l,s->r,context_names[s->ctx]);
+ fprintf(resolver," %d %d ",s->l,s->r);
+ cu_pr_ctxmap(s->ctxmap);
+ fprintf(resolver," ");
for (p=s->s; (c= *p); p++) {
if (c=='\\') fprintf(resolver,"\\%c",c);
else if (c>=33 && c<=126) fputc(c,resolver);
readdb(rd);
}
-static void add_result(OcrReader *rd, const char *s, int l, int r, int ctx) {
+static void add_result(OcrReader *rd, const char *s, int l, int r,
+ unsigned ctxmap) {
if (rd->nresults >= rd->aresults) {
rd->aresults++; rd->aresults<<=1;
rd->results= realloc(rd->results,sizeof(*rd->results)*rd->aresults);
rd->results[rd->nresults].s= s;
rd->results[rd->nresults].l= l;
rd->results[rd->nresults].r= r;
- rd->results[rd->nresults].ctx= ctx;
+ rd->results[rd->nresults].ctxmap= ctxmap;
rd->nresults++;
}
-OcrResultGlyph *ocr(OcrReader *rd, int w, Pixcol cols[]) {
- int nspaces=-w;
- unsigned ctxmap=2; /* uppercase */
+struct OcrCellTypeInfo {
+ unsigned initial, nextword, midword;
+};
+const struct OcrCellTypeInfo ocr_celltype_number= {
+ 4,4,4
+};
+const struct OcrCellTypeInfo ocr_celltype_text= {
+ .initial=2 /* Uppercase */,
+ .nextword=3 /* Either */,
+ .midword=1 /* Lower only */
+};
+
+OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) {
+ int nspaces;
+ unsigned ctxmap;
int ctxi, i, x;
- rd->nresults=0;
+ restart:
- fprintf(debug,"OCR h=%d w=%d",rd->h,w);
- for (x=0; x<w; x++) fprintf(debug," %"PSPIXCOL(PRIx),cols[x]);
- fprintf(debug,"\n");
+ nspaces=- w;
+ ctxmap= ct->initial;
+ rd->nresults=0;
+ debugf("OCR h=%d w=%d",rd->h,w);
+ for (x=0; x<w; x++) debugf(" %"PSPIXCOL(PRIx),cols[x]);
+ debugf("\n");
debug_flush();
- restart:
x=0;
for (;;) {
debug_flush();
if (!cols[x]) {
nspaces++;
x++;
- if (nspaces==3) {
- fprintf(debug,"OCR x=%x nspaces=%d space\n",x,nspaces);
- add_result(rd," ",x-nspaces,x+1,0);
- ctxmap=3; /* either */
+ if (nspaces==SPACE_SPACES) {
+ debugf("OCR x=%x nspaces=%d space\n",x,nspaces);
+ ctxmap= ct->nextword;
}
continue;
}
+
+ /* something here, so we need to add the spaces */
+ if (nspaces>=SPACE_SPACES)
+ add_result(rd," ",x-nspaces,x+1,0);
nspaces=0;
/* find character */
int lx=x;
DatabaseNode *uniquematch= 0;
- int uniquematch_rx=-1, uniquematch_ctxi=-1;
+ int uniquematch_rx=-1;
- fprintf(debug,"OCR lx=%d ctxmap=%x ",lx,ctxmap);
+ debugf("OCR lx=%d ctxmap=%x ",lx,ctxmap);
for (ctxi=0; ctxi<NCONTEXTS; ctxi++) {
DatabaseNode *current= &rd->contexts[ctxi];;
x= lx;
if (!(ctxmap & (1u << ctxi))) continue;
- fprintf(debug," || %s",context_names[ctxi]);
+ debugf(" || %s",context_names[ctxi]);
for (;;) {
debug_flush();
- fprintf(debug," | x=%d",x);
+ debugf(" | x=%d",x);
if (x>w) break;
Pixcol cv= cols[x];
- fprintf(debug," cv=%"PSPIXCOL(PRIx),cv);
+ debugf(" cv=%"PSPIXCOL(PRIx),cv);
for (i=0; i<current->nlinks; i++)
if (current->links[i].col == cv)
goto found;
/* not found */
- fprintf(debug," ?");
+ debugf(" ?");
break;
found:
current= current->links[i].then;
if (current->s[0]) {
- fprintf(debug," \"%s\"",current->s);
+ debugf(" \"%s\"",current->s);
bestmatch= current;
bestmatch_rx= x;
} else {
- fprintf(debug," ...");
+ debugf(" ...");
}
x++;
}
if (bestmatch) {
- if (uniquematch) {
- fprintf(debug, " ambiguous");
+ if (uniquematch && strcmp(bestmatch->s, uniquematch->s)) {
+ debugf( " ambiguous");
uniquematch= 0;
break;
}
uniquematch= bestmatch;
uniquematch_rx= bestmatch_rx;
- uniquematch_ctxi= ctxi;
}
}
if (uniquematch) {
- fprintf(debug," || YES\n");
- add_result(rd, uniquematch->s, lx, uniquematch_rx, uniquematch_ctxi);
+ debugf(" || YES\n");
+ add_result(rd, uniquematch->s, lx, uniquematch_rx, ctxmap);
x= uniquematch_rx+1;
- ctxmap= 1; /* Lower only */
+ ctxmap= ct->midword;
} else {
int rx;
- fprintf(debug," || UNKNOWN");
+ debugf(" || UNKNOWN");
for (rx=lx; rx<w && cols[rx]; rx++);
- fprintf(debug," x=%d ctxmap=%x %d..%d\n",x, ctxmap, lx,rx);
+ debugf(" x=%d ctxmap=%x %d..%d\n",x, ctxmap, lx,rx);
debug_flush();
callout_unknown(rd, w,cols, lx,rx-1, ctxmap);
goto restart;
}
}
add_result(rd, 0,-1,-1,0);
- fprintf(debug,"OCR finished %d glyphs\n",rd->nresults);
+ debugf("OCR finished %d glyphs\n",rd->nresults);
debug_flush();
return rd->results;
}