"Upper",
"Digit"
};
+struct OcrCellTypeInfo {
+ /* bitmaps of indices into context_names: */
+ unsigned initial, nextword, midword;
+ int space_spaces;
+ const char *name;
+};
+const struct OcrCellTypeInfo ocr_celltype_number= {
+ 4,4,4,
+ .space_spaces= 5,
+ .name= "number"
+};
+const struct OcrCellTypeInfo ocr_celltype_text= {
+ .initial=2, /* Uppercase */
+ .nextword=3, /* Either */
+ .midword=1, /* Lower only */
+ .space_spaces= 4,
+ .name= "text"
+};
-#define NCONTEXTS (sizeof(context_names)/sizeof(context_names[0]))
-#define SPACE_SPACES 4
+#define NCONTEXTS (sizeof(context_names)/sizeof(context_names[0]))
struct OcrReader {
int h;
fatal("Error in character set database.\n" \
" Requirement not met: %s:%d: %s", __FILE__,__LINE__, #x))
-static void fgetsline(FILE *f, char *lbuf, size_t lbufsz) {
- errno=0;
- char *s= fgets(lbuf,lbufsz,f);
- sysassert(!ferror(f));
- dbassert(!feof(f));
- assert(s);
- int l= strlen(lbuf);
- dbassert(l>0); dbassert(lbuf[--l]='\n');
- lbuf[l]= 0;
-}
-#define FGETSLINE(f,buf) (fgetsline(f,buf,sizeof(buf)))
+#define FGETSLINE (fgetsline(db,lbuf,sizeof(lbuf)))
static void cleardb_node(DatabaseNode *n) {
int i;
return;
}
- FGETSLINE(db,lbuf);
+ FGETSLINE;
dbassert(!strcmp(lbuf,"# ypp-sc-tools pctb font v1"));
r= fscanf(db, "%d", &h);
dbassert(h==rd->h);
for (;;) {
- FGETSLINE(db,lbuf);
+ FGETSLINE;
if (!lbuf || lbuf[0]=='#') continue;
if (!strcmp(lbuf,".")) break;
if (!strcmp(lbuf,context_names[ctxi]))
goto found_ctx;
/* not found, just skip */
- for (;;) { FGETSLINE(db,lbuf); if (!lbuf[0]) break; }
+ for (;;) { FGETSLINE; if (!lbuf[0]) break; }
continue;
found_ctx:
current= &rd->contexts[ctxi];
for (;;) {
- FGETSLINE(db,lbuf);
+ FGETSLINE;
if (!lbuf[0]) { dbassert(current != &rd->contexts[ctxi]); break; }
char *ep;
cv= strtoul(lbuf,&ep,16); dbassert(!*ep);
const char *p;
char cb;
Pixcol pv;
+
+ if (!o_resolver)
+ fatal("OCR failed - unrecognised characters or ligatures.\n"
+ "Character set database needs to be updated or augmented.\n"
+ "See README.charset.\n");
if (!resolver) {
sysassert(! pipe(jobpipe) );
/* we know donepipe[1] is >= 4 and we have dealt with all the others
* so we aren't in any danger of overwriting some other fd 4: */
sysassert( dup2(donepipe[1],4) ==4 );
- execlp("./yppsc-ocr-resolver", "yppsc-ocr-resolver",
+ execlp(o_resolver, o_resolver,
DEBUGP(callout) ? "--debug" : "--noop-arg",
"--automatic-1",
(char*)0);
- sysassert(!"execlp failed");
+ sysassert(!"execlp ocr-resolver failed");
}
sysassert(! close(jobpipe[0]) );
sysassert(! close(donepipe[1]) );
}
if (r==0) {
- pid_t pid;
- int st;
- for (;;) {
- pid= waitpid(resolver_pid, &st, 0);
- if (pid==-1) { sysassert(errno==EINTR); continue; }
- break;
- }
- sysassert(pid==resolver_pid);
- if (WIFEXITED(st)) {
- if (WEXITSTATUS(st))
- fatal("character resolver failed with nonzero exit status %d",
- WEXITSTATUS(st));
- fclose(resolver);
- close(resolver_done);
- resolver= 0;
- } else if (WIFSIGNALED(st)) {
- fatal("character resolver died due to signal %s%s",
- strsignal(WTERMSIG(st)), WCOREDUMP(st)?" (core dumped)":"");
- } else {
- fatal("character resolver gave strange wait status %d",st);
- }
+ waitpid_check_exitstatus(resolver_pid, "character resolver");
+ fclose(resolver);
+ close(resolver_done);
+ resolver= 0;
} else {
assert(r==1);
sysassert(cb==0);
rd->nresults++;
}
-struct OcrCellTypeInfo {
- unsigned initial, nextword, midword;
- const char *name;
-};
-const struct OcrCellTypeInfo ocr_celltype_number= {
- 4,4,4,
- .name= "number"
-};
-const struct OcrCellTypeInfo ocr_celltype_text= {
- .initial=2, /* Uppercase */
- .nextword=3, /* Either */
- .midword=1, /* Lower only */
- .name= "text"
-};
const char *ocr_celltype_name(OcrCellType ct) { return ct->name; }
if (!cols[x]) {
nspaces++;
x++;
- if (nspaces==SPACE_SPACES) {
+ if (nspaces == ct->space_spaces) {
debugf("OCR x=%x nspaces=%d space\n",x,nspaces);
ctxmap= ct->nextword;
}
}
/* something here, so we need to add the spaces */
- if (nspaces>=SPACE_SPACES)
+ if (nspaces >= ct->space_spaces)
add_result(rd," ",x-nspaces,x+1,0);
nspaces=0;
if (uniquematch->s[0]) ctxmap= ct->midword;
else debugf(" (empty)");
if (uniquematch->endsword) {
- nspaces= SPACE_SPACES;
+ nspaces= ct->space_spaces;
debugf("_");
ctxmap= ct->nextword;
}