chiark
/
gitweb
/
~yarrgweb
/
ypp-sc-tools.main.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
made its first tsv!
[ypp-sc-tools.main.git]
/
pctb
/
ocr.c
diff --git
a/pctb/ocr.c
b/pctb/ocr.c
index 266ede2a32fa197299aaff620e974e6a3cc4c834..712f90df77c21e239b8f47c256564aae512acaf4 100644
(file)
--- a/
pctb/ocr.c
+++ b/
pctb/ocr.c
@@
-13,6
+13,7
@@
typedef struct {
typedef struct DatabaseNode {
char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */
int nlinks, alinks;
typedef struct DatabaseNode {
char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */
int nlinks, alinks;
+ unsigned endsword:1;
DatabaseLink *links;
} DatabaseNode;
DatabaseLink *links;
} DatabaseNode;
@@
-61,7
+62,7
@@
static void readdb(OcrReader *rd) {
char chrs[MAXGLYPHCHRS+1];
Pixcol cv;
int r,j,ctxi;
char chrs[MAXGLYPHCHRS+1];
Pixcol cv;
int r,j,ctxi;
- int h;
+ int h
, endsword
;
char lbuf[100];
FILE *db;
char lbuf[100];
FILE *db;
@@
-112,6
+113,11
@@
static void readdb(OcrReader *rd) {
}
chrs[nchrs++]= c;
}
}
chrs[nchrs++]= c;
}
+ endsword= 0;
+ if (nchrs>1 && chrs[nchrs-1]==' ') {
+ endsword= 1;
+ nchrs--;
+ }
chrs[nchrs]= 0;
current= &rd->contexts[ctxi];
chrs[nchrs]= 0;
current= &rd->contexts[ctxi];
@@
-149,6
+155,7
@@
static void readdb(OcrReader *rd) {
eassert(!current->s[0]);
strcpy(current->s, chrs);
eassert(!current->s[0]);
strcpy(current->s, chrs);
+ current->endsword= endsword;
}
eassert(!ferror(db));
eassert(!fclose(db));
}
eassert(!ferror(db));
eassert(!fclose(db));
@@
-360,7
+367,7
@@
OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) {
found:
current= current->links[i].then;
if (current->s[0]) {
found:
current= current->links[i].then;
if (current->s[0]) {
- debugf(" \"%s\"
",current->s
);
+ debugf(" \"%s\"
%s",current->s,current->endsword?"_":""
);
bestmatch= current;
bestmatch_rx= x;
} else {
bestmatch= current;
bestmatch_rx= x;
} else {
@@
-382,10
+389,17
@@
OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) {
}
if (uniquematch) {
}
if (uniquematch) {
- debugf(" || YES
\n
");
+ debugf(" || YES");
add_result(rd, uniquematch->s, lx, uniquematch_rx, ctxmap);
x= uniquematch_rx+1;
add_result(rd, uniquematch->s, lx, uniquematch_rx, ctxmap);
x= uniquematch_rx+1;
- ctxmap= ct->midword;
+ if (uniquematch->s[0]) ctxmap= ct->midword;
+ else debugf(" (empty)");
+ if (uniquematch->endsword) {
+ nspaces= SPACE_SPACES;
+ debugf("_");
+ ctxmap= ct->nextword;
+ }
+ debugf("\n");
} else {
int rx;
debugf(" || UNKNOWN");
} else {
int rx;
debugf(" || UNKNOWN");