chiark
/
gitweb
/
~yarrgweb
/
ypp-sc-tools.web-live.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
swap ct_Word and ct_Lower to make Word be default when applicable
[ypp-sc-tools.web-live.git]
/
pctb
/
ocr.c
diff --git
a/pctb/ocr.c
b/pctb/ocr.c
index f6104452dc1c1854e6ccd19aac77882caae0aba9..ef0364c9ba93b5b0d1b91b00e20f8378ae428b59 100644
(file)
--- a/
pctb/ocr.c
+++ b/
pctb/ocr.c
@@
-53,11
+53,11
@@
typedef struct {
int rx;
} FindCharResults;
int rx;
} FindCharResults;
-enum { ct_
Lower, ct_Upper, ct_Word
, ct_Digit };
+enum { ct_
Word, ct_Upper, ct_Lower
, ct_Digit };
static const char *context_names[]= {
static const char *context_names[]= {
- "
Lower",
/* bit 0, value 001 */
+ "
Word",
/* bit 0, value 001 */
"Upper", /* bit 1, value 002 */
"Upper", /* bit 1, value 002 */
- "
Word",
/* bit 2, value 004 */
+ "
Lower",
/* bit 2, value 004 */
"Digit", /* bit 3, value 010 */
};
struct OcrCellTypeInfo {
"Digit", /* bit 3, value 010 */
};
struct OcrCellTypeInfo {
@@
-223,7
+223,7
@@
static void callout_unknown(OcrReader *rd, int w, Pixcol cols[],
Pixcol pv;
FILE *resolver= resolve_start();
Pixcol pv;
FILE *resolver= resolve_start();
- if (!resolver)
+ if (!resolver
|| !(o_flags & ff_editcharset)
)
fatal("OCR failed - unrecognised characters or ligatures.\n"
"Character set database needs to be updated or augmented.\n"
"See README.charset.\n");
fatal("OCR failed - unrecognised characters or ligatures.\n"
"Character set database needs to be updated or augmented.\n"
"See README.charset.\n");
@@
-235,7
+235,7
@@
static void callout_unknown(OcrReader *rd, int w, Pixcol cols[],
for (i=0, s=rd->results; i<rd->nresults; i++, s++) {
if (!strcmp(s->s," ")) continue;
fprintf(resolver," %d %d ",s->l,s->r);
for (i=0, s=rd->results; i<rd->nresults; i++, s++) {
if (!strcmp(s->s," ")) continue;
fprintf(resolver," %d %d ",s->l,s->r);
- cu_pr_ctxmap(resolver,
s->ctxmap
);
+ cu_pr_ctxmap(resolver,
1u << s->ctxi
);
fprintf(resolver," ");
for (p=s->s; (c= *p); p++) {
if (c=='\\') fprintf(resolver,"\\%c",c);
fprintf(resolver," ");
for (p=s->s; (c= *p); p++) {
if (c=='\\') fprintf(resolver,"\\%c",c);
@@
-265,8
+265,7
@@
static void callout_unknown(OcrReader *rd, int w, Pixcol cols[],
readdb(rd);
}
readdb(rd);
}
-static void add_result(OcrReader *rd, const char *s, int l, int r,
- unsigned ctxmap) {
+static void add_result(OcrReader *rd, const char *s, int l, int r, int ctxi) {
if (rd->nresults >= rd->aresults) {
rd->aresults++; rd->aresults<<=1;
rd->results= mrealloc(rd->results, sizeof(*rd->results)*rd->aresults);
if (rd->nresults >= rd->aresults) {
rd->aresults++; rd->aresults<<=1;
rd->results= mrealloc(rd->results, sizeof(*rd->results)*rd->aresults);
@@
-274,7
+273,7
@@
static void add_result(OcrReader *rd, const char *s, int l, int r,
rd->results[rd->nresults].s= s;
rd->results[rd->nresults].l= l;
rd->results[rd->nresults].r= r;
rd->results[rd->nresults].s= s;
rd->results[rd->nresults].l= l;
rd->results[rd->nresults].r= r;
- rd->results[rd->nresults].ctx
map= ctxmap
;
+ rd->results[rd->nresults].ctx
i= ctxi
;
rd->nresults++;
}
rd->nresults++;
}
@@
-314,7
+313,8
@@
static DatabaseNode *findchar_1ctx(const FindCharArgs *fca,
return bestmatch;
}
return bestmatch;
}
-static DatabaseNode *findchar(const FindCharArgs *fca, int *match_rx) {
+static DatabaseNode *findchar(const FindCharArgs *fca,
+ int *match_rx, int *match_rctxi) {
FindCharResults results[NCONTEXTS];
int ctxi, match=-1, nmatches=0;
FindCharResults results[NCONTEXTS];
int ctxi, match=-1, nmatches=0;
@@
-342,8
+342,9
@@
static DatabaseNode *findchar(const FindCharArgs *fca, int *match_rx) {
if (match<0)
return 0;
if (match<0)
return 0;
- *match_rx= results[ctxi].rx;
- return results[ctxi].match;
+ *match_rx= results[match].rx;
+ if (match_rctxi) *match_rctxi= match;
+ return results[match].match;
}
static int findchar_select_text(const FindCharArgs *fca,
}
static int findchar_select_text(const FindCharArgs *fca,
@@
-415,17
+416,18
@@
OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) {
/* something here, so we need to add the spaces */
if (nspaces >= ct->space_spaces)
/* something here, so we need to add the spaces */
if (nspaces >= ct->space_spaces)
- add_result(rd," ",x-nspaces,x+1,
0
);
+ add_result(rd," ",x-nspaces,x+1,
-1
);
nspaces=0;
fca.x= x;
int match_rx=-1;
nspaces=0;
fca.x= x;
int match_rx=-1;
- DatabaseNode *match= findchar(&fca, &match_rx);
+ int match_ctxi=-1;
+ DatabaseNode *match= findchar(&fca, &match_rx, &match_ctxi);
if (match) {
debugf(" || YES");
if (match) {
debugf(" || YES");
- add_result(rd, match->str, x, match_rx,
fca.ctxmap
);
+ add_result(rd, match->str, x, match_rx,
match_ctxi
);
x= match_rx+1;
if (match->match) fca.ctxmap= ct->midword;
else debugf(" (empty)");
x= match_rx+1;
if (match->match) fca.ctxmap= ct->midword;
else debugf(" (empty)");
@@
-446,7
+448,7
@@
OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) {
}
}
}
}
- add_result(rd, 0,-1,-1,
0
);
+ add_result(rd, 0,-1,-1,
-1
);
debugf("OCR finished %d glyphs\n",rd->nresults);
debug_flush();
return rd->results;
debugf("OCR finished %d glyphs\n",rd->nresults);
debug_flush();
return rd->results;