#define FOR_EACH_CONTEXT(EACH) \
EACH(Word) \
- EACH(Upper) \
EACH(Lower) \
+ EACH(Upper) \
EACH(Digit)
#define FEC_ENUM(Context) ct_##Context,
static int findchar_select_text(const FindCharArgs *fca,
const FindCharResults results[]) {
- if (fca->ctxmap != 017) return -1;
+
+ dbassert(! results[ct_Digit].match ); /* digits are supposedly unambiguous */
+
+ switch (fca->ctxmap) {
+
+#define RETURN_IF_LONGER(this,that) do{ \
+ if (results[ct_##this].rx > results[ct_##that].rx) \
+ return ct_##this; \
+ }while(0)
+
+ case ctf_Digit | ctf_Upper | ctf_Lower | ctf_Word:
+ /* Start of word. Prefer Word match; failing that, take the longest */
+ if (results[ct_Word].match) return ct_Word;
+ RETURN_IF_LONGER(Lower,Upper);
+ RETURN_IF_LONGER(Upper,Lower);
+ break;
+
+ case ctf_Digit | ctf_Upper | ctf_Lower:
+ /* Mid-word. Prefer longer match; failing that, match lower. */
+ RETURN_IF_LONGER(Upper,Lower);
+ return ct_Lower;
+ }
- dbassert(! results[ct_Digit].match );
- if (results[ct_Word].match) return ct_Word;
- if (results[ct_Lower].rx > results[ct_Upper].rx) return ct_Lower;
- if (results[ct_Upper].rx > results[ct_Lower].rx) return ct_Upper;
+ /* oh well */
return -1;
}
const struct OcrCellTypeInfo ocr_celltype_text= {
.initial= ctf_Digit | ctf_Upper,
.nextword= ctf_Digit | ctf_Upper | ctf_Lower | ctf_Word,
- .midword= ctf_Digit | ctf_Lower,
+ .midword= ctf_Digit | ctf_Upper | ctf_Lower,
.space_spaces= 4,
.name= "text",
.findchar_select= findchar_select_text