From f5bf5f990ae25df9d9495c6aec00ebdbee6a703c Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 3 Jul 2009 18:02:09 +0100 Subject: [PATCH] Permit matching Upper in mid of word --- pctb/ocr.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/pctb/ocr.c b/pctb/ocr.c index f174c68..6482b81 100644 --- a/pctb/ocr.c +++ b/pctb/ocr.c @@ -55,8 +55,8 @@ typedef struct { #define FOR_EACH_CONTEXT(EACH) \ EACH(Word) \ - EACH(Upper) \ EACH(Lower) \ + EACH(Upper) \ EACH(Digit) #define FEC_ENUM(Context) ct_##Context, @@ -363,12 +363,30 @@ static DatabaseNode *findchar(const FindCharArgs *fca, static int findchar_select_text(const FindCharArgs *fca, const FindCharResults results[]) { - if (fca->ctxmap != 017) return -1; + + dbassert(! results[ct_Digit].match ); /* digits are supposedly unambiguous */ + + switch (fca->ctxmap) { + +#define RETURN_IF_LONGER(this,that) do{ \ + if (results[ct_##this].rx > results[ct_##that].rx) \ + return ct_##this; \ + }while(0) + + case ctf_Digit | ctf_Upper | ctf_Lower | ctf_Word: + /* Start of word. Prefer Word match; failing that, take the longest */ + if (results[ct_Word].match) return ct_Word; + RETURN_IF_LONGER(Lower,Upper); + RETURN_IF_LONGER(Upper,Lower); + break; + + case ctf_Digit | ctf_Upper | ctf_Lower: + /* Mid-word. Prefer longer match; failing that, match lower. */ + RETURN_IF_LONGER(Upper,Lower); + return ct_Lower; + } - dbassert(! results[ct_Digit].match ); - if (results[ct_Word].match) return ct_Word; - if (results[ct_Lower].rx > results[ct_Upper].rx) return ct_Lower; - if (results[ct_Upper].rx > results[ct_Lower].rx) return ct_Upper; + /* oh well */ return -1; } @@ -381,7 +399,7 @@ const struct OcrCellTypeInfo ocr_celltype_number= { const struct OcrCellTypeInfo ocr_celltype_text= { .initial= ctf_Digit | ctf_Upper, .nextword= ctf_Digit | ctf_Upper | ctf_Lower | ctf_Word, - .midword= ctf_Digit | ctf_Lower, + .midword= ctf_Digit | ctf_Upper | ctf_Lower, .space_spaces= 4, .name= "text", .findchar_select= findchar_select_text -- 2.30.2