From 344df47a4f31ad647d31152a56a6cc8b004ce087 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 7 Jun 2009 19:43:31 +0100 Subject: [PATCH] Digit fields have different spacing --- pctb/ocr.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/pctb/ocr.c b/pctb/ocr.c index fd4cba9..d12218f 100644 --- a/pctb/ocr.c +++ b/pctb/ocr.c @@ -46,10 +46,27 @@ static const char *context_names[]= { "Upper", "Digit" }; +struct OcrCellTypeInfo { + /* bitmaps of indices into context_names: */ + unsigned initial, nextword, midword; + int space_spaces; + const char *name; +}; +const struct OcrCellTypeInfo ocr_celltype_number= { + 4,4,4, + .space_spaces= 5, + .name= "number" +}; +const struct OcrCellTypeInfo ocr_celltype_text= { + .initial=2, /* Uppercase */ + .nextword=3, /* Either */ + .midword=1, /* Lower only */ + .space_spaces= 4, + .name= "text" +}; -#define NCONTEXTS (sizeof(context_names)/sizeof(context_names[0])) -#define SPACE_SPACES 4 +#define NCONTEXTS (sizeof(context_names)/sizeof(context_names[0])) struct OcrReader { int h; @@ -320,20 +337,6 @@ static void add_result(OcrReader *rd, const char *s, int l, int r, rd->nresults++; } -struct OcrCellTypeInfo { - unsigned initial, nextword, midword; - const char *name; -}; -const struct OcrCellTypeInfo ocr_celltype_number= { - 4,4,4, - .name= "number" -}; -const struct OcrCellTypeInfo ocr_celltype_text= { - .initial=2, /* Uppercase */ - .nextword=3, /* Either */ - .midword=1, /* Lower only */ - .name= "text" -}; const char *ocr_celltype_name(OcrCellType ct) { return ct->name; } @@ -362,7 +365,7 @@ OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) { if (!cols[x]) { nspaces++; x++; - if (nspaces==SPACE_SPACES) { + if (nspaces == ct->space_spaces) { debugf("OCR x=%x nspaces=%d space\n",x,nspaces); ctxmap= ct->nextword; } @@ -370,7 +373,7 @@ OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) { } /* something here, so we need to add the spaces */ - if (nspaces>=SPACE_SPACES) + if (nspaces >= ct->space_spaces) add_result(rd," ",x-nspaces,x+1,0); nspaces=0; @@ -435,7 +438,7 @@ OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) { if (uniquematch->s[0]) ctxmap= ct->midword; else debugf(" (empty)"); if (uniquematch->endsword) { - nspaces= SPACE_SPACES; + nspaces= ct->space_spaces; debugf("_"); ctxmap= ct->nextword; } -- 2.30.2