From 5a2c03e2e4f52b8329f45cf67afc3edec1f2c65b Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sat, 6 Jun 2009 00:09:11 +0100 Subject: [PATCH] Seems to be able to do the whole thing --- pctb/charset-15.txt | 557 ++++++++++++++++++++++++++++++++++++++++++++ pctb/convert.c | 10 +- pctb/ocr.c | 71 ++++-- pctb/ocr.h | 10 +- pctb/show-thing.tcl | 55 +++-- 5 files changed, 657 insertions(+), 46 deletions(-) diff --git a/pctb/charset-15.txt b/pctb/charset-15.txt index 873cf51..8c7b13a 100644 --- a/pctb/charset-15.txt +++ b/pctb/charset-15.txt @@ -1,6 +1,188 @@ # ypp-sc-tools pctb font v1 15 +Digit +0 +3e0 +410 +808 +808 +410 +3e0 + +Digit +1 +810 +808 +ff8 +800 +800 + +Digit +2 +c08 +a08 +908 +888 +870 + +Digit +3 +1008 +888 +888 +948 +630 + +Digit +4 +180 +140 +120 +110 +ff8 +100 + +Digit +5 +878 +848 +848 +488 +308 + +Digit +6 +3e0 +490 +848 +848 +488 +308 + +Digit +7 +8 +c08 +308 +c8 +28 +18 + +Digit +8 +730 +8c8 +888 +888 +8c8 +730 + +Digit +9 +860 +890 +908 +908 +490 +3e0 + +Digit +> +820 +440 +440 +280 +100 +100 + +Lower +' +38 + +Lower +- +100 +100 +100 +100 +100 + +Lower +D +ff8 +808 +808 +808 +808 +410 +3e0 + +Lower +F +ff8 +88 +88 +88 + +Lower +J +2000 +2000 +1ff8 + +Lower +K +ff8 +40 +c0 +120 +210 +408 +800 + +Lower +N +ff8 +10 +60 +80 +300 +400 +ff8 + +Lower +R +ff8 +88 +88 +188 +248 +430 +800 + +Lower +W +18 +3e0 +c00 +3e0 +18 +3e0 +c00 +3e0 +18 + +Lower +Y +8 +10 +60 +f80 +60 +10 +8 + Lower a 640 @@ -10,6 +192,46 @@ a fc0 800 +Lower +b +ff8 +440 +820 +820 +820 +7c0 + +Lower +c +7c0 +820 +820 +820 + +Lower +d +7c0 +820 +820 +820 +440 +ff8 + +Lower +e +7c0 +920 +920 +920 +9c0 + +Lower +f +20 +ff0 +28 +8 + Lower g 27c0 @@ -18,6 +240,68 @@ g 2420 1fe0 +Lower +h +ff8 +40 +20 +20 +fc0 + +Lower +i +fe8 + +Lower +k +ff8 +100 +180 +240 +420 +800 + +Lower +l +ff8 + +Lower +m +fe0 +40 +20 +20 +fc0 +40 +20 +20 +fc0 + +Lower +n +fe0 +40 +20 +20 +fc0 + +Lower +o +7c0 +820 +820 +820 +7c0 + +Lower +p +3fe0 +440 +820 +820 +820 +7c0 + Lower r fe0 @@ -25,6 +309,19 @@ fe0 20 20 +Lower +s +8c0 +920 +920 +620 + +Lower +t +20 +7f0 +820 + Lower u 7e0 @@ -33,6 +330,209 @@ u 400 fe0 +Lower +v +20 +c0 +700 +800 +700 +c0 +20 + +Lower +vy +20 +c0 +700 +800 +700 +c0 +60 +180 +2600 +1800 +600 +180 +60 + +Lower +w +60 +380 +c00 +380 +60 +380 +c00 +380 +60 + +Lower +y +60 +180 +2600 +1800 +600 +180 +60 + +Lower +yw +60 +180 +2600 +1800 +600 +180 +60 +380 +c00 +380 +60 +380 +c00 +380 +60 + +Lower +z +c20 +a20 +920 +8a0 +860 + +Upper +B +ff8 +888 +888 +888 +770 + +Upper +C +3e0 +410 +808 +808 +808 +808 + +Upper +D +ff8 +808 +808 +808 +808 +410 +3e0 + +Upper +E +ff8 +888 +888 +888 +888 +808 + +Upper +F +ff8 +88 +88 +88 + +Upper +G +3e0 +410 +808 +808 +808 +808 +f08 + +Upper +H +ff8 +80 +80 +80 +80 +80 +ff8 + +Upper +I +ff8 + +Upper +J +2000 +2000 +1ff8 + +Upper +K +ff8 +40 +c0 +120 +210 +408 +800 + +Upper +L +ff8 +800 +800 +800 +800 + +Upper +M +ff8 +30 +1c0 +600 +1c0 +30 +ff8 + +Upper +N +ff8 +10 +60 +80 +300 +400 +ff8 + +Upper +P +ff8 +88 +88 +88 +70 + +Upper +R +ff8 +88 +88 +188 +248 +430 +800 + Upper S 830 @@ -42,4 +542,61 @@ S 708 8 +Upper +T +8 +8 +8 +ff8 +8 +8 +8 + +Upper +V +18 +60 +380 +c00 +380 +60 +18 + +Upper +W +18 +3e0 +c00 +3e0 +18 +3e0 +c00 +3e0 +18 + +Upper +Y +8 +10 +60 +f80 +60 +10 +8 + +Upper +c +7c0 +820 +820 +820 + +Upper +o +7c0 +820 +820 +820 +7c0 + . diff --git a/pctb/convert.c b/pctb/convert.c index 93c7137..f90ca4a 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -30,6 +30,7 @@ static inline char get_p(Point p) { return get(p.x,p.y); } #define START_MAIN {200,200} #define MIN_COLUMNS 6 #define INTERESTING_COLUMNS 6 +#define TEXT_COLUMNS 2 #define MAX_COLUMNS 7 static Rect mainr = { START_MAIN,START_MAIN }; @@ -238,7 +239,7 @@ static void load_image_and_canonify(void) { debug_flush(); } -static void ocr_rectangle(Rect r) { +static void ocr_rectangle(Rect r, const OcrCellType ct) { OcrResultGlyph *results, *res; int w= r.br.x - r.tl.x + 1; @@ -257,7 +258,7 @@ static void ocr_rectangle(Rect r) { } cols[w]= 0; - results= ocr(rd,w,cols); + results= ocr(rd,ct,w,cols); printf("YES! \""); for (res=results; res->s; res++) printf("%s",res->s); @@ -282,7 +283,10 @@ int main(void) { for (colno=0; colnoresults; inresults; i++, s++) { if (!strcmp(s->s," ")) continue; - fprintf(resolver," %d %d %s ",s->l,s->r,context_names[s->ctx]); + fprintf(resolver," %d %d ",s->l,s->r); + cu_pr_ctxmap(s->ctxmap); + fprintf(resolver," "); for (p=s->s; (c= *p); p++) { if (c=='\\') fprintf(resolver,"\\%c",c); else if (c>=33 && c<=126) fputc(c,resolver); @@ -247,7 +254,8 @@ static void callout_unknown(OcrReader *rd, int w, Pixcol cols[], readdb(rd); } -static void add_result(OcrReader *rd, const char *s, int l, int r, int ctx) { +static void add_result(OcrReader *rd, const char *s, int l, int r, + unsigned ctxmap) { if (rd->nresults >= rd->aresults) { rd->aresults++; rd->aresults<<=1; rd->results= realloc(rd->results,sizeof(*rd->results)*rd->aresults); @@ -256,23 +264,37 @@ static void add_result(OcrReader *rd, const char *s, int l, int r, int ctx) { rd->results[rd->nresults].s= s; rd->results[rd->nresults].l= l; rd->results[rd->nresults].r= r; - rd->results[rd->nresults].ctx= ctx; + rd->results[rd->nresults].ctxmap= ctxmap; rd->nresults++; } -OcrResultGlyph *ocr(OcrReader *rd, int w, Pixcol cols[]) { - int nspaces=-w; - unsigned ctxmap=2; /* uppercase */ +struct OcrCellTypeInfo { + unsigned initial, nextword, midword; +}; +const struct OcrCellTypeInfo ocr_celltype_number= { + 4,4,4 +}; +const struct OcrCellTypeInfo ocr_celltype_text= { + .initial=2 /* Uppercase */, + .nextword=3 /* Either */, + .midword=1 /* Lower only */ +}; + +OcrResultGlyph *ocr(OcrReader *rd, OcrCellType ct, int w, Pixcol cols[]) { + int nspaces; + unsigned ctxmap; int ctxi, i, x; - rd->nresults=0; + restart: + nspaces=- w; + ctxmap= ct->initial; + rd->nresults=0; fprintf(debug,"OCR h=%d w=%d",rd->h,w); for (x=0; xnextword; } continue; } @@ -296,7 +318,7 @@ OcrResultGlyph *ocr(OcrReader *rd, int w, Pixcol cols[]) { int lx=x; DatabaseNode *uniquematch= 0; - int uniquematch_rx=-1, uniquematch_ctxi=-1; + int uniquematch_rx=-1; fprintf(debug,"OCR lx=%d ctxmap=%x ",lx,ctxmap); @@ -336,22 +358,21 @@ OcrResultGlyph *ocr(OcrReader *rd, int w, Pixcol cols[]) { } if (bestmatch) { - if (uniquematch) { + if (uniquematch && strcmp(bestmatch->s, uniquematch->s)) { fprintf(debug, " ambiguous"); uniquematch= 0; break; } uniquematch= bestmatch; uniquematch_rx= bestmatch_rx; - uniquematch_ctxi= ctxi; } } if (uniquematch) { fprintf(debug," || YES\n"); - add_result(rd, uniquematch->s, lx, uniquematch_rx, uniquematch_ctxi); + add_result(rd, uniquematch->s, lx, uniquematch_rx, ctxmap); x= uniquematch_rx+1; - ctxmap= 1; /* Lower only */ + ctxmap= ct->midword; } else { int rx; fprintf(debug," || UNKNOWN"); diff --git a/pctb/ocr.h b/pctb/ocr.h index d08ae15..a03f1f5 100644 --- a/pctb/ocr.h +++ b/pctb/ocr.h @@ -19,17 +19,17 @@ typedef uint32_t Pixcol; typedef struct { const char *s; /* valid until next call to ocr() */ int l,r; /* column numbers */ - int ctx; /* match context index */ + unsigned ctxmap; /* match context index */ } OcrResultGlyph; -typedef const struct OcrGlyphContextDeveloperInfo *OcrCellContext; -extern const struct OcrGlyphContextDeveloperInfo *ocr_celltype_text; -extern const struct OcrGlyphContextDeveloperInfo *ocr_celltype_number; +typedef const struct OcrCellTypeInfo *OcrCellType; +extern const struct OcrCellTypeInfo ocr_celltype_text; +extern const struct OcrCellTypeInfo ocr_celltype_number; typedef struct OcrReader OcrReader; OcrReader *ocr_init(int h); -OcrResultGlyph *ocr(OcrReader *rd, int w, Pixcol cols[]); +OcrResultGlyph *ocr(OcrReader *rd, OcrCellType, int w, Pixcol cols[]); /* return value is array terminated by {0,-1,-1} * array is valid until next call to ocr() */ diff --git a/pctb/show-thing.tcl b/pctb/show-thing.tcl index 440f9c6..d71c36e 100755 --- a/pctb/show-thing.tcl +++ b/pctb/show-thing.tcl @@ -69,8 +69,8 @@ proc show_context {maxhv x ctxs} { upvar 1 $maxhv maxh set w .d.ctx.at$x if {[llength $ctxs]==1} { set fg blue } { set fg yellow } - label $w -bg black -fg $fg -text [join $ctxs "/\n "] - place $w -x [expr {$x*$mul}] -y 0 + label $w -bg black -fg $fg -text [join $ctxs "/\n"] -justify left + place $w -x [expr {($x-1)*$mul}] -y 0 set wh [winfo reqheight $w] if {$wh > $maxh} { set maxh $wh } } @@ -90,8 +90,8 @@ proc resize_widgets {} { eval destroy [winfo children .d.ctx] set maxh 0 - foreach {min max context got} $glyphsdone { - show_context maxh $min [list $context] + foreach {min max contexts got} $glyphsdone { + show_context maxh $min $contexts } show_context maxh $unk_l $unk_contexts .d.ctx configure -height $maxh @@ -115,8 +115,28 @@ proc read_xpm {f} { } if {$y==-3} { manyset $l cols rows colours cpp - #assert {$colours==2} - #assert {$cpp==1} + if {$colours!=2 || $cpp!=1} { error "$l ?" } + + set chop_l [expr {$unk_l - 80}] + set chop_r [expr {$cols - $unk_l - 100}] + if {$chop_l<0} { set chop_l 0 } + + set unk_l [expr {$unk_l - $chop_l}] + set unk_r [expr {$unk_r - $chop_l}] + set ngd {} + foreach {min max contexts got} $glyphsdone { + lappend ngd \ + [expr {$min-$chop_l}] \ + [expr {$max-$chop_l}] \ + $contexts $got + } + set glyphsdone $ngd + + set realcols $cols + set cols [expr {$cols - $chop_l - $chop_r}] + puts stderr "NOW cols=$cols chop_l,r=$chop_l,$chop_r rows=$rows\ + $unk_l $unk_r $ngd" + set mulcols [expr {$cols*$mul+$inter}] set mulrows [expr {$rows*$mul+$inter}] append o "\"$mulcols $mulrows 9 1\",\n" @@ -138,13 +158,19 @@ proc read_xpm {f} { set x 0 set ol "\"+" set olh $ol + if {$chop_r>=0} { + set l [string range $l $chop_l end-$chop_r] + } else { + set l [string range $l $chop_l end] + append l [string repeat " " [expr -$chop_r]] + } foreach c [split $l ""] { set how "u" if {$x >= $unk_l && $x <= $unk_r} { set how q } else { set ab 0 - foreach {min max context got} $glyphsdone { + foreach {min max contexts got} $glyphsdone { set rhsmost_max $max if {$x >= $min && $x <= $max} { set how [lindex {a b} $ab] @@ -186,7 +212,7 @@ proc read_xpm {f} { proc draw_glyphsdone {} { global glyphsdone mul inter eval destroy [winfo children .d.got] - foreach {min max context got} $glyphsdone { + foreach {min max contexts got} $glyphsdone { frame .d.got.m$min -bd 0 -background \#888 label .d.got.m$min.l -text "$got" -fg white -bg black -bd 0 pack .d.got.m$min.l -padx 1 -pady 1 @@ -397,6 +423,7 @@ proc write_database {} { foreach o [lsort $ol] { puts $f $o } + puts $f "." close $f file rename -force $database_fn.new $database_fn } @@ -416,8 +443,8 @@ proc update_database/DEFINE {c0 c1 strq} { if {$c0 == $unk_l} { set ncontexts $unk_contexts } else { - foreach {l r context got} $glyphsdone { - if {$l==$c0} { set ncontexts [list $context]; break } + foreach {l r contexts got} $glyphsdone { + if {$l==$c0} { set ncontexts $contexts; break } } if {![info exists ncontexts]} { puts stderr "must start at letter LHS!" @@ -432,10 +459,12 @@ proc update_database/DEFINE {c0 c1 strq} { write_database } -proc update_database/DELETE {l r ctx} { +proc update_database/DELETE {l r ctxs} { global database - set bm [dbkey $ctx $l $r] - unset database($bm) + foreach ctx $ctxs { + set bm [dbkey $ctx $l $r] + catch { unset database($bm) } + } write_database } -- 2.30.2