X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.web-live.git;a=blobdiff_plain;f=pctb%2Fyppsc-ocr-resolver;fp=pctb%2Fyppsc-ocr-resolver;h=016a95e58bfc90557e370ca7c36728dca32a0531;hp=0000000000000000000000000000000000000000;hb=89dfaeec1540f73ba85dbd25dd5332416f98778e;hpb=52210ae670b22ce2d187bd2dc943fd8ae3f4a8c0 diff --git a/pctb/yppsc-ocr-resolver b/pctb/yppsc-ocr-resolver new file mode 100755 index 0000000..016a95e --- /dev/null +++ b/pctb/yppsc-ocr-resolver @@ -0,0 +1,561 @@ +#!/usr/bin/wish + +# usage: +# run show-thing without args +# then on stdin write +# one line which is a Tcl list for unk_{l,r} unk_contexts glyphsdone +# the xpm in the format expected +# then expect child to raise SIGSTOP or exit 0 or exit nonzero +# if child raised SIGSTOP, check database was updated + + +proc manyset {list args} { + foreach val $list var $args { + upvar 1 $var my + set my $val + } +} + + +#---------- display core ---------- + +set mul 6 +set inter 1 + +set gotsh 20 +set csrh 20 +set ctxh 20 + +proc init_widgets {} { + # idempotent + global csrh gotsh ctxh + + if {[winfo exists .d]} return + + frame .d + + image create bitmap image/main + label .d.mi -image image/main -borderwidth 0 + + frame .d.csr -bg black -height $csrh + frame .d.got -bg black -height $gotsh + frame .d.ctx -bg black + + image create bitmap image/cursor -data \ +{#define csr_width 11 +#define csr_height 11 +static unsigned char csr_bits[] = { + 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x21, 0x04, 0x22, 0x02, 0x25, 0x05, + 0xaa, 0x02, 0x74, 0x01, 0xa8, 0x00, 0x70, 0x00, 0x20, 0x00}; +} + + frame .d.csr.csr + label .d.csr.csr.l -image image/cursor -compound left + entry .d.csr.csr.e -bd 0 + pack .d.csr.csr.l -side left + + frame .d.mi.csr_0 -bg white -width 1 + frame .d.mi.csr_1 -bg white -width 1 + + pack .d.csr .d.mi .d.got .d.ctx -side top + pack .d + + frame .help + pack .help +} + +proc show_context {maxhv x ctxs} { + global mul + upvar 1 $maxhv maxh + set w .d.ctx.at$x + if {[llength $ctxs]==1} { set fg blue } { set fg yellow } + label $w -bg black -fg $fg -text [join $ctxs "/\n"] -justify left + place $w -x [expr {($x-1)*$mul}] -y 0 + set wh [winfo reqheight $w] + if {$wh > $maxh} { set maxh $wh } +} + +proc resize_widgets {} { + global mulcols mulrows csrh gotsh ctxh glyphsdone + global unk_l unk_contexts + + foreach w {.d.csr .d.got .d.ctx} { + $w configure -width $mulcols + } + #.d configure -height [expr {$csrh+$mulrows+$gotsh+$ctxh}] + foreach w {0 1} { + .d.mi.csr_$w configure -height $mulrows + } + + eval destroy [winfo children .d.ctx] + + set maxh 0 + foreach {min max contexts got} $glyphsdone { + show_context maxh $min $contexts + } + show_context maxh $unk_l $unk_contexts + .d.ctx configure -height $maxh +} + + +#---------- xpm input processor ---------- + +proc read_xpm {f} { + global glyphsdone mul inter rhsmost_max unk_l unk_r mulcols mulrows + global cols rows wordmap + + set o {} + set y -3 + while 1 { + if {[gets $f l] < 0} { error "huh? "} + if {![regexp {^"(.*)",$} $l dummy l]} { + append o "$l\n" + if {[regexp {^\}\;$} $l]} break + continue + } + if {$y==-3} { + manyset $l cols rows colours cpp + if {$colours!=2 || $cpp!=1} { error "$l ?" } + + set chop_l [expr {$unk_l - 80}] + set chop_r [expr {$cols - $unk_l - 100}] + if {$chop_l<0} { set chop_l 0 } + + set unk_l [expr {$unk_l - $chop_l}] + set unk_r [expr {$unk_r - $chop_l}] + set ngd {} + foreach {min max contexts got} $glyphsdone { + lappend ngd \ + [expr {$min-$chop_l}] \ + [expr {$max-$chop_l}] \ + $contexts $got + } + set glyphsdone $ngd + + set realcols $cols + set cols [expr {$cols - $chop_l - $chop_r}] + debug "NOW cols=$cols chop_l,r=$chop_l,$chop_r rows=$rows\ + $unk_l $unk_r $ngd" + + set mulcols [expr {$cols*$mul+$inter}] + set mulrows [expr {$rows*$mul+$inter}] + append o "\"$mulcols $mulrows 9 1\",\n" + for {set x 0} {$x<$cols} {incr x} { set wordmap($x) 0 } + } elseif {$y==-2} { # first pixel + append o \ +"\"+ c #111\", +\"a c #800\", +\"A c #fcc\", +\"b c #00c\", +\"B c #fff\", +\"u c #000\", +\"U c #ff0\", +\"q c #000\", +\"Q c #ff0\",\n" + } elseif {$y==-1} { # 2nd pixel but we've already printed ours + } else { + set ybit [expr {1<<$y}] + set x 0 + set ol "\"+" + set olh $ol + if {$chop_r>=0} { + set l [string range $l $chop_l end-$chop_r] + } else { + set l [string range $l $chop_l end] + append l [string repeat " " [expr -$chop_r]] + } + foreach c [split $l ""] { + set how "u" + if {$x >= $unk_l && $x <= $unk_r} { + set how q + } else { + set ab 0 + foreach {min max contexts got} $glyphsdone { + set rhsmost_max $max + if {$x >= $min && $x <= $max} { + set how [lindex {a b} $ab] + break + } + set ab [expr {!$ab}] + } + } + switch -exact $c { + " " { set p $how } + "o" { + set p [string toupper $how] + incr wordmap($x) $ybit + } + default { error "$c ?" } + } + append ol "[string repeat $p [expr {$mul-$inter}]][ + string repeat + $inter]" + append olh [string repeat + $mul] + incr x + } + set ole "\",\n" + append ol $ole + append olh $ole + set olhn [string repeat $olh $inter] + if {!$y} { append o $olhn } + append o [string repeat $ol [expr {$mul-1}]] + append o $olhn + } + incr y + } + set data [exec xpmtoppm << $o] + image create photo image/main -data $data +} + + +#---------- per-invocation display ---------- + +proc draw_glyphsdone {} { + global glyphsdone mul inter + eval destroy [winfo children .d.got] + foreach {min max contexts got} $glyphsdone { + frame .d.got.m$min -bd 0 -background \#888 + label .d.got.m$min.l -text "$got" -fg white -bg black -bd 0 + pack .d.got.m$min.l -padx 1 -pady 1 + place .d.got.m$min -x [expr {$min*$mul+$inter}] -y 0 + } +} + +proc startup_cursor {} { + global cur_already cur_mode cur_0 cur_1 last_ht + global glyphsdone unk_l unk_r + + set cur_already [expr {[llength $glyphsdone]/4-1}] + set cur_mode 1 ;# one of: 0 1 already text + + set cur_0 $unk_l + set cur_1 [expr {$unk_r+1}] + set last_ht {} + + recursor +} + + +#---------- runtime display and keystroke handling ---------- + +proc helptext {t} { + global last_ht + if {![string compare $t $last_ht]} return + eval destroy [grid slaves .help] + set y 0; foreach l $t { + set x 0; foreach c $l { + set w .help.at${x}x${y} + label $w -text $c + grid $w -row $y -column $x -padx 5 + incr x + } + incr y + } + set last_ht $t +} + +proc recursor/0 {} { recursor//01 0 } +proc recursor/1 {} { recursor//01 1 } +proc recursor//01 {z1} { + global mul rhsmost_max cols glyphsdone + upvar #0 cur_$z1 cur + .d.csr.csr.l configure -text {adjust} + place .d.csr.csr -x [expr {$cur*$mul - 7}] + bind_key space { othercursor } + bind_leftright_q cur_$z1 0 [expr {$cols-1}] + if {[llength $glyphsdone]} { + bind_key Tab { set cur_mode already; recursor } + } else { + bind_key Tab {} + } + bind_key Return { + if {$cur_0 != $cur_1} { + .d.csr.csr.e delete 0 end + set cur_mode text + recursor + } + } + helptext { + {{<- ->} {move cursor, adjusting area to define}} + {Space {switch to moving other cursor}} + {Return {confirm location, enter letter(s)}} + {Tab {switch to correcting earlier ocr}} + {Q {quit and abandon OCR run}} + } +} +proc othercursor {} { + global cur_mode + set cur_mode [expr {!$cur_mode}] + recursor +} + +proc recursor/text {} { + helptext { + {Return {confirm entry of new glyph}} + {Escape {abandon entry}} + } + unbind_all_keys + .d.csr.csr.l configure -text {define:} + pack .d.csr.csr.e -side left + focus .d.csr.csr.e + bind_key Return { + set strq [.d.csr.csr.e get] + if {[regexp {^(?:[!-[]|[]-~]|\\\\|\\x[0-9a-f]{2})+} $strq]} { + RETURN_RESULT DEFINE "$cur_0 $cur_1 $strq" + } + } + bind_key Escape { + bind_key Escape {} + pack forget .d.csr.csr.e + set cur_mode 1 + recursor + } +} + +proc recursor/already {} { + global mul + global glyphsdone + global cur_already mul + global glyphsdone cur_already mul + .d.csr.csr.l configure -text {correct} + set rmax [lindex $glyphsdone [expr {$cur_already*4}]] + place .d.csr.csr -x [expr {$rmax*$mul-3}] + bind_key Return {} + bind_key space {} + bind_leftright_q cur_already 0 [expr {[llength $glyphsdone]/4-1}] + bind_key Tab { bind_key Delete {}; set cur_mode 1; recursor } + bind_key Delete { + RETURN_RESULT DELETE [lrange $glyphsdone \ + [expr $cur_already*4] \ + [expr $cur_already*4+2]] + } + helptext { + {{<- ->} {move cursor, selecting glyph to correct}} + {Del {clear this glyph from the recognition database}} + {Tab {switch to selecting area to define as new glyph}} + {Q {quit and abandon OCR run}} + } +} + +proc bind_key {k proc} { + global keybindings + bind . $proc + set keybindings($k) [expr {!![string length $proc]}] +} +proc unbind_all_keys {} { + global keybindings + foreach k [array names keybindings] { bind_key $k {} } +} + +proc bind_leftright_q {var min max} { + bind_key Left [list leftright $var $min $max -1] + bind_key Right [list leftright $var $min $max +1] + bind_key q { + puts stderr "\nCharacter resolver quitting as you requested." + exit 1 + } +} +proc leftright {var min max inc} { + upvar #0 $var v + set vnew $v + incr vnew $inc + if {$vnew < $min || $vnew > $max} return + set v $vnew + recursor +} + +proc recursor {} { + global csrh cur_mode cur_0 cur_1 mul + foreach z1 {0 1} { + place .d.mi.csr_$z1 -y 0 -x [expr {[set cur_$z1] * $mul}] + } + recursor/$cur_mode +} + + +#---------- database read and write ---------- + +# database format: +# series of glyphs: +# ... +# width +# + +# $database($context 0x 0x...) = $hex + +set database_header {# ypp-sc-tools pctb font v1} + +proc db_getsl {f} { + if {[gets $f l] < 0} { error "unexpected db eof" } + return $l +} + +proc read_database {} { + global database database_header rows database_fn + catch { unset database } + set database_fn ./charset-$rows.txt + if {![file exists $database_fn]} return + set f [open $database_fn r] + if {[string compare [db_getsl $f] $database_header]} { error "$l ?" } + if {([db_getsl $f])+0 != $rows} { error "wrong h ?" } + while 1 { + set context [db_getsl $f] + if {![string length $context]} continue + if {[regexp {^\#} $context]} continue + if {![string compare . $context]} break + + set bm $context + set strq [db_getsl $f] + while 1 { + set l [db_getsl $f] + if {![string length $l]} break + lappend bm [format %x 0x$l] + } + set database($bm) $strq + } + close $f +} + +proc write_database {} { + global database rows database_fn database_header + set ol {} + foreach bm [array names database] { + set strq $database($bm) + set o "[lindex $bm 0]\n$strq\n" + foreach x [lrange $bm 1 end] { append o "$x\n" } + + lappend ol $o + } + set f [open $database_fn.new w] + puts $f "$database_header\n$rows\n" + foreach o [lsort $ol] { + puts $f $o + } + puts $f "." + close $f + file rename -force $database_fn.new $database_fn +} + +proc dbkey {ctx l r} { + global wordmap + set bm $ctx + for {set x $l} {$x <= $r} {incr x} { + lappend bm [format %x $wordmap($x)] + } + return $bm +} + +proc update_database/DEFINE {c0 c1 strq} { + global glyphsdone unk_l unk_contexts wordmap database + if {$c0 > $c1} { manyset [list $c0 $c1] c1 c0 } + if {$c0 == $unk_l} { + set ncontexts $unk_contexts + } else { + foreach {l r contexts got} $glyphsdone { + if {$l==$c0} { set ncontexts $contexts; break } + } + if {![info exists ncontexts]} { + puts stderr "must start at letter LHS!" + return + } + } + incr c1 -1 + foreach c $ncontexts { + set bm [dbkey $c $c0 $c1] + set database($bm) $strq + } + write_database +} + +proc update_database/DELETE {l r ctxs} { + global database + foreach ctx $ctxs { + set bm [dbkey $ctx $l $r] + catch { unset database($bm) } + } + write_database +} + + +proc RETURN_RESULT {how what} { + global mainkind + place forget .d.csr.csr + pack forget .d.csr.csr.e + helptext {{{ Processing }}} + unbind_all_keys + update idletasks + debug "$how $what" + eval update_database/$how $what + done/$mainkind +} + +#---------- main progrm ---------- + +proc main/test {} { + global glyphsdone unk_l unk_r unk_contexts + + set glyphsdone { + 7 11 1 M + 13 17 0 a + 19 23 0 n + } + set unk_l 25 + set unk_r 29 + set unk_contexts Test + + set f [open text.xpm] + read_xpm $f + close $f + + read_database + resize_widgets + draw_glyphsdone + startup_cursor +} +proc done/test {} { +} + +proc required {} { + global glyphsdone unk_l unk_r unk_contexts + + if {[gets stdin l]<0} { + if {[eof stdin]} { fconfigure stdin -blocking yes; exit 0 } + return + } + init_widgets + manyset [lrange $l 0 3] unk_l unk_r unk_contexts + set glyphsdone [lrange $l 3 end] + debug "GOT $l" + + fileevent stdin readable {} + fconfigure stdin -blocking yes + + read_xpm stdin + resize_widgets + read_database + draw_glyphsdone + startup_cursor +} + +proc main/automatic {} { + fconfigure stdin -blocking no + fileevent stdin readable required +} +proc done/automatic {} { + exec sh -c {printf \\0 >&4} + main/automatic +} + +proc debug {m} { } + +set mainkind test +foreach arg $argv { + switch -exact -- $arg { + {--debug} { proc debug {m} { puts stderr "SHOW-THING $m" } } + {--noop-arg} { } + {--automatic-1} { set mainkind automatic } + {--automatic*} { error "incompatible versions - install problem" } + default { error "huh $argv ?" } + } +} + +main/$mainkind