From: Ian Jackson Date: Wed, 3 Jun 2009 22:48:27 +0000 (+0100) Subject: merge changes made accidentally to wrong copy X-Git-Tag: 1.9.2~192 X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.web-live.git;a=commitdiff_plain;h=24222363faec9b4e3d7074af2df5f39933613c7f;hp=1971b4961a061d32a86a5e08a64f1b960f666802 merge changes made accidentally to wrong copy --- diff --git a/.gitignore b/.gitignore index cfb08ee..8875777 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,15 @@ *~ t +pctb/*.o pctb/t.* pctb/u.* pctb/convert + +pctb/stuff/text.ppm +pctb/stuff/text.png +pctb/stuff/text.pbm +pctb/stuff/text.xbm + +pctb/stuff/database +pctb/stuff/t.* diff --git a/pctb/Makefile b/pctb/Makefile index 10c0be9..3707d81 100644 --- a/pctb/Makefile +++ b/pctb/Makefile @@ -1,5 +1,9 @@ LDLIBS += -lnetpbm CFLAGS += -Wall -Wwrite-strings -Wpointer-arith -Wmissing-prototypes \ - -Wstrict-prototypes -g + -Wstrict-prototypes -Werror -g all: convert + +convert: convert.o ocr.o + +convert.o ocr.o: ocr.h diff --git a/pctb/convert.c b/pctb/convert.c index a0ecca1..c8082e6 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -1,12 +1,5 @@ -#include -#include -#include -#include -#include -#include -#define eassert assert -#define debug stdout +#include "ocr.h" typedef struct { unsigned long rgb; /* on screen */ @@ -16,7 +9,7 @@ typedef struct { static int height, width; static char *image; -static void debug_flush(void) { +void debug_flush(void) { eassert(!fflush(debug)); eassert(!ferror(debug)); } @@ -242,88 +235,9 @@ static void load_image_and_canonify(void) { debug_flush(); } -typedef uint32_t Pixcol; -#define PSPIXCOL(priscan) priscan##32 - -typedef struct { - Pixcol col; - struct OCRDatabaseNode *then; -} OCRDatabaseLink; - -#define MAXGLYPHCHRS 3 - -typedef struct OCRDatabaseNode { - char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */ - int nlinks, alinks; - OCRDatabaseLink *links; -} OCRDatabaseNode; - -#define N_OCR_CONTEXTS 2 -static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS]; - -static void load_ocr_database(void) { - int ctx,nchrs; - OCRDatabaseNode *current, *additional; - char chrs[MAXGLYPHCHRS+1]; - Pixcol cv; - int r,i,j; - - FILE *db= fopen("database","r"); eassert(db); - - for (;;) { - r= fscanf(db, "%d %d", &ctx, &nchrs); - if (r==EOF) break; - eassert(r==2); - eassert(ctx>=0 && ctx0 && nchrs<=MAXGLYPHCHRS); - - for (i=0; i0 && c<=255); - chrs[i]= c; - } - chrs[nchrs]= 0; - - int twidth; - r= fscanf(db, "%d", &twidth); eassert(r==1); - current= &ocr_contexts[ctx]; - for (i=0; inlinks; j++) - if (current->links[j].col == cv) { - current= current->links[j].then; - goto found_link; - } - - additional= malloc(sizeof(*additional)); eassert(additional); - additional->s[0]= 0; - additional->nlinks= additional->alinks= 0; - additional->links= 0; - if (current->nlinks==current->alinks) { - current->alinks++; - current->alinks<<=1; - current->links= realloc(current->links, - sizeof(*current->links) * current->alinks); - eassert(current->links); - } - current->links[current->nlinks].col= cv; - current->links[current->nlinks].then= additional; - current->nlinks++; - current= additional; - - found_link:; - } - - eassert(!current->s[0]); - strcpy(current->s, chrs); - } - eassert(!ferror(db)); - eassert(feof(db)); - fclose(db); -} - static void ocr_rectangle(Rect r) { + OcrResultGlyph *results, *res; + int w= r.br.x - r.tl.x + 1; int h= r.br.y - r.tl.y + 1; Pixcol cols[w+1]; @@ -341,56 +255,20 @@ static void ocr_rectangle(Rect r) { } cols[w]= 0; - int nspaces=0; - int ctx=1,i; - x=0; - - for (;;) { - if (x>w) break; - - if (!cols[x]) { - nspaces++; - x++; - if (nspaces>3) ctx=1; - continue; - } - - OCRDatabaseNode *current=0, *lastmatch=0; - int startx=x; - int afterlastmatchx=-1; - current= &ocr_contexts[ctx]; - for (;;) { - if (x>w) break; - Pixcol cv= cols[x]; - for (i=0; inlinks; i++) - if (current->links[i].col == cv) - goto found; - /* not found */ - break; - found: - x++; - current= current->links[i].then; - if (current->s[0]) { lastmatch=current; afterlastmatchx=x; } - } - - if (!lastmatch) { - int x2; - for (x2=x+1; x2s); - x= afterlastmatchx; - ctx= 0; - } - } + results= ocr(w,h,cols); + printf("YES! \""); + for (res=0; res->s; res++) + printf("%s",res->s); + printf("\"\n"); + eassert(!ferror(stdout)); + eassert(!fflush(stdout)); } int main(void) { Rect thisr, entryr; int tryrect, colno; - load_ocr_database(); + ocr_init(); load_image_and_canonify(); find_structure(); diff --git a/pctb/ocr.c b/pctb/ocr.c new file mode 100644 index 0000000..87bf808 --- /dev/null +++ b/pctb/ocr.c @@ -0,0 +1,263 @@ +/* + */ + +#include "ocr.h" + +typedef struct { + Pixcol col; + struct OCRDatabaseNode *then; +} OCRDatabaseLink; + +#define MAXGLYPHCHRS 3 + +typedef struct OCRDatabaseNode { + char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */ + int nlinks, alinks; + OCRDatabaseLink *links; +} OCRDatabaseNode; + +#define N_OCR_CONTEXTS 2 + +static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS]; +static FILE *db; +static OcrResultGlyph *results; +static int aresults, nresults; + +static FILE *resolver; +static pid_t resolver_pid; + +static void ocr_readdb(void) { + int ctx,nchrs; + OCRDatabaseNode *current, *additional; + char chrs[MAXGLYPHCHRS+1]; + Pixcol cv; + int r,i,j; + + assert(!db); + db= fopen("database","r"); eassert(db); + + for (;;) { + r= fscanf(db, "%d %d", &ctx, &nchrs); + if (r==EOF) break; + eassert(r==2); + eassert(ctx>=0 && ctx0 && nchrs<=MAXGLYPHCHRS); + + for (i=0; i0 && c<=255); + chrs[i]= c; + } + chrs[nchrs]= 0; + + int twidth; + r= fscanf(db, "%d", &twidth); eassert(r==1); + current= &ocr_contexts[ctx]; + for (i=0; inlinks; j++) + if (current->links[j].col == cv) { + current= current->links[j].then; + goto found_link; + } + + additional= malloc(sizeof(*additional)); eassert(additional); + additional->s[0]= 0; + additional->nlinks= additional->alinks= 0; + additional->links= 0; + if (current->nlinks==current->alinks) { + current->alinks++; + current->alinks<<=1; + current->links= realloc(current->links, + sizeof(*current->links) * current->alinks); + eassert(current->links); + } + current->links[current->nlinks].col= cv; + current->links[current->nlinks].then= additional; + current->nlinks++; + current= additional; + + found_link:; + } + + eassert(!current->s[0]); + strcpy(current->s, chrs); + } + eassert(!ferror(db)); + eassert(feof(db)); +} + +static void callout_unknown(int w, int h, Pixcol cols[], int unk_l, int unk_r, + const OcrResultGlyph *sofar, int nsofar) { + int pfd[2], c, r,i, x,y; + const OcrResultGlyph *s; + const char *p; + Pixcol pv; + + if (!resolver) { + r= pipe(pfd); eassert(!r); + resolver_pid= fork(); + eassert(resolver_pid!=-1); + if (!resolver_pid) { + r= dup2(pfd[0],0); eassert(!r); + r= close(pfd[1]); eassert(!r); + execlp("./show-thing.tcl", "./show-thing.tcl",(char*)0); + eassert(!"execlp failed"); + } + r= close(pfd[0]); eassert(!r); + resolver= fdopen(pfd[1],"w"); eassert(resolver); + } + fprintf(resolver,"%d %d",unk_l,unk_r); + for (i=0, s=sofar; il,s->r,s->ctx); + for (p=s->s; (c= *p); p++) { + if (c=='\\') fprintf(resolver,"\\%c",c); + else if (c>=33 && c<=126) fputc(c,resolver); + else fprintf(resolver,"\\x%02x",(unsigned char)c); + } + } + fputc('\n',resolver); + + fprintf(resolver, + "/* XPM */\n" + "static char *t[] = {\n" + "/* columns rows colors chars-per-pixel */\n" + "\"%d %d 2 1\",\n" + "\" c black\",\n" + "\"o c white\",\n", + w,h); + for (y=0, pv=1; y= aresults) { + aresults++; aresults<<=1; + results= realloc(results,sizeof(*results)*aresults); + eassert(results); + } + results[nresults].s= s; + results[nresults].l= l; + results[nresults].r= r; + results[nresults].ctx= ctx; + nresults++; +} + +OcrResultGlyph *ocr(int w, int h, Pixcol cols[]) { + int nspaces=0; + int ctx=1,i, x; + + nresults=0; + assert(db); + + fprintf(debug,"OCR h=%d w=%d",w,h); + for (x=0; x=w) + break; + + if (!cols[x]) { + nspaces++; + x++; + if (nspaces>3) ctx=1; + continue; + } + + /* find character */ + OCRDatabaseNode *current=0, *bestmatch=0; + int lx=x; + int bestmatch_rx=-1; + current= &ocr_contexts[ctx]; + fprintf(debug,"OCR lx=%d ctx=%d ",lx,ctx); + + for (;;) { + debug_flush(); + fprintf(debug,"| x=%d",x); + if (x>w) break; + Pixcol cv= cols[x]; + fprintf(debug," cv=%"PSPIXCOL(PRIx),x); + for (i=0; inlinks; i++) + if (current->links[i].col == cv) + goto found; + /* not found */ + fprintf(debug," ?"); + break; + + found: + current= current->links[i].then; + if (current->s[0]) { + fprintf(debug," \"%s\"",current->s); + bestmatch=current; bestmatch_rx=x; + } else { + fprintf(debug," ..."); + } + x++; + } + + if (bestmatch) { + fprintf(debug," YES\n"); + add_result(bestmatch->s, lx, bestmatch_rx, ctx); + x= bestmatch_rx+1; + ctx= 0; + } else { + int rx; + fprintf(debug," UNKNOWN"); + for (rx=lx+1; rx +#include +#include +#include +#include +#include +#include +#include + +typedef uint32_t Pixcol; +#define PSPIXCOL(priscan) priscan##32 + +typedef struct { + const char *s; /* valid until next call to ocr() */ + int l,r; /* column numbers */ + int ctx; /* match context index */ +} OcrResultGlyph; + +OcrResultGlyph *ocr(int w, int h, Pixcol cols[]); + /* return value is array terminated by {0,-1,-1} + * array is valid until next call to ocr() + */ + +void ocr_init(void); + +void debug_flush(void); + +#define eassert assert +#define debug stdout + +#endif /*OCR_H*/ diff --git a/pctb/stuff/show-thing.tcl b/pctb/show-thing.tcl similarity index 93% rename from pctb/stuff/show-thing.tcl rename to pctb/show-thing.tcl index c1e8cb9..f2055f4 100755 --- a/pctb/stuff/show-thing.tcl +++ b/pctb/show-thing.tcl @@ -1,4 +1,12 @@ -#!/usr/bin/tk +#!/usr/bin/wish + +# usage: +# run show-thing without args +# then on stdin write +# one line which is a Tcl list for foolist +# the xpm in the format expected +# then expect child to raise SIGSTOP or exit 0 or exit nonzero +# if child raised SIGSTOP, check database was updated proc manyset {list args} { foreach val $list var $args { @@ -14,9 +22,11 @@ set rhsmost_max -1 proc read_xpm {f} { set o {} set y -3 - while {[gets $f l] >= 0} { + while 1 { + if {[gets $f l] < 0} { error "huh? "} if {![regexp {^"(.*)",$} $l dummy l]} { append o "$l\n" + if {[regexp {^\}\;$} $l]} break continue } if {$y==-3} { @@ -278,11 +288,10 @@ proc read_database {} { global database set f [open database r] while {[gets $f l] >= 0} { - if {![regexp {^(\w+) (\d+) ([0-9a-f]{2}+)$} $l \ + if {![regexp {^(\w+) (\d+) ((?:[0-9a-f]{2})+)$} $l \ dummy context strl strh]} { error "bad syntax" } - binary scan $strw h* strh if {[string length $strh] != $strl*2} { error "$strh $strl" } gets $f l; set width [format %d $l] set bm $context @@ -291,6 +300,7 @@ proc read_database {} { } set database($bm) $strh } + close $f } proc write_database {} { @@ -305,9 +315,12 @@ proc write_database {} { foreach x [lrange $bm 1 end] { append o "$x\n" } lappend ol $o } + set f [open database.new w] foreach o [lsort $ol] { - puts -nonewline $o + puts -nonewline $f $o } + close $f + file rename -force database.new database } proc update_database/DEFINE {c0 c1 strh} { diff --git a/pctb/stuff/old b/pctb/stuff/old new file mode 100644 index 0000000..557b690 --- /dev/null +++ b/pctb/stuff/old @@ -0,0 +1,9 @@ +"\"+ c #111\", +\"a c #600\", +\"A c #fcc\", +\"b c #006\", +\"B c #ccf\", +\"u c #000\", +\"U c #888\", +\"q c #440\", +\"Q c #ff0\",\n" diff --git a/pctb/stuff/text.xpm b/pctb/text.xpm similarity index 100% rename from pctb/stuff/text.xpm rename to pctb/text.xpm diff --git a/pctb/x.gdb b/pctb/x.gdb new file mode 100644 index 0000000..922bd4c --- /dev/null +++ b/pctb/x.gdb @@ -0,0 +1,4 @@ +file convert +set confirm off +set args