chiark / gitweb /
merge changes made accidentally to wrong copy
authorIan Jackson <ian@liberator.relativity.greenend.org.uk>
Wed, 3 Jun 2009 22:48:27 +0000 (23:48 +0100)
committerIan Jackson <ian@liberator.relativity.greenend.org.uk>
Wed, 3 Jun 2009 22:48:27 +0000 (23:48 +0100)
.gitignore
pctb/Makefile
pctb/convert.c
pctb/ocr.c [new file with mode: 0644]
pctb/ocr.h [new file with mode: 0644]
pctb/show-thing.tcl [moved from pctb/stuff/show-thing.tcl with 93% similarity]
pctb/stuff/old [new file with mode: 0644]
pctb/text.xpm [moved from pctb/stuff/text.xpm with 100% similarity]
pctb/x.gdb [new file with mode: 0644]

index cfb08ee..8875777 100644 (file)
@@ -1,6 +1,15 @@
 *~
 t
 
+pctb/*.o
 pctb/t.*
 pctb/u.*
 pctb/convert
+
+pctb/stuff/text.ppm
+pctb/stuff/text.png
+pctb/stuff/text.pbm
+pctb/stuff/text.xbm
+
+pctb/stuff/database
+pctb/stuff/t.*
index 10c0be9..3707d81 100644 (file)
@@ -1,5 +1,9 @@
 LDLIBS += -lnetpbm
 CFLAGS += -Wall -Wwrite-strings -Wpointer-arith -Wmissing-prototypes \
-       -Wstrict-prototypes -g
+       -Wstrict-prototypes -Werror -g
 
 all: convert
+
+convert: convert.o ocr.o
+
+convert.o ocr.o: ocr.h
index a0ecca1..c8082e6 100644 (file)
@@ -1,12 +1,5 @@
-#include <pam.h>
-#include <stdint.h>
-#include <inttypes.h>
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
 
-#define eassert assert
-#define debug stdout
+#include "ocr.h"
 
 typedef struct {
   unsigned long rgb; /* on screen */
@@ -16,7 +9,7 @@ typedef struct {
 static int height, width;
 static char *image;
 
-static void debug_flush(void) {
+void debug_flush(void) {
   eassert(!fflush(debug));
   eassert(!ferror(debug));
 }
@@ -242,88 +235,9 @@ static void load_image_and_canonify(void) {
   debug_flush();
 }
 
-typedef uint32_t Pixcol;
-#define PSPIXCOL(priscan) priscan##32
-
-typedef struct {
-  Pixcol col;
-  struct OCRDatabaseNode *then;
-} OCRDatabaseLink;
-
-#define MAXGLYPHCHRS 3
-
-typedef struct OCRDatabaseNode {
-  char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */
-  int nlinks, alinks;
-  OCRDatabaseLink *links;
-} OCRDatabaseNode;
-
-#define N_OCR_CONTEXTS 2
-static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS];
-
-static void load_ocr_database(void) {
-  int ctx,nchrs;
-  OCRDatabaseNode *current, *additional;
-  char chrs[MAXGLYPHCHRS+1];
-  Pixcol cv;
-  int r,i,j;
-
-  FILE *db= fopen("database","r");  eassert(db);
-
-  for (;;) {
-    r= fscanf(db, "%d %d", &ctx, &nchrs);
-    if (r==EOF) break;
-    eassert(r==2);
-    eassert(ctx>=0 && ctx<N_OCR_CONTEXTS);
-    eassert(nchrs>0 && nchrs<=MAXGLYPHCHRS);
-
-    for (i=0; i<nchrs; i++) {
-      int c;
-      r= fscanf(db, "%x", &c);  eassert(r==1);
-      eassert(c>0 && c<=255);
-      chrs[i]= c;
-    }
-    chrs[nchrs]= 0;
-
-    int twidth;
-    r= fscanf(db, "%d", &twidth);  eassert(r==1);
-    current= &ocr_contexts[ctx];
-    for (i=0; i<twidth; i++) {
-      r= fscanf(db, "%"PSPIXCOL(SCNx), &cv);  eassert(r==1);
-      for (j=0; j<current->nlinks; j++)
-       if (current->links[j].col == cv) {
-         current= current->links[j].then;
-         goto found_link;
-       }
-
-      additional= malloc(sizeof(*additional)); eassert(additional);
-      additional->s[0]= 0;
-      additional->nlinks= additional->alinks= 0;
-      additional->links= 0;
-      if (current->nlinks==current->alinks) {
-       current->alinks++;
-       current->alinks<<=1;
-       current->links= realloc(current->links,
-            sizeof(*current->links) * current->alinks);
-       eassert(current->links);
-      }
-      current->links[current->nlinks].col= cv;
-      current->links[current->nlinks].then= additional;
-      current->nlinks++;
-      current= additional;
-
-    found_link:;
-    }
-
-    eassert(!current->s[0]);
-    strcpy(current->s, chrs);
-  }
-  eassert(!ferror(db));
-  eassert(feof(db));
-  fclose(db);
-}      
-
 static void ocr_rectangle(Rect r) {
+  OcrResultGlyph *results, *res;
+
   int w= r.br.x - r.tl.x + 1;
   int h= r.br.y - r.tl.y + 1;
   Pixcol cols[w+1];
@@ -341,56 +255,20 @@ static void ocr_rectangle(Rect r) {
   }
   cols[w]= 0;
 
-  int nspaces=0;
-  int ctx=1,i;
-  x=0;
-
-  for (;;) {
-    if (x>w) break;
-
-    if (!cols[x]) {
-      nspaces++;
-      x++;
-      if (nspaces>3) ctx=1;
-      continue;
-    }
-      
-    OCRDatabaseNode *current=0, *lastmatch=0;
-    int startx=x;
-    int afterlastmatchx=-1;
-    current= &ocr_contexts[ctx];
-    for (;;) {
-      if (x>w) break;
-      Pixcol cv= cols[x];
-      for (i=0; i<current->nlinks; i++)
-       if (current->links[i].col == cv)
-         goto found;
-      /* not found */
-      break;
-    found:
-      x++;
-      current= current->links[i].then;
-      if (current->s[0]) { lastmatch=current; afterlastmatchx=x; }
-    }
-
-    if (!lastmatch) {
-      int x2;
-      for (x2=x+1; x2<w && cols[x2]; x2++);
-      printf("UNKNOWN x=%d ctx=%d %d..%d\n",x, ctx, startx,x2);
-      x++;
-    } else {
-      printf("OUTPUT x=%d `%s'\n", x, lastmatch->s);
-      x= afterlastmatchx;
-      ctx= 0;
-    }
-  }
+  results= ocr(w,h,cols);
+  printf("YES! \"");
+  for (res=0; res->s; res++)
+    printf("%s",res->s);
+  printf("\"\n");
+  eassert(!ferror(stdout));
+  eassert(!fflush(stdout));
 }
 
 int main(void) {
   Rect thisr, entryr;
   int tryrect, colno;
 
-  load_ocr_database();
+  ocr_init();
   load_image_and_canonify();
   find_structure();
 
diff --git a/pctb/ocr.c b/pctb/ocr.c
new file mode 100644 (file)
index 0000000..87bf808
--- /dev/null
@@ -0,0 +1,263 @@
+/*
+  */
+
+#include "ocr.h"
+
+typedef struct {
+  Pixcol col;
+  struct OCRDatabaseNode *then;
+} OCRDatabaseLink;
+
+#define MAXGLYPHCHRS 3
+
+typedef struct OCRDatabaseNode {
+  char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */
+  int nlinks, alinks;
+  OCRDatabaseLink *links;
+} OCRDatabaseNode;
+
+#define N_OCR_CONTEXTS 2
+
+static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS];
+static FILE *db;
+static OcrResultGlyph *results;
+static int aresults, nresults;
+
+static FILE *resolver;
+static pid_t resolver_pid;
+
+static void ocr_readdb(void) {
+  int ctx,nchrs;
+  OCRDatabaseNode *current, *additional;
+  char chrs[MAXGLYPHCHRS+1];
+  Pixcol cv;
+  int r,i,j;
+
+  assert(!db);
+  db= fopen("database","r");  eassert(db);
+
+  for (;;) {
+    r= fscanf(db, "%d %d", &ctx, &nchrs);
+    if (r==EOF) break;
+    eassert(r==2);
+    eassert(ctx>=0 && ctx<N_OCR_CONTEXTS);
+    eassert(nchrs>0 && nchrs<=MAXGLYPHCHRS);
+
+    for (i=0; i<nchrs; i++) {
+      int c;
+      r= fscanf(db, "%x", &c);  eassert(r==1);
+      eassert(c>0 && c<=255);
+      chrs[i]= c;
+    }
+    chrs[nchrs]= 0;
+
+    int twidth;
+    r= fscanf(db, "%d", &twidth);  eassert(r==1);
+    current= &ocr_contexts[ctx];
+    for (i=0; i<twidth; i++) {
+      r= fscanf(db, "%"PSPIXCOL(SCNx), &cv);  eassert(r==1);
+      for (j=0; j<current->nlinks; j++)
+       if (current->links[j].col == cv) {
+         current= current->links[j].then;
+         goto found_link;
+       }
+
+      additional= malloc(sizeof(*additional)); eassert(additional);
+      additional->s[0]= 0;
+      additional->nlinks= additional->alinks= 0;
+      additional->links= 0;
+      if (current->nlinks==current->alinks) {
+       current->alinks++;
+       current->alinks<<=1;
+       current->links= realloc(current->links,
+            sizeof(*current->links) * current->alinks);
+       eassert(current->links);
+      }
+      current->links[current->nlinks].col= cv;
+      current->links[current->nlinks].then= additional;
+      current->nlinks++;
+      current= additional;
+
+    found_link:;
+    }
+
+    eassert(!current->s[0]);
+    strcpy(current->s, chrs);
+  }
+  eassert(!ferror(db));
+  eassert(feof(db));
+}      
+
+static void callout_unknown(int w, int h, Pixcol cols[], int unk_l, int unk_r,
+                           const OcrResultGlyph *sofar, int nsofar) {
+  int pfd[2], c, r,i, x,y;
+  const OcrResultGlyph *s;
+  const char *p;
+  Pixcol pv;
+  
+  if (!resolver) {
+    r= pipe(pfd);  eassert(!r);
+    resolver_pid= fork();
+    eassert(resolver_pid!=-1);
+    if (!resolver_pid) {
+      r= dup2(pfd[0],0); eassert(!r);
+      r= close(pfd[1]); eassert(!r);
+      execlp("./show-thing.tcl", "./show-thing.tcl",(char*)0);
+      eassert(!"execlp failed");
+    }
+    r= close(pfd[0]); eassert(!r);
+    resolver= fdopen(pfd[1],"w"); eassert(resolver);
+  }
+  fprintf(resolver,"%d %d",unk_l,unk_r);
+  for (i=0, s=sofar; i<nsofar; i++, s++) {
+    fprintf(resolver," %d %d %d ",s->l,s->r,s->ctx);
+    for (p=s->s; (c= *p); p++) {
+      if (c=='\\') fprintf(resolver,"\\%c",c);
+      else if (c>=33 && c<=126) fputc(c,resolver);
+      else fprintf(resolver,"\\x%02x",(unsigned char)c);
+    }
+  }
+  fputc('\n',resolver);
+
+  fprintf(resolver,
+         "/* XPM */\n"
+         "static char *t[] = {\n"
+         "/* columns rows colors chars-per-pixel */\n"
+         "\"%d %d 2 1\",\n"
+         "\"  c black\",\n"
+         "\"o c white\",\n",
+         w,h);
+  for (y=0, pv=1; y<h; y++, pv<<=1) {
+    fputc('"',resolver);
+    for (x=0; x<w; x++)
+      fputc(cols[x] & pv ? 'o' : ' ', resolver);
+    fputs("\",\n",resolver);
+  }
+  fputs("};\n",resolver);
+  eassert(!ferror(resolver));
+  eassert(!fflush(resolver));
+
+  for (;;) {
+    eassert(resolver);
+    pid_t pid= waitpid(resolver_pid, &r, WUNTRACED);
+    if (pid==-1) { eassert(errno==EINTR); continue; }
+    eassert(pid==resolver_pid);
+    if (WIFEXITED(r)) {
+      eassert(!WEXITSTATUS(r));
+      fclose(resolver);
+      resolver= 0;
+    } else if (WIFSTOPPED(r)) {
+      r= kill(resolver_pid,SIGCONT);
+      eassert(!r);
+    } else if (WIFSIGNALED(r)) {
+      eassert(!"resolver child died due to signal");
+    } else {
+      eassert(!"weird wait status");
+    }
+    struct stat stab, fstab;
+    r= stat("database",&stab);  eassert(!r);
+    r= fstat(fileno(db),&fstab);  eassert(!r);
+    if (stab.st_ino != fstab.st_ino ||
+       stab.st_dev != fstab.st_dev)
+      break;
+  }
+  fclose(db);
+  db= 0;
+  ocr_readdb();
+}
+
+static void add_result(const char *s, int l, int r, int ctx) {
+  if (nresults >= aresults) {
+    aresults++; aresults<<=1;
+    results= realloc(results,sizeof(*results)*aresults);
+    eassert(results);
+  }
+  results[nresults].s= s;
+  results[nresults].l= l;
+  results[nresults].r= r;
+  results[nresults].ctx= ctx;
+  nresults++;
+}
+
+OcrResultGlyph *ocr(int w, int h, Pixcol cols[]) {
+  int nspaces=0;
+  int ctx=1,i, x;
+
+  nresults=0;
+  assert(db);
+
+  fprintf(debug,"OCR h=%d w=%d",w,h);
+  for (x=0; x<w; x++) fprintf(debug," %"PSPIXCOL(PRIx),cols[x]);
+  fprintf(debug,"\n");
+  debug_flush();
+
+ restart:
+  x=0;
+  for (;;) {
+    debug_flush();
+    /* skip spaces */
+    if (x>=w)
+      break;
+
+    if (!cols[x]) {
+      nspaces++;
+      x++;
+      if (nspaces>3) ctx=1;
+      continue;
+    }
+
+    /* find character */
+    OCRDatabaseNode *current=0, *bestmatch=0;
+    int lx=x;
+    int bestmatch_rx=-1;
+    current= &ocr_contexts[ctx];
+    fprintf(debug,"OCR  lx=%d ctx=%d  ",lx,ctx);
+
+    for (;;) {
+      debug_flush();
+      fprintf(debug,"| x=%d",x);
+      if (x>w) break;
+      Pixcol cv= cols[x];
+      fprintf(debug," cv=%"PSPIXCOL(PRIx),x);
+      for (i=0; i<current->nlinks; i++)
+       if (current->links[i].col == cv)
+         goto found;
+      /* not found */
+      fprintf(debug," ?");
+      break;
+
+    found:
+      current= current->links[i].then;
+      if (current->s[0]) {
+       fprintf(debug," \"%s\"",current->s);
+       bestmatch=current; bestmatch_rx=x;
+      } else {
+       fprintf(debug," ...");
+      }
+      x++;
+    }
+
+    if (bestmatch) {
+      fprintf(debug," YES\n");
+      add_result(bestmatch->s, lx, bestmatch_rx, ctx);
+      x= bestmatch_rx+1;
+      ctx= 0;
+    } else {
+      int rx;
+      fprintf(debug," UNKNOWN");
+      for (rx=lx+1; rx<w && cols[rx]; rx++);
+      fprintf(debug," x=%d ctx=%d %d..%d\n",x, ctx, lx,rx);
+      debug_flush();
+      callout_unknown(w,h,cols, lx,rx, results,nresults);
+      goto restart;
+    }
+  }
+  add_result(0,-1,-1,0);
+  fprintf(debug,"OCR  finished %d glyphs\n",nresults);
+  debug_flush();
+  return results;
+}
+
+void ocr_init(void) {
+  ocr_readdb();
+}
diff --git a/pctb/ocr.h b/pctb/ocr.h
new file mode 100644 (file)
index 0000000..26bccbc
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef OCR_H
+#define OCR_H
+
+#include <pam.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+typedef uint32_t Pixcol;
+#define PSPIXCOL(priscan) priscan##32
+
+typedef struct {
+  const char *s; /* valid until next call to ocr() */
+  int l,r; /* column numbers */
+  int ctx; /* match context index */
+} OcrResultGlyph;
+
+OcrResultGlyph *ocr(int w, int h, Pixcol cols[]);
+  /* return value is array terminated by {0,-1,-1}
+   * array is valid until next call to ocr()
+   */
+
+void ocr_init(void);
+
+void debug_flush(void);
+
+#define eassert assert
+#define debug stdout
+
+#endif /*OCR_H*/
similarity index 93%
rename from pctb/stuff/show-thing.tcl
rename to pctb/show-thing.tcl
index c1e8cb9..f2055f4 100755 (executable)
@@ -1,4 +1,12 @@
-#!/usr/bin/tk
+#!/usr/bin/wish
+
+# usage:
+#  run show-thing without args
+#  then on stdin write
+#     one line which is a Tcl list for foolist
+#     the xpm in the format expected
+#  then expect child to raise SIGSTOP or exit 0 or exit nonzero
+#  if child raised SIGSTOP, check database was updated
 
 proc manyset {list args} {
     foreach val $list var $args {
@@ -14,9 +22,11 @@ set rhsmost_max -1
 proc read_xpm {f} {
     set o {}
     set y -3
-    while {[gets $f l] >= 0} {
+    while 1 {
+       if {[gets $f l] < 0} { error "huh? "}
        if {![regexp {^"(.*)",$} $l dummy l]} {
            append o "$l\n"
+           if {[regexp {^\}\;$} $l]} break
            continue
        }
        if {$y==-3} {
@@ -278,11 +288,10 @@ proc read_database {} {
     global database
     set f [open database r]
     while {[gets $f l] >= 0} {
-       if {![regexp {^(\w+) (\d+) ([0-9a-f]{2}+)$} $l \
+       if {![regexp {^(\w+) (\d+) ((?:[0-9a-f]{2})+)$} $l \
                  dummy context strl strh]} {
            error "bad syntax"
        }
-        binary scan $strw h* strh
        if {[string length $strh] != $strl*2} { error "$strh $strl" }
        gets $f l; set width [format %d $l]
        set bm $context
@@ -291,6 +300,7 @@ proc read_database {} {
        }
        set database($bm) $strh
     }
+    close $f
 }
 
 proc write_database {} {
@@ -305,9 +315,12 @@ proc write_database {} {
        foreach x [lrange $bm 1 end] { append o "$x\n" }
        lappend ol $o
     }
+    set f [open database.new w]
     foreach o [lsort $ol] {
-       puts -nonewline $o
+       puts -nonewline $f $o
     }
+    close $f
+    file rename -force database.new database
 }
 
 proc update_database/DEFINE {c0 c1 strh} {
diff --git a/pctb/stuff/old b/pctb/stuff/old
new file mode 100644 (file)
index 0000000..557b690
--- /dev/null
@@ -0,0 +1,9 @@
+"\"+ c #111\",
+\"a c #600\",
+\"A c #fcc\",
+\"b c #006\",
+\"B c #ccf\",
+\"u c #000\",
+\"U c #888\",
+\"q c #440\",
+\"Q c #ff0\",\n"
similarity index 100%
rename from pctb/stuff/text.xpm
rename to pctb/text.xpm
diff --git a/pctb/x.gdb b/pctb/x.gdb
new file mode 100644 (file)
index 0000000..922bd4c
--- /dev/null
@@ -0,0 +1,4 @@
+file convert
+set confirm off
+set args <u.pnm
+run