12 unsigned long rgb; /* on screen */
13 char c; /* canonical */
16 static int height, width;
19 static void debug_flush(void) {
20 eassert(!fflush(debug));
21 eassert(!ferror(debug));
28 typedef struct { /* both inclusive */
33 static inline char get(int x, int y) { return image[y * width + x]; }
34 static inline char get_p(Point p) { return get(p.x,p.y); }
37 #define START_MAIN {200,200}
39 #define INTERESTING_COLUMNS 6
42 static Rect mainr = { START_MAIN,START_MAIN };
43 static int commbasey, comminty;
44 static int colrightx[INTERESTING_COLUMNS];
47 static const CanonColourInfo canoncolourinfos[]= {
48 { 0x475A5E, '*' }, /* edge */
49 { 0x2C5F7A, '*' }, /* edge just under box heading shadow */
50 { 0x7D9094, '+' }, /* interbox */
51 { 0xBDC5BF, ' ' }, /* background - pale */
52 { 0xADB5AF, ' ' }, /* background - dark */
53 { 0x000000, 'o' }, /* foreground */
54 { 0xD4B356, ' ' }, /* background (cursor) */
55 { 0xFFFFFF, 'o' }, /* foreground (cursor) */
59 static void require_rectangle(int tlx, int tly, int brx, int bry,
62 for (x=tlx; x<=brx; x++)
63 for (y=tly; y<=bry; y++) {
68 static void require_rectangle_r(Rect rr, const char *ok) {
69 require_rectangle(rr.tl.x,rr.tl.y, rr.br.x,rr.br.y, ok);
72 static void debug_rect(const char *what, int whati, Rect rr) {
74 fprintf(debug, "%s %d: %d,%d..%d,%d:\n", what, whati,
75 rr.tl.x,rr.tl.y, rr.br.x,rr.br.y);
76 w= rr.br.x - rr.tl.x + 1;
77 for (y=rr.tl.y; y<=rr.br.y; y++) {
78 fprintf(debug, "%4d%*s|", y, rr.tl.x,"");
79 r= fwrite(image + y*width + rr.tl.x, 1, w, debug);
87 #define WALK_UNTIL(point,coord,increm,last,edge) \
89 if ((point).coord == (last)+(increm)) break; \
90 if (get_p((point)) == (edge)) { (point).coord -= (increm); break; } \
91 (point).coord += (increm); \
94 #define WALK_UNTIL_MUST(point,coord,increm,last,edge) \
96 WALK_UNTIL(point,coord,increm,last,edge); \
97 eassert((point).coord != (last)+(increm)); \
100 static void find_structure(void) {
101 Rect whole = { {0,0}, {width-1,height-1} };
103 WALK_UNTIL_MUST(mainr.tl, x,-1, whole.tl.x, '*');
104 WALK_UNTIL_MUST(mainr.tl, y,-1, whole.tl.y, '*');
105 WALK_UNTIL_MUST(mainr.br, x,+1, whole.br.x, '*');
106 WALK_UNTIL_MUST(mainr.br, y,+1, whole.br.y, '*');
108 require_rectangle(mainr.tl.x-1, mainr.tl.y, mainr.tl.x-1, mainr.br.y, "*");
109 require_rectangle(mainr.br.x+1, mainr.tl.y, mainr.br.x+1, mainr.br.y, "*");
110 require_rectangle(mainr.tl.x, mainr.tl.y-1, mainr.br.x, mainr.tl.y-1, "*");
111 require_rectangle(mainr.tl.x, mainr.br.y+1, mainr.br.x, mainr.br.y+1, "*");
113 #define CHECK_STRIP_BORDER(tlbr,xy,increm) \
118 csb_p.xy= mainr.tlbr.xy; \
119 if (get_p(csb_p)=='+') { \
121 csb_r.tl.xy= csb_p.xy; \
122 csb_r.br.xy= csb_p.xy; \
123 require_rectangle_r(csb_r, "+"); \
124 mainr.tlbr.xy += increm; \
128 debug_rect("mainr",0, mainr);
130 CHECK_STRIP_BORDER(tl,x,+1);
131 CHECK_STRIP_BORDER(tl,y,+1);
132 CHECK_STRIP_BORDER(br,x,-1);
133 CHECK_STRIP_BORDER(br,y,-1);
135 debug_rect("mainr",1, mainr);
137 Point up = START_MAIN;
138 WALK_UNTIL_MUST(up, y,-1, mainr.tl.y, '+');
140 Point down = START_MAIN;
142 WALK_UNTIL_MUST(down, y,+1, mainr.br.y, '+');
145 for (y=0, xscaleunit=1; y<4; y++, xscaleunit*=10) {
147 for (x=0; x<=width; x++) {
148 if (x % xscaleunit) fputc(' ',debug);
149 else fprintf(debug,"%d",(x / xscaleunit)%10);
155 comminty= down.y - up.y + 2;
156 fprintf(debug, "up.y=%d down.y=%d commbasey=%d comminty=%d\n",
157 up.y,down.y, commbasey,comminty);
159 Point across= { mainr.tl.x, commbasey };
162 eassert(get_p(across) != '+');
163 WALK_UNTIL(across, x,+1, mainr.br.x, '+');
164 eassert(colno < MAX_COLUMNS);
166 if (colrx > mainr.br.x) colrx= mainr.br.x;
167 if (colno < INTERESTING_COLUMNS) {
168 colrightx[colno]= colrx;
169 fprintf(debug,"colrightx[%d]= %d\n",colno,colrx);
171 fprintf(debug,"extra colr %d %d\n",colno,colrx);
176 if (across.x >= mainr.br.x-1)
180 require_rectangle(across.x,mainr.tl.y, across.x,mainr.br.y, "+");
183 eassert(colno >= MIN_COLUMNS);
186 static void find_commodity(int offset, Rect *rr) {
187 /* rr->tl.x==-1 if offset out of range */
188 rr->tl.y= commbasey - offset*comminty;
189 rr->br.y= rr->tl.y + comminty-2;
190 if (rr->tl.y < mainr.tl.y || rr->br.y > mainr.br.y) { rr->tl.x=-1; return; }
191 if (rr->tl.y > mainr.tl.y)
192 require_rectangle(rr->tl.x,rr->tl.y-1, rr->br.x,rr->tl.y-1, "+");
193 if (rr->br.y < mainr.tl.y)
194 require_rectangle(rr->tl.x,rr->br.y+1, rr->br.x,rr->br.y+1, "+");
196 rr->tl.x= mainr.tl.x;
197 rr->br.x= mainr.br.x;
200 static void find_table_entry(Rect commod, int colno, Rect *cellr) {
201 cellr->tl.y= commod.tl.y;
202 cellr->br.y= commod.br.y;
203 cellr->tl.x= !colno ? commod.tl.x : colrightx[colno-1]+2;
204 cellr->br.x= colrightx[colno];
205 debug_rect("cell", colno, *cellr);
206 require_rectangle_r(*cellr, " o");
209 static void load_image_and_canonify(void) {
211 unsigned char rgb[3];
213 const CanonColourInfo *cci;
215 pnm_readpaminit(stdin, &inpam, sizeof(inpam));
216 height= inpam.height;
218 eassert(inpam.maxval == 255);
219 eassert(inpam.bytes_per_sample == 1);
221 image= malloc(width*height);
223 memset(image,'?',width*height);
225 for (y=0; y<height; y++) {
226 for (x=0; x<width; x++) {
227 r= fread(&rgb,1,3,stdin); eassert(r==3);
229 ((unsigned long)rgb[0]<<16) |
230 ((unsigned long)rgb[1]<<8) |
232 for (cci=canoncolourinfos; cci->c; cci++)
233 if (cci->rgb == rgb_l) {
234 image[y*width + x]= cci->c;
238 fprintf(debug, "%4d ",y);
239 r= fwrite(image + y*width, 1,width, debug); eassert(r==width);
245 typedef uint32_t Pixcol;
246 #define PSPIXCOL(priscan) priscan##32
250 struct OCRDatabaseNode *then;
253 #define MAXGLYPHCHRS 3
255 typedef struct OCRDatabaseNode {
256 char s[MAXGLYPHCHRS+1]; /* null-terminated; "" means no match here */
258 OCRDatabaseLink *links;
261 #define N_OCR_CONTEXTS 2
262 static OCRDatabaseNode ocr_contexts[N_OCR_CONTEXTS];
264 static void load_ocr_database(void) {
266 OCRDatabaseNode *current, *additional;
267 char chrs[MAXGLYPHCHRS+1];
271 FILE *db= fopen("database","r"); eassert(db);
274 r= fscanf(db, "%d %d", &ctx, &nchrs);
277 eassert(ctx>=0 && ctx<N_OCR_CONTEXTS);
278 eassert(nchrs>0 && nchrs<=MAXGLYPHCHRS);
280 for (i=0; i<nchrs; i++) {
282 r= fscanf(db, "%x", &c); eassert(r==1);
283 eassert(c>0 && c<=255);
289 r= fscanf(db, "%d", &twidth); eassert(r==1);
290 current= &ocr_contexts[ctx];
291 for (i=0; i<twidth; i++) {
292 r= fscanf(db, "%"PSPIXCOL(SCNx), &cv); eassert(r==1);
293 for (j=0; j<current->nlinks; j++)
294 if (current->links[j].col == cv) {
295 current= current->links[j].then;
299 additional= malloc(sizeof(*additional)); eassert(additional);
301 additional->nlinks= additional->alinks= 0;
302 additional->links= 0;
303 if (current->nlinks==current->alinks) {
306 current->links= realloc(current->links,
307 sizeof(*current->links) * current->alinks);
308 eassert(current->links);
310 current->links[current->nlinks].col= cv;
311 current->links[current->nlinks].then= additional;
318 eassert(!current->s[0]);
319 strcpy(current->s, chrs);
321 eassert(!ferror(db));
326 static void ocr_rectangle(Rect r) {
327 int w= r.br.x - r.tl.x + 1;
328 int h= r.br.y - r.tl.y + 1;
331 for (x=0; x<w; x++) {
333 for (y=0, cx=0, rv=1; y<h; y++, rv<<=1) {
334 switch (get(x+r.tl.x, y+r.tl.y)) {
336 case 'o': cx |= rv; break;
337 default: eassert(!"wrong pixel");
354 if (nspaces>3) ctx=1;
358 OCRDatabaseNode *current=0, *lastmatch=0;
360 int afterlastmatchx=-1;
361 current= &ocr_contexts[ctx];
365 for (i=0; i<current->nlinks; i++)
366 if (current->links[i].col == cv)
372 current= current->links[i].then;
373 if (current->s[0]) { lastmatch=current; afterlastmatchx=x; }
378 for (x2=x+1; x2<w && cols[x2]; x2++);
379 printf("UNKNOWN x=%d ctx=%d %d..%d\n",x, ctx, startx,x2);
382 printf("OUTPUT x=%d `%s'\n", x, lastmatch->s);
394 load_image_and_canonify();
397 for (tryrect= +height; tryrect >= -height; tryrect--) {
398 find_commodity(tryrect, &thisr);
401 debug_rect("commod",tryrect, thisr);
403 for (colno=0; colno<MIN_COLUMNS; colno++) {
404 find_table_entry(thisr,colno,&entryr);
405 ocr_rectangle(entryr);