chiark / gitweb /
Much faster
[ypp-sc-tools.db-test.git] / pctb / ocr.h
1 #ifndef OCR_H
2 #define OCR_H
3
4
5 #define DEBUG_RECTANGLES
6 // #define DEBUG_OCR
7
8
9
10 #define _GNU_SOURCE
11
12 #include <pam.h>
13 #include <stdint.h>
14 #include <inttypes.h>
15 #include <assert.h>
16 #include <string.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <stdarg.h>
20 #include <time.h>
21 #include <limits.h>
22
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <sys/time.h>
26
27
28 typedef struct {
29   int w,h;
30   char d[];
31 } CanonImage;
32
33
34 typedef uint32_t Pixcol;
35 #define PSPIXCOL(priscan) priscan##32
36
37 typedef struct {
38   const char *s; /* valid until next call to ocr() */
39   int l,r; /* column numbers */
40   unsigned ctxmap; /* match context index */
41 } OcrResultGlyph;
42
43 typedef const struct OcrCellTypeInfo *OcrCellType;
44 extern const struct OcrCellTypeInfo ocr_celltype_text;
45 extern const struct OcrCellTypeInfo ocr_celltype_number;
46
47 typedef struct OcrReader OcrReader;
48 OcrReader *ocr_init(int h);
49
50 OcrResultGlyph *ocr(OcrReader *rd, OcrCellType, int w, Pixcol cols[]);
51   /* return value is array terminated by {0,-1,-1}
52    * array is valid until next call to ocr()
53    */
54
55 void debug_flush(void);
56
57 #define eassert assert
58 #define debug stdout
59
60 const char *get_vardir(void);
61
62 CanonImage *file_read_image(FILE *f);
63 int main_test(void);
64
65 #define MAX_PAGES 100
66 extern CanonImage *page_images[MAX_PAGES];
67 extern int npages;
68
69
70 typedef struct {
71   unsigned long rgb; /* on screen */
72   char c; /* canonical */
73 } CanonColourInfo;
74
75 extern const CanonColourInfo canoncolourinfos[];
76
77 CanonImage *alloc_canon_image(int w, int h);
78
79 #ifdef DEBUG_RECTANGLES
80 # define CANIMG_DEBUG_RECTANGLE_1LINE(im,w,h)   \
81       fprintf(debug, "%4d ",y);                 \
82       r= fwrite(im->d + y*w, 1,w, debug);       \
83       eassert(r==w);                            \
84       fputc('\n',debug);
85 #else
86 # define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */
87 #endif
88
89 #define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{             \
90     /* compute_rgb should be a number of statements, or         \
91      * a block, which assigns to                                \
92      *   unsigned long rgb;                                     \
93      * given the values of                                      \
94      *   int x,y;                                               \
95      * all of which are anamorphic.  Result is stored in im.    \
96      */                                                         \
97     (im)= alloc_canon_image((w), (h));                          \
98                                                                 \
99     int x,y,r;                                                  \
100     for (y=0; y<(h); y++) {                                     \
101       for (x=0; x<(w); x++) {                                   \
102         const CanonColourInfo *cci;                             \
103         unsigned long rgb;                                      \
104         COMPUTE_RGB;                                            \
105         for (cci=canoncolourinfos; cci->c; cci++)               \
106           if (cci->rgb == rgb) {                                \
107             (im)->d[y*(w) + x]= cci->c;                         \
108             break;                                              \
109           }                                                     \
110       }                                                         \
111       CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h))                \
112     }                                                           \
113     debug_flush();                                              \
114   }while(0)
115
116
117 #endif /*OCR_H*/