chiark / gitweb /
Check we're on the right page
[ypp-sc-tools.db-live.git] / pctb / ocr.h
1 #ifndef OCR_H
2 #define OCR_H
3
4
5 #define DEBUG_RECTANGLES
6 // #define DEBUG_OCR
7
8
9
10 #define _GNU_SOURCE
11
12 #include <pam.h>
13 #include <stdint.h>
14 #include <inttypes.h>
15 #include <assert.h>
16 #include <string.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <stdarg.h>
20 #include <time.h>
21 #include <limits.h>
22
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <sys/time.h>
26
27
28 typedef struct {
29   int w,h;
30   char d[];
31 } CanonImage;
32
33
34 typedef uint32_t Pixcol;
35 #define PSPIXCOL(priscan) priscan##32
36
37 typedef struct {
38   const char *s; /* valid until next call to ocr() */
39   int l,r; /* column numbers */
40   unsigned ctxmap; /* match context index */
41 } OcrResultGlyph;
42
43 typedef const struct OcrCellTypeInfo *OcrCellType;
44 extern const struct OcrCellTypeInfo ocr_celltype_text;
45 extern const struct OcrCellTypeInfo ocr_celltype_number;
46
47 typedef struct OcrReader OcrReader;
48 OcrReader *ocr_init(int h);
49
50 OcrResultGlyph *ocr(OcrReader *rd, OcrCellType, int w, Pixcol cols[]);
51   /* return value is array terminated by {0,-1,-1}
52    * array is valid until next call to ocr()
53    */
54
55 void debug_flush(void);
56
57 void find_structure(CanonImage *im);
58
59 #define eassert assert
60 #define debug stdout
61
62 const char *get_vardir(void);
63
64 CanonImage *file_read_image(FILE *f);
65 int main_test(void);
66
67 #define MAX_PAGES 100
68 extern CanonImage *page_images[MAX_PAGES];
69 extern int npages;
70
71
72 typedef struct {
73   unsigned long rgb; /* on screen */
74   char c; /* canonical */
75 } CanonColourInfo;
76
77 extern const CanonColourInfo canoncolourinfos[];
78
79 CanonImage *alloc_canon_image(int w, int h);
80
81 #ifdef DEBUG_RECTANGLES
82 # define CANIMG_DEBUG_RECTANGLE_1LINE(im,w,h)   \
83       fprintf(debug, "%4d ",y);                 \
84       r= fwrite(im->d + y*w, 1,w, debug);       \
85       eassert(r==w);                            \
86       fputc('\n',debug);
87 #else
88 # define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */
89 #endif
90
91 #define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{             \
92     /* compute_rgb should be a number of statements, or         \
93      * a block, which assigns to                                \
94      *   unsigned long rgb;                                     \
95      * given the values of                                      \
96      *   int x,y;                                               \
97      * all of which are anamorphic.  Result is stored in im.    \
98      */                                                         \
99     (im)= alloc_canon_image((w), (h));                          \
100                                                                 \
101     int x,y,r;                                                  \
102     for (y=0; y<(h); y++) {                                     \
103       for (x=0; x<(w); x++) {                                   \
104         const CanonColourInfo *cci;                             \
105         unsigned long rgb;                                      \
106         COMPUTE_RGB;                                            \
107         for (cci=canoncolourinfos; cci->c; cci++)               \
108           if (cci->rgb == rgb) {                                \
109             (im)->d[y*(w) + x]= cci->c;                         \
110             break;                                              \
111           }                                                     \
112       }                                                         \
113       CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h))                \
114     }                                                           \
115     debug_flush();                                              \
116   }while(0)
117
118
119 #endif /*OCR_H*/