chiark / gitweb /
Much faster
[ypp-sc-tools.db-test.git] / pctb / ocr.h
index 26bccbc7b83b81bb693b27995063ed7cedbecc1d..792110466f4dfe05cde96b469697321ea56b1f1d 100644 (file)
@@ -1,14 +1,35 @@
 #ifndef OCR_H
 #define OCR_H
 
+
+#define DEBUG_RECTANGLES
+// #define DEBUG_OCR
+
+
+
+#define _GNU_SOURCE
+
 #include <pam.h>
 #include <stdint.h>
 #include <inttypes.h>
 #include <assert.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <time.h>
+#include <limits.h>
+
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/time.h>
+
+
+typedef struct {
+  int w,h;
+  char d[];
+} CanonImage;
+
 
 typedef uint32_t Pixcol;
 #define PSPIXCOL(priscan) priscan##32
@@ -16,19 +37,81 @@ typedef uint32_t Pixcol;
 typedef struct {
   const char *s; /* valid until next call to ocr() */
   int l,r; /* column numbers */
-  int ctx; /* match context index */
+  unsigned ctxmap; /* match context index */
 } OcrResultGlyph;
 
-OcrResultGlyph *ocr(int w, int h, Pixcol cols[]);
+typedef const struct OcrCellTypeInfo *OcrCellType;
+extern const struct OcrCellTypeInfo ocr_celltype_text;
+extern const struct OcrCellTypeInfo ocr_celltype_number;
+
+typedef struct OcrReader OcrReader;
+OcrReader *ocr_init(int h);
+
+OcrResultGlyph *ocr(OcrReader *rd, OcrCellType, int w, Pixcol cols[]);
   /* return value is array terminated by {0,-1,-1}
    * array is valid until next call to ocr()
    */
 
-void ocr_init(void);
-
 void debug_flush(void);
 
 #define eassert assert
 #define debug stdout
 
+const char *get_vardir(void);
+
+CanonImage *file_read_image(FILE *f);
+int main_test(void);
+
+#define MAX_PAGES 100
+extern CanonImage *page_images[MAX_PAGES];
+extern int npages;
+
+
+typedef struct {
+  unsigned long rgb; /* on screen */
+  char c; /* canonical */
+} CanonColourInfo;
+
+extern const CanonColourInfo canoncolourinfos[];
+
+CanonImage *alloc_canon_image(int w, int h);
+
+#ifdef DEBUG_RECTANGLES
+# define CANIMG_DEBUG_RECTANGLE_1LINE(im,w,h)  \
+      fprintf(debug, "%4d ",y);                        \
+      r= fwrite(im->d + y*w, 1,w, debug);      \
+      eassert(r==w);                           \
+      fputc('\n',debug);
+#else
+# define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */
+#endif
+
+#define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{            \
+    /* compute_rgb should be a number of statements, or                \
+     * a block, which assigns to                               \
+     *   unsigned long rgb;                                    \
+     * given the values of                                     \
+     *   int x,y;                                              \
+     * all of which are anamorphic.  Result is stored in im.   \
+     */                                                                \
+    (im)= alloc_canon_image((w), (h));                         \
+                                                               \
+    int x,y,r;                                                 \
+    for (y=0; y<(h); y++) {                                    \
+      for (x=0; x<(w); x++) {                                  \
+        const CanonColourInfo *cci;                            \
+        unsigned long rgb;                                     \
+       COMPUTE_RGB;                                            \
+       for (cci=canoncolourinfos; cci->c; cci++)               \
+         if (cci->rgb == rgb) {                                \
+           (im)->d[y*(w) + x]= cci->c;                         \
+           break;                                              \
+         }                                                     \
+      }                                                                \
+      CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h))               \
+    }                                                          \
+    debug_flush();                                             \
+  }while(0)
+
+
 #endif /*OCR_H*/