From 5d58a08423953756871493042a9cfff032b66e18 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sat, 6 Jun 2009 15:22:39 +0100 Subject: [PATCH] Much faster --- pctb/Makefile | 2 +- pctb/convert.c | 58 +++++++++-------------- pctb/ocr.h | 55 +++++++++++++++++++++- pctb/pages.c | 122 ++++++++++++++++++------------------------------- 4 files changed, 120 insertions(+), 117 deletions(-) diff --git a/pctb/Makefile b/pctb/Makefile index 3a4fff3..55f4d53 100644 --- a/pctb/Makefile +++ b/pctb/Makefile @@ -5,7 +5,7 @@ all: convert x-manip-window CONVERT_OBJS= convert.o ocr.o pages.o -convert: $(CONVERT_OBJS) -lnetpbm -lXpm -lXtst -lX11 +convert: $(CONVERT_OBJS) -lnetpbm -lXtst -lX11 $(CONVERT_OBJS): ocr.h x-manip-window: -lXtst -lX11 diff --git a/pctb/convert.c b/pctb/convert.c index 3a2b79f..4784c65 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -1,11 +1,6 @@ #include "ocr.h" -typedef struct { - unsigned long rgb; /* on screen */ - char c; /* canonical */ -} CanonColourInfo; - static CanonImage *cim; void debug_flush(void) { @@ -38,7 +33,7 @@ static int colrightx[INTERESTING_COLUMNS]; static int text_h; static OcrReader *rd; -static const CanonColourInfo canoncolourinfos[]= { +const CanonColourInfo canoncolourinfos[]= { { 0x475A5E, '*' }, /* edge */ { 0x2C5F7A, '*' }, /* edge just under box heading shadow */ { 0x7D9094, '+' }, /* interbox */ @@ -221,44 +216,33 @@ static void find_table_entry(Rect commod, int colno, Rect *cellr) { require_rectangle_r(*cellr, " o"); } +CanonImage *alloc_canon_image(int w, int h) { + CanonImage *im= malloc(sizeof(CanonImage) + w*h); + eassert(im); + im->w= w; + im->h= h; + memset(im->d,'?',w*h); + return im; +} + CanonImage *file_read_image(FILE *f) { struct pam inpam; - unsigned char rgb[3]; - int x,y,r; - const CanonColourInfo *cci; + unsigned char rgb_buf[3]; + CanonImage *im; pnm_readpaminit(f, &inpam, sizeof(inpam)); eassert(inpam.maxval == 255); eassert(inpam.bytes_per_sample == 1); - CanonImage *im= malloc(sizeof(CanonImage) + inpam.width*inpam.height); - eassert(im); - im->h= inpam.height; - im->w= inpam.width; - - memset(im->d,'?',inpam.width*inpam.height); - - for (y=0; yc; cci++) - if (cci->rgb == rgb_l) { - im->d[y*inpam.width + x]= cci->c; - break; - } - } -#ifdef DEBUG_RECTANGLES - fprintf(debug, "%4d ",y); - r= fwrite(im->d + y*inpam.width, 1,inpam.width, debug); - eassert(r==inpam.width); - fputc('\n',debug); -#endif - } - debug_flush(); + CANONICALISE_IMAGE(im, inpam.width, inpam.height, { + r= fread(&rgb_buf,1,3,f); eassert(r==3); + + rgb= + ((unsigned long)rgb_buf[0]<<16) | + ((unsigned long)rgb_buf[1]<<8) | + (rgb_buf[2]); + }); + return im; } diff --git a/pctb/ocr.h b/pctb/ocr.h index d9a25f7..7921104 100644 --- a/pctb/ocr.h +++ b/pctb/ocr.h @@ -1,6 +1,12 @@ #ifndef OCR_H #define OCR_H + +#define DEBUG_RECTANGLES +// #define DEBUG_OCR + + + #define _GNU_SOURCE #include @@ -12,6 +18,7 @@ #include #include #include +#include #include #include @@ -60,7 +67,51 @@ extern CanonImage *page_images[MAX_PAGES]; extern int npages; -#define DEBUG_RECTANGLES -// #define DEBUG_OCR +typedef struct { + unsigned long rgb; /* on screen */ + char c; /* canonical */ +} CanonColourInfo; + +extern const CanonColourInfo canoncolourinfos[]; + +CanonImage *alloc_canon_image(int w, int h); + +#ifdef DEBUG_RECTANGLES +# define CANIMG_DEBUG_RECTANGLE_1LINE(im,w,h) \ + fprintf(debug, "%4d ",y); \ + r= fwrite(im->d + y*w, 1,w, debug); \ + eassert(r==w); \ + fputc('\n',debug); +#else +# define CANIMG_DEBUG_RECTANGLE_1LINE(im,y,h) /* nothing */ +#endif + +#define CANONICALISE_IMAGE(im,w,h, COMPUTE_RGB) do{ \ + /* compute_rgb should be a number of statements, or \ + * a block, which assigns to \ + * unsigned long rgb; \ + * given the values of \ + * int x,y; \ + * all of which are anamorphic. Result is stored in im. \ + */ \ + (im)= alloc_canon_image((w), (h)); \ + \ + int x,y,r; \ + for (y=0; y<(h); y++) { \ + for (x=0; x<(w); x++) { \ + const CanonColourInfo *cci; \ + unsigned long rgb; \ + COMPUTE_RGB; \ + for (cci=canoncolourinfos; cci->c; cci++) \ + if (cci->rgb == rgb) { \ + (im)->d[y*(w) + x]= cci->c; \ + break; \ + } \ + } \ + CANIMG_DEBUG_RECTANGLE_1LINE((im),(w),(h)) \ + } \ + debug_flush(); \ + }while(0) + #endif /*OCR_H*/ diff --git a/pctb/pages.c b/pctb/pages.c index 3eb33ce..5978f74 100644 --- a/pctb/pages.c +++ b/pctb/pages.c @@ -8,8 +8,6 @@ #include #include -#include - CanonImage *page_images[MAX_PAGES]; int npages; @@ -18,8 +16,6 @@ static Display *disp; static struct timeval tv_startup; static unsigned wwidth, wheight; -static XpmAttributes xpma; - static KeyCode keycode(KeySym sym) { return XKeysymToKeycode(disp,sym); } @@ -73,7 +69,7 @@ typedef XImage Snapshot; //static size_t snapshot_alloc= 1024; static double last_input; -static const double min_update_allowance= 0.5; +static const double min_update_allowance= 0.25; static double timestamp(void) { struct timeval tv; @@ -222,83 +218,55 @@ static void raise_and_set_focus(void) { fprintf(stderr,"PAGING raise_and_set_focus done.\n"); } -static void store_page(int pageno, Snapshot *sn) { - pid_t converter, paster; - eassert(pageno < MAX_PAGES); - int paste[2], results[2]; - FILE *err; - int r; +typedef struct { + unsigned long mask; + int lshift, rshift; +} ShMask; - eassert(!fflush(stdout)); - eassert(!fflush(stderr)); +static void compute_shift_mask(ShMask *sm, int targshift, + unsigned long ximage_mask) { + unsigned long below; - r= pipe(paste); eassert(!r); - r= pipe(results); eassert(!r); - err= tmpfile(); eassert(err); - - converter= fork(); eassert(converter!=-1); - if (!converter) { - r= dup2(paste[0],0); eassert(r==0); - r= dup2(results[1],1); eassert(r==1); - r= dup2(2,4); eassert(r==4); /* fileno(errn) > 4, see above */ - r= dup2(fileno(err),2); eassert(r==2); - close(paste[0]); - close(paste[1]); - close(results[0]); - close(results[1]); - execlp("xpmtoppm", "xpmtoppm",(char*)0); - dup2(4,2); - eassert(!"xpmtoppm exec failure"); - } - - char *xpmdata=0; - r= XpmCreateBufferFromImage(disp, &xpmdata, sn, 0, &xpma); - eassert(!r); - eassert(xpmdata); - - paster= fork(); eassert(paster!=-1); - if (!paster) { - FILE *f= fdopen(paste[1],"w"); eassert(f); - close(paste[0]); - close(results[0]); - close(results[1]); - r= fputs(xpmdata,f); eassert(r!=EOF); - //size_t did= fwrite(sn->d, 1, sn->len, f); - //eassert(did==sn->len); - eassert(!fclose(f)); - exit(0); - } - - close(paste[0]); - close(paste[1]); - close(results[1]); - - XpmFree(xpmdata); + sm->lshift= 0; + sm->rshift= 0; + sm->mask= 0xfful << targshift; + below= ~0ul << targshift; - FILE *f= fdopen(results[0],"r"); - int c1= fgetc(f); - if (c1!=EOF) { - ungetc(c1,f); - page_images[pageno]= file_read_image(f); - r= fgetc(f); eassert(r==EOF); eassert(!ferror(f)); eassert(feof(f)); - fclose(f); + for (;;) { + if (ximage_mask < sm->mask) { + sm->lshift++; ximage_mask <<= 1; + } else if ((ximage_mask & ~below) > sm->mask) { + sm->rshift++; ximage_mask >>= 1; + } else { + break; + } + assert(!(sm->lshift && sm->rshift)); } + assert(sm->lshift < LONG_BIT); + assert(sm->rshift < LONG_BIT); +} - pid_t got_conv,got_paste; - int st_conv, st_paste; - - got_conv= waitpid(converter,&st_conv,0); eassert(got_conv==converter); - got_paste= waitpid(paster,&st_paste,0); eassert(got_paste==paster); - - if (!st_conv && - (!st_paste || (WIFSIGNALED(st_paste) && WTERMSIG(st_paste)==SIGPIPE)) - && c1!=EOF) { - fclose(err); - return; - } - rewind(err); int c; while ((c=getc(err))!=EOF) fputc(c,stderr); - fprintf(stderr, "convert: subprocess statuses: %d %d\n", st_conv, st_paste); - _exit(127); +static void store_page(int pageno, Snapshot *sn) { + eassert(pageno < MAX_PAGES); + ShMask shiftmasks[3]; + CanonImage *im; + +#define COMPUTE_SHIFT_MASK(ix, targshift, rgb) \ + compute_shift_mask(&shiftmasks[ix], targshift, sn->rgb##_mask) + COMPUTE_SHIFT_MASK(0, 16, red); + COMPUTE_SHIFT_MASK(1, 8, green); + COMPUTE_SHIFT_MASK(2, 0, blue); + + CANONICALISE_IMAGE(im, sn->width, sn->height, { + long xrgb= XGetPixel(sn, x, y); + int i; + rgb= 0; + for (i=0; i<3; i++) + rgb |= ((xrgb << shiftmasks[i].lshift) + >> shiftmasks[i].rshift) & shiftmasks[i].mask; + }); + + page_images[pageno]= im; } static void read_pages(void) { -- 2.30.2