From 0487d41d9c7796c6efebb8235c9aa39fe580ccf9 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 21 Jun 2009 18:36:44 +0100 Subject: [PATCH] WIP new dictionary arrangements --- pctb/README | 4 +- pctb/convert.c | 72 ++++++++++++++++++++++++++-------- pctb/convert.h | 11 ++++-- pctb/dictionary-manager | 10 +++-- pctb/dictionary-pixmap-options | 3 +- pctb/ocr.c | 4 +- pctb/pages.c | 19 +++++---- pctb/rgbimage.c | 26 ++++++++++-- pctb/structure.c | 23 ++++++----- pctb/x.gdb | 4 +- 10 files changed, 123 insertions(+), 53 deletions(-) diff --git a/pctb/README b/pctb/README index 003df0c..888d533 100644 --- a/pctb/README +++ b/pctb/README @@ -28,8 +28,10 @@ Options to vary the processing: --screenshot-file F Store or read screenshots in F rather than #pages#.pnm --window-id ID Specified X window is the YPP client - do not search --edit-charset Enable character set editing. See README.dictionary. + --find-island Find and print the ocean and island. Suppresses OCR + and output unless used with result processing option. -Controlling what happens to the results: +Controlling what happens to the results - only one at a time: --upload (default) Upload to the PCTB server --tsv Print data as clean tab-separated-values file --raw-tsv Dump the raw (not deduped, unsorted) OCR'd data diff --git a/pctb/convert.c b/pctb/convert.c index bb1073f..2685e87 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -49,14 +49,20 @@ enum mode { mode_all= 0111, }; +enum outmodekind { + omk_unset, omk_upload, omk_str, omk_raw, omk_none +}; +static enum outmodekind o_outmode_kind; +static const char *o_outmode_str= 0; + static enum mode o_mode= mode_all; static char *o_screenshot_fn; static int o_quiet; -static const char *o_outputmode= "upload"; static const char *o_serv_pctb, *o_serv_dict_fetch, *o_serv_dict_submit; const char *o_resolver= "./dictionary-manager"; FILE *screenshot_file; +const char *o_ocean, *o_pirate; enum flags o_flags= ff_dict_fetch|ff_dict_submit|ff_dict_pirate; @@ -91,13 +97,13 @@ static void run_analysis(void) { sysassert( fseek(tf,0,SEEK_SET) == 0); - progress_log("processing results (--%s)...", o_outputmode); + progress_log("processing results (--%s)...", o_outmode_str); pid_t processor; sysassert( (processor= fork()) != -1 ); if (!processor) { sysassert( dup2(fileno(tf),0) ==0 ); - EXECLP_HELPER("commod-results-processor", o_outputmode, (char*)0); + EXECLP_HELPER("commod-results-processor", o_outmode_str, (char*)0); } waitpid_check_exitstatus(processor, "output processor/uploader"); @@ -122,7 +128,7 @@ void fetch_with_rsync(const char *stem) { sysassert(!"exec rsync failed"); } - waitpid_check_exitstatus(fetcher, "dictionary-manager --update"); + waitpid_check_exitstatus(fetcher, "rsync"); } static void set_server(const char *envname, const char *defprotocol, @@ -151,7 +157,10 @@ static void set_server(const char *envname, const char *defprotocol, /* rsync :: protocol specification - anyway, adding scheme:// won't help */ goto ok; - value= masprintf("%s%s", defprotocol, value); + int vallen= strlen(value); + + value= masprintf("%s%s%s", defprotocol, value, + vallen && value[vallen-1]=='/' ? "" : "/"); ok: sysassert(! setenv(envname,value,1) ); @@ -175,6 +184,7 @@ int main(int argc, char **argv) { else if (IS("--analyse-only") || IS("--same")) o_mode= mode_analyse; else if (IS("--everything")) o_mode= mode_all; + else if (IS("--find-island")) o_flags |= ffs_printisland; else if (IS("--single-page")) o_flags |= ff_singlepage; else if (IS("--quiet")) o_quiet= 1; else if (IS("--edit-charset")) o_flags |= ff_editcharset; @@ -182,15 +192,19 @@ int main(int argc, char **argv) { else if (IS("--dict-read-only")) o_flags &= (~ffs_dict | ff_dict_fetch); else if (IS("--dict-anon")) o_flags &= ~ff_dict_pirate; else if (IS("--dict-submit")) o_flags |= ff_dict_fetch|ff_dict_submit; - else if (IS("--upload") || - IS("--arbitrage") || + else if (IS("--raw-tsv")) o_outmode_kind= omk_raw; + else if (IS("--upload")) o_outmode_kind= omk_upload; + else if (IS("--arbitrage") || IS("--tsv") || - IS("--best-prices")) o_outputmode= arg+2; - else if (IS("--raw-tsv")) o_outputmode= 0; + IS("--best-prices")) o_outmode_kind=omk_str, + o_outmode_str=arg+2; + else if (IS("--screenshot-file")) o_screenshot_fn= ARGVAL; else if (IS("--pctb-server")) o_serv_pctb= ARGVAL; else if (IS("--dict-submit-server")) o_serv_dict_submit= ARGVAL; else if (IS("--dict-update-server")) o_serv_dict_fetch= ARGVAL; + else if (IS("--ocean")) o_ocean= ARGVAL; + else if (IS("--pirate")) o_pirate= ARGVAL; #define DF(f) \ else if (IS("-D" #f)) \ debug_flags |= dbg_##f; @@ -206,9 +220,16 @@ int main(int argc, char **argv) { } /* Consequential changes to options */ - - if (!strcmp("upload",o_outputmode)) - o_flags |= ffs_upload; + + if (o_mode & mf_analyse) { + if (!o_outmode_kind) + o_outmode_kind= (o_flags & ff_printisland) ? omk_none : omk_str; + + if (o_outmode_kind==omk_upload) { + o_flags |= ffs_upload; + o_outmode_str= "upload"; + } + } /* Defaults */ @@ -233,6 +254,17 @@ int main(int argc, char **argv) { screenshot_startup(); find_yppclient_window(); } + if (!ocean) ocean= o_ocean; + if (!pirate) pirate= o_pirate; + + if (o_flags & ff_needisland) { + if (!ocean) + badusage("need --ocean option when replaying images" + " (consider supplying --pirate too)"); + sysassert(! setenv("YPPSC_OCEAN",ocean,1) ); + if (pirate && (o_flags & ff_dict_pirate)) + sysassert(! setenv("YPPSC_PIRATE",pirate,1) ); + } if (o_mode & mf_screenshot) { open_screenshot_file("w"); if (o_flags & ff_singlepage) take_one_screenshot(); @@ -244,11 +276,17 @@ int main(int argc, char **argv) { else read_screenshots(); } if (o_mode & mf_analyse) { - find_islandname(); - if (o_outputmode) - run_analysis(); - else - analyse(stdout); + if (o_flags & ff_needisland) { + find_islandname(); + if (o_flags & ff_printisland) + printf("%s, %s\n", archipelago, island); + } + switch (o_outmode_kind) { + case omk_upload: case omk_str: run_analysis(); break; + case omk_raw: analyse(stdout); break; + case omk_none: break; + default: abort(); + } } return 0; } diff --git a/pctb/convert.h b/pctb/convert.h index 354a6f1..0231ca9 100644 --- a/pctb/convert.h +++ b/pctb/convert.h @@ -94,11 +94,15 @@ enum flags { ffs_dict= 00070, ff_needisland= 00100, - ff_upload= 00200, - ffs_upload= 00300, + ff_printisland= 00200, + ff_upload= 00400, + ffs_printisland= 00300, + ffs_upload= 00500, }; extern enum flags o_flags; +extern const char *o_ocean, *o_pirate; + /*----- from pages.c -----*/ void screenshot_startup(void); @@ -112,7 +116,8 @@ extern CanonImage *page_images[MAX_PAGES]; extern int npages; RgbImage *page0_rgbimage; -extern char *ocean, *pirate; +extern const char *ocean, *pirate; +extern char *archipelago, *island; #endif /*CONVERT_H*/ diff --git a/pctb/dictionary-manager b/pctb/dictionary-manager index 0037a2a..49156ad 100755 --- a/pctb/dictionary-manager +++ b/pctb/dictionary-manager @@ -237,7 +237,7 @@ proc required/char {} { pack .d.csr -side top -before .d.mi pack .d.got .d.ctx -side top -after .d.mi - read_database ./charset-$rows.txt + read_database "./#local-char$rows#.txt" draw_glyphsdone startup_cursor } @@ -294,11 +294,14 @@ proc pixmap_maybe_ok {} { set nsel 0 foreach_pixmap_col col { set cs [.d.pe.grid.l$col curselection] - incr nsel [llength $cs] + set lcs [llength $cs] + if {!$lcs} continue + incr nsel $lcs set pixmap_selcol $col set pixmap_selrow [lindex $cs 0] } if {$nsel==1} { + debug "MAYBE_OK YES col=$pixmap_selcol row=$pixmap_selrow." .d.pe.ok configure -state normal -command pixmap_ok } else { .d.pe.ok configure -state disabled -command {} @@ -337,7 +340,7 @@ proc required/pixmap {} { set alloptions [exec ./dictionary-pixmap-options $unk_what] - read_database ./pixmaps.txt + read_database "./#local-pixmap#.txt" set mulcols [image width image/main] set mulrows [image height image/main] @@ -1063,4 +1066,5 @@ if {$debug} { } set argv [lrange $argv $ai end] +debug [exec printenv] main/$mainkind diff --git a/pctb/dictionary-pixmap-options b/pctb/dictionary-pixmap-options index 5b7a825..67ca722 100755 --- a/pctb/dictionary-pixmap-options +++ b/pctb/dictionary-pixmap-options @@ -69,7 +69,8 @@ sub ptcl ($) { } sub main__island () { - my $url= "$pctb/islands.php?oceanName=SAGE"; + my $ocean= $ENV{'YPPSC_OCEAN'}; die unless $ocean; + my $url= "$pctb/islands.php?oceanName=".uc $ocean; my $resp= $ua->get($url); die $resp->status_line unless $resp->is_success; $jsonresp= $resp->content; diff --git a/pctb/ocr.c b/pctb/ocr.c index 5828bd0..2c3a167 100644 --- a/pctb/ocr.c +++ b/pctb/ocr.c @@ -400,8 +400,8 @@ OcrReader *ocr_init(int h) { OcrReader *rd; if (o_flags & ff_dict_fetch) { - char *fetchfile= masprintf("master-char%d",rd->h); - progress("Updating %s",fetchfile); + char *fetchfile= masprintf("char%d",h); + progress("Updating %s...",fetchfile); fetch_with_rsync(fetchfile); free(fetchfile); } diff --git a/pctb/pages.c b/pctb/pages.c index 50a652c..32e429a 100644 --- a/pctb/pages.c +++ b/pctb/pages.c @@ -45,7 +45,7 @@ CanonImage *page_images[MAX_PAGES]; int npages; RgbImage *page0_rgbimage; -char *ocean, *pirate; +const char *ocean, *pirate; static XWindowAttributes attr; static Window id; @@ -694,11 +694,13 @@ void find_yppclient_window(void) { REQUIRE( !memcmp(title + len - S(suffix), suffix, S(suffix)) ); REQUIRE( !memcmp(spc1, onthe, S(onthe)) ); -#define ASSIGN(what, start, end) do { \ - sysassert( asprintf(&what, "%.*s", (end)-(start), start) >0 ); \ - }while(0) - ASSIGN(pirate, title + S(prefix), spc1); +#define ASSIGN(what, start, end) \ + what= masprintf("%.*s", (end)-(start), start); \ + if (o_##what) REQUIRE( !strcmp(o_##what, what) ); \ + else + ASSIGN(ocean, spc1 + S(onthe), (title + len) - S(suffix)); + ASSIGN(pirate, title + S(prefix), spc1); debugfind(" YES!\n"); id= w2; @@ -712,9 +714,10 @@ void find_yppclient_window(void) { if (children1) XFree(children1); } if (nfound>1) - fatal("Found several YPP clients." + fatal("Found several possible YPP clients." " Close one, or specify the windowid with --window-id.\n"); if (nfound<1) - fatal("Did not find YPP client." - " Use --window-id and/or report this as a fault.\n"); + fatal("Did not find %sYPP client." + " Use --window-id and/or report this as a fault.\n", + o_ocean || o_pirate ? "matching ": ""); } diff --git a/pctb/rgbimage.c b/pctb/rgbimage.c index 798a104..d0ba6b7 100644 --- a/pctb/rgbimage.c +++ b/pctb/rgbimage.c @@ -48,9 +48,12 @@ #include "convert.h" -static int identify(const RgbImage *base, Rect portion, - char result[MAXIMGIDENT], const char *what) { - sysassert( dbfile_open("pixmaps.txt") ); +static int identify1(const RgbImage *base, Rect portion, + char result[MAXIMGIDENT], const char *what, + const char *which) { + char *dbfile_name= masprintf("#%s-pixmap#.txt",which); + if (!dbfile_open(dbfile_name)) + goto not_found; #define FGETSLINE (dbfile_getsline(result,MAXIMGIDENT,__FILE__,__LINE__)) @@ -95,13 +98,21 @@ static int identify(const RgbImage *base, Rect portion, goto found; } } + not_found: result[0]= 0; -found: + found: dbfile_close(); + free(dbfile_name); return !!result[0]; } +static int identify(const RgbImage *base, Rect portion, + char result[MAXIMGIDENT], const char *what) { + return identify1(base,portion,result,what, "master") || + identify1(base,portion,result,what, "local"); +} + static void fwrite_ppm(FILE *f, const RgbImage *base, Rect portion) { int x,y,i; fprintf(f,"P3\n%d %d\n255\n", RECT_W(portion), RECT_H(portion)); @@ -119,6 +130,13 @@ static void fwrite_ppm(FILE *f, const RgbImage *base, Rect portion) { void identify_rgbimage(const RgbImage *base, Rect portion, char result[MAXIMGIDENT], const char *what) { + static int synced; + + if (!synced) { + fetch_with_rsync("pixmap"); + synced++; + } + for (;;) { int ok= identify(base, portion, result, what); if (ok) return; diff --git a/pctb/structure.c b/pctb/structure.c index 0c9750c..97cbb07 100644 --- a/pctb/structure.c +++ b/pctb/structure.c @@ -46,6 +46,8 @@ static int colrightx[INTERESTING_COLUMNS]; static int text_h=-1, columns=-1; static OcrReader *rd; +char *archipelago, *island; + #define OTHERCOORD_x y #define OTHERCOORD_y x @@ -468,6 +470,8 @@ void analyse(FILE *tsv_output) { if (!rd) rd= ocr_init(text_h); + progress("OCRing page %d...",page); + for (tryrect= +cim->h; tryrect >= -cim->h; tryrect--) { find_commodity(tryrect, &thisr); if (thisr.tl.x < 0) @@ -528,7 +532,6 @@ void find_islandname(void) { debug_rect("sunshiner",1, sunshiner); identify_rgbimage(ri, sunshiner, sunshine, "sunshine widget"); - fprintf(stderr,"sunshine: `%s'\n",sunshine); if (!memcmp(sunshine,"Ship ",5)) { Rect islandnamer; @@ -538,11 +541,8 @@ void find_islandname(void) { islandnamer.tl.y= 128; islandnamer.br.y= 156; -#define IR_VSHRINK_MUST(CONDMUST,PRWHY) \ - do{ if (!(CONDMUST)) goto not_in_radar; }while(0) - - ADJUST_BOX(islandnamer,"o",5, 0, IR_VSHRINK_MUST, tl,y,+1); - ADJUST_BOX(islandnamer,"o",5, cim->h, IR_VSHRINK_MUST, br,y,-1); + ADJUST_BOX(islandnamer,"o",5, 0, MUST, tl,y,+1); + ADJUST_BOX(islandnamer,"o",5, cim->h, MUST, br,y,-1); debug_rect("islandnamer",0, islandnamer); // int larger_islandnamebry= islandnamer.tl.y + 25; @@ -562,15 +562,14 @@ void find_islandname(void) { } identify_rgbimage(ri, islandnamer, archisland, "island"); - fprintf(stderr,"radar: `%s'\n",archisland); - - assert(!"radar ok"); - - not_in_radar: - assert(!"not in radar?"); } else { assert(!"not vessel"); } + char *delim= strstr(archisland," - "); + assert(delim); + archipelago= masprintf("%.*s", delim-archisland, archisland); + island= masprintf("%s", delim+3); + free(ri); } diff --git a/pctb/x.gdb b/pctb/x.gdb index 1484ddb..86aca3d 100644 --- a/pctb/x.gdb +++ b/pctb/x.gdb @@ -1,5 +1,5 @@ file ypp-commodities set confirm off -set args -Dpages --window-id 0x1c228b9 -break pages.c:267 +set args --same --raw-tsv >raw.tsv +break ocr_init run -- 2.30.2