X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~yarrgweb/git?p=ypp-sc-tools.db-test.git;a=blobdiff_plain;f=pctb%2Fconvert.c;h=8101fb920a40adafad02ab92ed4efd5181c2ae44;hp=4cc45b4a7981d09821b76136490fad3bf5c3bb83;hb=fdb2c7e1f2211fe4328ee6390f40a55446ecd078;hpb=27ae0b4c6d571104f553d5bf863998fd478feb45 diff --git a/pctb/convert.c b/pctb/convert.c index 4cc45b4..8101fb9 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -33,27 +33,40 @@ void debug_flush(void) { } const char *get_vardir(void) { return "."; } +const char *get_libdir(void) { return "."; } -static enum { - mf_findwindow= 0001, - mf_screenshot= 0010, - mf_readscreenshot= 0020, - mf_analyse= 0100, + +enum mode { + mf_findwindow= 00001, + mf_screenshot= 00010, + mf_readscreenshot= 00020, + mf_analyse= 00100, + mfm_special= 07000, - mode_findwindow= 0001, - mode_screenshot= 0011, - mode_analyse= 0120, + mode_findwindow= 00001, + mode_screenshot= 00011, + mode_analyse= 00120, + mode_showcharset= 01000, + + mode_all= 00111, +}; - mode_all= 0111, -} o_mode= mode_all; +enum outmodekind { + omk_unset, omk_upload, omk_str, omk_raw, omk_none +}; +static enum outmodekind o_outmode_kind; +static const char *o_outmode_str= 0; +static enum mode o_mode= mode_all; static char *o_screenshot_fn; -static int o_single_page, o_quiet; -static const char *o_outputmode= "upload"; +static const char *o_serv_pctb, *o_serv_dict_fetch, *o_serv_dict_submit; -const char *o_resolver; +const char *o_resolver= "./dictionary-manager"; FILE *screenshot_file; +const char *o_ocean, *o_pirate; +int o_quiet; +enum flags o_flags= ff_dict_fetch|ff_dict_submit|ff_dict_pirate; static void vbadusage(const char *fmt, va_list) FMT(1,0) NORET; static void vbadusage(const char *fmt, va_list al) { @@ -78,66 +91,135 @@ static void run_analysis(void) { progress("running recognition..."); analyse(tf); - if (o_single_page && !strcmp(o_outputmode,"upload")) - fatal("Recognition successful, but refusing to upload partial data\n" - " (--single-page specified). Specify an output mode?"); + if (o_flags & ff_upload) { + if (npages<=1) + fatal("Recognition successful, but refusing to upload partial data\n" + " (--single-page specified). Specify an output mode?"); + } sysassert( fseek(tf,0,SEEK_SET) == 0); - progress_log("processing results (--%s)...", o_outputmode); + progress_log("processing results (--%s)...", o_outmode_str); pid_t processor; sysassert( (processor= fork()) != -1 ); if (!processor) { sysassert( dup2(fileno(tf),0) ==0 ); - execlp("./yppsc-commod-processor", "yppsc-commod-processor", - o_outputmode, (char*)0); - sysassert(!"execlp commod-processor failed"); + EXECLP_HELPER("commod-results-processor", o_outmode_str, (char*)0); } waitpid_check_exitstatus(processor, "output processor/uploader"); fclose(tf); progress_log("all complete."); -} +} + +void fetch_with_rsync(const char *stem) { + pid_t fetcher; + + sysassert( (fetcher= fork()) != -1 ); + if (!fetcher) { + const char *rsync= getenv("YPPSC_PCTB_RSYNC"); + if (!rsync) rsync= "rsync"; + + const char *src= getenv("YPPSC_PCTB_DICT_UPDATE"); + char *remote= masprintf("%s/master-%s.txt", src, stem); + char *local= masprintf("#master-%s#.txt", stem); + if (DEBUGP(rsync)) + fprintf(stderr,"executing rsync to fetch %s to %s\n",remote,local); + execlp(rsync, "rsync", + DEBUGP(rsync) ? "-zvLt" : "-zLt", + "--",remote,local,(char*)0); + sysassert(!"exec rsync failed"); + } + + waitpid_check_exitstatus(fetcher, "rsync"); +} + +static void set_server(const char *envname, const char *defprotocol, + const char *defvalue, const char *defvalue_test, + const char *userspecified, + int enable) { + const char *value; + + if (!enable) { value= "0"; goto ok; } + + if (userspecified) + value= userspecified; + else if ((value= getenv(envname))) + ; + else if (o_flags & ff_testservers) + value= defvalue_test; + else + value= defvalue; + + if (value[0]=='/' || (value[0]=='.' && value[1]=='/')) + /* absolute or relative pathname - or anyway, something with no hostname */ + goto ok; + + const char *colon= strchr(value, ':'); + const char *slash= strchr(value, '/'); + + if (colon && (!slash || colon < slash)) + /* colon before the first slash, if any */ + /* rsync :: protocol specification - anyway, adding scheme:// won't help */ + goto ok; + + int vallen= strlen(value); + + value= masprintf("%s%s%s", defprotocol, value, + vallen && value[vallen-1]=='/' ? "" : "/"); + + ok: + sysassert(! setenv(envname,value,1) ); +} int main(int argc, char **argv) { const char *arg; - int r; + + sysassert( setlocale(LC_MESSAGES,"") ); + sysassert( setlocale(LC_CTYPE,"en_GB.UTF-8") || + setlocale(LC_CTYPE,"en.UTF-8") ); #define ARGVAL ((*++argv) ? *argv : \ (badusage("missing value for option %s",arg),(char*)0)) +#define IS(s) (!strcmp(arg,(s))) + while ((arg=*++argv)) { - if (!strcmp(arg,"--find-window-only")) - o_mode= mode_findwindow; - else if (!strcmp(arg,"--screenshot-only")) - o_mode= mode_screenshot; - else if (!strcmp(arg,"--analyse-only") || - !strcmp(arg,"--same")) - o_mode= mode_analyse; - else if (!strcmp(arg,"--everything")) - o_mode= mode_all; - else if (!strcmp(arg,"--single-page")) - o_single_page= 1; - else if (!strcmp(arg,"--quiet")) - o_quiet= 1; - else if (!strcmp(arg,"--edit-dictionary")) - o_resolver= "./dictionary-manager"; - else if (!strcmp(arg,"--raw-tsv")) - o_outputmode= 0; - else if (!strcmp(arg,"--upload") || - !strcmp(arg,"--arbitrage") || - !strcmp(arg,"--tsv") || - !strcmp(arg,"--best-prices")) - o_outputmode= arg+2; - else if (!strcmp(arg,"--screenshot-file")) - o_screenshot_fn= ARGVAL; + if (IS("--find-window-only")) o_mode= mode_findwindow; + else if (IS("--screenshot-only")) o_mode= mode_screenshot; + else if (IS("--show-charset")) o_mode= mode_showcharset; + else if (IS("--analyse-only") || + IS("--same")) o_mode= mode_analyse; + else if (IS("--everything")) o_mode= mode_all; + else if (IS("--find-island")) o_flags |= ffs_printisland; + else if (IS("--single-page")) o_flags |= ff_singlepage; + else if (IS("--quiet")) o_quiet= 1; + else if (IS("--edit-charset")) o_flags |= ff_editcharset; + else if (IS("--test-servers")) o_flags |= ff_testservers; + else if (IS("--dict-local-only")) o_flags &= ~ffs_dict; + else if (IS("--dict-read-only")) o_flags &= (~ffs_dict | ff_dict_fetch); + else if (IS("--dict-anon")) o_flags &= ~ff_dict_pirate; + else if (IS("--dict-submit")) o_flags |= ff_dict_fetch|ff_dict_submit; + else if (IS("--raw-tsv")) o_outmode_kind= omk_raw; + else if (IS("--upload")) o_outmode_kind= omk_upload; + else if (IS("--arbitrage") || + IS("--tsv") || + IS("--best-prices")) o_outmode_kind=omk_str, + o_outmode_str=arg+2; + + else if (IS("--screenshot-file")) o_screenshot_fn= ARGVAL; + else if (IS("--pctb-server")) o_serv_pctb= ARGVAL; + else if (IS("--dict-submit-server")) o_serv_dict_submit= ARGVAL; + else if (IS("--dict-update-server")) o_serv_dict_fetch= ARGVAL; + else if (IS("--ocean")) o_ocean= ARGVAL; + else if (IS("--pirate")) o_pirate= ARGVAL; #define DF(f) \ - else if (!strcmp(arg,"-D" #f)) \ + else if (IS("-D" #f)) \ debug_flags |= dbg_##f; DEBUG_FLAG_LIST #undef DF - else if (!strcmp(arg,"--window-id")) { + else if (IS("--window-id")) { char *ep; unsigned long windowid= strtoul(ARGVAL,&ep,0); if (*ep) badusage("invalid window id"); @@ -145,33 +227,97 @@ int main(int argc, char **argv) { } else badusage("unknown option `%s'",arg); } - - if (!o_screenshot_fn) { - r= asprintf(&o_screenshot_fn,"%s/#pages#.ppm",get_vardir()); - sysassert(r>=0); + + /* Consequential changes to options */ + + if (o_mode & mf_analyse) { + if (!o_outmode_kind) { + if (o_flags & ff_printisland) { + o_outmode_kind= omk_none; + o_flags |= ff_singlepage; + } else { + o_outmode_kind= omk_upload; + } + } + + if (o_outmode_kind==omk_upload) { + o_flags |= ffs_upload; + o_outmode_str= "upload"; + } } + /* Defaults */ + + set_server("YPPSC_PCTB_PCTB", + "http://", "pctb.ilk.org" /*pctb.crabdance.com*/, + "pctb.ilk.org", + o_serv_pctb, o_flags & (ff_needisland|ff_upload)); + + set_server("YPPSC_PCTB_DICT_UPDATE", + "rsync://", "rsync.pctb.chiark.greenend.org.uk/pctb", + "rsync.pctb.chiark.greenend.org.uk/pctb/test", + o_serv_dict_fetch, o_flags & ff_dict_fetch); + + set_server("YPPSC_PCTB_DICT_SUBMIT", + "http://", "dictup.pctb.chiark.greenend.org.uk", + "dictup.pctb.chiark.greenend.org.uk/test", + o_serv_dict_submit, o_flags & ff_dict_submit); + + if (!o_screenshot_fn) + o_screenshot_fn= masprintf("%s/#pages#.ppm",get_vardir()); + + /* Actually do the work */ + if (o_mode & mf_findwindow) { screenshot_startup(); find_yppclient_window(); } + if (!ocean) ocean= o_ocean; + if (!pirate) pirate= o_pirate; + + if (o_flags & ff_needisland) + if (!ocean) + badusage("need --ocean option when not using actual YPP client window" + " (consider supplying --pirate too)"); + if (ocean) + sysassert(! setenv("YPPSC_OCEAN",ocean,1) ); + if (pirate && (o_flags & ff_dict_pirate)) + sysassert(! setenv("YPPSC_PIRATE",pirate,1) ); + + switch (o_mode & mfm_special) { + case 0: break; + case mode_showcharset: ocr_showcharsets(); exit(0); + default: abort(); + } + if (o_mode & mf_screenshot) { open_screenshot_file("w"); - if (o_single_page) take_one_screenshot(); + if (o_flags & ff_singlepage) take_one_screenshot(); else take_screenshots(); + progress_log("OK for you to move the mouse now."); } if (o_mode & mf_readscreenshot) { + if ((o_flags & ff_upload) && !(o_flags & ff_testservers)) + badusage("must not reuse screenshots for upload to live PCTB database"); open_screenshot_file("r"); - if (o_single_page) read_one_screenshot(); + if (o_flags & ff_singlepage) read_one_screenshot(); else read_screenshots(); } if (o_mode & mf_analyse) { - find_islandname(); - if (o_outputmode) - run_analysis(); - else - analyse(stdout); + if (o_flags & ff_needisland) { + find_islandname(page_images[0]->rgb); + if (o_flags & ff_printisland) + printf("%s, %s\n", archipelago, island); + sysassert(! setenv("YPPSC_ISLAND",island,1) ); + } + switch (o_outmode_kind) { + case omk_upload: case omk_str: run_analysis(); break; + case omk_raw: analyse(stdout); break; + case omk_none: break; + default: abort(); + } } + progress_log("Finished."); return 0; } @@ -280,3 +426,13 @@ void waitpid_check_exitstatus(pid_t pid, const char *what) { fatal("%s gave strange wait status %d", what, st); } } + +char *masprintf(const char *fmt, ...) { + char *r; + va_list al; + va_start(al,fmt); + sysassert( vasprintf(&r,fmt,al) >= 0); + sysassert(r); + va_end(al); + return r; +}