From 2e3f8f6ab168a6824dac1430473c7299018a1b0f Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Thu, 9 Jul 2009 20:16:12 +0100 Subject: [PATCH] Compress _pages.ppm for faster execution (less disk traffic) --- pctb/Makefile | 2 +- pctb/README.files | 7 ++-- pctb/common.c | 88 ++++++++++++++++++++++++++++------------------- pctb/common.h | 5 +++ pctb/convert.c | 39 ++++++--------------- 5 files changed, 74 insertions(+), 67 deletions(-) diff --git a/pctb/Makefile b/pctb/Makefile index 4da4677..baf8760 100644 --- a/pctb/Makefile +++ b/pctb/Makefile @@ -49,7 +49,7 @@ clean: realclean: clean rm -f $(TARGETS) rm -f raw.tsv - rm -f _pages.ppm _upload-*.html _commodmap.tsv + rm -f _pages.ppm _pages.ppm.gz _upload-*.html _commodmap.tsv rm -f _master-*.txt _master-*.txt.gz _local-*.txt rm -f ./#pages#.ppm ./#upload-*#.html ./#commodmap#.tsv rm -f ./#master-*#.txt ./#local-*#.txt raw.tsv diff --git a/pctb/README.files b/pctb/README.files index c8d4928..87a90de 100644 --- a/pctb/README.files +++ b/pctb/README.files @@ -3,7 +3,7 @@ Files we use and update The program reads and writes the following files: - * _pages.ppm + * _pages.ppm.gz Contains one or more images (as raw ppms, end-to-end) which are the screenshots taken in the last run. This is (over)written whenever @@ -14,8 +14,9 @@ The program reads and writes the following files: You can specify a different file with --screenshot-file. If you want to display the contents of this file, `display' can do - it. Don't try `display vid:_pages.ppm' as this will consume - truly stupendous quantities of RAM - it wedged my laptop. + it, although you may have to uncompress it first. Don't try + `display vid:_pages.ppm' as this will consume truly stupendous + quantities of RAM - it wedged my laptop. * _master-newcommods.txt _local-newcommods.txt diff --git a/pctb/common.c b/pctb/common.c index 2c2a085..f95e585 100644 --- a/pctb/common.c +++ b/pctb/common.c @@ -49,31 +49,12 @@ int dbfile_gzopen(const char *basepath_spec) { assert(!dbfile); basepath= basepath_spec; - //uncomppath= masprintf("%s (uncompressed)", basepath); char *zpath= masprintf("%s.gz", basepath); - int zfd= open(zpath, O_RDONLY); + int e= gzopen(zpath, O_RDONLY, &dbfile, &dbzcat, 0); free(zpath); - - if (zfd<0) { sysassert(errno==ENOENT); return 0; } - - int pipefds[2]; - sysassert(! pipe(pipefds) ); - - sysassert( (dbzcat=fork()) != -1 ); - if (!dbzcat) { - sysassert( dup2(zfd,0)==0 ); - sysassert( dup2(pipefds[1],1)==1 ); - sysassert(! close(zfd) ); - sysassert(! close(pipefds[0]) ); - sysassert(! close(pipefds[1]) ); - execlp("zcat","zcat",(char*)0); - sysassert(!"execlp zcat"); - } - sysassert(! close(zfd) ); - sysassert(! close(pipefds[1]) ); - sysassert( dbfile= fdopen(pipefds[0], "r") ); - + if (e) { errno=e; sysassert(errno==ENOENT); return 0; } + return 1; } @@ -89,19 +70,7 @@ int dbfile_open(const char *tpath) { } void dbfile_close(void) { - if (!dbfile) return; - - sysassert(!ferror(dbfile)); - sysassert(!fclose(dbfile)); - - if (dbzcat != -1) { - char *zcatstr= masprintf("zcat %s.gz", basepath); - waitpid_check_exitstatus(dbzcat,zcatstr,1); - free(zcatstr); - dbzcat= -1; - } - - dbfile= 0; + gzclose(&dbfile, &dbzcat, basepath); } #define dbassertgl(x) ((x) ? (void)0 : dbfile_assertfail(file,line,#x)) @@ -148,3 +117,52 @@ void dbfile_assertfail(const char *file, int line, const char *m) { " %s", file,line, m); } + +int gzopen(const char *zpath, int oflags, FILE **f_r, pid_t *pid_r, + const char *gziplevel /* 0 for read; may be 0, or "-1" etc. */) { + + int zfd= open(zpath, oflags, 0666); + if (!zfd) return errno; + + int pipefds[2]; + sysassert(! pipe(pipefds) ); + + int oi,io; const char *cmd; const char *stdiomode; + switch ((oflags & O_ACCMODE)) { + case O_RDONLY: oi=0; io=1; cmd="gunzip"; stdiomode="r"; break; + case O_WRONLY: oi=1; io=0; cmd="gzip"; stdiomode="w"; break; + default: abort(); + } + + sysassert( (*pid_r=fork()) != -1 ); + if (!*pid_r) { + sysassert( dup2(zfd,oi)==oi ); + sysassert( dup2(pipefds[io],io)==io ); + sysassert(! close(zfd) ); + sysassert(! close(pipefds[0]) ); + sysassert(! close(pipefds[1]) ); + execlp(cmd,cmd,gziplevel,(char*)0); + sysassert(!"execlp gzip/gunzip"); + } + sysassert(! close(zfd) ); + sysassert(! close(pipefds[io]) ); + sysassert( *f_r= fdopen(pipefds[oi], stdiomode) ); + + return 0; +} + +void gzclose(FILE **f, pid_t *p, const char *what) { + if (!*f) return; + + sysassert(!ferror(*f)); + sysassert(!fclose(*f)); + + if (*p != -1) { + char *process= masprintf("%s (de)compressor",what); + waitpid_check_exitstatus(*p,process,1); + free(process); + *p= -1; + } + + *f= 0; +} diff --git a/pctb/common.h b/pctb/common.h index 699d161..1fb7e11 100644 --- a/pctb/common.h +++ b/pctb/common.h @@ -156,6 +156,11 @@ void dbfile_close(void); /* idempotent */ int dbfile_scanf(const char *fmt, ...) SCANFMT(1,2); int dbfile_vscanf(const char *fmt, va_list al) SCANFMT(1,0); +int gzopen(const char *zpath, int oflags, FILE **f_r, pid_t *pid_r, + const char *gziplevel /* 0 for read; may be 0, or "-1" etc. */); + /* returns errno value from open */ +void gzclose(FILE **f, pid_t *p, const char *what); + /* also OK with f==0, or p==-1 */ char *masprintf(const char *fmt, ...) FMT(1,2); diff --git a/pctb/convert.c b/pctb/convert.c index 366ea72..4ded228 100644 --- a/pctb/convert.c +++ b/pctb/convert.c @@ -81,28 +81,13 @@ static void vbadusage(const char *fmt, va_list al) { } DEFINE_VWRAPPERF(static, badusage, NORET); -static void open_screenshot_file(const char *mode) { +static void open_screenshot_file(int for_write) { if (!fnmatch("*.gz",o_screenshot_fn,0)) { - int zfd, pipefds[2]; - sysassert( (zfd= open(o_screenshot_fn, O_WRONLY|O_CREAT|O_TRUNC, - 0666)) >= 0); - sysassert(! pipe(pipefds) ); - sysassert( (screenshot_compressor=fork()) != -1 ); - if (!screenshot_compressor) { - sysassert( dup2(pipefds[0],0)==0 ); - sysassert( dup2(zfd,1)==1 ); - sysassert(! close(zfd) ); - sysassert(! close(pipefds[0]) ); - sysassert(! close(pipefds[1]) ); - execlp("gzip","gzip","-1",(char*)0); - sysassert(!"execlp gzip for screenshots"); - } - sysassert(! close(zfd) ); - sysassert(! close(pipefds[0]) ); - sysassert( screenshot_file= fdopen(pipefds[1], "w") ); - + int mode= for_write ? O_WRONLY|O_CREAT|O_TRUNC : O_RDONLY; + sysassert(! gzopen(o_screenshot_fn, mode, &screenshot_file, + &screenshot_compressor, "-1") ); } else { - screenshot_file= fopen(o_screenshot_fn, mode); + screenshot_file= fopen(o_screenshot_fn, for_write ? "w" : "r"); if (!screenshot_file) fatal("could not open screenshots file `%s': %s", o_screenshot_fn, strerror(errno)); @@ -298,8 +283,7 @@ int main(int argc, char **argv) { o_serv_dict_submit, o_flags & ff_dict_submit); if (!o_screenshot_fn) - o_screenshot_fn= masprintf("%s/_pages.ppm%s", get_vardir(), - o_mode & mf_readscreenshot ? "" : ".gz"); + o_screenshot_fn= masprintf("%s/_pages.ppm.gz", get_vardir()); /* Actually do the work */ @@ -328,18 +312,21 @@ int main(int argc, char **argv) { } if (o_mode & mf_screenshot) { - open_screenshot_file("w"); + open_screenshot_file(1); if (o_flags & ff_singlepage) take_one_screenshot(); else take_screenshots(); progress_log("OK for you to move the mouse now, and you can" " use the YPP client again."); + progress("Finishing handling screenshots..."); + gzclose(&screenshot_file,&screenshot_compressor,"screenshots output"); } if (o_mode & mf_readscreenshot) { if ((o_flags & ff_upload) && !(o_flags & ff_testservers)) badusage("must not reuse screenshots for upload to live PCTB database"); - open_screenshot_file("r"); + open_screenshot_file(0); if (o_flags & ff_singlepage) read_one_screenshot(); else read_screenshots(); + gzclose(&screenshot_file,&screenshot_compressor,"screenshots input"); } if (o_mode & mf_analyse) { if (o_flags & ff_needisland) { @@ -355,10 +342,6 @@ int main(int argc, char **argv) { default: abort(); } } - if (screenshot_compressor!=-1) { - progress("Finishing compressing screenshots..."); - waitpid_check_exitstatus(screenshot_compressor,"screenshots gzip",0); - } progress_log("Finished."); return 0; } -- 2.30.2