From ffac51d73ab97c4c9c1e6c43b18d54d716000bf6 Mon Sep 17 00:00:00 2001 Message-Id: From: Mark Wooding Date: Mon, 31 Dec 2007 16:13:44 +0000 Subject: [PATCH] disorder-rescan can now suppress the check phase, which on first startup with lots of tracks is rather time consuming. Organization: Straylight/Edgeware From: Richard Kettlewell Additionally, it avoids doing most of the work inside a transaction, by the rather brute expedient of pulling the entire track list into memory and then iterating over that. The server takes advantage of this by making the initial blocking rescan not do the check phase, so it's reasonably quick, and then issuing a second non-blocking rescan immediately. It might be better all round to calculate track lengths on demand but this arrangement should be better than previously. --- lib/trackdb.c | 40 ++++++++++++----- lib/trackdb.h | 2 +- server/disorderd.c | 12 +++-- server/rescan.c | 107 +++++++++++++++++++++++++++++++++++---------- server/server.c | 2 +- server/state.c | 2 +- 6 files changed, 123 insertions(+), 42 deletions(-) diff --git a/lib/trackdb.c b/lib/trackdb.c index 8239a69..eafcfc1 100644 --- a/lib/trackdb.c +++ b/lib/trackdb.c @@ -259,10 +259,23 @@ static int reap_db_deadlock(ev_source attribute((unused)) *ev, return 0; } -static pid_t subprogram(ev_source *ev, const char *prog, - int outputfd) { +static pid_t subprogram(ev_source *ev, int outputfd, const char *prog, + ...) { pid_t pid; - + va_list ap; + const char *args[1024], **argp, *a; + + argp = args; + *argp++ = prog; + *argp++ = "--config"; + *argp++ = configfile; + *argp++ = debugging ? "--debug" : "--no-debug"; + *argp++ = log_default == &log_syslog ? "--syslog" : "--no-syslog"; + va_start(ap, prog); + while((a = va_arg(ap, const char *))) + *argp++ = a; + va_end(ap); + *argp = 0; /* If we're in the background then trap subprocess stdout/stderr */ if(!(pid = xfork())) { exitfn = _exit; @@ -279,10 +292,7 @@ static pid_t subprogram(ev_source *ev, const char *prog, /* If we were negatively niced, undo it. We don't bother checking for * error, it's not that important. */ setpriority(PRIO_PROCESS, 0, 0); - execlp(prog, prog, "--config", configfile, - debugging ? "--debug" : "--no-debug", - log_default == &log_syslog ? "--syslog" : "--no-syslog", - (char *)0); + execvp(prog, (char **)args); fatal(errno, "error invoking %s", prog); } return pid; @@ -291,7 +301,7 @@ static pid_t subprogram(ev_source *ev, const char *prog, /* start deadlock manager */ void trackdb_master(ev_source *ev) { assert(db_deadlock_pid == -1); - db_deadlock_pid = subprogram(ev, DEADLOCK, -1); + db_deadlock_pid = subprogram(ev, -1, DEADLOCK, (char *)0); ev_child(ev, db_deadlock_pid, 0, reap_db_deadlock, 0); D(("started deadlock manager")); } @@ -396,7 +406,7 @@ void trackdb_open(int flags) { /* This database needs upgrading */ info("invoking disorder-dbupgrade to upgrade from %ld to %ld", oldversion, config->dbversion); - pid = subprogram(0, "disorder-dbupgrade", -1); + pid = subprogram(0, -1, "disorder-dbupgrade", (char *)0); while(waitpid(pid, &err, 0) == -1 && errno == EINTR) ; if(err) @@ -1353,7 +1363,7 @@ void trackdb_stats_subprocess(ev_source *ev, d->done = done; d->u = u; xpipe(p); - pid = subprogram(ev, "disorder-stats", p[1]); + pid = subprogram(ev, p[1], "disorder-stats", (char *)0); xclose(p[1]); ev_child(ev, pid, 0, stats_finished, d); ev_reader_new(ev, p[0], stats_read, stats_error, d, "disorder-stats reader"); @@ -2146,14 +2156,20 @@ static int reap_rescan(ev_source attribute((unused)) *ev, return 0; } -void trackdb_rescan(ev_source *ev) { +/** @brief Initiate a rescan + * @param ev Event loop or 0 to block + * @param check 1 to recheck lengths, 0 to suppress check + */ +void trackdb_rescan(ev_source *ev, int check) { int w; if(rescan_pid != -1) { error(0, "rescan already underway"); return; } - rescan_pid = subprogram(ev, RESCAN, -1); + rescan_pid = subprogram(ev, -1, RESCAN, + check ? "--check" : "--no-check", + (char *)0); if(ev) { ev_child(ev, rescan_pid, 0, reap_rescan, 0); D(("started rescanner")); diff --git a/lib/trackdb.h b/lib/trackdb.h index f1aae33..b0f601e 100644 --- a/lib/trackdb.h +++ b/lib/trackdb.h @@ -136,7 +136,7 @@ char **trackdb_search(char **wordlist, int nwordlist, int *ntracks); /* return a list of tracks containing all of the words given. If you * ask for only stopwords you get no tracks. */ -void trackdb_rescan(struct ev_source *ev); +void trackdb_rescan(struct ev_source *ev, int check); /* Start a rescan, if one is not running already */ int trackdb_rescan_cancel(void); diff --git a/server/disorderd.c b/server/disorderd.c index f7bc378..15e3572 100644 --- a/server/disorderd.c +++ b/server/disorderd.c @@ -128,7 +128,7 @@ static int handle_sigterm(ev_source attribute((unused)) *ev_, static int rescan_again(ev_source *ev_, const struct timeval attribute((unused)) *now, void attribute((unused)) *u) { - trackdb_rescan(ev_); + trackdb_rescan(ev_, 1/*check*/); rescan_after(86400); return 0; } @@ -290,13 +290,17 @@ int main(int argc, char **argv) { if(ev_signal(ev, SIGTERM, handle_sigterm, 0)) fatal(0, "ev_signal failed"); /* ignore SIGPIPE */ signal(SIGPIPE, SIG_IGN); - /* start a rescan straight away if this is a new installation */ + /* Start a rescan straight away if this is a new installation. This rescan + * blocks; the point is that when it is finished we are in a good position to + * choose a random track. */ if(!trackdb_existing_database) { - trackdb_rescan(0/*ev*/); + trackdb_rescan(0/*ev*/, 0/*check*/); /* No ev -> the rescan will block. Since we called reconfigure() already * any clients will also be forced to block. */ } - rescan_after(86400); + /* Start a second rescan, with length checking enabled, immediately after + * startup. */ + rescan_after(1); /* periodically tidy up the database */ dbgc_after(60); /* periodically check the volume */ diff --git a/server/rescan.c b/server/rescan.c index dab56df..47dcf99 100644 --- a/server/rescan.c +++ b/server/rescan.c @@ -63,6 +63,8 @@ static const struct option options[] = { { "no-debug", no_argument, 0, 'D' }, { "syslog", no_argument, 0, 's' }, { "no-syslog", no_argument, 0, 'S' }, + { "check", no_argument, 0, 'K' }, + { "no-check", no_argument, 0, 'C' }, { 0, 0, 0, 0 } }; @@ -75,7 +77,8 @@ static void help(void) { " --version, -V Display version number\n" " --config PATH, -c PATH Set configuration file\n" " --debug, -d Turn on debugging\n" - " --[no-]syslog Force logging\n" + " --[no-]syslog Enable/disable logging to syslog\n" + " --[no-]check Enable/disable track length check\n" "\n" "Rescanner for DisOrder. Not intended to be run\n" "directly.\n"); @@ -199,52 +202,74 @@ done: struct recheck_state { const struct collection *c; long nobsolete, nnocollection, nlength; + struct recheck_track *tracks; +}; + +struct recheck_track { + struct recheck_track *next; + const char *track; }; /* called for each non-alias track */ -static int recheck_callback(const char *track, - struct kvp *data, - void *u, - DB_TXN *tid) { +static int recheck_list_callback(const char *track, + struct kvp attribute((unused)) *data, + void *u, + DB_TXN attribute((unused)) *tid) { struct recheck_state *cs = u; + struct recheck_track *t = xmalloc(sizeof *t); + + t->next = cs->tracks; + t->track = track; + cs->tracks = t; + return 0; +} + +static int recheck_track_tid(struct recheck_state *cs, + const struct recheck_track *t, + DB_TXN *tid) { const struct collection *c = cs->c; - const char *path = kvp_get(data, "_path"); + const char *path; char buffer[20]; int err, n; long length; + struct kvp *data; - if(aborted()) return EINTR; - D(("rechecking %s", track)); + if((err = trackdb_getdata(trackdb_tracksdb, t->track, &data, tid))) + return err; + path = kvp_get(data, "_path"); + D(("rechecking %s", t->track)); /* if we're not checking a specific collection, find the right collection */ if(!c) { - if(!(c = find_track_collection(track))) { - D(("obsoleting %s", track)); - if((err = trackdb_obsolete(track, tid))) return err; + if(!(c = find_track_collection(t->track))) { + D(("obsoleting %s", t->track)); + if((err = trackdb_obsolete(t->track, tid))) + return err; ++cs->nnocollection; return 0; } } /* see if the track has evaporated */ if(check(c->module, c->root, path) == 0) { - D(("obsoleting %s", track)); - if((err = trackdb_obsolete(track, tid))) return err; + D(("obsoleting %s", t->track)); + if((err = trackdb_obsolete(t->track, tid))) + return err; ++cs->nobsolete; return 0; } /* make sure we know the length */ if(!kvp_get(data, "_length")) { - D(("recalculating length of %s", track)); + D(("recalculating length of %s", t->track)); for(n = 0; n < config->tracklength.n; ++n) - if(fnmatch(config->tracklength.s[n].s[0], track, 0) == 0) + if(fnmatch(config->tracklength.s[n].s[0], t->track, 0) == 0) break; if(n >= config->tracklength.n) - error(0, "no tracklength plugin found for %s", track); + error(0, "no tracklength plugin found for %s", t->track); else { - length = tracklength(config->tracklength.s[n].s[1], track, path); + length = tracklength(config->tracklength.s[n].s[1], t->track, path); if(length > 0) { byte_snprintf(buffer, sizeof buffer, "%ld", length); kvp_set(&data, "_length", buffer); - if((err = trackdb_putdata(trackdb_tracksdb, track, data, tid, 0))) + if((err = trackdb_putdata(trackdb_tracksdb, t->track, data, tid, 0))) return err; ++cs->nlength; } @@ -253,20 +278,38 @@ static int recheck_callback(const char *track, return 0; } +static int recheck_track(struct recheck_state *cs, + const struct recheck_track *t) { + int e; + + WITH_TRANSACTION(recheck_track_tid(cs, t, tid)); + return e; +} + /* recheck a collection */ static void recheck_collection(const struct collection *c) { struct recheck_state cs; + const struct recheck_track *t; + long nrc; if(c) info("rechecking %s", c->root); else info("rechecking all tracks"); + /* Doing the checking inside a transaction locks up the server for much too + * long (because it spends lots of time thinking about each track). So we + * pull the full track list into memory and work from that. + * + * 100,000 tracks at, say, 80 bytes per track name, gives 8MB, which is quite + * reasonable. + */ for(;;) { checkabort(); + info("getting track list"); global_tid = trackdb_begin_transaction(); memset(&cs, 0, sizeof cs); cs.c = c; - if(trackdb_scan(c ? c->root : 0, recheck_callback, &cs, global_tid)) + if(trackdb_scan(c ? c->root : 0, recheck_list_callback, &cs, global_tid)) goto fail; break; fail: @@ -285,6 +328,19 @@ static void recheck_collection(const struct collection *c) { } trackdb_commit_transaction(global_tid); global_tid = 0; + nrc = 0; + for(t = cs.tracks; t; t = t->next) { + if(aborted()) + return; + recheck_track(&cs, t); + ++nrc; + if(nrc % 100 == 0) { + if(c) + info("rechecking %s, %ld tracks so far", c->root, nrc); + else + info("rechecking all tracks, %ld tracks so far", nrc); + } + } if(c) info("rechecked %s, %ld obsoleted, %ld lengths calculated", c->root, cs.nobsolete, cs.nlength); @@ -334,11 +390,12 @@ static void expire_noticed(void) { int main(int argc, char **argv) { int n, logsyslog = !isatty(2); struct sigaction sa; + int do_check = 1; set_progname(argv); mem_init(); if(!setlocale(LC_CTYPE, "")) fatal(errno, "error calling setlocale"); - while((n = getopt_long(argc, argv, "hVc:dDSs", options, 0)) >= 0) { + while((n = getopt_long(argc, argv, "hVc:dDSsKC", options, 0)) >= 0) { switch(n) { case 'h': help(); case 'V': version(); @@ -347,6 +404,8 @@ int main(int argc, char **argv) { case 'D': debugging = 0; break; case 'S': logsyslog = 0; break; case 's': logsyslog = 1; break; + case 'K': do_check = 1; break; + case 'C': do_check = 0; break; default: fatal(0, "invalid option"); } } @@ -368,7 +427,8 @@ int main(int argc, char **argv) { /* Rescan all collections */ do_all(rescan_collection); /* Check that every track still exists */ - recheck_collection(0); + if(do_check) + recheck_collection(0); /* Expire noticed.db */ expire_noticed(); } @@ -377,8 +437,9 @@ int main(int argc, char **argv) { for(n = optind; n < argc; ++n) do_directory(argv[n], rescan_collection); /* Check specified collections for tracks that have gone */ - for(n = optind; n < argc; ++n) - do_directory(argv[n], recheck_collection); + if(do_check) + for(n = optind; n < argc; ++n) + do_directory(argv[n], recheck_collection); } trackdb_close(); trackdb_deinit(); diff --git a/server/server.c b/server/server.c index 44580c9..a7dee99 100644 --- a/server/server.c +++ b/server/server.c @@ -356,7 +356,7 @@ static int c_rescan(struct conn *c, char attribute((unused)) **vec, int attribute((unused)) nvec) { info("S%x rescan by %s", c->tag, c->who); - trackdb_rescan(c->ev); + trackdb_rescan(c->ev, 1/*check*/); sink_writes(ev_writer_sink(c->w), "250 initiated rescan\n"); return 1; /* completed */ } diff --git a/server/state.c b/server/state.c index d6b6699..16fc1a3 100644 --- a/server/state.c +++ b/server/state.c @@ -157,7 +157,7 @@ int reconfigure(ev_source *ev, int reload) { /* We only allow for upgrade at startup */ trackdb_open(TRACKDB_CAN_UPGRADE); if(need_another_rescan) - trackdb_rescan(ev); + trackdb_rescan(ev, 1/*check*/); if(!ret) { queue_read(); recent_read(); -- [mdw]