* site.name_backlog.lock lock taken out by innxmit wrapper
* holder and its child are "xmit"
* site.name_backlog_<date>.<inum>
- * 431'd articles, ready for innxmit
+ * 431'd articles, ready for innxmit or duct
* created (link/mv) by duct
- * read by xmit
- * unlinked by xmit
- * site.name_backlog_<letters> eg
+ * site.name_backlog_<anything-else> eg
* site.name_backlog_manual
* anything the sysadmin likes (eg, feed files
* from old feeds to be merged into this one)
* created (link/mv) by admin
- * read by xmit
- * unlinked by xmit
-
-
-
-
- BLATHER ABOUT NOT USING INNXMIT
-
- scan for backlog files
- take "old enough" backlog files one at a time and reprocess them
- so need another input file - the current backlog file
- inhibit scanning for backlog files only when
- - last file not "old enough"
- - no backlog files found and we haven't made one
-
- also we have to start a new backlog file every (some interval)
-
- TODO for backlog file inputs
- - all code to search for and open these files
- - write proper algorithm comment
-
+ *
+ * Backlog files are also processed by innduct. We find the oldest
+ * backlog file which is at least a certain amount old, and feed it
+ * back into our processing. When every article in it has been read
+ * and processed, we unlink it and look for another backlog file.
+ *
+ * If we don't have a backlog file that we're reading, we close the
+ * defer file that we're writing and make it into a backlog file at
+ * the first convenient opportunity.
-
OVERALL STATES:
START
static int inndcomm_flush_timeout=100, quiet_if_locked=0;
static const char *remote_host;
static int reconnect_delay_periods, flushfail_retry_periods, open_wait_periods;
+static int backlog_retry_minperiods, backlog_spontaneous_rescan_periods;
static const char *inndconffile;
static double accept_proportion;
return status;
}
+static void check_isreg(const struct stat *stab, const char *path,
+ const char *what) {
+ if (!S_ISREG(stab->st_mode))
+ die("%s %s not a plain file (mode 0%lo)",
+ what, path, (unsigned long)stab->st_mode);
+}
+
+static void xfstat(int fd, struct stat *stab_r, const char *what) {
+ int r= fstab(path, stab);
+ if (r) sysdie("could not fstat %s %s", what, path);
+}
+
+static void xfstat_isreg(int fd, struct stat *stab_r, const char *what) {
+ xfstat(fd, stab_r, what);
+ check_isreg(stab, path, what);
+}
+
+static void xlstat_isreg(const char *path, struct stat *stab,
+ int *enoent_r /* 0 means ENOENT is fatal */
+ const char *what) {
+ int r= lstat(path, stab);
+ if (r) {
+ if (errno==ENOENT && enoent_r) { *enoent_r=1; return; }
+ sysdie("could not lstat %s %s", what, path);
+ }
+ if (enoent_r) *enoent_r= 0;
+ check_isreg(stab, path, what);
+}
+
+static int samefile(const struct stat *a, const struct stat *b) {
+ assert(S_ISREG(a->st_mode));
+ assert(S_ISREG(b->st_mode));
+ return (a->st_ino == b->st_ino &&
+ a->st_dev == b->st_dev);
+}
+
/*========== logging ==========*/
static void logcore(int sysloglevel, const char *fmt, ...)
/*========== overall control of article flow ==========*/
static void check_master_queue(void) {
- try reading current feed file;
-
if (!queue.count)
return;
OOP_RD_SHORTREC_FORBID
};
+static void *peer_rd_err(oop_source *lp, oop_read *oread, oop_event ev,
+ const char *errmsg, int errnoval,
+ const char *data, size_t recsz, void *conn_v) {
+ Conn *conn= conn_v;
+ connfail(conn, "error receiving from peer: %s", errmsg);
+ return OOP_CONTINUE;
+}
+
static Article *article_reply_check(Connection *conn, const char *response,
int code_indicates_streaming,
const char *sanitised_response) {
free(art);
}
-static void *peer_rd_err(oop_source *lp, oop_read *oread, oop_event ev,
- const char *errmsg, int errnoval,
- const char *data, size_t recsz, void *conn_v) {
- Conn *conn= conn_v;
- connfail(conn, "error receiving from peer: %s", errmsg);
- return OOP_CONTINUE;
-}
-
static void *peer_rd_ok(oop_source *lp, oop_read *oread, oop_event ev,
const char *errmsg, int errnoval,
const char *data, size_t recsz, void *conn_v) {
code_streaming= 1;
case 436: /* IHAVE says try later */
GET_ARTICLE;
+ open_defer();
if (fprintf(defer, "%s %s\n", TokenToText(art->token), art->messageid) <0
|| fflush(defer))
sysfatal("write to defer file %s",path_ductdefer);
ipf->fd= -1;
assert(sms==sm_SEPARATED || sms==sm_DROPPING);
-
+
if (main_input_file)
inputfile_tailing_start(main_input_file);
}
static void inputfile_tailing_stop(InputFile *ipf) {
assert(ipf->fd);
+ oop_rd_cancel(ipf->rd);
oop_rd_delete(ipf->rd);
ipf->rd= 0;
assert(!ipf->filemon); /* we shouldn't be monitoring it now */
}
-/*========== interaction with innd ==========*/
+/*========== interaction with innd - state machine ==========*/
/* See official state diagram at top of file. We implement
* this as follows:
| V install defer as backlog V install defer as backlog
^ | close D | close D
| | unlink D | unlink D
- | | start new defer | exit
- | | V
- `----------' ==========
+ | | | exit
+ `----------' V
+ ==========
(ESRCH)
[Droppped]
==========
*/
-static void open_defer(void) {
- struct stat stab;
-
- assert(!defer);
- defer= fopen(path_ductdefer, "a+");
- if (!defer) sysfatal("could not open defer file %s", path_ductdefer);
-
- /* truncate away any half-written records */
-
- r= fstat(fileno(defer), &stab);
- if (r) sysdie("could not stat newly opened defer file %s", path_ductdefer);
-
- if (stab.st_size > LONG_MAX)
- die("defer file %s size is far too large", path_ductdefer);
+static void statemc_init(void) {
+ struct stat stab, stabf;
+ int noent;
- if (!stab.st_size)
- return;
+ path_ductlock= xasprintf("%s_duct.lock", feedfile);
+ path_duct= xasprintf("%s_duct", feedfile);
+ path_ductdefer= xasprintf("%s_duct.defer", feedfile);
+ globpat_backlog= xasprintf("%s_backlog_*", feedfile);
- long orgsize= stab.st_size;
- long truncto= stab.st_size;
for (;;) {
- if (!truncto) break; /* was only (if anything) one half-truncated record */
- if (fseek(defer, truncto-1, SEEK_SET) < 0)
- sysdie("seek in defer file %s while truncating partial", path_ductdefer);
-
- r= getc(defer);
- if (r==EOF) {
- if (ferror(defer))
- sysdie("failed read from defer file %s", path_ductdefer);
- else
- die("defer file %s shrank while we were checking it!", path_ductdefer);
+ int lockfd= open(path_ductlock, O_CREAT|O_RDWR, 0600);
+ if (lockfd<0) sysfatal("open lockfile %s", path_ductlock);
+
+ struct flock fl;
+ memset(&fl,0,sizeof(fl));
+ fl.l_type= F_WRLCK;
+ fl.l_whence= SEEK_SET;
+ r= fcntl(lockfd, F_SETLK, &fl);
+ if (r==-1) {
+ if (errno==EACCES || errno==EAGAIN) {
+ if (quiet_if_locked) exit(0);
+ fatal("another duct holds the lockfile");
+ }
+ sysdie("fcntl F_SETLK lockfile %s", path_ductlock);
}
- if (r=='\n') break;
- truncto--;
- }
-
- if (stab.st_size != truncto) {
- warn("truncating half-record at end of defer file %s -"
- " shrinking by %ld bytes from %ld to %ld",
- path_ductdefer, orgsize - truncto, orgsize, truncto);
- if (fflush(defer))
- sysfatal("could not flush defer file %s", path_ductdefer);
- if (ftruncate(fileno(defer), truncto))
- sysdie("could not truncate defer file %s", path_ductdefer);
+ xfstat_isreg(lockfd, &stabf, "lockfile");
+ xlstat_isreg(path_ductlock, &stab, &noent, "lockfile");
+ if (!noent && samefile(&stab, &stabf))
+ break;
- } else {
- info("continuing existing defer file %s (%ld bytes)",
- path_ductdefer, orgsize);
+ if (close(lockfd))
+ sysdie("could not close stale lockfile %s", path_ductlock);
}
- if (fseek(defer, truncto, SEEK_SET))
- sysdie("could not seek to new end of defer file %s", path_ductdefer);
-}
-
-static void statemc_init(void) {
- struct stat stab;
-
- path_ductlock= xasprintf("%s_duct.lock", feedfile);
- path_duct= xasprintf("%s_duct", feedfile);
- path_ductdefer= xasprintf("%s_duct.defer", feedfile);
+ debug("startup: locked");
- if (lstat(path_ductdefer, &stab)) {
- if (errno!=ENOENT) sysdie("could not check defer file %s", path_defer);
+ xlstat_isreg(path_ductdefer, &stab, &noent, "defer file");
+ if (noent) {
+ debug("startup: ductdefer ENOENT");
} else {
- if (!S_ISREG(stab.st_mode))
- die("defer file %s not a plain file (mode 0%lo)",
- path_defer, (unsigned long)stab.st_mode);
+ debug("startup: ductdefer nlink=%ld", (long)stab.st_nlink);
switch (stab.st_nlink==1) {
case 1: /* ok */ break;
case 2:
path_defer, stab.st_nlink);
}
}
- open_defer();
-
- int lockfd= open(path_ductlock, O_CREAT|O_RDWR, 0600);
- if (lockfd<0) sysfatal("open lockfile %s", path_ductlock);
-
- struct flock fl;
- memset(&fl,0,sizeof(fl));
- fl.l_type= F_WRLCK;
- fl.l_whence= SEEK_SET;
- r= fcntl(lockfd, F_SETLK, &fl);
- if (r==-1) {
- if (errno==EACCES || errno==EAGAIN) {
- if (quiet_if_locked) exit(0);
- fatal("another duct holds the lockfile");
- }
- sysdie("fcntl F_SETLK lockfile %s", path_ductlock);
- }
InputFile *file_d= open_input_file(path_duct);
if (file_d) {
struct stat stab_f, stab_d;
- r= stat(feedfile, &stab_f);
- if (r) {
- if (errno!=ENOENT) sysdie("check feed file %s", feedfile);
- /* D exists, F ENOENT => Moved */
+ xlstat_isreg(feedfile, &stab_f, &noent, "feed file");
+ if (noent) {
+ debug("startup: D exists, F ENOENT => Moved");
goto found_moved;
}
- /* F and D both exist */
+ debug("startup: F and D both exist");
- r= fstat(file_d->fd, &stab_d);
- if (r) sysdie("check duct file %s", ductfile);
+ xfstat_isreg(file_d->fd, &stab_d, "ductfile");
- if (stab_d.st_ino == stab_f.st_ino &&
- stab_d.st_dev == stab_f.st_dev) {
- /* F==D => Hardlinked*/
+ if (samefile(&stab_d, &stab_f)) {
+ debug("startup: F==D => Hardlinked");
r= unlink(path_duct);
if (r) sysdie("unlink feed file %s during startup", feedfile);
found_moved:
- /* => Moved */
+ debug(" => Moved");
startup_set_input_file(file_d);
spawn_inndcomm_flush(); /* => Flushing, sets sms to sm_FLUSHING */
} else {
- /* F!=D => Separated */
+ debug("F!=D => Separated");
SMS(SEPARATED, 0, "found both old and current feed files");
startup_set_input_file(file_d);
}
- } else { /*!file_d*/
+ } else {
+ debug("startup: D ENOENT => Nothing");
SMS(WAITING, open_wait_periods, "no feed file currently exists");
}
}
if (ipf == backlog_input_file) {
notice_processed(ipf,"backlog file",ipf->path);
+ close_input_file(ipf);
if (unlink(ipf->path))
sysdie("could not unlink done backlog file %s", ipf->path);
- close_input_file(ipf);
- fixme trigger search for new backlog file;
+ backlog_input_file= 0;
+ search_backlog_file();
+ return;
}
assert(ipf == old_input_file);
notice_processed(ipf,"feed file",0);
- r= fstat(fileno(defer), &stab);
- if (r) sysdie("check defer file %s", path_defer);
-
- if (fclose(defer)) sysfatal("could not close defer file %s", path_defer);
- defer= 0;
-
- char *backlog= xasprintf("%s_backlog_%lu.%lu", feedfile,
- (unsigned long)now.tv_sec,
- (unsigned long)stab.st_ino);
- if (link(path_defer, path_backlog))
- sysfatal("could not install defer file %s as backlog file %s",
- path_defer, backlog);
- if (unlink(path_defer))
- sysdie("could not unlink old defer link %s to backlog file %s",
- path_defer, backlog);
+ close_defer();
if (unlink(path_duct))
sysdie("could not unlink old duct file %s", path_duct);
}
}
+/*---------- defer and backlog files ----------*/
+
+static void open_defer(void) {
+ struct stat stab;
+
+ if (defer) return;
+
+ defer= fopen(path_ductdefer, "a+");
+ if (!defer) sysfatal("could not open defer file %s", path_ductdefer);
+
+ /* truncate away any half-written records */
+
+ xfstat_isreg(fileno(defer), &stab, "newly opened defer file");
+
+ if (stab.st_size > LONG_MAX)
+ die("defer file %s size is far too large", path_ductdefer);
+
+ if (!stab.st_size)
+ return;
+
+ long orgsize= stab.st_size;
+ long truncto= stab.st_size;
+ for (;;) {
+ if (!truncto) break; /* was only (if anything) one half-truncated record */
+ if (fseek(defer, truncto-1, SEEK_SET) < 0)
+ sysdie("seek in defer file %s while truncating partial", path_ductdefer);
+
+ r= getc(defer);
+ if (r==EOF) {
+ if (ferror(defer))
+ sysdie("failed read from defer file %s", path_ductdefer);
+ else
+ die("defer file %s shrank while we were checking it!", path_ductdefer);
+ }
+ if (r=='\n') break;
+ truncto--;
+ }
+
+ if (stab.st_size != truncto) {
+ warn("truncating half-record at end of defer file %s -"
+ " shrinking by %ld bytes from %ld to %ld",
+ path_ductdefer, orgsize - truncto, orgsize, truncto);
+
+ if (fflush(defer))
+ sysfatal("could not flush defer file %s", path_ductdefer);
+ if (ftruncate(fileno(defer), truncto))
+ sysdie("could not truncate defer file %s", path_ductdefer);
+
+ } else {
+ info("continuing existing defer file %s (%ld bytes)",
+ path_ductdefer, orgsize);
+ }
+ if (fseek(defer, truncto, SEEK_SET))
+ sysdie("could not seek to new end of defer file %s", path_ductdefer);
+}
+
+static void close_defer(void) {
+ if (!defer)
+ return;
+
+ xfstat(fileno(defer), &stab, "defer file");
+
+ if (fclose(defer)) sysfatal("could not close defer file %s", path_defer);
+ defer= 0;
+
+ char *backlog= xasprintf("%s_backlog_%lu.%lu", feedfile,
+ (unsigned long)now.tv_sec,
+ (unsigned long)stab.st_ino);
+ if (link(path_defer, path_backlog))
+ sysfatal("could not install defer file %s as backlog file %s",
+ path_defer, backlog);
+ if (unlink(path_defer))
+ sysdie("could not unlink old defer link %s to backlog file %s",
+ path_defer, backlog);
+
+ if (backlog_nextscan_periods < 0 ||
+ backlog_nextscan_periods > backlog_retry_minperiods + 1)
+ backlog_nextscan_periods= backlog_retry_minperiods + 1;
+}
+
+static void poll_backlog_file(void) {
+ if (backlog_nextscan_periods < 0) return;
+ if (backlog_nextscan_periods-- > 0) return;
+ search_backlog_file();
+}
+
+static void search_backlog_file(void) {
+ glob_t gl;
+ int r;
+ struct stat stab;
+ const char *oldest_path=0;
+ time_t oldest_mtime, now;
+
+ assert(!backlog_input_file);
+
+ r= glob(globpat_backlog, GLOB_ERR|GLOB_MARK|GLOB_NOSORT, 0, &gl);
+
+ switch (r) {
+ case GLOB_ABORTED:
+ sysdie("failed to expand backlog pattern %s", globpat_backlog);
+ case GLOB_NOSPACE:
+ die("out of memory expanding backlog pattern %s", globpat_backlog);
+ case 0:
+ for (i=0; i<gl.gl_pathc; i++) {
+ const char *path= gl.gl_pathv[i];
+ r= stat(path, &stab);
+ if (r) {
+ syswarn("failed to stat backlog file %s", path);
+ continue;
+ }
+ if (!S_ISREG(stab.st_mode)) {
+ warn("backlog file %s is not a plain file (or link to one)", path);
+ continue;
+ }
+ if (!oldest_path || stab.st_mtime < oldest_mtime) {
+ oldest_path= path;
+ oldest_mtime= stab.st_mtime;
+ }
+ }
+ case GLOB_NOMATCH: /* fall through */
+ break;
+ default:
+ sysdie("glob expansion of backlog pattern %s gave unexpected"
+ " nonzero (error?) return value %d", globpat_backlog, r);
+ }
+
+ globfree(&gl);
+
+ if (!oldest_path) {
+ debug("backlog scan: none");
+ backlog_nextscan_periods= backlog_spontaneous_rescan_periods;
+ return;
+ }
+
+ now= time(); if (now==-1) sysdie("time(2) failed");
+ double age= difftime(now, oldest_mtime);
+ long age_deficiency= (backlog_retry_minperiods * PERIOD_SECONDS) - age;
+
+ if (age_deficiency <= 0) {
+ debug("backlog scan: found age=%f deficiency=%ld oldest=%s",
+ age, age_deficiency, oldest_path);
+
+ backlog_input_file= open_input_file();
+ inputfile_tailing_start(backlog_input_file);
+ backlog_nextscan_periods= -1;
+ return;
+ }
+
+ backlog_nextscan_periods= age_deficiency / PERIOD_SECONDS;
+
+ if (backlog_spontaneous_rescan_periods >= 0 &&
+ backlog_nextscan_periods > backlog_spontaneous_rescan_periods)
+ backlog_nextscan_periods= backlog_spontaneous_rescan_periods;
+
+ debug("backlog scan: young age=%f deficiency=%ld nextscan=%d oldest=%s",
+ age, age_deficiency, backlog_nextscan_periods, oldest_path);
+}
+
/*========== flushing the feed ==========*/
static pid_t inndcomm_child;
if (WIFEXITED(status)) {
switch (WEXITSTATUS(status)) {
-
+
case INNDCOMMCHILD_ESTATUS_FAIL:
goto failed;
default:
goto unexpected_exitstatus;
-
+
}
} else if (WIFSIGNALED(status) && WTERMSIG(status) == SIGALRM) {
warn("flush timed out trying to talk to innd");
postfork_stdio(defer);
}
-
#define EVERY(what, interval, body) \
static const struct timeval what##_timeout = { 5, 0 }; \
static void what##_schedule(void); \
loop->on_time(loop, what##_timeout, what##_timedout, 0); \
}
-EVERY(filepoll, {5,0}, { check_master_queue(); })
+EVERY(filepoll, {5,0}, {
+ if (main_input_file && main_input_file->readable_callback)
+ filemon_callback(main_input_file);
+});
EVERY(period, {PERIOD_SECONDS,0}, {
if (connect_delay) connect_delay--;
+ poll_backlog_file();
+ if (!backlog_input_file) close_defer(); /* want to start on a new backlog */
statemc_poll();
check_master_queue();
});
void op_seconds(const Option *o, const char *val) {
int *store= o->store;
char *ep;
-
+
double v= strtod(val,&ep);
if (ep==val) badusage("bad time/duration value for %s",o->long);
int main(int argc, char **argv) {
const char *arg;
-
+
for (;;) {
arg= *++argv;
if (!arg) break;
if (nocheck_decay_articles < 0.1)
badusage("nocheck decay articles must be at least 0.1");
nocheck_decay= 1 - 1/nocheck_decay_articles;
-
+
innconf_read(inndconffile);
if (!feedfile)
badusage("feed filename must be nonempty");
else if (feedfile[strlen(feedfile)-1]=='/')
feedfile= xasprintf("%s%s",feedfile,sitename);
-
+
+ const char *feedfile_forbidden= "?*[";
+ int c;
+ while ((c= *feedfile_forbidden++))
+ if (strchr(feedfile, c))
+ badusage("feed filename may not contain glob metacharacter %c",c);
+
if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
sysdie("could not ignore SIGPIPE");
dup2(null,1);
dup2(null,2);
close(null);
-
+
pid_t child1= xfork("daemonise first fork");
if (child1) _exit(0);