2 * "Interpreter" that you can put in #! like this
3 * #!/usr/bin/prefork-interp [<options>] <interpreter>
6 * prefork-interp [<option> ..] <interpreter> [<script> [<args> ...]]
7 * prefork-interp [<option>,..],<interpreter> <script> [<args> ...]
8 * prefork-interp '[<option> ..] <interpreter>' <script> [<args> ...]
10 * Options must specify argument laundering mode.
11 * Currently the only mode supported is:
12 * -U unlaundered: setup and executor both get all arguments and env vars
13 * ident covers only env vars specified with -E
14 * ident covers only arguments interpreter and (if present) script
18 ***************************************************************************
20 State during service execution, process parentage and key fds
26 && session leader (daemon)
27 & process group leader
33 || listen watch-err/in
34 || call (accept) \ ,------2
35 || ,-----------------------------. SERVER -----0 WATCHER(C)
36 CLIENT 2--=fdpassed>=---------. \ || && | &&
37 (C) 1--=fdpassed>=---------. \ \ || inotify
38 0--=fdpassed>=---------. \ \ \ || sockpath
53 ***************************************************************************
55 Control flow and causality
57 | - \ / process control flow
58 ... < > causes mediated by fds or other IPC etc.
59 && session leader (daemon)
60 & process group leader
61 # language/implementation boundary
62 *1 line continued elsewhere
64 ______ process termination (after reaping, if shown)
73 attempt to connect, and read greeting
76 tidy up stale /run entries *1 (continue from send_fds, below)
79 retry attempt to connect, and read greeting
82 create listening socket release lock
85 | `------------------.
88 make "fake" initial call socketpair (C)
90 fork/exec #########################################################
91 | `-------------. application
94 | # script initialisation
96 | # ########|#############################################
98 | # identify fds from envirnment (Perl)
103 waitpid # fork for initial service
106 | # | SCRIPT [server] &&
108 | # | ** accept / event loop **
109 | # | accepted? \ \ \
110 | # | / \ watch\ \idle
111 | # | fork child \stderr\ \timeout?
112 | # | _________/ | | |
114 | # SCRIPT [monitor] | eof?| |
117 read ,....<.....send greeting | | |
118 greeting # | ___________________
126 | # fork for executor (Perl)
127 | # |parent? \child? prefork-interp
128 | # | ######\############################
129 | # | # SCRIPT (executor) application
130 | # | # execute service
131 | # wait for read # |
132 | # (select) # terminates
134 | # | # kernel closes execterm
135 | # | ,......<....../|
138 | # | | ,......<...../
139 | # waitpid # _______________
141 | ,....<....,..send status #
142 read status # ________________ #
146 ********** Or, if client is killed **********
148 | # | # execute service
149 terminates # wait for read # |
154 _____________ # \|call? # |
156 # kill whole pgrp... # killled
158 # | | ,......<....../
159 # waitpid # _______________
162 # _____SIGPIPE______ #
164 ***************************************************************************
166 Sequence of events and fd pluming.
167 NB INCOMPLETE - does not cover execterm, cleanup
169 client (C wrapper) connects to server
170 (including reading ack byte)
172 === acquires lock ===
173 makes new listening socket
175 forks watcher and awaits
176 makes first-instance socketpair
177 forks setup (script, sock fds indicated in env)
178 fd0, fd1, fd2: from-outer
179 other fd: call(client-end)(fake)
180 reaps setup (and reports error)
181 (implicitly releases lock)
183 watcher fd[012]: watcher pipes
184 starts watch on socket path
185 sets stderr to line buffered
186 sets stdin to nonblocking
187 daemonises (one fork, becomes session leader)
188 when socket stat changes, quit
190 setup (pre-exec) fd0: null,
191 fd[12]: fd2-from-outer
192 env fds: listener, call(server-end)(fake),
193 watcher read, watcher write
195 possibly clean env, argv
197 setup (script) runs initialisation parts of the script
198 at prefork establishment point:
199 setup (pm) [1] opens syslog
203 server (pm) [1] [fd0: null],
204 [fd[12]: fd2-from-outer]
206 right away, forks init monitor
207 [2] closes outer caller fds and call(fake)
208 [server (pm)] fd[012]: null
209 other fds: listener, syslog
210 runs in loop accepting and forking,
211 reaping and limiting children (incl init monitor)
212 reports failures of monitors to syslog
214 [client (C wrapper)] if client connect succeeds:
215 now fd: call(client-end)
216 sends message with: cmdline, env
219 [server (script)] accepts, forks subseq monitor
221 monitor [1] [fd0: null]
222 (init [fd[12]: init: fd2-from-outer; subseq: null]
223 or errors: init: fd2; subseq: syslog
224 subseq) other fds: syslog, call(server-end)
226 receives args, env, fds
229 executor sorts out fds:
230 fd0, fd1, fd2: from-outer
231 close fds: call(server-end)
235 runs main part of script
238 [monitor] [fd[012]: null]
239 [fd[12]: init: fd2-from-outer; subseq: null]
240 [errors: init: fd2; subseq: syslog]
242 reports status via socket
244 [client (C wrapper)] [fd0, fd1, fd2: from-outer]
245 [other fd: call(client-end)]
246 receives status, exits appropriately
247 (if was bad signal, reports to stderr, exits 127)
249 ***************************************************************************
253 #include <arpa/inet.h>
259 const char our_name[] = "prefork-interp";
261 static struct sockaddr_un sockaddr_sun;
262 static FILE *call_sock;
264 #define ACK_BYTE '\n'
266 static const char *const *executor_argv;
268 static const char header_magic[4] = "PFI\n";
270 void fusagemessage(FILE *f) {
271 fprintf(f, "usage: #!/usr/bin/prefork-interp [<options>]\n");
274 static int laundering;
275 static int max_sockets = 100; // maximum entries in the run dir is 2x this
277 static struct stat initial_stab;
279 const struct cmdinfo cmdinfos[]= {
281 { 0, 'U', 0, .iassignto= &laundering, .arg= 'U' },
285 void ident_addinit(void) {
286 char ident_magic[1] = { 0 };
287 sha256_update(&identsc, sizeof(ident_magic), ident_magic);
290 static void propagate_exit_status(int status, const char *what) {
293 if (WIFEXITED(status)) {
297 if (WIFSIGNALED(status)) {
298 int sig = WTERMSIG(status);
299 const char *signame = strsignal(sig);
300 if (signame == 0) signame = "unknown signal";
302 if (! WCOREDUMP(status) &&
309 sa.sa_handler = SIG_DFL;
310 r = sigaction(sig, &sa, 0);
311 if (r) diee("failed to reset signal handler while propagating %s",
316 sigaddset(&sset, sig);
317 r = sigprocmask(SIG_UNBLOCK, &sset, 0);
318 if (r) diee("failed to reset signal block while propagating %s",
322 die("unexpectedly kept running after raising (to propagate) %s",
326 die("%s failed due to signal %d %s%s", what, sig, signame,
327 WCOREDUMP(status) ? " (core dumped)" : "");
330 die("%s failed with weird wait status %d 0x%x", what, status, status);
338 static int preclean_entry_compar_name(const void *av, const void *bv) {
339 const PrecleanEntry *a = av;
340 const PrecleanEntry *b = bv;
341 return strcmp(a->name_hash, b->name_hash);
344 static int preclean_entry_compar_atime(const void *av, const void *bv) {
345 const PrecleanEntry *ae = av; time_t a = ae->atime;
346 const PrecleanEntry *be = bv; time_t b = be->atime;
351 static time_t preclean_stat_atime(const char *s_path) {
353 int r= lstat(s_path, &stab);
355 if (errno!=ENOENT) diee("pre-cleanup: stat socket (%s)", s_path);
358 return stab.st_atime;
361 static void preclean(void) {
362 DIR *dir = opendir(run_base);
364 if (errno == ENOENT) return;
365 diee("pre-cleanup: open run dir (%s)", run_base);
368 PrecleanEntry *entries=0;
369 size_t avail_entries=0;
370 size_t used_entries=0;
373 while ((errno = 0, de = readdir(dir))) {
374 char c0 = de->d_name[0];
375 if (!(c0 == 'l' || c0 == 's')) continue;
376 char *name_hash = m_asprintf("%s", de->d_name+1);
377 char *s_path = m_asprintf("%s/s%s", run_base, name_hash);
378 time_t atime = preclean_stat_atime(s_path);
380 if (avail_entries == used_entries) {
381 assert(avail_entries < INT_MAX / 4 / sizeof(PrecleanEntry));
384 entries = realloc(entries, avail_entries * sizeof(PrecleanEntry));
386 entries[used_entries].name_hash = name_hash;
387 entries[used_entries].atime = atime;
390 if (errno) diee("pre-cleanup: read run dir (%s)", run_base);
392 // First we dedupe (after sorting by path)
393 qsort(entries, used_entries, sizeof(PrecleanEntry),
394 preclean_entry_compar_name);
395 PrecleanEntry *p, *q;
396 for (p=entries, q=entries; p < entries + used_entries; p++) {
397 if (q > entries && !strcmp(p->name_hash, (q-1)->name_hash))
401 used_entries = q - entries;
403 // Now maybe delete some things
405 // Actually this has an off-by-one error since we are about
406 // to create a socket, so the actual number of sockets is one more.
407 // But, *actually*, since there might be multiple of us running at once,
408 // we might have even more than that. This doesn't really matter.
409 if (used_entries > max_sockets) {
410 qsort(entries, used_entries, sizeof(PrecleanEntry),
411 preclean_entry_compar_atime);
412 for (p=entries; p < entries + max_sockets; p++) {
413 char *l_path = m_asprintf("%s/l%s", run_base, p->name_hash);
414 char *s_path = m_asprintf("%s/s%s", run_base, p->name_hash);
415 int lock_fd = flock_file(l_path);
416 // Recheck atime - we might have raced!
417 time_t atime = preclean_stat_atime(s_path);
418 if (atime != p->atime) {
419 // Raced. This will leave use deleting too few things. Whatever.
421 int r= unlink(s_path);
422 if (r && errno!=ENOENT) diee("preclean: delete stale (%s)", s_path);
424 if (r) diee("preclean: delete stale lock (%s)", s_path);
425 // NB we don't hold the lock any more now.
433 for (p=entries; p < entries + used_entries; p++)
438 static __attribute((noreturn)) void die_data_overflow(void) {
439 die("cannot handle data with length >2^32");
442 static void prepare_data(size_t *len, char **buf,
443 const void *data, size_t dl) {
445 if (dl >= SIZE_MAX - *len)
450 memcpy(*buf, data, dl);
455 static void prepare_length(size_t *len, char **buf, size_t dl_sz) {
456 if (dl_sz > UINT32_MAX) die_data_overflow();
457 uint32_t dl = htonl(dl_sz);
458 prepare_data(len, buf, &dl, sizeof(dl));
461 static void prepare_string(size_t *len, char **buf, const char *s) {
462 size_t sl = strlen(s);
463 prepare_data(len, buf, s, sl+1);
466 static void prepare_message(size_t *len, char **buf) {
469 const char *const *p = (void*)environ;
472 prepare_string(len, buf, s);
475 prepare_string(len, buf, "");
479 prepare_string(len, buf, s);
482 static void send_fd(int payload_fd) {
483 int via_fd = fileno(call_sock);
486 struct cmsghdr align;
487 char buf[CMSG_SPACE(sizeof(payload_fd))];
497 iov.iov_base = &dummy_byte;
503 msg.msg_control = cmsg_buf.buf;
504 msg.msg_controllen = sizeof(cmsg_buf.buf);
506 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
507 cmsg->cmsg_level = SOL_SOCKET;
508 cmsg->cmsg_type = SCM_RIGHTS;
509 cmsg->cmsg_len = CMSG_LEN(sizeof(payload_fd));
510 *(int*)CMSG_DATA(cmsg) = payload_fd;
512 msg.msg_controllen = sizeof(cmsg_buf.buf);
515 ssize_t r = sendmsg(via_fd, &msg, 0);
517 if (errno == EINTR) continue;
525 static void send_request(void) {
526 // Sending these first makes it easier for the script to
527 // use buffered IO for the message.
533 prepare_message(&len, 0);
535 size_t tlen = len + 4;
536 char *m = xmalloc(tlen);
538 prepare_length(0, &p, len);
539 prepare_message(0, &p);
540 assert(p == m + tlen);
542 ssize_t sr = fwrite(m, tlen, 1, call_sock);
543 if (sr != 1) diee("write request (buffer)");
545 if (fflush(call_sock)) diee("write request");
548 static FILE *call_sock_from_fd(int fd) {
551 FILE *call_sock = fdopen(fd, "r+");
552 if (!call_sock) diee("fdopen socket");
554 r = setvbuf(call_sock, 0, _IONBF, 0);
555 if (r) die("setvbuf socket");
560 static bool was_eof(FILE *call_sock) {
561 return feof(call_sock) || errno==ECONNRESET;
565 static int protocol_read_maybe(void *data, size_t sz) {
567 size_t sr = fread(data, sz, 1, call_sock);
569 if (was_eof(call_sock)) return -1;
570 diee("read() on monitor call socket (%zd)", sz);
575 static void protocol_read(void *data, size_t sz) {
576 if (protocol_read_maybe(data, sz) < 0)
577 die("monitor process quit unexpectedly");
580 // Returns 0 if OK, error msg if peer was garbage.
581 static const char *read_greeting(void) {
582 char got_magic[sizeof(header_magic)];
584 if (protocol_read_maybe(&got_magic, sizeof(got_magic)) < 0)
585 return "initial monitor process quit";
587 if (memcmp(got_magic, header_magic, sizeof(header_magic)))
588 die("got unexpected protocol magic 0x%02x%02x%02x%02x",
589 got_magic[0], got_magic[1], got_magic[2], got_magic[3]);
592 protocol_read(&xdata_len, sizeof(xdata_len));
593 void *xdata = xmalloc(xdata_len);
594 protocol_read(xdata, xdata_len);
599 // Returns: call(client-end), or 0 to mean "is garbage"
600 // find_socket_path must have been called
601 static FILE *connect_existing(void) {
605 fd = socket(AF_UNIX, SOCK_STREAM, 0);
606 if (fd==-1) diee("socket() for client");
608 socklen_t salen = sizeof(sockaddr_sun);
609 r = connect(fd, (const struct sockaddr*)&sockaddr_sun, salen);
611 if (errno==ECONNREFUSED || errno==ENOENT) goto x_garbage;
612 diee("connect() %s", socket_path);
615 call_sock = call_sock_from_fd(fd);
624 if (call_sock) { fclose(call_sock); call_sock=0; }
625 if (fd >= 0) close(fd);
629 static void watcher_cb_stdin(uv_poll_t *handle, int status, int events) {
633 if ((errno = -status)) diee("watcher: poll stdin");
637 if (!(errno==EINTR || errno==EWOULDBLOCK || errno==EAGAIN))
638 diee("watcher: read sentinel stdin");
642 static void watcher_cb_sockpath(uv_fs_event_t *handle, const char *filename,
643 int events, int status) {
645 struct stat now_stab;
647 if ((errno = -status)) diee("watcher: poll stdin");
649 r= stat(socket_path, &now_stab);
651 if (errno==ENOENT) _exit(0);
652 if (errno==EINTR) continue;
653 diee("stat socket: %s", socket_path);
655 if (!stabs_same_inode(&now_stab, &initial_stab))
660 // On entry, stderr is still inherited, but 0 and 1 are the pipes
661 static __attribute__((noreturn))
662 void become_watcher(void) {
664 uv_poll_t uvhandle_stdin;
665 uv_fs_event_t uvhandle_sockpath;
670 errno= -uv_loop_init(&loop);
671 if (errno) diee("watcher: uv_loop_init");
673 errno= -uv_poll_init(&loop, &uvhandle_stdin, 0);
674 if (errno) diee("watcher: uv_poll_init");
675 errno= -uv_poll_start(&uvhandle_stdin,
676 UV_READABLE | UV_WRITABLE | UV_DISCONNECT,
678 if (errno) diee("watcher: uv_poll_start");
680 errno= -uv_fs_event_init(&loop, &uvhandle_sockpath);
681 if (errno) diee("watcher: uv_fs_event_init");
683 errno= -uv_fs_event_start(&uvhandle_sockpath, watcher_cb_sockpath,
685 if (errno) diee("watcher: uv_fs_event_start");
687 // OK everything is set up, let us daemonise
688 if (dup2(1,2) != 2) diee("watcher: set daemonised stderr");
689 r= setvbuf(stderr, 0, _IOLBF, BUFSIZ);
690 if (r) diee("watcher: setvbuf stderr");
692 pid_t child = fork();
693 if (child == (pid_t)-1) diee("watcher: fork");
696 if (setsid() == (pid_t)-1) diee("watcher: setsid");
698 r= uv_run(&loop, UV_RUN_DEFAULT);
699 die("uv_run returned (%d)", r);
702 static __attribute__((noreturn))
703 void become_setup(int sfd, int fake_pair[2],
704 int watcher_stdin, int watcher_stderr) {
706 int call_fd = fake_pair[1];
708 int null_0 = open("/dev/null", O_RDONLY); if (null_0 < 0) diee("open null");
709 if (dup2(null_0, 0)) diee("dup2 /dev/null onto stdin");
711 if (dup2(2, 1) != 1) die("dup2 stderr onto stdout");
715 // Extension could work like this:
717 // We advertise a new protocol (perhaps one which is nearly entirely
718 // different after the connect) by putting a name for it comma-separated
719 // next to "v1". Simple extension can be done by having the script
720 // side say something about it in the ack xdata, which we currently ignore.
721 putenv(m_asprintf("PREFORK_INTERP=v1 %d,%d,%d,%d",
722 sfd, call_fd, watcher_stdin, watcher_stderr));
724 execvp(executor_argv[0], (char**)executor_argv);
725 diee("execute %s", executor_argv[0]);
728 static void connect_or_spawn(void) {
731 call_sock = connect_existing();
732 if (call_sock) return;
734 // We're going to make a new one, so clean out old ones
737 int lockfd = acquire_lock();
738 call_sock = connect_existing();
739 if (call_sock) { close(lockfd); return; }
741 // We must start a fresh one, and we hold the lock
743 r = unlink(socket_path);
744 if (r<0 && errno!=ENOENT)
745 diee("failed to remove stale socket %s", socket_path);
747 int sfd = socket(AF_UNIX, SOCK_STREAM, 0);
748 if (sfd<0) diee("socket() for new listener");
750 socklen_t salen = sizeof(sockaddr_sun);
751 r= bind(sfd, (const struct sockaddr*)&sockaddr_sun, salen);
752 if (r<0) diee("bind() on new listener");
754 r= stat(socket_path, &initial_stab);
755 if (r<0) diee("stat() fresh socket");
757 // We never want callers to get ECONNREFUSED. But:
758 // There is a race here: from my RTFM they may get ECONNREFUSED
759 // if they try between our bind() and listen(). But if they do, they'll
760 // acquire the lock (serialising with us) and retry, and then it will work.
761 r = listen(sfd, INT_MAX);
762 if (r<0) diee("listen() for new listener");
766 int watcher_stdin[2];
767 int watcher_stderr[2];
768 if (pipe(watcher_stdin) || pipe(watcher_stderr))
769 diee("pipe() for socket inode watcher");
771 pid_t watcher = fork();
772 if (watcher == (pid_t)-1) diee("fork for watcher");
776 close(watcher_stdin[1]);
777 close(watcher_stderr[0]);
778 if (dup2(watcher_stdin[0], 0) != 0 ||
779 dup2(watcher_stderr[1], 1) != 1)
780 diee("initial dup2() for watcher");
781 close(watcher_stdin[0]);
782 close(watcher_stderr[1]);
786 close(watcher_stdin[0]);
787 close(watcher_stderr[1]);
788 nonblock(watcher_stderr[0]);
793 r = socketpair(AF_UNIX, SOCK_STREAM, 0, fake_pair);
794 if (r<0) diee("socketpair() for fake initial connection");
796 pid_t setup_pid = fork();
797 if (setup_pid == (pid_t)-1) diee("fork for spawn setup");
798 if (!setup_pid) become_setup(sfd, fake_pair,
799 watcher_stdin[1], watcher_stderr[0]);
803 call_sock = call_sock_from_fd(fake_pair[0]);
806 pid_t got = waitpid(setup_pid, &status, 0);
807 if (got == (pid_t)-1) diee("waitpid setup [%ld]", (long)setup_pid);
808 if (got != setup_pid) diee("waitpid setup [%ld] gave [%ld]!",
809 (long)setup_pid, (long)got);
810 if (status != 0) propagate_exit_status(status, "setup");
812 const char *emsg = read_greeting();
813 if (emsg) die("setup failed: %s", emsg);
819 static void make_executor_argv(const char *const *argv) {
820 switch (laundering) {
822 default: die("need -U (specifying unlaundered argument handling)");
826 #define EACH_NEW_ARG(EACH) { \
827 arg = interp; { EACH } \
828 if ((arg = script)) { EACH } \
829 const char *const *walk = argv; \
830 while ((arg = *walk++)) { EACH } \
834 EACH_NEW_ARG( (void)arg; count++; );
836 const char **out = calloc(count, sizeof(char*));
837 executor_argv = (const char* const*)out;
838 if (!executor_argv) diee("allocate for arguments");
840 EACH_NEW_ARG( *out++ = arg; );
844 int main(int argc_unused, const char *const *argv) {
851 // which ought to be passed on to the actual executor.
852 make_executor_argv(argv);
855 FILLZERO(sockaddr_sun);
856 sockaddr_sun.sun_family = AF_UNIX;
857 assert(strlen(socket_path) <= sizeof(sockaddr_sun.sun_path));
858 strncpy(sockaddr_sun.sun_path, socket_path, sizeof(sockaddr_sun.sun_path));
862 // We're committed now, send the request (or bail out)
866 protocol_read(&status, sizeof(status));
868 status = ntohl(status);
869 if (status > INT_MAX) die("status 0x%lx does not fit in an int",
870 (unsigned long)status);
872 propagate_exit_status(status, "invocation");