1 # Copyright 2022 Ian Jackson and contributors to chiark-utils
2 # SPDX-License-Identifier: GPL-3.0-or-later
3 # There is NO WARRANTY.
5 package Proc::Prefork::Interp;
7 our @ISA = qw(Exporter);
9 prefork_initialisation_complete
10 prefork_autoreload_also_check
16 use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
18 use POSIX qw(_exit setsid :sys_wait_h :errno_h :signal_h);
19 use Sys::Syslog qw(openlog syslog LOG_INFO LOG_ERR LOG_WARNING);
24 our $env_name = 'PREFORK_INTERP';
31 our @autoreload_extra_files = ();
33 sub prefork_autoreload_also_check {
34 push @autoreload_extra_files, @_;
40 syslog(LOG_ERR, "$0: prefork: error: $m");
42 carp "$0: prefork: initialisation error: $m";
49 syslog(LOG_INFO, "$0 prefork: $m, quitting");
53 # Returns in the executor process
54 sub become_monitor () {
59 # Make a process group for this call
60 setpgrp or fail_log("setpgrp failed: $!");
62 eval { protocol_exchange(); 1; }
63 or fail_log("protocol exchange failed: $@");
65 pipe EXECTERM, EXECTERMW or fail_log("pipe: $!");
67 my $child = fork // fail_log("fork executor: $!");
70 open ::STDIN , "<& $call_fds[0]" or fail_log("dup for fd0");
71 open ::STDOUT, ">& $call_fds[1]" or fail_log("dup for fd1");
72 open ::STDERR, ">& $call_fds[2]" or fail_log("dup for fd2");
80 #---- monitor [2] ----
84 vec($rbits, fileno(CALL), 1) = 1;
85 vec($rbits, fileno(EXECTERM), 1) = 1;
87 my $nfound = select($rbits, '', $ebits, undef);
90 fail_log("monitor select() failed: $!");
93 # Either the child has just died, or the caller has gone away
96 kill 'INT', 0 or fail_log("kill executor [$child]: $!");
98 my $got = waitpid $child, 0;
99 $got >= 0 // fail_log("wait for executor [$child] (2): $!");
100 $got == $child or fail_log("wait for esecutor [$child] gave [$got]");
102 protocol_write(pack "N", $?);
106 sub close_call_fds () {
107 foreach (@call_fds) {
113 sub protocol_write ($) {
115 return if (print CALL $d and flush CALL);
116 _exit(0) if $!==EPIPE || $!==ECONNRESET;
117 fail_log("protocol write: $!");
120 sub eintr_retry ($) {
124 return $r if defined $r;
130 sub protocol_read_fail ($) {
132 _exit(0) if $!==ECONNRESET;
133 die("recv $what: $!");
136 sub protocol_exchange () {
137 my $greeting = "PFI\n\0\0\0\0";
138 protocol_write($greeting);
143 $r = sysread CALL, $ibyte, 1;
145 $!==EINTR or protocol_read_fail("signalling byte");
149 $ibyte and die(sprintf "signalling byte is 0x%02x, not zero", $ibyte);
155 $r = IO::FDPass::recv(fileno(CALL));
158 protocol_read_fail("fd $_");
164 $r = read(CALL, $len, 4) // protocol_read_fail("message length");
167 $len = unpack "N", $len;
169 $r = read(CALL, $data, $len) // protocol_read_fail("message data ($len)");
170 $r == $len or _exit(0);
172 @ARGV = split /\0/, $data, -1;
173 @ARGV >= 2 or die("message data has too few strings (".(scalar @ARGV).")");
174 length(pop(@ARGV)) and die("message data missing trailing nul");
176 while (my $s = shift @ARGV) {
178 $s =~ m/=/ or die("message data env var missing equals");
183 sub autoreload_check ($) {
185 my @s = Time::HiRes::stat($f);
187 $!==ENOENT or fail_log("autoreload check: stat failed: $f: $!");
190 if ($s[9] > $startup_mtime) {
191 syslog(LOG_INFO, "$0 prefork: reloading; due to $f");
196 sub prefork_initialisation_complete {
199 push @autoreload_extra_files, $0;
201 # if env var not set, we're not running under prefork-interp
202 my @env_data = split / /, ($ENV{$env_name} // return);
203 croak "$env_name has too few words" unless @env_data >= 2;
204 my (@vsns) = split /,/, $env_data[0];
205 croak "$env_name doesn't specify v1" unless @vsns >= 2 && $vsns[0] eq 'v1';
206 $startup_mtime = $vsns[1];
207 my @env_fds = split /,/, $env_data[1];
208 croak "$env_name has too few fds" unless @env_fds >= 4;;
211 my $num_servers = $opts{max_servers} // 4;
213 #---- setup (pm) [1] ----
216 $_ eq ($_+0) or croak "$env_name contains $_, not a number";
218 open LISTEN, "+>&=$env_fds[0]" or croak "listen fd: $!";
219 open CALL, "+>&=$env_fds[1]" or croak "call fd: $!";
220 open WATCHI, "+>&=$env_fds[2]" or croak "call fd: $!";
221 open WATCHE, "+>&=$env_fds[3]" or croak "watch stderr fd: $!";
223 my $log_facility = $opts{log_facility} // 'LOG_USER';
224 if (length $log_facility) {
225 openlog("prefork-interp $0", 'ndelay,nofatal,pid', $log_facility);
228 open NULL, "+>/dev/null" or croak "open /dev/null: $!";
230 #---- fork for server ----
232 my $child = fork // croak "first fork failed: $!";
234 #---- setup (pm) [2], exits ----
237 setsid() > 0 or fail_log("setsid: $!");
238 # The server will be a session leader, but it won't open ttys,
241 #---- server(pm) [1] ----
243 $child = fork // croak "second fork failed: $!";
245 # we are the child, i.e. the one fa-monitor
246 local $0 = "$0 [monitor(init)]";
247 return become_monitor();
252 $children{$child} = 1;
254 # --- server(pm) [2] ----
256 local $0 = "$0 [server]";
259 open STDIN, "<&NULL" or fail_log("dup null onto stdin: $!");
260 open STDOUT, ">&NULL" or fail_log("dup null onto stdout: $!");
261 open STDERR, ">&NULL" or fail_log("dup null onto stderr: $!");
269 my $full = $num_servers >= 0 ? %children >= $num_servers : 0;
270 my $got = waitpid -1, ($full ? 0 : WNOHANG);
271 $got >= 0 or fail_log("failed to wait for monitor(s): $!");
273 if ($? && $? != SIGPIPE) {
275 "$0 prefork: monitor process [$got] failed with wait status $?");
277 if (!exists $children{$got}) {
279 "$0 prefork: monitor process [$got] wasn't one of ours?!");
281 delete $children{$got};
286 # select for accepting or housekeeping timeout
288 vec($rbits, fileno(LISTEN), 1) = 1;
289 vec($rbits, fileno(WATCHE), 1) = 1;
291 my $idle_timeout = $opts{idle_timeout} // 1000000;
292 $idle_timeout = undef if $idle_timeout < 0;
293 my $nfound = select($rbits, '', $ebits, $idle_timeout);
296 last if $nfound == 0;
299 fail_log("select failed: $!");
302 # Has the watcher told us to shut down, or died with a message ?
304 my $r = sysread WATCHE, $msgbuf, 2048;
307 fail_log("watcher: $msgbuf");
308 } elsif (defined $r) {
310 "$0 prefork: lost socket (fresh start or cleanup?), quitting");
312 } elsif ($! == EINTR || $! == EAGAIN || $! == EWOULDBLOCK) {
314 fail_log("watcher stderr read: $!");
317 if (%opts{autoreload_inc} // 1) {
318 foreach my $f (values %INC) {
319 autoreload_check($f);
322 foreach my $f (@autoreload_extra_files) {
323 autoreload_check($f);
325 foreach my $f (@{ %opts{autoreload_extra} // [] }) {
326 autoreload_check($f);
329 # Anything to accept ?
330 if (accept(CALL, LISTEN)) {
331 $child = fork // fail_log("fork for accepted call failed: $!");
333 #---- monitor [1] ----
334 $0 =~ s{ \[server\]$}{ [monitor]};
335 return become_monitor();
339 $children{$child} = 1;
340 } elsif ($! == EINTR || $! == EAGAIN || $! == EWOULDBLOCK) {
342 syslog(LOG_WARNING, "$0 prefork: accept failed: $!");
343 if ($errcount > ($opts{max_errors} // 100)) {
344 fail_log("too many accept failures, quitting");
357 Proc::Prefork::Interp - script-side handler for prefork-interp
361 #!/usr/bin/prefork-interp -U,perl,-w
364 use Proc::Prefork::Interp;
366 ... generic initialisation code, use statements ...
368 prefork_initialisation_complete();
370 ... per-execution code ...
374 Proc::Prefork::Interp implements the script-side protocol
375 expected by the preform-interp C wrapper program.
377 The combination arranges that the startup overhead of your script
378 is paid once, and then the initialised script can service multiple
379 requests, quickly and in parallel.
381 C<prefork_initialisation_complete> actually daemonises the program,
382 forking twice, and returning in the grandchild.
384 It returns once for each associated invocation of C<prefork-interp>
385 (ie, each invocation of the script which starts C<#!/usr/bin/prefork-interp>),
386 each time in a separate process.
388 =head1 PRE-INITIALISATION STATE, CONTEXT AND ACTIONS
390 During initialisation, the program may load perl modules, and do
391 other kinds of pre-computation and pre-loading.
393 Where files are read during pre-loading, consider calling
394 C<prefork_autoreload_also_check> to arrange that the script will
395 automatically be restarted when the files change.
396 See L</"AUTOMATIC RELOADING">.
398 Before C<prefork_initialisation_complete>,
399 the script will stdin connected to /dev/null,
400 and stdout connected to its stderr.
402 It should avoid accessing its command line arguments
403 - or, at least, those which will vary from call to call.
405 Likewise it should not pay attention to environment variables
406 which are expected to change from one invocation to the next.
407 For example, if the program is a CGI script, it ought not to
408 read the CGI environment variables until after initialisation.
410 It is I<NOT> safe to open a connection to a database,
411 or other kind of server, before initialisation is complete.
412 This is because the db connection would end up being shared
413 by all of the individual executions.
415 =head1 POST-INITIALISATION STATE, CONTEXT AND ACTIONS
417 Each time C<prefork_initialisation_complete> returns,
418 corresponds to one invocation of C<prefork-interp>.
420 On return the script will have its stdin, stdout and stderr
421 connected to those provided by C<prefork-interp>'s caller
423 Likewise C<@ARGV> and C<%ENV> will have been adjusted to
424 copy the arguments and environment of the particular call.
426 By this time, the process has forked twice.
427 Its parent is not the original caller,
428 and it is in a session and a process group
429 set up for this shared script and this particular invocation,
432 Signals sent to the C<prefork-interp> will not be received
434 if C<prefork-interp> is killed, the script will receive a C<SIGINT>;
435 when that happens it ought to die promptly,
436 without doing further IO on stdin/stdout/stderr.
438 The exit status of the script will be reproduced
439 as the exit status of C<prefork-interp>,
440 so that the caller sees the right exit status.
442 =head1 DESCRIPTORS AND OTHER INHERITED PROCESS PROPERTIES
444 The per-invocation child inherits everything that is
445 set up before C<prefork_initialisation_complete>.
447 This includes ulimits, signal dispositions, uids and gids,
448 and of course file descriptors (other than 0/1/2).
450 The prefork-interp system
451 uses C<SIGINT> to terminate services when needed
452 and relies on C<SIGPIPE> to have a default disposition.
453 Do not mess with these.
455 It is not generally safe to open a connection to some kind of service
456 during initialisation.
457 Each invocation will share the socket,
458 which can cause terrible confusion (even security holes).
459 For example, do not open a database handle during initialisation.
461 =head1 AUTOMATIC RELOADING
463 The prefork-interp system supports automatic reloading/restarting,
464 when a script, or files it loads, are modified.
466 Files mentioned in C<$0> and C<%INC> will automatically be checked;
467 if any are found to be newer than the original invocation,
468 a fressh "server" will created -
469 re-running the script again from the top, as for an initial call.
471 The set of files checked in this way can be modified
472 via initialisation-complete options,
473 or by calling C<prefork_autoreload_also_check>.
475 =head1 STANDALONE OPERATION
477 A script which loads Proc::Prefork::Interp
478 and calls C<prefork_initialisation_complete>
479 can also be run standalone.
480 This can be useful for testing.
482 When not run under C<prefork-interp>, C<prefork_initialisation_complete>
483 does nothing and returns in the same process.
489 =item C<< prefork_initialisation_complete( I<%options> ) >>
491 Turns this script into a server,
492 which can be reused for subsequent invocations.
493 Returns multiple times,
494 each time in a different process,
497 When not run under C<prefork-interp>, this is a no-op.
499 C<%options> is an even-length list of options,
500 in the format used for initalising a Perl hash:
504 =item C<< max_servers => I<MAX> >>
506 Allow I<MAX> (an integer) concurrent invocations at once.
507 If too many invocations arrive at once,
508 new ones won't be served until some of them complete.
510 If I<MAX> is negative, there is no limit.
511 The limit is only applied somewhat approximately.
514 =item C<< idle_timeout => I<TIMEOUT> >>
516 If no invocations occur for this length of time, we quit;
517 future invocations would involve a restart.
519 If I<TIMEOUT> is negative, we don't time out.
521 =item C<< autoreload_inc => I<BOOL> >>
524 we don't automatically check files in C<%INC> for reloads.
525 See L</"AUTOMATIC RELOADING">.
527 =item C<< autoreload_extra => [ I<PATHS> ] >>
529 Additional paths to check for reloads
530 (as an arrayref of strings).
531 (This is in addition to paths passed to C<prefork_autoreload_also_check>.)
532 See L</"AUTOMATIC RELOADING">.
533 Default is 1 megasecond.
535 =item C<< max_errors => I<NUMBER> >>
537 If our server loop experiences more errors than this, we quit.
539 a future invocation would restart the script from the top.)
542 =item C<< log_facility => I<BOOL> >>
544 The syslog facility to use,
545 for messages from the persistent server.
547 The value is in the format expected by C<Sys::Syslog::openlog>;
548 the empty string means not to use syslog at all,
549 in which case errors experienced by the psersistent server
550 will not be reported anywhere, impeding debugging.
552 Default is C<LOG_USER>.
556 =item C<< prefork_autoreload_also_check I<PATHS> >>
558 Also check each path in I<PATHS> for being out of date;
559 if any exists and has an mtime after our setup,
560 we consider ourselves out of date and arrange for a reload.
562 It is not an error for a I<PATH> to not exist,
563 but it is an error if it can't be checked.
567 =head1 AUTHORS AND COPYRIGHT
569 The prefork-interp system was designed and implemented by Ian Jackson
570 and is distributed as part of chiark-utils.
572 prefork-interp and Proc::Prefork::Interp are
573 Copyright 2022 Ian Jackson and contributors to chiark-utils.
577 A function which works and returns in the grant parent,
578 having readjusted many important process properties,
579 is inherently rather weird.
580 Scripts using this facility must take some care.
582 Signal propagation, from caller to actual service, is lacking.
584 If the service continues to access its descriptors after receiving SIGINT,
585 the ultimate callers can experience maulfunctions
586 (eg, stolen terminal keystrokes!)
588 =head1 FUTURE POSSIBILITIES
590 This system should work for Python too.
591 I would welcome contribution of the approriate Python code.
592 Please get in touch so I can help you.
598 =item C<prefork-interp.txt>
600 Usage and options for the C<prefork-interp>
601 invocation wrapper program.
603 =item C<prefork-interp.c>
605 Design and protocol information is in the comments
606 at the top of the source file.