From: Ian Jackson Date: Sun, 21 Aug 2022 20:18:10 +0000 (+0100) Subject: prefork-interp: docs X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?a=commitdiff_plain;h=baec4f384d7f30e4e73f46aa5479ecd23dcc1cdf;p=chiark-utils.git prefork-interp: docs Signed-off-by: Ian Jackson --- diff --git a/cprogs/prefork-interp.c b/cprogs/prefork-interp.c index cfab9f8..56d6040 100644 --- a/cprogs/prefork-interp.c +++ b/cprogs/prefork-interp.c @@ -11,12 +11,12 @@ * The script must load a corresponding library (eg Proc::Prefork::Interp * for Perl) and call its preform_initialisation_complete routine. * - * Options must specify argument mediation approach. - * Currently the only argument mediation supported is: + * Options must specify argument/environment mediation approach. + * Currently the only args/env mediation supported is: * * -U unlaundered: setup and executor both get all arguments and env vars - * ident covers only env vars specified with -E - * ident covers only arguments interpreter and (if present) script + * ident covers only env vars specified with -E + * ident covers only two arguments: interpreter and (if present) script * * Options for setting the operation mode: * diff --git a/scripts/Proc/Prefork/Interp.pm b/scripts/Proc/Prefork/Interp.pm index ae0199f..77a6d62 100644 --- a/scripts/Proc/Prefork/Interp.pm +++ b/scripts/Proc/Prefork/Interp.pm @@ -217,9 +217,9 @@ sub prefork_initialisation_complete { open WATCHI, "+>&=$env_fds[2]" or croak "call fd: $!"; open WATCHE, "+>&=$env_fds[3]" or croak "watch stderr fd: $!"; - if (!$opts{no_openlog}) { - openlog("prefork-interp $0", 'ndelay,nofatal,pid', - $opts{log_facility} // 'log_user'); + my $log_facility = $opts{log_facility} // 'LOG_USER'; + if (length $log_facility) { + openlog("prefork-interp $0", 'ndelay,nofatal,pid', $log_facility); } open NULL, "+>/dev/null" or croak "open /dev/null: $!"; @@ -263,7 +263,7 @@ sub prefork_initialisation_complete { for (;;) { # reap children if (%children) { - my $full = %children >= $num_servers; + my $full = $num_servers >= 0 ? %children >= $num_servers : 0; my $got = waitpid -1, ($full ? 0 : WNOHANG); $got >= 0 or fail_log("failed to wait for monitor(s): $!"); if ($got) { @@ -285,7 +285,9 @@ sub prefork_initialisation_complete { vec($rbits, fileno(LISTEN), 1) = 1; vec($rbits, fileno(WATCHE), 1) = 1; my $ebits = $rbits; - my $nfound = select($rbits, '', $ebits, ($opts{idle_timeout} // 1000000)); + my $idle_timeout = $opts{idle_timeout} // 1000000; + $idle_timeout = undef if $idle_timeout < 0; + my $nfound = select($rbits, '', $ebits, $idle_timeout); # Idle timeout? last if $nfound == 0; @@ -346,3 +348,258 @@ sub prefork_initialisation_complete { 1; __END__ + +=head1 NAME + +Proc::Prefork::Interp - script-side handler for prefork-interp + +=head1 SYNOPSYS + + #!/usr/bin/prefork-interp -U,perl,-w + # -*- perl -*- + use strict; + use Proc::Prefork::Interp; + + ... generic initialisation code, use statements ... + + prefork_initialisation_complete(); + + ... per-execution code ... + +=head1 DESCRIPTION + +Proc::Prefork::Interp implements the script-side protocol +expected by the preform-interp C wrapper program. + +The combination arranges that the startup overhead of your script +is paid once, and then the initialised script can service multiple +requests, quickly and in parallel. + +C actually daemonises the program, +forking twice, and returning in the grandchild. + +It returns once for each associated invocation of C +(ie, each invocation of the script which starts C<#!/usr/bin/prefork-interp>), +each time in a separate process. + +=head1 PRE-INITIALISATION STATE, CONTEXT AND ACTIONS + +During initialisation, the program may load perl modules, and do +other kinds of pre-computation and pre-loading. + +Where files are read during pre-loading, consider calling +C to arrange that the script will +automatically be restarted when the files change. +See L. + +Before C, +the script will stdin connected to /dev/null, +and stdout connected to its stderr. + +It should avoid accessing its command line arguments +- or, at least, those which will vary from call to call. + +Likewise it should not pay attention to environment variables +which are expected to change from one invocation to the next. +For example, if the program is a CGI script, it ought not to +read the CGI environment variables until after initialisation. + +It is I safe to open a connection to a database, +or other kind of server, before initialisation is complete. +This is because the db connection would end up being shared +by all of the individual executions. + +=head1 POST-INITIALISATION STATE, CONTEXT AND ACTIONS + +Each time C returns, +corresponds to one invocation of C. + +On return the script will have its stdin, stdout and stderr +connected to those provided by C's caller +for this invocation. +Likewise C<@ARGV> and C<%ENV> will have been adjusted to +copy the arguments and environment of the particular call. + +By this time, the process has forked twice. +Its parent is not the original caller, +and it is in a session and a process group +set up for this shared script and this particular invocation, +respectively. + +Signals sent to the C will not be received +by the script. +if C is killed, the script will receive a C; +when that happens it ought to die promptly, +without doing further IO on stdin/stdout/stderr. + +The exit status of the script will be reproduced +as the exit status of C, +so that the caller sees the right exit status. + +=head1 DESCRIPTORS AND OTHER INHERITED PROCESS PROPERTIES + +The per-invocation child inherits everything that is +set up before C. + +This includes ulimits, signal dispositions, uids and gids, +and of course file descriptors (other than 0/1/2). + +The prefork-interp system +uses C to terminate services when needed +and relies on C to have a default disposition. +Do not mess with these. + +It is not generally safe to open a connection to some kind of service +during initialisation. +Each invocation will share the socket, +which can cause terrible confusion (even security holes). +For example, do not open a database handle during initialisation. + +=head1 AUTOMATIC RELOADING + +The prefork-interp system supports automatic reloading/restarting, +when a script, or files it loads, are modified. + +Files mentioned in C<$0> and C<%INC> will automatically be checked; +if any are found to be newer than the original invocation, +a fressh "server" will created - +re-running the script again from the top, as for an initial call. + +The set of files checked in this way can be modified +via initialisation-complete options, +or by calling C. + +=head1 STANDALONE OPERATION + +A script which loads Proc::Prefork::Interp +and calls C +can also be run standalone. +This can be useful for testing. + +When not run under C, C +does nothing and returns in the same process. + +=head1 FUNCTIONS + +=over + +=item C<< prefork_initialisation_complete( I<%options> ) >> + +Turns this script into a server, +which can be reused for subsequent invocations. +Returns multiple times, +each time in a different process, +one per invocation. + +When not run under C, this is a no-op. + +C<%options> is an even-length list of options, +in the format used for initalising a Perl hash: + +=over + +=item C<< max_servers => I >> + +Allow I (an integer) concurrent invocations at once. +If too many invocations arrive at once, +new ones won't be served until some of them complete. + +If I is negative, there is no limit. +The limit is only applied somewhat approximately. +Default is 4. + +=item C<< idle_timeout => I >> + +If no invocations occur for this length of time, we quit; +future invocations would involve a restart. + +If I is negative, we don't time out. + +=item C<< autoreload_inc => I >> + +If set falseish, +we don't automatically check files in C<%INC> for reloads. +See L. + +=item C<< autoreload_extra => [ I ] >> + +Additional paths to check for reloads +(as an arrayref of strings). +(This is in addition to paths passed to C.) +See L. +Default is 1 megasecond. + +=item C<< max_errors => I >> + +If our server loop experiences more errors than this, we quit. +(If this happens, +a future invocation would restart the script from the top.) +Default is 100. + +=item C<< log_facility => I >> + +The syslog facility to use, +for messages from the persistent server. + +The value is in the format expected by C; +the empty string means not to use syslog at all, +in which case errors experienced by the psersistent server +will not be reported anywhere, impeding debugging. + +Default is C. + +=back + +=item C<< prefork_autoreload_also_check I >> + +Also check each path in I for being out of date; +if any exists and has an mtime after our setup, +we consider ourselves out of date and arrange for a reload. + +It is not an error for a I to not exist, +but it is an error if it can't be checked. + +=back + +=head1 AUTHORS AND COPYRIGHT + +The prefork-interp system was designed and implemented by Ian Jackson +and is distributed as part of chiark-utils. + +prefork-interp and Proc::Prefork::Interp are +Copyright 2022 Ian Jackson and contributors to chiark-utils. + +=head1 LIMITATIONS + +A function which works and returns in the grant parent, +having readjusted many important process properties, +is inherently rather weird. +Scripts using this facility must take some care. + +Signal propagation, from caller to actual service, is lacking. + +If the service continues to access its descriptors after receiving SIGINT, +the ultimate callers can experience maulfunctions +(eg, stolen terminal keystrokes!) + +=head1 FUTURE POSSIBILITIES + +This system should work for Python too. +I would welcome contribution of the approriate Python code. +Please get in touch so I can help you. + +=head1 SEE ALSO + +=over + +=item C + +Usage and options for the C +invocation wrapper program. + +=item C + +Design and protocol information is in the comments +at the top of the source file. + +=back