scripts/Proc/Prefork/Interp.pm

   1
   2 package Proc::Prefork::Interp;
   3 require Exporter;
   4 our @ISA = qw(Exporter);
   5 our @EXPORT = qw(
   6                   prefork_initialisation_complete
   7                   prefork_autoreload_also_check
   8                );
   9
  10 use strict;
  11
  12 use Carp;
  13 use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
  14 use IO::FDPass;
  15 use POSIX qw(_exit setsid :sys_wait_h :errno_h :signal_h);
  16 use Sys::Syslog qw(openlog syslog LOG_INFO LOG_ERR LOG_WARNING);
  17 use Time::HiRes qw();
  18
  19 our $logger;
  20
  21 our $env_name = 'PREFORK_INTERP';
  22
  23 our @call_fds;
  24 our $socket_path;
  25 our $fail_log = 0;
  26 our $startup_mtime;
  27
  28 our @autoreload_extra_files = ();
  29
  30 sub prefork_autoreload_also_check {
  31   push @autoreload_extra_files, @_;
  32 }
  33
  34 sub fail_log ($) {
  35   my ($m) = @_;
  36   if ($fail_log) {
  37     syslog(LOG_ERR, "$0: prefork: error: $m");
  38   } else {
  39     carp "$0: prefork: initialisation error: $m";
  40   }
  41   _exit 127;
  42 }
  43
  44 sub server_quit ($) {
  45   my ($m) = @_;
  46   syslog(LOG_INFO, "$0 prefork: $m, quitting");
  47   _exit(0);
  48 }
  49
  50 # Returns in the executor process
  51 sub become_monitor () {
  52   close LISTEN;
  53   close WATCHI;
  54   close WATCHE;
  55
  56   # Make a process group for this call
  57   setpgrp or fail_log("setpgrp failed: $!");
  58
  59   eval { protocol_exchange(); 1; }
  60     or fail_log("protocol exchange failed: $@");
  61
  62   pipe EXECTERM, EXECTERMW or fail_log("pipe: $!");
  63
  64   my $child = fork // fail_log("fork executor: $!");
  65   if (!$child) {
  66     #---- executor ----
  67     open ::STDIN , "<& $call_fds[0]" or fail_log("dup for fd0");
  68     open ::STDOUT, ">& $call_fds[1]" or fail_log("dup for fd1");
  69     open ::STDERR, ">& $call_fds[2]" or fail_log("dup for fd2");
  70     close EXECTERM;
  71     close_call_fds();
  72     $! = 0;
  73     return;
  74   }
  75   close EXECTERMW;
  76
  77   #---- monitor [2] ----
  78
  79   for (;;) {
  80     my $rbits = '';
  81     vec($rbits, fileno(CALL), 1) = 1;
  82     vec($rbits, fileno(EXECTERM), 1) = 1;
  83     my $ebits = $rbits;
  84     my $nfound = select($rbits, '', $ebits, undef);
  85     last if $nfound > 0;
  86     next if $! == EINTR;
  87     fail_log("monitor select() failed: $!");
  88   }
  89
  90   # Either the child has just died, or the caller has gone away
  91
  92   $SIG{INT} = 'IGN';
  93   kill 'INT', 0 or fail_log("kill executor [$child]: $!");
  94
  95   my $got = waitpid $child, 0;
  96   $got >= 0 // fail_log("wait for executor [$child] (2): $!");
  97   $got == $child or fail_log("wait for esecutor [$child] gave [$got]");
  98
  99   protocol_write(pack "N", $?);
 100   _exit(0);
 101 }
 102
 103 sub close_call_fds () {
 104   foreach (@call_fds) {
 105     POSIX::close($_);
 106   }
 107   close CALL;
 108 }
 109
 110 sub protocol_write ($) {
 111   my ($d) = @_;
 112   return if (print CALL $d and flush CALL);
 113   _exit(0) if $!==EPIPE || $!==ECONNRESET;
 114   fail_log("protocol write: $!");
 115 }
 116
 117 sub eintr_retry ($) {
 118   my ($f) = @_;
 119   for (;;) {
 120     my $r = $f->();
 121     return $r if defined $r;
 122     next if $!==EINTR;
 123     return $r;
 124   }
 125 }
 126
 127 sub protocol_read_fail ($) {
 128   my ($what) = @_;
 129   _exit(0) if $!==ECONNRESET;
 130   die("recv $what: $!");
 131 }
 132
 133 sub protocol_exchange () {
 134   my $greeting = "PFI\n\0\0\0\0";
 135   protocol_write($greeting);
 136
 137   my $ibyte = 0;
 138   my $r;
 139   for (;;) {
 140     $r = sysread CALL, $ibyte, 1;
 141     last if $r > 0;
 142     $!==EINTR or protocol_read_fail("signalling byte");
 143   }
 144   $r == 1 or _exit(0);
 145   $ibyte = ord $ibyte;
 146   $ibyte and die(sprintf "signalling byte is 0x%02x, not zero", $ibyte);
 147
 148   @call_fds = map {
 149     my $r;
 150     for (;;) {
 151       $! = 0;
 152       $r = IO::FDPass::recv(fileno(CALL));
 153       last if $r >= 0;
 154       _exit(0) if $!==0;
 155       protocol_read_fail("fd $_");
 156     }
 157     $r;
 158   } 0..2;
 159
 160   my $len;
 161   $r = read(CALL, $len, 4) // protocol_read_fail("message length");
 162   $r == 4 or _exit(0);
 163
 164   $len = unpack "N", $len;
 165   my $data;
 166   $r = read(CALL, $data, $len) // protocol_read_fail("message data ($len)");
 167   $r == $len or _exit(0);
 168
 169   @ARGV = split /\0/, $data, -1;
 170   @ARGV >= 2 or die("message data has too few strings (".(scalar @ARGV).")");
 171   length(pop(@ARGV)) and die("message data missing trailing nul");
 172   %ENV = ();
 173   while (my $s = shift @ARGV) {
 174     last if !length $s;
 175     $s =~ m/=/ or die("message data env var missing equals");
 176     $ENV{$`} = $';
 177   }
 178 }
 179
 180 sub autoreload_check ($) {
 181   my ($f) = @_;
 182   my @s = Time::HiRes::stat($f);
 183   if (!@s) {
 184     $!==ENOENT or fail_log("autoreload check: stat failed: $f: $!");
 185     return;
 186   }
 187   if ($s[9] > $startup_mtime) {
 188     syslog(LOG_INFO, "$0 prefork: reloading; due to $f");
 189     _exit(0);
 190   }
 191 }
 192
 193 sub prefork_initialisation_complete {
 194   my %opts = @_;
 195
 196   push @autoreload_extra_files, $0;
 197
 198   # if env var not set, we're not running under prefork-interp
 199   my @env_data = split / /, ($ENV{$env_name} // return);
 200   croak "$env_name has too few words" unless @env_data >= 2;
 201   my (@vsns) = split /,/, $env_data[0];
 202   croak "$env_name doesn't specify v1" unless @vsns >= 2 && $vsns[0] eq 'v1';
 203   $startup_mtime = $vsns[1];
 204   my @env_fds = split /,/, $env_data[1];
 205   croak "$env_name has too few fds" unless @env_fds >= 4;;
 206   $#env_fds = 3;
 207
 208   my $num_servers = $opts{max_servers} // 4;
 209
 210   #---- setup (pm) [1] ----
 211
 212   foreach (@env_fds) {
 213     $_ eq ($_+0) or croak "$env_name contains $_, not a number";
 214   }
 215   open LISTEN, "+>&=$env_fds[0]" or croak "listen fd: $!";
 216   open CALL,   "+>&=$env_fds[1]" or croak "call fd: $!";
 217   open WATCHI, "+>&=$env_fds[2]" or croak "call fd: $!";
 218   open WATCHE, "+>&=$env_fds[3]" or croak "watch stderr fd: $!";
 219
 220   my $log_facility = $opts{log_facility} // 'LOG_USER';
 221   if (length $log_facility) {
 222     openlog("prefork-interp $0", 'ndelay,nofatal,pid', $log_facility);
 223   }
 224
 225   open NULL, "+>/dev/null" or croak "open /dev/null: $!";
 226
 227   #---- fork for server ----
 228
 229   my $child = fork // croak "first fork failed: $!";
 230   if ($child) {
 231     #---- setup (pm) [2], exits ----
 232     _exit(0);
 233   }
 234   setsid() > 0 or fail_log("setsid: $!");
 235   # The server will be a session leader, but it won't open ttys,
 236   # so that is ok.
 237
 238   #---- server(pm) [1] ----
 239
 240   $child = fork // croak "second fork failed: $!";
 241   if (!$child) {
 242     # we are the child, i.e. the one fa-monitor
 243     local $0 = "$0 [monitor(init)]";
 244     return become_monitor();
 245   }
 246   close CALL;
 247
 248   our %children;
 249   $children{$child} = 1;
 250
 251   # --- server(pm) [2] ----
 252
 253   local $0 = "$0 [server]";
 254
 255   $fail_log = 1;
 256   open STDIN, "<&NULL" or fail_log("dup null onto stdin: $!");
 257   open STDOUT, ">&NULL" or fail_log("dup null onto stdout: $!");
 258   open STDERR, ">&NULL" or fail_log("dup null onto stderr: $!");
 259   close NULL;
 260
 261   my $errcount = 0;
 262
 263   for (;;) {
 264     # reap children
 265     if (%children) {
 266       my $full = $num_servers >= 0 ? %children >= $num_servers : 0;
 267       my $got = waitpid -1, ($full ? 0 : WNOHANG);
 268       $got >= 0 or fail_log("failed to wait for monitor(s): $!");
 269       if ($got) {
 270         if ($? && $? != SIGPIPE) {
 271           syslog(LOG_WARNING,
 272  "$0 prefork: monitor process [$got] failed with wait status $?");
 273         }
 274         if (!exists $children{$got}) {
 275           syslog(LOG_WARNING,
 276  "$0 prefork: monitor process [$got] wasn't one of ours?!");
 277         }
 278         delete $children{$got};
 279         next;
 280       }
 281     }
 282
 283     # select for accepting or housekeeping timeout
 284     my $rbits = '';
 285     vec($rbits, fileno(LISTEN), 1) = 1;
 286     vec($rbits, fileno(WATCHE), 1) = 1;
 287     my $ebits = $rbits;
 288     my $idle_timeout = $opts{idle_timeout} // 1000000;
 289     $idle_timeout = undef if $idle_timeout < 0;
 290     my $nfound = select($rbits, '', $ebits, $idle_timeout);
 291
 292     # Idle timeout?
 293     last if $nfound == 0;
 294     if ($nfound < 0) {
 295       next if $! == EINTR;
 296       fail_log("select failed: $!");
 297     }
 298
 299     # Has the watcher told us to shut down, or died with a message ?
 300     my $msgbuf = '';
 301     my $r = sysread WATCHE, $msgbuf, 2048;
 302     if ($r > 0) {
 303       chomp $msgbuf;
 304       fail_log("watcher: $msgbuf");
 305     } elsif (defined $r) {
 306       syslog(LOG_INFO,
 307  "$0 prefork: lost socket (fresh start or cleanup?), quitting");
 308       last;
 309     } elsif ($! == EINTR || $! == EAGAIN || $! == EWOULDBLOCK) {
 310     } else {
 311       fail_log("watcher stderr read: $!");
 312     }
 313
 314     if (%opts{autoreload_inc} // 1) {
 315       foreach my $f (values %INC) {
 316         autoreload_check($f);
 317       }
 318     }
 319     foreach my $f (@autoreload_extra_files) {
 320       autoreload_check($f);
 321     }
 322     foreach my $f (@{ %opts{autoreload_extra} // [] }) {
 323       autoreload_check($f);
 324     }
 325
 326     # Anything to accept ?
 327     if (accept(CALL, LISTEN)) {
 328       $child = fork // fail_log("fork for accepted call failed: $!");
 329       if (!$child) {
 330         #---- monitor [1] ----
 331         $0 =~ s{ \[server\]$}{ [monitor]};
 332         return become_monitor();
 333       }
 334       close(CALL);
 335       $errcount = 0;
 336       $children{$child} = 1;
 337     } elsif ($! == EINTR || $! == EAGAIN || $! == EWOULDBLOCK) {
 338     } else {
 339       syslog(LOG_WARNING, "$0 prefork: accept failed: $!");
 340       if ($errcount > ($opts{max_errors} // 100)) {
 341         fail_log("too many accept failures, quitting");
 342       }
 343     }
 344   }
 345   _exit(0);
 346 }
 347
 348 1;
 349
 350 __END__
 351
 352 =head1 NAME
 353
 354 Proc::Prefork::Interp - script-side handler for prefork-interp
 355
 356 =head1 SYNOPSYS
 357
 358     #!/usr/bin/prefork-interp -U,perl,-w
 359     # -*- perl -*-
 360     use strict;
 361     use Proc::Prefork::Interp;
 362
 363     ... generic initialisation code, use statements ...
 364
 365     prefork_initialisation_complete();
 366
 367     ... per-execution code ...
 368
 369 =head1 DESCRIPTION
 370
 371 Proc::Prefork::Interp implements the script-side protocol
 372 expected by the preform-interp C wrapper program.
 373
 374 The combination arranges that the startup overhead of your script
 375 is paid once, and then the initialised script can service multiple
 376 requests, quickly and in parallel.
 377
 378 C<prefork_initialisation_complete> actually daemonises the program,
 379 forking twice, and returning in the grandchild.
 380
 381 It returns once for each associated invocation of C<prefork-interp>
 382 (ie, each invocation of the script which starts C<#!/usr/bin/prefork-interp>),
 383 each time in a separate process.
 384
 385 =head1 PRE-INITIALISATION STATE, CONTEXT AND ACTIONS
 386
 387 During initialisation, the program may load perl modules, and do
 388 other kinds of pre-computation and pre-loading.
 389
 390 Where files are read during pre-loading, consider calling
 391 C<prefork_autoreload_also_check> to arrange that the script will
 392 automatically be restarted when the files change.
 393 See L</"AUTOMATIC RELOADING">.
 394
 395 Before C<prefork_initialisation_complete>,
 396 the script will stdin connected to /dev/null,
 397 and stdout connected to its stderr.
 398
 399 It should avoid accessing its command line arguments
 400 - or, at least, those which will vary from call to call.
 401
 402 Likewise it should not pay attention to environment variables
 403 which are expected to change from one invocation to the next.
 404 For example, if the program is a CGI script, it ought not to
 405 read the CGI environment variables until after initialisation.
 406
 407 It is I<NOT> safe to open a connection to a database,
 408 or other kind of server, before initialisation is complete.
 409 This is because the db connection would end up being shared
 410 by all of the individual executions.
 411
 412 =head1 POST-INITIALISATION STATE, CONTEXT AND ACTIONS
 413
 414 Each time C<prefork_initialisation_complete> returns,
 415 corresponds to one invocation of C<prefork-interp>.
 416
 417 On return the script will have its stdin, stdout and stderr
 418 connected to those provided by C<prefork-interp>'s caller
 419 for this invocation.
 420 Likewise C<@ARGV> and C<%ENV> will have been adjusted to
 421 copy the arguments and environment of the particular call.
 422
 423 By this time, the process has forked twice.
 424 Its parent is not the original caller,
 425 and it is in a session and a process group
 426 set up for this shared script and this particular invocation,
 427 respectively.
 428
 429 Signals sent to the C<prefork-interp> will not be received
 430 by the script.
 431 if C<prefork-interp> is killed, the script will receive a C<SIGINT>;
 432 when that happens it ought to die promptly,
 433 without doing further IO on stdin/stdout/stderr.
 434
 435 The exit status of the script will be reproduced
 436 as the exit status of C<prefork-interp>,
 437 so that the caller sees the right exit status.
 438
 439 =head1 DESCRIPTORS AND OTHER INHERITED PROCESS PROPERTIES
 440
 441 The per-invocation child inherits everything that is
 442 set up before C<prefork_initialisation_complete>.
 443
 444 This includes ulimits, signal dispositions, uids and gids,
 445 and of course file descriptors (other than 0/1/2).
 446
 447 The prefork-interp system
 448 uses C<SIGINT> to terminate services when needed
 449 and relies on C<SIGPIPE> to have a default disposition.
 450 Do not mess with these.
 451
 452 It is not generally safe to open a connection to some kind of service
 453 during initialisation.
 454 Each invocation will share the socket,
 455 which can cause terrible confusion (even security holes).
 456 For example, do not open a database handle during initialisation.
 457
 458 =head1 AUTOMATIC RELOADING
 459
 460 The prefork-interp system supports automatic reloading/restarting,
 461 when a script, or files it loads, are modified.
 462
 463 Files mentioned in C<$0> and C<%INC> will automatically be checked;
 464 if any are found to be newer than the original invocation,
 465 a fressh "server" will created -
 466 re-running the script again from the top, as for an initial call.
 467
 468 The set of files checked in this way can be modified
 469 via initialisation-complete options,
 470 or by calling C<prefork_autoreload_also_check>.
 471
 472 =head1 STANDALONE OPERATION
 473
 474 A script which loads Proc::Prefork::Interp
 475 and calls C<prefork_initialisation_complete>
 476 can also be run standalone.
 477 This can be useful for testing.
 478
 479 When not run under C<prefork-interp>, C<prefork_initialisation_complete>
 480 does nothing and returns in the same process.
 481
 482 =head1 FUNCTIONS
 483
 484 =over
 485
 486 =item C<< prefork_initialisation_complete( I<%options> ) >>
 487
 488 Turns this script into a server,
 489 which can be reused for subsequent invocations.
 490 Returns multiple times,
 491 each time in a different process,
 492 one per invocation.
 493
 494 When not run under C<prefork-interp>, this is a no-op.
 495
 496 C<%options> is an even-length list of options,
 497 in the format used for initalising a Perl hash:
 498
 499 =over
 500
 501 =item C<< max_servers => I<MAX> >>
 502
 503 Allow I<MAX> (an integer) concurrent invocations at once.
 504 If too many invocations arrive at once,
 505 new ones won't be served until some of them complete.
 506
 507 If I<MAX> is negative, there is no limit.
 508 The limit is only applied somewhat approximately.
 509 Default is 4.
 510
 511 =item C<< idle_timeout => I<TIMEOUT> >>
 512
 513 If no invocations occur for this length of time, we quit;
 514 future invocations would involve a restart.
 515
 516 If I<TIMEOUT> is negative, we don't time out.
 517
 518 =item C<< autoreload_inc => I<BOOL> >>
 519
 520 If set falseish,
 521 we don't automatically check files in C<%INC> for reloads.
 522 See L</"AUTOMATIC RELOADING">.
 523
 524 =item C<< autoreload_extra => [ I<PATHS> ] >>
 525
 526 Additional paths to check for reloads
 527 (as an arrayref of strings).
 528 (This is in addition to paths passed to C<prefork_autoreload_also_check>.)
 529 See L</"AUTOMATIC RELOADING">.
 530 Default is 1 megasecond.
 531
 532 =item C<< max_errors => I<NUMBER> >>
 533
 534 If our server loop experiences more errors than this, we quit.
 535 (If this happens,
 536 a future invocation would restart the script from the top.)
 537 Default is 100.
 538
 539 =item C<< log_facility => I<BOOL> >>
 540
 541 The syslog facility to use,
 542 for messages from the persistent server.
 543
 544 The value is in the format expected by C<Sys::Syslog::openlog>;
 545 the empty string means not to use syslog at all,
 546 in which case errors experienced by the psersistent server
 547 will not be reported anywhere, impeding debugging.
 548
 549 Default is C<LOG_USER>.
 550
 551 =back
 552
 553 =item C<< prefork_autoreload_also_check I<PATHS> >>
 554
 555 Also check each path in I<PATHS> for being out of date;
 556 if any exists and has an mtime after our setup,
 557 we consider ourselves out of date and arrange for a reload.
 558
 559 It is not an error for a I<PATH> to not exist,
 560 but it is an error if it can't be checked.
 561
 562 =back
 563
 564 =head1 AUTHORS AND COPYRIGHT
 565
 566 The prefork-interp system was designed and implemented by Ian Jackson
 567 and is distributed as part of chiark-utils.
 568
 569 prefork-interp and Proc::Prefork::Interp are
 570 Copyright 2022 Ian Jackson and contributors to chiark-utils.
 571
 572 =head1 LIMITATIONS
 573
 574 A function which works and returns in the grant parent,
 575 having readjusted many important process properties,
 576 is inherently rather weird.
 577 Scripts using this facility must take some care.
 578
 579 Signal propagation, from caller to actual service, is lacking.
 580
 581 If the service continues to access its descriptors after receiving SIGINT,
 582 the ultimate callers can experience maulfunctions
 583 (eg, stolen terminal keystrokes!)
 584
 585 =head1 FUTURE POSSIBILITIES
 586
 587 This system should work for Python too.
 588 I would welcome contribution of the approriate Python code.
 589 Please get in touch so I can help you.
 590
 591 =head1 SEE ALSO
 592
 593 =over
 594
 595 =item C<prefork-interp.txt>
 596
 597 Usage and options for the C<prefork-interp>
 598 invocation wrapper program.
 599
 600 =item C<prefork-interp.c>
 601
 602 Design and protocol information is in the comments
 603 at the top of the source file.
 604
 605 =back