/*
* userv service (or standalone program) for per-user IP subranges.
*
- * When invoked appropriately, it creates a point-to-point network
- * interface with specified parameters. It arranges for packets sent out
- * via that interface by the kernel to appear on its own stdout in SLIP or
- * CSLIP encoding, and packets injected into its own stdin to be given to
- * the kernel as if received on that interface. Optionally, additional
- * routes can be set up to arrange for traffic for other address ranges to
- * be routed through the new interface.
- *
* This is the service program, which is invoked as root from userv (or may
* be invoked firectly).
*
*
* The remaining arguments are supplied by the (untrusted) caller:
*
- * <local-addr>,<peer-addr>,<mtu>,<proto>
+ * <local-addr>,<peer-addr>,<mtu>[,[<proto>][,[<ifnamepat>]]]
*
- * As for slattach. Supported protocols are slip, cslip, and
- * adaptive. Alternatively, set to `debug' to print debugging info
- * and exit. <local-addr> is address of the interface to be created
+ * As for slattach. The only supported protocol is slip.
+ * Alternatively, set to `debug' to print debugging info and
+ * exit. <local-addr> is address of the interface to be created
* on the local system; <peer-addr> is the address of the
* point-to-point peer. They must be actual addresses (not
* hostnames).
* not supported). If no additional routes are to be set up, use `-'
* or supply an empty argument.
*
- * Each <config> item - whether a line file such as
- * /etc/userv/ipif-networks, or supplied on the service program
- * command line - is one of:
+ * Each <config> item - whether a line in a file such as
+ * /etc/userv/ipif-networks, or the single trusted argument supplied
+ * on the service program command line - is one of:
*
* /<config-file-name>
* ./<config-file-name>
* service program directly (not via userv), without needing to set up
* permissions in /etc/userv/ipif-networks.
*
+ * Only `*' permits interface name patterns other than the default
+ * value of `userv%d'.
+ *
* #...
*
* Comment. Blank lines are also ignored.
* The service program should be run from userv with no-disconnect-hup.
*/
/*
- * Copyright (C) 1999-2000 Ian Jackson
+ * This file is part of ipif, part of userv-utils
+ *
+ * Copyright 1996-2013 Ian Jackson <ijackson@chiark.greenend.org.uk>
+ * Copyright 1998 David Damerell <damerell@chiark.greenend.org.uk>
+ * Copyright 1999,2003
+ * Chancellor Masters and Scholars of the University of Cambridge
+ * Copyright 2010 Tony Finch <fanf@dotat.at>
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with userv-utils; if not, write to the Free Software
- * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * $Id$
+ * along with userv-utils; if not, see http://www.gnu.org/licenses/.
*/
#include <stdio.h>
#include <limits.h>
#include <signal.h>
#include <unistd.h>
+#include <stdint.h>
+#include <poll.h>
+#include <stddef.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <linux/if.h>
+#include <linux/if_tun.h>
+
#define NARGS 4
-#define MAXEXROUTES 5
+#define MAXEXROUTES 50
#define ATXTLEN 16
static const unsigned long gidmaxval= (unsigned long)((gid_t)-2);
-static const char *const protos_ok[]= { "slip", "cslip", "adaptive", 0 };
-static const int signals[]= { SIGHUP, SIGINT, SIGTERM, 0 };
+static const char *const protos_ok[]= { "slip", 0 };
+static const char default_ifnamepat[]= "userv%d";
static const char *configstr, *proto;
static unsigned long localaddr, peeraddr, mtu;
static int localpming, peerpming;
-static int localallow, peerallow, allallow;
+static int localallow, peerallow, ifnameallow, allallow;
+static char *ifnamepat;
static int nexroutes;
static struct exroute {
unsigned long prefix, mask;
} *cpplace;
-static int slpipe[2], ptmaster, undoslattach;
-static const char *ifname;
-static const char *ptyname;
-
-#define NPIDS 4
-
-static union {
- struct { pid_t sl, cout, cin, task; } byname;
- pid_t bynumber[NPIDS];
-} pids;
-sigset_t emptyset, fullset;
-
+static int tunfd;
+static char *ifname;
-static int cleantask(void) {
- pid_t pid;
-
- pid= fork();
- if (!pid) return 1;
- if (pid == (pid_t)-1)
- perror("userv-ipif: fork for undo slattach failed - cannot clean up properly");
- return 0;
-}
static void terminate(int estatus) {
- int i, status;
- pid_t pid;
-
- for (i=0; i<NPIDS; i++)
- if (pids.bynumber[i]) kill(pids.bynumber[i], SIGTERM);
-
- if (undoslattach) {
- if (cleantask()) {
- execlp("slattach", "slattach", "-p", "tty", ptyname, (char*)0);
- perror("userv-ipif: exec slattach for undo slattach failed");
- exit(-1);
- }
- if (ifname && cleantask()) {
- execlp("ifconfig", "ifconfig", ifname, "down", (char*)0);
- perror("userv-ipif: exec ifconfig for undo ifconfig failed");
- exit(-1);
- }
- }
-
- for (;;) {
- pid= waitpid(-1,&status,0);
- if (pid == (pid_t)-1) break;
- }
exit(estatus);
}
if (len_r) *len_r= len;
}
+static char *eat_optionalstr(const char **argp,
+ const char *what,
+ const char *def) {
+ ptrdiff_t len;
+ const char *start= *argp;
+ if (!start) {
+ len = 0;
+ } else {
+ const char *comma= strchr(start, ',');
+ if (comma) {
+ len= comma - start;
+ *argp= comma + 1;
+ } else {
+ len= strlen(start);
+ *argp= 0;
+ }
+ }
+ if (!len) {
+ start= def;
+ len= strlen(def);
+ }
+ char *r = malloc(len+1);
+ if (!r) sysfatal("malloc for command line string");
+ memcpy(r,start,len);
+ r[len]= 0;
+ return r;
+}
+
static int addrnet_isin(unsigned long prefix, unsigned long mask,
unsigned long mprefix, unsigned long mmask) {
return !(~mask & mmask) && (prefix & mmask) == mprefix;
badusage("failed while reading configuration file: %s", strerror(errno));
cpplace= npp.parent;
+ fclose(file);
}
static void pconfig(const char *configstr, int truncated) {
case '*':
permit_begin();
permit_range(0UL,0UL,1,0);
+ ifnameallow= 1;
return;
case '#':
peeraddr= eat_addr(&carg,"peer-addr", ",",0);
mtu= eat_number(&carg,"mtu", 576,65536, ",",0);
localallow= peerallow= 0;
-
- if (!strcmp(carg,"debug")) {
+
+ char *protostr= eat_optionalstr(&carg,"protocol","slip");
+ if (!strcmp(protostr,"debug")) {
proto= 0;
} else {
for (cprotop= protos_ok;
- (proto= *cprotop) && strcmp(proto,carg);
+ (proto= *cprotop) && strcmp(proto,protostr);
cprotop++);
if (!proto) fatal("invalid protocol");
}
+
+ ifnamepat= eat_optionalstr(&carg,"ifname pattern",default_ifnamepat);
addrnet_mustdiffer("local-addr",localaddr,~0UL, "peer-addr",peeraddr,~0UL);
sprintf(erwhatbuf, "route#%d", i);
checkallow(exroutes[i].allow, erwhatbuf, exroutes[i].prefixtxt, exroutes[i].masktxt);
}
+ if (!strcmp(ifnamepat,default_ifnamepat))
+ ifnameallow= 1;
+ if (!ifnameallow) {
+ fprintf(stderr,
+ "userv-ipif service: access denied for interface name %s\n",
+ ifnamepat);
+ allallow= 0;
+ }
if (!allallow) fatal("access denied");
}
}
-static void setsigmask(const sigset_t *ss) {
- int r;
-
- r= sigprocmask(SIG_SETMASK, ss, 0);
- if (r) sysfatal("[un]block signals");
-}
-
-static void setsignals(void (*handler)(int), struct sigaction *sa, int chldflags) {
- const int *signalp;
- int r, sig;
-
- sa->sa_handler= handler;
- sa->sa_flags= 0;
- for (signalp=signals; (sig=*signalp); signalp++) {
- r= sigaction(sig, sa, 0); if (r) sysfatal("uncatch signal");
- }
- sa->sa_flags= chldflags;
- r= sigaction(SIGCHLD, sa, 0); if (r) sysfatal("uncatch children");
-}
-
-static void infork(void) {
- struct sigaction sa;
-
- memset(&pids,0,sizeof(pids));
- sigemptyset(&sa.sa_mask);
- setsignals(SIG_DFL,&sa,0);
- setsigmask(&emptyset);
- undoslattach= 0;
-}
-
-static pid_t makesubproc(void (*entry)(void)) {
- pid_t pid;
-
- pid= fork(); if (pid == (pid_t)-1) sysfatal("fork for subprocess");
- if (pid) return pid;
-
- infork();
- entry();
- abort();
-}
-
-static int task(void) {
- pid_t pid;
+static int task(const char *desc) {
+ pid_t pid, pidr;
+ int status;
pid= fork();
if (pid == (pid_t)-1) sysfatal("fork for task");
- if (!pid) { infork(); return 1; }
-
- pids.byname.task= pid;
- while (pids.byname.task) sigsuspend(&emptyset);
- return 0;
-}
-
-static void mdup2(int fd1, int fd2, const char *what) {
- int r;
+ if (!pid) return 1;
for (;;) {
- r= dup2(fd1,fd2); if (r==fd2) return;
- if (r!=-1) fatal("dup2 in %s gave wrong answer %d instead of %d",what,r,fd2);
- if (errno != EINTR) sysfatal("dup2 failed in %s",what);
+ pidr= waitpid(pid,&status,0);
+ if (pidr!=(pid_t)-1) break;
+ if (errno==EINTR) continue;
+ sysfatal("waitpid for task");
}
-}
-
-static void sl_entry(void) {
- mdup2(slpipe[1],1,"slattach child");
- execlp("slattach", "slattach", "-v", "-L", "-p",proto, ptyname, (char*)0);
- sysfatal("cannot exec slattach");
-}
-
-static void cin_entry(void) {
- mdup2(ptmaster,1,"cat input child");
- execlp("cat", "cat", (char*)0);
- sysfatal("cannot exec cat input");
-}
-
-static void cout_entry(void) {
- mdup2(ptmaster,0,"cat output child");
- execlp("cat", "cat", (char*)0);
- sysfatal("cannot exec cat output");
-}
-
-static void sighandler(int signum) {
- pid_t pid;
- int estatus, status;
- const char *taskfail;
-
- estatus= 4;
-
- if (signum == SIGCHLD) {
- for (;;) {
- pid= waitpid(-1,&status,WNOHANG);
- if (!pid || pid == (pid_t)-1) return;
-
- if (pid == pids.byname.task) {
- pids.byname.task= 0;
- if (!status) return;
- taskfail= "task";
- } else if (pid == pids.byname.cin) {
- pids.byname.cin= 0;
- if (status) {
- taskfail= "input cat";
- } else {
- taskfail= 0;
- estatus= 0;
- }
- } else if (pid == pids.byname.cout) {
- pids.byname.cout= 0;
- taskfail= "output cat";
- } else if (pid == pids.byname.sl) {
- pids.byname.sl= 0;
- taskfail= "slattach";
- } else {
- continue;
- }
- break;
- }
- if (taskfail) {
- if (WIFEXITED(status)) {
- fprintf(stderr,
- "userv-ipif service: %s unexpectedly exited with exit status %d\n",
- taskfail, WEXITSTATUS(status));
- } else if (WIFSIGNALED(status)) {
- fprintf(stderr,
- "userv-ipif service: %s unexpectedly killed by signal %s%s\n",
- taskfail, strsignal(WTERMSIG(status)),
- WCOREDUMP(status) ? " (core dumped)" : "");
- } else {
- fprintf(stderr, "userv-ipif service: %s unexpectedly terminated"
- " with unknown status code %d\n", taskfail, status);
- }
- }
+ assert(pidr==pid);
+
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status))
+ fatal("userv-ipif service: %s exited with error exit status %d\n",
+ desc, WEXITSTATUS(status));
+ } else if (WIFSIGNALED(status)) {
+ fatal("userv-ipif service: %s died due to signal %s%s\n",
+ desc, strsignal(WTERMSIG(status)),
+ WCOREDUMP(status) ? " (core dumped)" : "");
} else {
- fprintf(stderr,
- "userv-ipif service: received signal %d, terminating\n",
- signum);
+ fatal("userv-ipif service: %s unexpectedly terminated"
+ " with unknown status code %d\n", desc, status);
}
- terminate(estatus);
+ return 0;
}
-static void startup(void) {
+static void createif(void) {
+ struct ifreq ifr;
int r;
- struct sigaction sa;
-
- sigfillset(&fullset);
- sigemptyset(&emptyset);
-
- ptmaster= getpt(); if (ptmaster==-1) sysfatal("allocate pty master");
- r= grantpt(ptmaster); if (r) sysfatal("grab/grant pty slave");
- ptyname= ptsname(ptmaster); if (!ptyname) sysfatal("get pty slave name");
- r= chmod(ptyname,0600); if (r) sysfatal("chmod pty slave");
- r= unlockpt(ptmaster); if (r) sysfatal("unlock pty");
-
- sigfillset(&sa.sa_mask);
- setsignals(sighandler,&sa,SA_NOCLDSTOP);
- setsigmask(&fullset);
-}
-static void startslattach(void) {
- static char ifnbuf[200];
+ memset(&ifr,0,sizeof(ifr));
+ ifr.ifr_flags= IFF_TUN | IFF_NO_PI;
- FILE *piper;
- int r, l, k;
+ assert(sizeof(ifr.ifr_name) >= strlen(ifnamepat)+1);
+ strcpy(ifr.ifr_name, ifnamepat);
- r= pipe(slpipe); if (r) sysfatal("create pipe");
- piper= fdopen(slpipe[0],"r"); if (!piper) sysfatal("fdopen pipe");
+ tunfd= open("/dev/net/tun", O_RDWR);
+ if (!tunfd) sysfatal("open /dev/net/tun");
- undoslattach= 1;
- pids.byname.sl= makesubproc(sl_entry);
+ r= fcntl(tunfd, F_GETFD);
+ if (r==-1) sysfatal("fcntl(tunfd,F_GETFD)");
+ r= fcntl(tunfd, F_SETFD, r|FD_CLOEXEC);
+ if (r==-1) sysfatal("fcntl(tunfd,F_SETFD,|FD_CLOEXEC)");
- close(slpipe[1]);
- setsigmask(&emptyset);
- if (!fgets(ifnbuf,sizeof(ifnbuf),piper)) {
- if (ferror(piper)) sysfatal("cannot read ifname from slattach");
- else fatal("cannot read ifname from slattach");
- }
- setsigmask(&fullset);
- l= strlen(ifnbuf);
- if (l<=0 || ifnbuf[l-1] != '\n') fatal("slattach gave strange output `%s'",ifnbuf);
- ifnbuf[l-1]= 0;
- for (k=l; k>0 && ifnbuf[k-1]!=' '; k--);
- ifname= ifnbuf+k;
+ r= ioctl(tunfd, TUNSETIFF, (void*)&ifr);
+ if (r) sysfatal("ioctl TUNSETIFF");
+
+ /* ifr.ifr_name might not be null-terminated. crazy abi. */
+ ifname= malloc(sizeof(ifr.ifr_name)+1);
+ if (!ifname) sysfatal("malloc for interface name");
+ memcpy(ifname, ifr.ifr_name, sizeof(ifr.ifr_name));
+ ifname[sizeof(ifr.ifr_name)]= 0;
}
static void netconfigure(void) {
char mtutxt[100];
int i;
- if (task()) {
+ if (task("ifconfig")) {
sprintf(mtutxt,"%lu",mtu);
execlp("ifconfig", "ifconfig", ifname, localtxt,
- "netmask","255.255.255.255", "-broadcast", "pointopoint",peertxt,
+ "netmask","255.255.255.255", "pointopoint",peertxt, "-broadcast",
"mtu",mtutxt, "up", (char*)0);
sysfatal("cannot exec ifconfig");
}
for (i=0; i<nexroutes; i++) {
- if (task()) {
+ if (task("route")) {
execlp("route","route", "add", "-net",exroutes[i].prefixtxt,
"netmask",exroutes[i].masktxt,
"gw",peertxt, "dev",ifname, (char*)0);
}
}
-static void copydata(void) __attribute__((noreturn));
-static void copydata(void) {
+static void setnonblock(int fd) {
int r;
+ r= fcntl(fd,F_GETFL);
+ if (r==-1) sysfatal("fcntl F_GETFL");
+ r= fcntl(fd,F_SETFL, r|O_NONBLOCK);
+ if (r==-1) sysfatal("fcntl F_SETFL O_NONBLOCK");
+}
+
+static void rx_packet(const uint8_t *packet, int len) {
+ if (!len)
+ return;
+ for (;;) {
+ int r= write(tunfd, packet, len);
+ if (r<0) {
+ if (errno==EINTR) continue;
+ if (errno==EAGAIN || errno==ENOMEM) return; /* oh well */
+ sysfatal("error writing packet to tun (transmitting)");
+ }
+ assert(r==len);
+ return;
+ }
+}
+
+static int output_waiting, input_waiting;
+
+#define SLIP_END 0300
+#define SLIP_ESC 0333
+#define SLIP_ESC_END 0334
+#define SLIP_ESC_ESC 0335
+
+static void more_rx_data(uint8_t *input_buf, uint8_t *output_buf) {
+ /* we make slip_data never contain continuation of a packet */
+ /* input_buf is passed as a parameter since it's in copydata's stack frame */
+ static int scanned;
+ static int output_len;
+
+ uint8_t *op= output_buf + output_len;
+ const uint8_t *ip= input_buf + scanned;
+ const uint8_t *ip_end= input_buf + input_waiting;
+ int eaten= 0;
- pids.byname.cin= makesubproc(cin_entry);
for (;;) {
- r= write(1, "\300", 1); if (r==1) break;
- assert(r==-1); if (errno != EINTR) sysfatal("send initial delim to confirm");
+ if (ip>=ip_end) break;
+ uint8_t c= *ip++;
+ if (c==SLIP_END) {
+ rx_packet(output_buf, op-output_buf);
+ op= output_buf;
+ eaten= ip - input_buf;
+ continue;
+ }
+ if (c==SLIP_ESC) {
+ if (ip>=ip_end) { /* rescan this when there's more */ ip--; break; }
+ c= *ip++;
+ if (c==SLIP_ESC_END) c=SLIP_END;
+ else if (c==SLIP_ESC_ESC) c=SLIP_ESC;
+ else fatal("unexpected byte 0%o after SLIP_ESC",c);
+ }
+ if (op == output_buf+mtu)
+ fatal("SLIP packet exceeds mtu");
+ *op++= c;
}
- pids.byname.cout= makesubproc(cout_entry);
- for (;;) sigsuspend(&emptyset);
+ output_len= op - output_buf;
+ scanned= ip - input_buf;
+
+ input_waiting -= eaten;
+ memmove(input_buf, input_buf+eaten, input_waiting);
+ scanned -= eaten;
+}
+
+static void tx_packet(uint8_t *output_buf, const uint8_t *ip, int inlen) {
+ /* output_buf is passed as a parameter since it's in copydata's stack frame */
+ assert(!output_waiting);
+ uint8_t *op= output_buf;
+
+ *op++= SLIP_END;
+ while (inlen-- >0) {
+ uint8_t c= *ip++;
+ if (c==SLIP_END) { *op++= SLIP_ESC; *op++= SLIP_ESC_END; }
+ else if (c==SLIP_ESC) { *op++= SLIP_ESC; *op++= SLIP_ESC_ESC; }
+ else *op++= c;
+ }
+ *op++= SLIP_END;
+ assert(op <= output_buf + mtu*2+2);
+
+ output_waiting= op - output_buf;
+}
+
+static void copydata(void) __attribute__((noreturn));
+static void copydata(void) {
+ uint8_t output_buf[mtu*2+2];
+ uint8_t input_buf[mtu*2+2];
+ uint8_t rx_packet_buf[mtu];
+
+ int r, i;
+
+ struct pollfd polls[3];
+ memset(polls, 0, sizeof(polls));
+
+ polls[0].fd= 0; polls[0].events= POLLIN;
+ polls[1].fd= 1;
+ polls[2].fd= tunfd;
+
+ /* We don't do flow control on input packets; instead, we just throw
+ * away ones which the kernel doesn't accept. So we always poll for
+ * those.
+ *
+ * Output packets we buffer, so we poll only as appropriate for those.
+ */
+
+ /* Start by transmitting one END byte to say we're ready. */
+ output_buf[0]= SLIP_END;
+ output_waiting= 1;
+
+ for (;;) {
+ if (output_waiting) {
+ r= write(1, output_buf, output_waiting);
+ if (r<0) {
+ if (errno==EINTR) continue;
+ if (errno!=EAGAIN)
+ sysfatal("error writing SLIP output (packets being received)");
+ } else {
+ assert(r>0);
+ output_waiting -= r;
+ memmove(output_buf, output_buf+r, output_waiting);
+ }
+ }
+ if (output_waiting) {
+ polls[1].events |= POLLOUT;
+ polls[2].events &= ~POLLIN;
+ } else {
+ polls[1].events &= ~POLLOUT;
+ polls[2].events |= POLLIN;
+ }
+ r= poll(polls,3,-1);
+
+ if (r<0) {
+ if (errno==EINTR) continue;
+ sysfatal("poll() failed");
+ }
+ assert(r>0); /* we used an infinite timeout */
+
+ for (i=0; i<sizeof(polls)/sizeof(polls[0]); i++)
+ if (polls[i].revents & ~polls[i].events)
+ fatal("unexpected revents 0x%x for fd=%d",
+ polls[i].revents, polls[i].fd);
+
+ if (polls[0].events & POLLIN) {
+ int want= sizeof(input_buf) - input_waiting;
+ if (want<0) fatal("incoming packet necessarily exceeds MTU");
+ r= read(0, input_buf + input_waiting, want);
+ if (r>0) {
+ input_waiting += r;
+ assert(input_waiting <= sizeof(input_buf));
+ more_rx_data(input_buf, rx_packet_buf);
+ } else if (r==0) {
+ terminate(0);
+ } else {
+ if (!(errno==EINTR || errno==EAGAIN))
+ sysfatal("error reading input SLIP data (packets to transmit)");
+ }
+ }
+
+ /* We handle what would be (polls[1].events & POLLOUT) above,
+ * unconditionally. That eliminates the need to poll in the usual case */
+
+ if (polls[2].events & POLLIN) {
+ uint8_t packet_buf[mtu];
+ r= read(tunfd, packet_buf, mtu);
+ if (r>0) {
+ tx_packet(output_buf, packet_buf, r);
+ } else {
+ assert(r<0);
+ if (!(errno==EAGAIN || errno==EWOULDBLOCK))
+ sysfatal("error reading packet (being transmitted) from tun");
+ }
+ }
+ }
}
int main(int argc, const char *const *argv) {
checkpermit();
if (!proto) dumpdebug();
- startup();
- startslattach();
+ createif();
netconfigure();
+ setnonblock(tunfd);
+ setnonblock(0);
+ setnonblock(1);
copydata();
}