chiark / gitweb /
socket: Allow selection of TCP Congestion Avoidance algorithm to socket
[elogind.git] / src / socket.c
index 7ff2927aff984152251ffd5de28895bf77c822e0..2da3215a3d3ed12d76615bd2c35ad87818391ed5 100644 (file)
 #include <sys/epoll.h>
 #include <signal.h>
 #include <arpa/inet.h>
+#include <selinux/selinux.h>
 
 #include "unit.h"
 #include "socket.h"
+#include "netinet/tcp.h"
 #include "log.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
@@ -115,6 +117,9 @@ static void socket_done(Unit *u) {
 
         s->service = NULL;
 
+        free(s->tcp_congestion);
+        s->tcp_congestion = NULL;
+
         free(s->bind_to_device);
         s->bind_to_device = NULL;
 
@@ -129,6 +134,42 @@ static void socket_done(Unit *u) {
         }
 }
 
+static int socket_instantiate_service(Socket *s) {
+        char *prefix, *name;
+        int r;
+        Unit *u;
+
+        assert(s);
+
+        /* This fills in s->service if it isn't filled in yet. For
+         * Accept=yes sockets we create the next connection service
+         * here. For Accept=no this is mostly a NOP since the service
+         * is figured out at load time anyway. */
+
+        if (s->service)
+                return 0;
+
+        assert(s->accept);
+
+        if (!(prefix = unit_name_to_prefix(s->meta.id)))
+                return -ENOMEM;
+
+        r = asprintf(&name, "%s@%u.service", prefix, s->n_accepted);
+        free(prefix);
+
+        if (r < 0)
+                return -ENOMEM;
+
+        r = manager_load_unit(s->meta.manager, name, NULL, NULL, &u);
+        free(name);
+
+        if (r < 0)
+                return r;
+
+        s->service = SERVICE(u);
+        return 0;
+}
+
 static bool have_non_accept_socket(Socket *s) {
         SocketPort *p;
 
@@ -160,6 +201,11 @@ static int socket_verify(Socket *s) {
                 return -EINVAL;
         }
 
+        if (s->accept && have_non_accept_socket(s)) {
+                log_error("%s configured for accepting sockets, but sockets are non-accepting. Refusing.", s->meta.id);
+                return -EINVAL;
+        }
+
         if (s->accept && s->max_connections <= 0) {
                 log_error("%s's MaxConnection setting too small. Refusing.", s->meta.id);
                 return -EINVAL;
@@ -329,14 +375,16 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sSocketMode: %04o\n"
                 "%sDirectoryMode: %04o\n"
                 "%sKeepAlive: %s\n"
-                "%sFreeBind: %s\n",
+                "%sFreeBind: %s\n"
+                "%sTCPCongestion: %s\n",
                 prefix, socket_state_to_string(s->state),
                 prefix, socket_address_bind_ipv6_only_to_string(s->bind_ipv6_only),
                 prefix, s->backlog,
                 prefix, s->socket_mode,
                 prefix, s->directory_mode,
                 prefix, yes_no(s->keep_alive),
-                prefix, yes_no(s->free_bind));
+                prefix, yes_no(s->free_bind),
+                prefix, s->tcp_congestion);
 
         if (s->control_pid > 0)
                 fprintf(f,
@@ -455,8 +503,7 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
                         b = ntohl(remote.in.sin_addr.s_addr);
 
                 if (asprintf(&r,
-                             "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
-                             nr,
+                             "%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
                              a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
                              ntohs(local.in.sin_port),
                              b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF,
@@ -478,8 +525,7 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
                                 *b = remote.in6.sin6_addr.s6_addr+12;
 
                         if (asprintf(&r,
-                                     "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
-                                     nr,
+                                     "%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
                                      a[0], a[1], a[2], a[3],
                                      ntohs(local.in6.sin6_port),
                                      b[0], b[1], b[2], b[3],
@@ -489,8 +535,7 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
                         char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN];
 
                         if (asprintf(&r,
-                                     "%u-%s:%u-%s:%u",
-                                     nr,
+                                     "%s:%u-%s:%u",
                                      inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)),
                                      ntohs(local.in6.sin6_port),
                                      inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)),
@@ -593,9 +638,13 @@ static void socket_apply_socket_options(Socket *s, int fd) {
                 if (r < 0 && x < 0)
                         log_warning("IP_TTL/IPV6_UNICAST_HOPS failed: %m");
         }
+
+        if (s->tcp_congestion)
+                if (setsockopt(fd, SOL_TCP, TCP_CONGESTION, s->tcp_congestion, strlen(s->tcp_congestion)+1) < 0)
+                        log_warning("TCP_CONGESTION failed: %m");
 }
 
-static void socket_apply_pipe_options(Socket *s, int fd) {
+static void socket_apply_fifo_options(Socket *s, int fd) {
         assert(s);
         assert(fd >= 0);
 
@@ -604,12 +653,153 @@ static void socket_apply_pipe_options(Socket *s, int fd) {
                         log_warning("F_SETPIPE_SZ: %m");
 }
 
+static int selinux_getconfromexe(
+                const char *exe,
+                security_context_t *newcon) {
+
+        security_context_t mycon = NULL, fcon = NULL;
+        security_class_t sclass;
+        int r = 0;
+
+        r = getcon(&mycon);
+        if (r < 0)
+                goto fail;
+
+        r = getfilecon(exe, &fcon);
+        if (r < 0)
+                goto fail;
+
+        sclass = string_to_security_class("process");
+        r = security_compute_create(mycon, fcon, sclass, newcon);
+
+fail:
+        if (r < 0)
+                r = -errno;
+
+        freecon(mycon);
+        freecon(fcon);
+        return r;
+}
+
+static int selinux_getfileconfrompath(
+                const security_context_t scon,
+                const char *path,
+                const char *class,
+                security_context_t *fcon) {
+
+        security_context_t dir_con = NULL;
+        security_class_t sclass;
+        int r = 0;
+
+        r = getfilecon(path, &dir_con);
+        if (r >= 0) {
+                r = -1;
+                if ((sclass = string_to_security_class(class)) != 0)
+                        r = security_compute_create(scon, dir_con, sclass, fcon);
+        }
+        if (r < 0)
+                r = -errno;
+
+        freecon(dir_con);
+        return r;
+}
+
+static int fifo_address_create(
+                const char *path,
+                mode_t directory_mode,
+                mode_t socket_mode,
+                security_context_t scon,
+                int *_fd) {
+
+        int fd = -1, r = 0;
+        struct stat st;
+        mode_t old_mask;
+        security_context_t filecon = NULL;
+
+        assert(path);
+        assert(_fd);
+
+        mkdir_parents(path, directory_mode);
+
+        if (scon) {
+                if (scon && ((r = selinux_getfileconfrompath(scon, path, "fifo_file", &filecon)) == 0)) {
+                        r = setfscreatecon(filecon);
+
+                        if (r < 0) {
+                                log_error("Failed to set SELinux file context (%s) on %s: %m", scon, path);
+                                r = -errno;
+                        }
+
+                        freecon(filecon);
+                }
+
+                if (r < 0  && security_getenforce() == 1)
+                        goto fail;
+        }
+
+        /* Enforce the right access mode for the fifo */
+        old_mask = umask(~ socket_mode);
+
+        /* Include the original umask in our mask */
+        umask(~socket_mode | old_mask);
+
+        r = mkfifo(path, socket_mode);
+        umask(old_mask);
+
+        if (r < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        if ((fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW)) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        setfscreatecon(NULL);
+
+        if (fstat(fd, &st) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        if (!S_ISFIFO(st.st_mode) ||
+            (st.st_mode & 0777) != (socket_mode & ~old_mask) ||
+            st.st_uid != getuid() ||
+            st.st_gid != getgid()) {
+
+                r = -EEXIST;
+                goto fail;
+        }
+
+        *_fd = fd;
+        return 0;
+
+fail:
+        setfscreatecon(NULL);
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        return r;
+}
+
 static int socket_open_fds(Socket *s) {
         SocketPort *p;
         int r;
+        security_context_t scon = NULL;
 
         assert(s);
 
+        if ((r = socket_instantiate_service(s)) < 0)
+                return r;
+
+        if (selinux_getconfromexe(s->service->exec_command[SERVICE_EXEC_START]->path, &scon) < 0) {
+                log_error("Failed to get SELinux exec context for %s \n", s->service->exec_command[SERVICE_EXEC_START]->path);
+                if (security_getenforce() == 1)
+                        return -errno;
+        }
+
+        log_debug("SELinux Socket context for %s set to %s\n", s->service->exec_command[SERVICE_EXEC_START]->path, scon);
         LIST_FOREACH(port, p, s->ports) {
 
                 if (p->fd >= 0)
@@ -625,47 +815,34 @@ static int socket_open_fds(Socket *s) {
                                              s->free_bind,
                                              s->directory_mode,
                                              s->socket_mode,
+                                             scon,
                                              &p->fd)) < 0)
                                 goto rollback;
 
                         socket_apply_socket_options(s, p->fd);
 
-                } else {
-                        struct stat st;
-                        assert(p->type == SOCKET_FIFO);
-
-                        mkdir_parents(p->path, s->directory_mode);
-
-                        if (mkfifo(p->path, s->socket_mode) < 0 && errno != EEXIST) {
-                                r = -errno;
-                                goto rollback;
-                        }
-
-                        if ((p->fd = open(p->path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW)) < 0) {
-                                r = -errno;
-                                goto rollback;
-                        }
+                } else  if (p->type == SOCKET_FIFO) {
 
-                        if (fstat(p->fd, &st) < 0) {
-                                r = -errno;
+                        if ((r = fifo_address_create(
+                                             p->path,
+                                             s->directory_mode,
+                                             s->socket_mode,
+                                             scon,
+                                             &p->fd)) < 0)
                                 goto rollback;
-                        }
 
-                        /* FIXME verify user, access mode */
+                        socket_apply_fifo_options(s, p->fd);
 
-                        if (!S_ISFIFO(st.st_mode)) {
-                                r = -EEXIST;
-                                goto rollback;
-                        }
-
-                        socket_apply_pipe_options(s, p->fd);
-                }
+                } else
+                        assert_not_reached("Unknown port type");
         }
 
+        freecon(scon);
         return 0;
 
 rollback:
         socket_close_fds(s);
+        freecon(scon);
         return r;
 }
 
@@ -1030,14 +1207,32 @@ static void socket_enter_running(Socket *s, int cfd) {
         assert(s);
         dbus_error_init(&error);
 
+        /* We don't take connections anymore if we are supposed to
+         * shut down anyway */
+        if (s->meta.job && s->meta.job->type == JOB_STOP) {
+                if (cfd >= 0)
+                        close_nointr_nofail(cfd);
+                else  {
+                        /* Flush all sockets by closing and reopening them */
+                        socket_close_fds(s);
+
+                        if ((r = socket_watch_fds(s)) < 0) {
+                                log_warning("%s failed to watch sockets: %s", s->meta.id, strerror(-r));
+                                socket_enter_stop_pre(s, false);
+                        }
+                }
+
+                return;
+        }
+
         if (cfd < 0) {
                 if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(s->service), JOB_REPLACE, true, &error, NULL)) < 0)
                         goto fail;
 
                 socket_set_state(s, SOCKET_RUNNING);
         } else {
-                Unit *u;
                 char *prefix, *instance = NULL, *name;
+                Service *service;
 
                 if (s->n_connections >= s->max_connections) {
                         log_warning("Too many incoming connections (%u)", s->n_connections);
@@ -1045,7 +1240,10 @@ static void socket_enter_running(Socket *s, int cfd) {
                         return;
                 }
 
-                if ((r = instance_from_socket(cfd, s->n_accepted++, &instance)) < 0)
+                if ((r = socket_instantiate_service(s)) < 0)
+                        goto fail;
+
+                if ((r = instance_from_socket(cfd, s->n_accepted, &instance)) < 0)
                         goto fail;
 
                 if (!(prefix = unit_name_to_prefix(s->meta.id))) {
@@ -1063,20 +1261,25 @@ static void socket_enter_running(Socket *s, int cfd) {
                         goto fail;
                 }
 
-                r = manager_load_unit(s->meta.manager, name, NULL, NULL, &u);
-                free(name);
-
-                if (r < 0)
+                if ((r = unit_add_name(UNIT(s->service), name)) < 0) {
+                        free(name);
                         goto fail;
+                }
 
-                if ((r = service_set_socket_fd(SERVICE(u), cfd, s)) < 0)
+                service = s->service;
+                s->service = NULL;
+                s->n_accepted ++;
+
+                unit_choose_id(UNIT(service), name);
+                free(name);
+
+                if ((r = service_set_socket_fd(service, cfd, s)) < 0)
                         goto fail;
 
                 cfd = -1;
-
                 s->n_connections ++;
 
-                if ((r = manager_add_job(u->meta.manager, JOB_START, u, JOB_REPLACE, true, &error, NULL)) < 0)
+                if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(service), JOB_REPLACE, true, &error, NULL)) < 0)
                         goto fail;
         }
 
@@ -1400,12 +1603,17 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         s->control_pid = 0;
 
         success = is_clean_exit(code, status);
-        s->failure = s->failure || !success;
 
-        if (s->control_command)
+        if (s->control_command) {
                 exec_status_exit(&s->control_command->exec_status, pid, code, status);
 
-        log_debug("%s control process exited, code=%s status=%i", u->meta.id, sigchld_code_to_string(code), status);
+                if (s->control_command->ignore)
+                        success = true;
+        }
+
+        log_full(success ? LOG_DEBUG : LOG_NOTICE,
+                 "%s control process exited, code=%s status=%i", u->meta.id, sigchld_code_to_string(code), status);
+        s->failure = s->failure || !success;
 
         if (s->control_command && s->control_command->command_next && success) {
                 log_debug("%s running next command for state %s", u->meta.id, socket_state_to_string(s->state));
@@ -1566,6 +1774,17 @@ void socket_connection_unref(Socket *s) {
         log_debug("%s: One connection closed, %u left.", s->meta.id, s->n_connections);
 }
 
+static void socket_reset_maintenance(Unit *u) {
+        Socket *s = SOCKET(u);
+
+        assert(s);
+
+        if (s->state == SOCKET_MAINTENANCE)
+                socket_set_state(s, SOCKET_DEAD);
+
+        s->failure = false;
+}
+
 static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
         [SOCKET_DEAD] = "dead",
         [SOCKET_START_PRE] = "start-pre",
@@ -1618,5 +1837,7 @@ const UnitVTable socket_vtable = {
         .sigchld_event = socket_sigchld_event,
         .timer_event = socket_timer_event,
 
+        .reset_maintenance = socket_reset_maintenance,
+
         .bus_message_handler = bus_socket_message_handler
 };