chiark / gitweb /
socket: Allow selection of TCP Congestion Avoidance algorithm to socket
[elogind.git] / src / socket.c
index 00fb568b02506817d1edd1be969e79a662c6323d..2da3215a3d3ed12d76615bd2c35ad87818391ed5 100644 (file)
 #include <sys/epoll.h>
 #include <signal.h>
 #include <arpa/inet.h>
+#include <selinux/selinux.h>
 
 #include "unit.h"
 #include "socket.h"
+#include "netinet/tcp.h"
 #include "log.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
@@ -37,6 +39,8 @@
 #include "unit-name.h"
 #include "dbus-socket.h"
 #include "missing.h"
+#include "special.h"
+#include "bus-errors.h"
 
 static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
         [SOCKET_DEAD] = UNIT_INACTIVE,
@@ -66,15 +70,10 @@ static void socket_init(Unit *u) {
 
         s->max_connections = 64;
 
-        s->keep_alive = false;
         s->priority = -1;
-        s->receive_buffer = 0;
-        s->send_buffer = 0;
         s->ip_tos = -1;
         s->ip_ttl = -1;
-        s->pipe_size = 0;
         s->mark = -1;
-        s->free_bind = false;
 
         exec_context_init(&s->exec_context);
 
@@ -118,6 +117,9 @@ static void socket_done(Unit *u) {
 
         s->service = NULL;
 
+        free(s->tcp_congestion);
+        s->tcp_congestion = NULL;
+
         free(s->bind_to_device);
         s->bind_to_device = NULL;
 
@@ -132,6 +134,42 @@ static void socket_done(Unit *u) {
         }
 }
 
+static int socket_instantiate_service(Socket *s) {
+        char *prefix, *name;
+        int r;
+        Unit *u;
+
+        assert(s);
+
+        /* This fills in s->service if it isn't filled in yet. For
+         * Accept=yes sockets we create the next connection service
+         * here. For Accept=no this is mostly a NOP since the service
+         * is figured out at load time anyway. */
+
+        if (s->service)
+                return 0;
+
+        assert(s->accept);
+
+        if (!(prefix = unit_name_to_prefix(s->meta.id)))
+                return -ENOMEM;
+
+        r = asprintf(&name, "%s@%u.service", prefix, s->n_accepted);
+        free(prefix);
+
+        if (r < 0)
+                return -ENOMEM;
+
+        r = manager_load_unit(s->meta.manager, name, NULL, NULL, &u);
+        free(name);
+
+        if (r < 0)
+                return r;
+
+        s->service = SERVICE(u);
+        return 0;
+}
+
 static bool have_non_accept_socket(Socket *s) {
         SocketPort *p;
 
@@ -163,12 +201,17 @@ static int socket_verify(Socket *s) {
                 return -EINVAL;
         }
 
+        if (s->accept && have_non_accept_socket(s)) {
+                log_error("%s configured for accepting sockets, but sockets are non-accepting. Refusing.", s->meta.id);
+                return -EINVAL;
+        }
+
         if (s->accept && s->max_connections <= 0) {
                 log_error("%s's MaxConnection setting too small. Refusing.", s->meta.id);
                 return -EINVAL;
         }
 
-        if (s->exec_context.pam_name && s->kill_mode != KILL_CONTROL_GROUP) {
+        if (s->exec_context.pam_name && s->exec_context.kill_mode != KILL_CONTROL_GROUP) {
                 log_error("%s has PAM enabled. Kill mode must be set to 'control-group'. Refusing.", s->meta.id);
                 return -EINVAL;
         }
@@ -209,10 +252,7 @@ int socket_add_one_mount_link(Socket *s, Mount *m) {
         if (!socket_needs_mount(s, m->where))
                 return 0;
 
-        if ((r = unit_add_dependency(UNIT(m), UNIT_BEFORE, UNIT(s), true)) < 0)
-                return r;
-
-        if ((r = unit_add_dependency(UNIT(s), UNIT_REQUIRES, UNIT(m), true)) < 0)
+        if ((r = unit_add_two_dependencies(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, UNIT(m), true)) < 0)
                 return r;
 
         return 0;
@@ -249,6 +289,17 @@ static int socket_add_device_link(Socket *s) {
         return r;
 }
 
+static int socket_add_default_dependencies(Socket *s) {
+        int r;
+        assert(s);
+
+        if (s->meta.manager->running_as == MANAGER_SYSTEM)
+                if ((r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true)) < 0)
+                        return r;
+
+        return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true);
+}
+
 static int socket_load(Unit *u) {
         Socket *s = SOCKET(u);
         int r;
@@ -263,7 +314,7 @@ static int socket_load(Unit *u) {
         if (u->meta.load_state == UNIT_LOADED) {
 
                 if (have_non_accept_socket(s)) {
-                        if ((r = unit_load_related_unit(u, ".service", (Unit**) &s->service)))
+                        if ((r = unit_load_related_unit(u, ".service", (Unit**) &s->service)) < 0)
                                 return r;
 
                         if ((r = unit_add_dependency(u, UNIT_BEFORE, UNIT(s->service), true)) < 0)
@@ -281,6 +332,10 @@ static int socket_load(Unit *u) {
 
                 if ((r = unit_add_default_cgroup(u)) < 0)
                         return r;
+
+                if (s->meta.default_dependencies)
+                        if ((r = socket_add_default_dependencies(s)) < 0)
+                                return r;
         }
 
         return socket_verify(s);
@@ -317,19 +372,19 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sSocket State: %s\n"
                 "%sBindIPv6Only: %s\n"
                 "%sBacklog: %u\n"
-                "%sKillMode: %s\n"
                 "%sSocketMode: %04o\n"
                 "%sDirectoryMode: %04o\n"
                 "%sKeepAlive: %s\n"
-                "%sFreeBind: %s\n",
+                "%sFreeBind: %s\n"
+                "%sTCPCongestion: %s\n",
                 prefix, socket_state_to_string(s->state),
                 prefix, socket_address_bind_ipv6_only_to_string(s->bind_ipv6_only),
                 prefix, s->backlog,
-                prefix, kill_mode_to_string(s->kill_mode),
                 prefix, s->socket_mode,
                 prefix, s->directory_mode,
                 prefix, yes_no(s->keep_alive),
-                prefix, yes_no(s->free_bind));
+                prefix, yes_no(s->free_bind),
+                prefix, s->tcp_congestion);
 
         if (s->control_pid > 0)
                 fprintf(f,
@@ -448,8 +503,7 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
                         b = ntohl(remote.in.sin_addr.s_addr);
 
                 if (asprintf(&r,
-                             "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
-                             nr,
+                             "%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
                              a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
                              ntohs(local.in.sin_port),
                              b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF,
@@ -471,8 +525,7 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
                                 *b = remote.in6.sin6_addr.s6_addr+12;
 
                         if (asprintf(&r,
-                                     "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
-                                     nr,
+                                     "%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
                                      a[0], a[1], a[2], a[3],
                                      ntohs(local.in6.sin6_port),
                                      b[0], b[1], b[2], b[3],
@@ -482,8 +535,7 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
                         char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN];
 
                         if (asprintf(&r,
-                                     "%u-%s:%u-%s:%u",
-                                     nr,
+                                     "%s:%u-%s:%u",
                                      inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)),
                                      ntohs(local.in6.sin6_port),
                                      inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)),
@@ -586,9 +638,13 @@ static void socket_apply_socket_options(Socket *s, int fd) {
                 if (r < 0 && x < 0)
                         log_warning("IP_TTL/IPV6_UNICAST_HOPS failed: %m");
         }
+
+        if (s->tcp_congestion)
+                if (setsockopt(fd, SOL_TCP, TCP_CONGESTION, s->tcp_congestion, strlen(s->tcp_congestion)+1) < 0)
+                        log_warning("TCP_CONGESTION failed: %m");
 }
 
-static void socket_apply_pipe_options(Socket *s, int fd) {
+static void socket_apply_fifo_options(Socket *s, int fd) {
         assert(s);
         assert(fd >= 0);
 
@@ -597,12 +653,153 @@ static void socket_apply_pipe_options(Socket *s, int fd) {
                         log_warning("F_SETPIPE_SZ: %m");
 }
 
+static int selinux_getconfromexe(
+                const char *exe,
+                security_context_t *newcon) {
+
+        security_context_t mycon = NULL, fcon = NULL;
+        security_class_t sclass;
+        int r = 0;
+
+        r = getcon(&mycon);
+        if (r < 0)
+                goto fail;
+
+        r = getfilecon(exe, &fcon);
+        if (r < 0)
+                goto fail;
+
+        sclass = string_to_security_class("process");
+        r = security_compute_create(mycon, fcon, sclass, newcon);
+
+fail:
+        if (r < 0)
+                r = -errno;
+
+        freecon(mycon);
+        freecon(fcon);
+        return r;
+}
+
+static int selinux_getfileconfrompath(
+                const security_context_t scon,
+                const char *path,
+                const char *class,
+                security_context_t *fcon) {
+
+        security_context_t dir_con = NULL;
+        security_class_t sclass;
+        int r = 0;
+
+        r = getfilecon(path, &dir_con);
+        if (r >= 0) {
+                r = -1;
+                if ((sclass = string_to_security_class(class)) != 0)
+                        r = security_compute_create(scon, dir_con, sclass, fcon);
+        }
+        if (r < 0)
+                r = -errno;
+
+        freecon(dir_con);
+        return r;
+}
+
+static int fifo_address_create(
+                const char *path,
+                mode_t directory_mode,
+                mode_t socket_mode,
+                security_context_t scon,
+                int *_fd) {
+
+        int fd = -1, r = 0;
+        struct stat st;
+        mode_t old_mask;
+        security_context_t filecon = NULL;
+
+        assert(path);
+        assert(_fd);
+
+        mkdir_parents(path, directory_mode);
+
+        if (scon) {
+                if (scon && ((r = selinux_getfileconfrompath(scon, path, "fifo_file", &filecon)) == 0)) {
+                        r = setfscreatecon(filecon);
+
+                        if (r < 0) {
+                                log_error("Failed to set SELinux file context (%s) on %s: %m", scon, path);
+                                r = -errno;
+                        }
+
+                        freecon(filecon);
+                }
+
+                if (r < 0  && security_getenforce() == 1)
+                        goto fail;
+        }
+
+        /* Enforce the right access mode for the fifo */
+        old_mask = umask(~ socket_mode);
+
+        /* Include the original umask in our mask */
+        umask(~socket_mode | old_mask);
+
+        r = mkfifo(path, socket_mode);
+        umask(old_mask);
+
+        if (r < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        if ((fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW)) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        setfscreatecon(NULL);
+
+        if (fstat(fd, &st) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        if (!S_ISFIFO(st.st_mode) ||
+            (st.st_mode & 0777) != (socket_mode & ~old_mask) ||
+            st.st_uid != getuid() ||
+            st.st_gid != getgid()) {
+
+                r = -EEXIST;
+                goto fail;
+        }
+
+        *_fd = fd;
+        return 0;
+
+fail:
+        setfscreatecon(NULL);
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        return r;
+}
+
 static int socket_open_fds(Socket *s) {
         SocketPort *p;
         int r;
+        security_context_t scon = NULL;
 
         assert(s);
 
+        if ((r = socket_instantiate_service(s)) < 0)
+                return r;
+
+        if (selinux_getconfromexe(s->service->exec_command[SERVICE_EXEC_START]->path, &scon) < 0) {
+                log_error("Failed to get SELinux exec context for %s \n", s->service->exec_command[SERVICE_EXEC_START]->path);
+                if (security_getenforce() == 1)
+                        return -errno;
+        }
+
+        log_debug("SELinux Socket context for %s set to %s\n", s->service->exec_command[SERVICE_EXEC_START]->path, scon);
         LIST_FOREACH(port, p, s->ports) {
 
                 if (p->fd >= 0)
@@ -618,47 +815,34 @@ static int socket_open_fds(Socket *s) {
                                              s->free_bind,
                                              s->directory_mode,
                                              s->socket_mode,
+                                             scon,
                                              &p->fd)) < 0)
                                 goto rollback;
 
                         socket_apply_socket_options(s, p->fd);
 
-                } else {
-                        struct stat st;
-                        assert(p->type == SOCKET_FIFO);
-
-                        mkdir_parents(p->path, s->directory_mode);
+                } else  if (p->type == SOCKET_FIFO) {
 
-                        if (mkfifo(p->path, s->socket_mode) < 0 && errno != EEXIST) {
-                                r = -errno;
-                                goto rollback;
-                        }
-
-                        if ((p->fd = open(p->path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW)) < 0) {
-                                r = -errno;
-                                goto rollback;
-                        }
-
-                        if (fstat(p->fd, &st) < 0) {
-                                r = -errno;
+                        if ((r = fifo_address_create(
+                                             p->path,
+                                             s->directory_mode,
+                                             s->socket_mode,
+                                             scon,
+                                             &p->fd)) < 0)
                                 goto rollback;
-                        }
-
-                        /* FIXME verify user, access mode */
 
-                        if (!S_ISFIFO(st.st_mode)) {
-                                r = -EEXIST;
-                                goto rollback;
-                        }
+                        socket_apply_fifo_options(s, p->fd);
 
-                        socket_apply_pipe_options(s, p->fd);
-                }
+                } else
+                        assert_not_reached("Unknown port type");
         }
 
+        freecon(scon);
         return 0;
 
 rollback:
         socket_close_fds(s);
+        freecon(scon);
         return r;
 }
 
@@ -813,6 +997,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
                        s->meta.manager->environment,
                        true,
                        true,
+                       true,
                        s->meta.manager->confirm_spawn,
                        s->meta.cgroup_bondings,
                        &pid);
@@ -881,10 +1066,10 @@ static void socket_enter_signal(Socket *s, SocketState state, bool success) {
         if (!success)
                 s->failure = true;
 
-        if (s->kill_mode != KILL_NONE) {
-                int sig = (state == SOCKET_STOP_PRE_SIGTERM || state == SOCKET_FINAL_SIGTERM) ? SIGTERM : SIGKILL;
+        if (s->exec_context.kill_mode != KILL_NONE) {
+                int sig = (state == SOCKET_STOP_PRE_SIGTERM || state == SOCKET_FINAL_SIGTERM) ? s->exec_context.kill_signal : SIGKILL;
 
-                if (s->kill_mode == KILL_CONTROL_GROUP) {
+                if (s->exec_context.kill_mode == KILL_CONTROL_GROUP) {
 
                         if ((r = cgroup_bonding_kill_list(s->meta.cgroup_bondings, sig)) < 0) {
                                 if (r != -EAGAIN && r != -ESRCH)
@@ -894,7 +1079,7 @@ static void socket_enter_signal(Socket *s, SocketState state, bool success) {
                 }
 
                 if (!sent && s->control_pid > 0)
-                        if (kill(s->kill_mode == KILL_PROCESS ? s->control_pid : -s->control_pid, sig) < 0 && errno != ESRCH) {
+                        if (kill(s->exec_context.kill_mode == KILL_PROCESS ? s->control_pid : -s->control_pid, sig) < 0 && errno != ESRCH) {
                                 r = -errno;
                                 goto fail;
                         }
@@ -1017,17 +1202,37 @@ fail:
 
 static void socket_enter_running(Socket *s, int cfd) {
         int r;
+        DBusError error;
 
         assert(s);
+        dbus_error_init(&error);
+
+        /* We don't take connections anymore if we are supposed to
+         * shut down anyway */
+        if (s->meta.job && s->meta.job->type == JOB_STOP) {
+                if (cfd >= 0)
+                        close_nointr_nofail(cfd);
+                else  {
+                        /* Flush all sockets by closing and reopening them */
+                        socket_close_fds(s);
+
+                        if ((r = socket_watch_fds(s)) < 0) {
+                                log_warning("%s failed to watch sockets: %s", s->meta.id, strerror(-r));
+                                socket_enter_stop_pre(s, false);
+                        }
+                }
+
+                return;
+        }
 
         if (cfd < 0) {
-                if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(s->service), JOB_REPLACE, true, NULL)) < 0)
+                if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(s->service), JOB_REPLACE, true, &error, NULL)) < 0)
                         goto fail;
 
                 socket_set_state(s, SOCKET_RUNNING);
         } else {
-                Unit *u;
-                char *prefix, *instance, *name;
+                char *prefix, *instance = NULL, *name;
+                Service *service;
 
                 if (s->n_connections >= s->max_connections) {
                         log_warning("Too many incoming connections (%u)", s->n_connections);
@@ -1035,7 +1240,10 @@ static void socket_enter_running(Socket *s, int cfd) {
                         return;
                 }
 
-                if ((r = instance_from_socket(cfd, s->n_accepted++, &instance)) < 0)
+                if ((r = socket_instantiate_service(s)) < 0)
+                        goto fail;
+
+                if ((r = instance_from_socket(cfd, s->n_accepted, &instance)) < 0)
                         goto fail;
 
                 if (!(prefix = unit_name_to_prefix(s->meta.id))) {
@@ -1053,31 +1261,38 @@ static void socket_enter_running(Socket *s, int cfd) {
                         goto fail;
                 }
 
-                r = manager_load_unit(s->meta.manager, name, NULL, &u);
-                free(name);
-
-                if (r < 0)
+                if ((r = unit_add_name(UNIT(s->service), name)) < 0) {
+                        free(name);
                         goto fail;
+                }
+
+                service = s->service;
+                s->service = NULL;
+                s->n_accepted ++;
 
-                if ((r = service_set_socket_fd(SERVICE(u), cfd, s)) < 0)
+                unit_choose_id(UNIT(service), name);
+                free(name);
+
+                if ((r = service_set_socket_fd(service, cfd, s)) < 0)
                         goto fail;
 
                 cfd = -1;
-
                 s->n_connections ++;
 
-                if ((r = manager_add_job(u->meta.manager, JOB_START, u, JOB_REPLACE, true, NULL)) < 0)
+                if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(service), JOB_REPLACE, true, &error, NULL)) < 0)
                         goto fail;
         }
 
         return;
 
 fail:
-        log_warning("%s failed to queue socket startup job: %s", s->meta.id, strerror(-r));
+        log_warning("%s failed to queue socket startup job: %s", s->meta.id, bus_error(&error, r));
         socket_enter_stop_pre(s, false);
 
         if (cfd >= 0)
                 close_nointr_nofail(cfd);
+
+        dbus_error_free(&error);
 }
 
 static void socket_run_next(Socket *s, bool success) {
@@ -1154,21 +1369,23 @@ static int socket_stop(Unit *u) {
 
         assert(s);
 
-        /* We cannot fulfill this request right now, try again later
-         * please! */
-        if (s->state == SOCKET_START_PRE ||
-            s->state == SOCKET_START_POST)
-                return -EAGAIN;
-
         /* Already on it */
         if (s->state == SOCKET_STOP_PRE ||
             s->state == SOCKET_STOP_PRE_SIGTERM ||
             s->state == SOCKET_STOP_PRE_SIGKILL ||
             s->state == SOCKET_STOP_POST ||
             s->state == SOCKET_FINAL_SIGTERM ||
-            s->state == SOCKET_FINAL_SIGTERM)
+            s->state == SOCKET_FINAL_SIGKILL)
                 return 0;
 
+        /* If there's already something running we go directly into
+         * kill mode. */
+        if (s->state == SOCKET_START_PRE ||
+            s->state == SOCKET_START_POST) {
+                socket_enter_signal(s, SOCKET_STOP_PRE_SIGTERM, true);
+                return -EAGAIN;
+        }
+
         assert(s->state == SOCKET_LISTENING || s->state == SOCKET_RUNNING);
 
         socket_enter_stop_pre(s, true);
@@ -1386,12 +1603,17 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         s->control_pid = 0;
 
         success = is_clean_exit(code, status);
-        s->failure = s->failure || !success;
 
-        if (s->control_command)
-                exec_status_fill(&s->control_command->exec_status, pid, code, status);
+        if (s->control_command) {
+                exec_status_exit(&s->control_command->exec_status, pid, code, status);
+
+                if (s->control_command->ignore)
+                        success = true;
+        }
 
-        log_debug("%s control process exited, code=%s status=%i", u->meta.id, sigchld_code_to_string(code), status);
+        log_full(success ? LOG_DEBUG : LOG_NOTICE,
+                 "%s control process exited, code=%s status=%i", u->meta.id, sigchld_code_to_string(code), status);
+        s->failure = s->failure || !success;
 
         if (s->control_command && s->control_command->command_next && success) {
                 log_debug("%s running next command for state %s", u->meta.id, socket_state_to_string(s->state));
@@ -1552,6 +1774,17 @@ void socket_connection_unref(Socket *s) {
         log_debug("%s: One connection closed, %u left.", s->meta.id, s->n_connections);
 }
 
+static void socket_reset_maintenance(Unit *u) {
+        Socket *s = SOCKET(u);
+
+        assert(s);
+
+        if (s->state == SOCKET_MAINTENANCE)
+                socket_set_state(s, SOCKET_DEAD);
+
+        s->failure = false;
+}
+
 static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
         [SOCKET_DEAD] = "dead",
         [SOCKET_START_PRE] = "start-pre",
@@ -1604,5 +1837,7 @@ const UnitVTable socket_vtable = {
         .sigchld_event = socket_sigchld_event,
         .timer_event = socket_timer_event,
 
+        .reset_maintenance = socket_reset_maintenance,
+
         .bus_message_handler = bus_socket_message_handler
 };