chiark / gitweb /
socket: Add support for TCP defer accept
authorSusant Sahani <susant@redhat.com>
Thu, 14 Aug 2014 17:36:12 +0000 (23:06 +0530)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Thu, 14 Aug 2014 23:55:44 +0000 (19:55 -0400)
TCP_DEFER_ACCEPT Allow a listener to be awakened only when data
arrives on the socket. If TCP_DEFER_ACCEPT set on a server-side
listening socket, the TCP/IP stack will not to wait for the final
ACK packet and not to initiate the process until the first packet
of real data has arrived. After sending the SYN/ACK, the server will
then wait for a data packet from a client. Now, only three packets
will be sent over the network, and the connection establishment delay
will be significantly reduced.

man/systemd.socket.xml
src/core/dbus-socket.c
src/core/load-fragment-gperf.gperf.m4
src/core/socket.c
src/core/socket.h

index 5efb398b09d3f165dc91106b25a62c1c690b9d07..4483905832efe5feafe756515cd3253b90f39859 100644 (file)
                                 for details.).</para></listitem>
                         </varlistentry>
 
                                 for details.).</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><varname>DeferAccept=</varname></term>
+
+                                <listitem><para>Takes time (in
+                                seconds) as argument. If set, the
+                                listening process will be awakened
+                                only when data arrives on the socket,
+                                and not immediately when connection is
+                                established. When this option is set,
+                                the
+                                <constant>TCP_DEFER_ACCEPT</constant>
+                                socket option will be used (see
+                                <citerefentry><refentrytitle>tcp</refentrytitle><manvolnum>7</manvolnum></citerefentry>),
+                                and the kernel will ignore initial ACK
+                                packets without any data. The argument
+                                specifies the approximate amount of
+                                time the kernel should wait for
+                                incoming data before falling back to
+                                the normal behaviour of honouring
+                                empty ACK packets. This option is
+                                beneficial for protocols where the
+                                client sends the data first (e.g.
+                                HTTP, in contrast to SMTP), because
+                                the server process will not be woken
+                                up unnecessarily before it can take
+                                any action.
+                                </para>
+
+                                <para>If the client also uses the
+                                <constant>TCP_DEFER_ACCEPT</constant>
+                                option, the latency of the initial
+                                connection may be reduced, because the
+                                kernel will send data in the final
+                                packet establishing the connection
+                                (the third packet in the "three-way
+                                handshake").</para>
+
+                                <para>Disabled by default.</para>
+                                </listitem>
+                        </varlistentry>
+
                         <varlistentry>
                                 <term><varname>ReceiveBuffer=</varname></term>
                                 <term><varname>SendBuffer=</varname></term>
                         <varlistentry>
                                 <term><varname>ReceiveBuffer=</varname></term>
                                 <term><varname>SendBuffer=</varname></term>
index bdf111c9e3344e1a1b20d436c5e5095dfaf47870..cc55b8d71b31a4887ffdc9c0c9a9b4b538053fc1 100644 (file)
@@ -100,6 +100,7 @@ const sd_bus_vtable bus_socket_vtable[] = {
         SD_BUS_PROPERTY("KeepAliveTime", "t", bus_property_get_usec, offsetof(Socket, keep_alive_time), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("KeepAliveInterval", "t", bus_property_get_usec, offsetof(Socket, keep_alive_interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("KeepAliveProbes", "u", bus_property_get_unsigned, offsetof(Socket, keep_alive_cnt), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("KeepAliveTime", "t", bus_property_get_usec, offsetof(Socket, keep_alive_time), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("KeepAliveInterval", "t", bus_property_get_usec, offsetof(Socket, keep_alive_interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("KeepAliveProbes", "u", bus_property_get_unsigned, offsetof(Socket, keep_alive_cnt), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("DeferAccept" , "t", bus_property_get_usec, offsetof(Socket, defer_accept), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Priority", "i", bus_property_get_int, offsetof(Socket, priority), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ReceiveBuffer", "t", bus_property_get_size, offsetof(Socket, receive_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SendBuffer", "t", bus_property_get_size, offsetof(Socket, send_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Priority", "i", bus_property_get_int, offsetof(Socket, priority), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ReceiveBuffer", "t", bus_property_get_size, offsetof(Socket, receive_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SendBuffer", "t", bus_property_get_size, offsetof(Socket, send_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
index 67bd0e520911cc416d52d72135607006ad514338..b4e2b257431c7f8fb10b48cd498026d0b2981438 100644 (file)
@@ -234,6 +234,7 @@ Socket.KeepAlive,                config_parse_bool,                  0,
 Socket.KeepAliveTime,            config_parse_sec,                   0,                             offsetof(Socket, keep_alive_time)
 Socket.KeepAliveInterval,        config_parse_sec,                   0,                             offsetof(Socket, keep_alive_interval)
 Socket.KeepAliveProbes,          config_parse_unsigned,              0,                             offsetof(Socket, keep_alive_cnt)
 Socket.KeepAliveTime,            config_parse_sec,                   0,                             offsetof(Socket, keep_alive_time)
 Socket.KeepAliveInterval,        config_parse_sec,                   0,                             offsetof(Socket, keep_alive_interval)
 Socket.KeepAliveProbes,          config_parse_unsigned,              0,                             offsetof(Socket, keep_alive_cnt)
+Socket.DeferAccept,              config_parse_sec,                   0,                             offsetof(Socket, defer_accept)
 Socket.NoDelay,                  config_parse_bool,                  0,                             offsetof(Socket, no_delay)
 Socket.Priority,                 config_parse_int,                   0,                             offsetof(Socket, priority)
 Socket.ReceiveBuffer,            config_parse_iec_size,              0,                             offsetof(Socket, receive_buffer)
 Socket.NoDelay,                  config_parse_bool,                  0,                             offsetof(Socket, no_delay)
 Socket.Priority,                 config_parse_int,                   0,                             offsetof(Socket, priority)
 Socket.ReceiveBuffer,            config_parse_iec_size,              0,                             offsetof(Socket, receive_buffer)
index d6d9821f572334834f73b4ce890a5c5053cf2855..a16b20d739239dde9b0975f8a78d0c0dc48067be 100644 (file)
@@ -613,6 +613,12 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
                         "%sKeepAliveProbes: %u\n",
                         prefix, s->keep_alive_cnt);
 
                         "%sKeepAliveProbes: %u\n",
                         prefix, s->keep_alive_cnt);
 
+        if(s->defer_accept)
+                fprintf(f,
+                        "%sDeferAccept: %s\n",
+                        prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+                                                s->defer_accept, USEC_PER_SEC));
+
         LIST_FOREACH(port, p, s->ports) {
 
                 if (p->type == SOCKET_SOCKET) {
         LIST_FOREACH(port, p, s->ports) {
 
                 if (p->type == SOCKET_SOCKET) {
@@ -828,6 +834,12 @@ static void socket_apply_socket_options(Socket *s, int fd) {
                         log_warning_unit(UNIT(s)->id, "TCP_KEEPCNT failed: %m");
         }
 
                         log_warning_unit(UNIT(s)->id, "TCP_KEEPCNT failed: %m");
         }
 
+        if (s->defer_accept) {
+                int value = s->defer_accept / USEC_PER_SEC;
+                if (setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &value, sizeof(value)) < 0)
+                        log_warning_unit(UNIT(s)->id, "TCP_DEFER_ACCEPT failed: %m");
+        }
+
         if (s->no_delay) {
                 int b = s->no_delay;
                 if (setsockopt(fd, SOL_TCP, TCP_NODELAY, &b, sizeof(b)) < 0)
         if (s->no_delay) {
                 int b = s->no_delay;
                 if (setsockopt(fd, SOL_TCP, TCP_NODELAY, &b, sizeof(b)) < 0)
index 8871eb1ca39d372c08a356fa070bd8620765e6fc..eede70564a4f4808a29cb590cf6c0351b72ff42c 100644 (file)
@@ -105,6 +105,7 @@ struct Socket {
         usec_t timeout_usec;
         usec_t keep_alive_time;
         usec_t keep_alive_interval;
         usec_t timeout_usec;
         usec_t keep_alive_time;
         usec_t keep_alive_interval;
+        usec_t defer_accept;
 
         ExecCommand* exec_command[_SOCKET_EXEC_COMMAND_MAX];
         ExecContext exec_context;
 
         ExecCommand* exec_command[_SOCKET_EXEC_COMMAND_MAX];
         ExecContext exec_context;