1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <netinet/tcp.h>
25 #include "resolved-dns-stream.h"
27 #define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
28 #define DNS_STREAMS_MAX 128
30 static void dns_stream_stop(DnsStream *s) {
33 s->io_event_source = sd_event_source_unref(s->io_event_source);
34 s->timeout_event_source = sd_event_source_unref(s->timeout_event_source);
35 s->fd = safe_close(s->fd);
38 static int dns_stream_update_io(DnsStream *s) {
43 if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
45 if (!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size)
48 return sd_event_source_set_io_events(s->io_event_source, f);
51 static int stream_complete(DnsStream *s, int error) {
57 s->complete(s, error);
64 static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
65 DnsStream *s = userdata;
69 return stream_complete(s, ETIMEDOUT);
72 static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
73 DnsStream *s = userdata;
78 if ((revents & EPOLLOUT) &&
80 s->n_written < sizeof(s->write_size) + s->write_packet->size) {
85 iov[0].iov_base = &s->write_size;
86 iov[0].iov_len = sizeof(s->write_size);
87 iov[1].iov_base = DNS_PACKET_DATA(s->write_packet);
88 iov[1].iov_len = s->write_packet->size;
90 IOVEC_INCREMENT(iov, 2, s->n_written);
92 ss = writev(fd, iov, 2);
94 if (errno != EINTR && errno != EAGAIN)
95 return stream_complete(s, errno);
99 /* Are we done? If so, disable the event source for EPOLLOUT */
100 if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
101 r = dns_stream_update_io(s);
103 return stream_complete(s, -r);
107 if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
109 s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
111 if (s->n_read < sizeof(s->read_size)) {
114 ss = read(fd, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
116 if (errno != EINTR && errno != EAGAIN)
117 return stream_complete(s, errno);
119 return stream_complete(s, ECONNRESET);
124 if (s->n_read >= sizeof(s->read_size)) {
126 if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
127 return stream_complete(s, EBADMSG);
129 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
132 if (!s->read_packet) {
133 r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size));
135 return stream_complete(s, -r);
137 s->read_packet->size = be16toh(s->read_size);
138 s->read_packet->ipproto = IPPROTO_TCP;
139 s->read_packet->family = s->peer.sa.sa_family;
140 s->read_packet->ttl = s->ttl;
141 s->read_packet->ifindex = s->ifindex;
143 if (s->read_packet->family == AF_INET) {
144 s->read_packet->sender.in = s->peer.in.sin_addr;
145 s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
146 s->read_packet->destination.in = s->local.in.sin_addr;
147 s->read_packet->destination_port = be16toh(s->local.in.sin_port);
149 assert(s->read_packet->family == AF_INET6);
150 s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
151 s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
152 s->read_packet->destination.in6 = s->local.in6.sin6_addr;
153 s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
155 if (s->read_packet->ifindex == 0)
156 s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
157 if (s->read_packet->ifindex == 0)
158 s->read_packet->ifindex = s->local.in6.sin6_scope_id;
163 (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
164 sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
166 if (errno != EINTR && errno != EAGAIN)
167 return stream_complete(s, errno);
169 return stream_complete(s, ECONNRESET);
174 /* Are we done? If so, disable the event source for EPOLLIN */
175 if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) {
176 r = dns_stream_update_io(s);
178 return stream_complete(s, -r);
180 /* If there's a packet handler
181 * installed, call that. Note that
182 * this is optional... */
184 return s->on_packet(s);
189 if ((s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) &&
190 (s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size))
191 return stream_complete(s, 0);
196 DnsStream *dns_stream_free(DnsStream *s) {
203 LIST_REMOVE(streams, s->manager->dns_streams, s);
204 s->manager->n_dns_streams--;
207 dns_packet_unref(s->write_packet);
208 dns_packet_unref(s->read_packet);
215 DEFINE_TRIVIAL_CLEANUP_FUNC(DnsStream*, dns_stream_free);
217 int dns_stream_new(Manager *m, DnsStream **ret, DnsProtocol protocol, int fd) {
218 static const int one = 1;
220 struct cmsghdr header; /* For alignment */
221 uint8_t buffer[CMSG_SPACE(MAX(sizeof(struct in_pktinfo), sizeof(struct in6_pktinfo)))
222 + EXTRA_CMSG_SPACE /* kernel appears to require extra space */];
224 struct msghdr mh = {};
225 struct cmsghdr *cmsg;
226 _cleanup_(dns_stream_freep) DnsStream *s = NULL;
233 if (m->n_dns_streams > DNS_STREAMS_MAX)
236 s = new0(DnsStream, 1);
241 s->protocol = protocol;
243 /* Query the remote side */
244 s->peer_salen = sizeof(s->peer);
245 r = getpeername(fd, &s->peer.sa, &s->peer_salen);
248 if (s->peer.sa.sa_family == AF_INET6)
249 s->ifindex = s->peer.in6.sin6_scope_id;
251 /* Query the local side */
252 s->local_salen = sizeof(s->local);
253 r = getsockname(fd, &s->local.sa, &s->local_salen);
256 if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
257 s->ifindex = s->local.in6.sin6_scope_id;
259 /* Check consistency */
260 assert(s->peer.sa.sa_family == s->local.sa.sa_family);
261 assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
263 /* Query connection meta information */
264 sl = sizeof(control);
265 if (s->peer.sa.sa_family == AF_INET) {
266 r = getsockopt(fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
270 assert(s->peer.sa.sa_family == AF_INET6);
272 r = getsockopt(fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
277 mh.msg_control = &control;
278 mh.msg_controllen = sl;
279 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
281 if (cmsg->cmsg_level == IPPROTO_IPV6) {
282 assert(s->peer.sa.sa_family == AF_INET6);
284 switch (cmsg->cmsg_type) {
287 struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
290 s->ifindex = i->ipi6_ifindex;
295 s->ttl = *(int *) CMSG_DATA(cmsg);
299 } else if (cmsg->cmsg_level == IPPROTO_IP) {
300 assert(s->peer.sa.sa_family == AF_INET);
302 switch (cmsg->cmsg_type) {
305 struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
308 s->ifindex = i->ipi_ifindex;
313 s->ttl = *(int *) CMSG_DATA(cmsg);
319 /* The Linux kernel sets the interface index to the loopback
320 * device if the connection came from the local host since it
321 * avoids the routing table in such a case. Let's unset the
322 * interface index in such a case. */
323 if (s->ifindex > 0 && manager_ifindex_is_loopback(m, s->ifindex) != 0)
326 /* If we don't know the interface index still, we look for the
327 * first local interface with a matching address. Yuck! */
329 s->ifindex = manager_find_ifindex(m, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr);
331 r = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one));
335 if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
336 uint32_t ifindex = htobe32(s->ifindex);
338 /* Make sure all packets for this connection are sent on the same interface */
339 if (s->local.sa.sa_family == AF_INET) {
340 r = setsockopt(fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex));
343 } else if (s->local.sa.sa_family == AF_INET6) {
344 r = setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex, sizeof(ifindex));
350 r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
354 r = sd_event_add_time(m->event, &s->timeout_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + DNS_STREAM_TIMEOUT_USEC, 0, on_stream_timeout, s);
358 LIST_PREPEND(streams, m->dns_streams, s);
369 int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
375 s->write_packet = dns_packet_ref(p);
376 s->write_size = htobe16(p->size);
379 return dns_stream_update_io(s);