chiark / gitweb /
pathmtu/pathmtu.c: Support IPv6 addresses in the generic code.
[tripe] / pathmtu / pathmtu.c
CommitLineData
c64d8cd5
MW
1/* -*-c-*-
2 *
3 * Report MTU on path to specified host
4 *
5 * (c) 2008 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Trivial IP Encryption (TrIPE).
11 *
11ad66c2
MW
12 * TrIPE is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
c64d8cd5 16 *
11ad66c2
MW
17 * TrIPE is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
c64d8cd5
MW
21 *
22 * You should have received a copy of the GNU General Public License
11ad66c2 23 * along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
c64d8cd5
MW
24 */
25
26/*----- Header files ------------------------------------------------------*/
27
28#include "config.h"
29
d245350a 30#include <assert.h>
c64d8cd5 31#include <errno.h>
88510d86 32#include <stddef.h>
c64d8cd5
MW
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <time.h>
37
38#include <sys/types.h>
39#include <sys/time.h>
40#include <unistd.h>
41
42#include <sys/socket.h>
43#include <netinet/in.h>
44#include <arpa/inet.h>
45#include <netdb.h>
46
88510d86
MW
47#include <netinet/in_systm.h>
48#include <netinet/ip.h>
49#include <netinet/ip_icmp.h>
50#include <netinet/udp.h>
51
52#include <net/if.h>
53#include <ifaddrs.h>
54#include <sys/ioctl.h>
55
56#include <mLib/alloc.h>
57#include <mLib/bits.h>
c64d8cd5
MW
58#include <mLib/dstr.h>
59#include <mLib/hex.h>
60#include <mLib/mdwopt.h>
61#include <mLib/quis.h>
62#include <mLib/report.h>
63#include <mLib/tv.h>
64
65/*----- Static variables --------------------------------------------------*/
66
67static unsigned char buf[65536];
68
88510d86
MW
69#define POLY 0x1002d
70
c64d8cd5
MW
71/*----- Utility functions -------------------------------------------------*/
72
88510d86
MW
73/* Step a value according to a simple LFSR. */
74#define STEP(q) \
75 do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0)
76
c64d8cd5
MW
77/* Fill buffer with a constant but pseudorandom string. Uses a simple
78 * LFSR.
79 */
80static void fillbuffer(unsigned char *p, size_t sz)
81{
82 unsigned int y = 0xbc20;
83 const unsigned char *l = p + sz;
84 int i;
c64d8cd5
MW
85
86 while (p < l) {
87 *p++ = y & 0xff;
88510d86 88 for (i = 0; i < 8; i++) STEP(y);
c64d8cd5
MW
89 }
90}
91
88510d86
MW
92/* Convert a string to floating point. */
93static double s2f(const char *s, const char *what)
94{
95 double f;
96 char *q;
c64d8cd5 97
88510d86
MW
98 errno = 0;
99 f = strtod(s, &q);
100 if (errno || *q) die(EXIT_FAILURE, "bad %s", what);
101 return (f);
102}
c64d8cd5 103
88510d86
MW
104/* Convert a floating-point value into a struct timeval. */
105static void f2tv(struct timeval *tv, double t)
106 { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; }
107
454f5a1a
MW
108union addr {
109 struct sockaddr sa;
110 struct sockaddr_in sin;
22062fb6 111 struct sockaddr_in6 sin6;
454f5a1a
MW
112};
113
22062fb6
MW
114/* Check whether an address family is even slightly supported. */
115static int addrfamok(int af)
116{
117 switch (af) {
118 case AF_INET: case AF_INET6: return (1);
119 default: return (0);
120 }
121}
122
454f5a1a
MW
123/* Return the size of a socket address. */
124static size_t addrsz(const union addr *a)
125{
126 switch (a->sa.sa_family) {
127 case AF_INET: return (sizeof(a->sin));
22062fb6 128 case AF_INET6: return (sizeof(a->sin6));
454f5a1a
MW
129 default: abort();
130 }
131}
132
133/* Compare two addresses. Maybe compare the port numbers too. */
134#define AEF_PORT 1u
135static int addreq(const union addr *a, const union addr *b, unsigned f)
136{
137 switch (a->sa.sa_family) {
138 case AF_INET:
139 return (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr &&
140 (!(f&AEF_PORT) || a->sin.sin_port == b->sin.sin_port));
22062fb6
MW
141 case AF_INET6:
142 return (!memcmp(a->sin6.sin6_addr.s6_addr,
143 b->sin6.sin6_addr.s6_addr, 16) &&
144 (!(f&AEF_PORT) || a->sin6.sin6_port == b->sin6.sin6_port));
454f5a1a
MW
145 default:
146 abort();
147 }
148}
149
88510d86
MW
150/*----- Main algorithm skeleton -------------------------------------------*/
151
152struct param {
153 unsigned f; /* Various flags */
154#define F_VERBOSE 1u /* Give a running commentary */
155 double retx; /* Initial retransmit interval */
156 double regr; /* Retransmit growth factor */
157 double timeout; /* Retransmission timeout */
158 int seqoff; /* Offset to write sequence number */
159 const struct probe_ops *pops; /* Probe algorithm description */
454f5a1a 160 union addr a; /* Destination address */
88510d86
MW
161};
162
163struct probestate {
164 const struct param *pp;
165 unsigned q;
166};
167
168struct probe_ops {
169 const char *name;
170 const struct probe_ops *next;
171 size_t statesz;
172 int (*setup)(void *, int, const struct param *);
173 void (*finish)(void *);
174 void (*selprep)(void *, int *, fd_set *);
175 int (*xmit)(void *, int);
176 int (*selproc)(void *, fd_set *, struct probestate *);
177};
178
179#define OPS_CHAIN 0
180
181enum {
182 RC_FAIL = -99,
183 RC_OK = 0,
184 RC_LOWER = -1,
185 RC_HIGHER = -2,
186 RC_NOREPLY = -3
187 /* or a positive MTU upper-bound */
188};
189
190/* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */
191#define ADDFD(fd) \
192 do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0)
193
194/* Check whether a buffer contains a packet from our current probe. */
195static int mypacketp(struct probestate *ps,
196 const unsigned char *p, size_t sz)
197{
198 const struct param *pp = ps->pp;
c64d8cd5 199
88510d86
MW
200 return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q);
201}
202
203/* See whether MTU is an acceptable MTU value. Return an appropriate
204 * RC_... code or a new suggested MTU.
205 */
206static int probe(struct probestate *ps, void *st, int mtu)
c64d8cd5 207{
88510d86 208 const struct param *pp = ps->pp;
c64d8cd5 209 fd_set fd_in;
88510d86
MW
210 struct timeval tv, now, when, done;
211 double timer = pp->retx;
212 int rc, maxfd;
213
214 /* Set up the first retransmit and give-up timers. */
215 gettimeofday(&now, 0);
216 f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv);
217 f2tv(&tv, timer); TV_ADD(&when, &now, &tv);
218 if (TV_CMP(&when, >, &done)) when = done;
219
220 /* Send the initial probe. */
221 if (pp->f & F_VERBOSE)
222 moan("sending probe of size %d (seq = %04x)", mtu, ps->q);
223 STEP(ps->q);
224 STORE16(buf + pp->seqoff, ps->q);
225 if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc);
226
227 for (;;) {
228
229 /* Wait for something interesting to happen. */
230 maxfd = 0; FD_ZERO(&fd_in);
231 pp->pops->selprep(st, &maxfd, &fd_in);
232 TV_SUB(&tv, &when, &now);
233 if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL);
234 gettimeofday(&now, 0);
235
236 /* See whether the probe method has any answers for us. */
237 if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc);
238
239 /* If we've waited too long, give up. If we should retransmit, do
240 * that.
241 */
242 if (TV_CMP(&now, >, &done))
243 return (RC_NOREPLY);
244 else if (TV_CMP(&now, >, &when)) {
245 if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu);
246 if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc);
247 do {
248 timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv);
249 } while (TV_CMP(&when, <, &now));
250 if (TV_CMP(&when, >, &done)) when = done;
251 }
252 }
253}
c64d8cd5 254
88510d86
MW
255/* Discover the path MTU to the destination address. */
256static int pathmtu(const struct param *pp)
257{
258 int sk;
259 int mtu, lo, hi;
260 int rc, droppy = -1;
261 void *st;
262 struct probestate ps;
263
264 /* Build and connect a UDP socket. We'll need this to know the local port
265 * number to use if nothing else. Set other stuff up.
266 */
454f5a1a
MW
267 if ((sk = socket(pp->a.sa.sa_family, SOCK_DGRAM, IPPROTO_UDP)) < 0)
268 goto fail_0;
269 if (connect(sk, &pp->a.sa, addrsz(&pp->a))) goto fail_1;
88510d86
MW
270 st = xmalloc(pp->pops->statesz);
271 if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2;
272 ps.pp = pp; ps.q = rand() & 0xffff;
22062fb6
MW
273 switch (pp->a.sa.sa_family) {
274 case AF_INET: lo = 576; break;
275 case AF_INET6: lo = 1280; break;
276 default: abort();
277 }
278 hi = mtu;
279 if (hi < lo) { errno = EMSGSIZE; return (-1); }
88510d86
MW
280
281 /* And now we do a thing which is sort of like a binary search, except that
282 * we also take explicit clues as establishing a new upper bound, and we
283 * try to hug that initially.
284 */
c64d8cd5 285 for (;;) {
d245350a
MW
286 assert(lo <= mtu && mtu <= hi);
287 if (pp->f & F_VERBOSE) moan("probe: %d <= %d <= %d", lo, mtu, hi);
88510d86
MW
288 rc = probe(&ps, st, mtu);
289 switch (rc) {
290
291 case RC_FAIL:
292 if (pp->f & F_VERBOSE) moan("probe failed");
293 goto fail_3;
294
295 case RC_NOREPLY:
296 /* If we've not seen a dropped packet before then we don't know what
297 * this means yet -- in particular, we don't know which bit of the
298 * network is swallowing packets. Send a minimum-size probe. If
299 * that doesn't come back then assume that the remote host is
300 * swallowing our packets. If it does, then we assume that dropped
301 * packets are a result of ICMP fragmentation-needed reports being
302 * lost or suppressed.
303 */
304 if (pp->f & F_VERBOSE) moan("gave up: black hole detected");
305 if (droppy == -1) {
306 if (pp->f & F_VERBOSE) moan("sending minimum-size probe");
307 switch (probe(&ps, st, lo)) {
308 case RC_FAIL:
309 goto fail_3;
310 case RC_NOREPLY:
311 if (pp->f & F_VERBOSE) {
312 moan("no reply from min-size probe: "
313 "assume black hole at target");
314 }
315 droppy = 1;
316 break;
317 case RC_HIGHER:
318 if (pp->f & F_VERBOSE) {
319 moan("reply from min-size probe OK: "
320 "assume black hole in network");
321 }
322 droppy = 0;
323 break;
324 default:
325 if (pp->f & F_VERBOSE)
326 moan("unexpected return code from probe");
327 errno = ENOTCONN;
328 goto fail_3;
329 }
330 }
331
332 if (droppy) goto higher; else goto lower;
333
334 case RC_HIGHER:
335 higher:
336 if (droppy == -1) {
337 if (pp->f & F_VERBOSE)
338 moan("probe returned: remote host is not a black hole");
339 droppy = 0;
340 }
341 if (mtu == hi) {
342 if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU");
343 goto done;
344 }
88510d86 345 lo = mtu;
d245350a
MW
346
347 /* Now we must make a new guess, between lo and hi. We know that lo
348 * is good; but we're not so sure about hi here. We know that hi >
349 * lo, so this will find an approximate midpoint, greater than lo and
350 * no more than hi.
351 */
352 if (pp->f & F_VERBOSE) moan("probe returned: guessing higher");
88510d86
MW
353 mtu += (hi - lo + 1)/2;
354 break;
355
356 case RC_LOWER:
357 lower:
d245350a
MW
358 /* If this didn't work, and we're already at the bottom of our
359 * possible range, then something has gone horribly wrong.
360 */
361 assert(lo < mtu);
362 hi = mtu - 1;
363 if (lo == hi) {
88510d86 364 if (pp->f & F_VERBOSE) moan("error returned: found correct MTU");
d245350a 365 mtu = lo;
88510d86
MW
366 goto done;
367 }
d245350a
MW
368
369 /* We must make a new guess, between lo and hi. We're probably
370 * fairly sure that lo will succeed, since either it's the minimum
371 * MTU or we've tested it already; but we're not quite sure about hi,
372 * so we want to aim high.
373 */
88510d86 374 if (pp->f & F_VERBOSE) moan("error returned: guessing lower");
88510d86
MW
375 mtu -= (hi - lo + 1)/2;
376 break;
377
378 default:
379 if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate");
380 mtu = hi = rc;
381 break;
382 }
c64d8cd5 383 }
88510d86
MW
384
385done:
386 /* Clean up and return our result. */
387 pp->pops->finish(st);
388 xfree(st);
c64d8cd5
MW
389 close(sk);
390 return (mtu);
391
88510d86
MW
392fail_3:
393 pp->pops->finish(st);
394fail_2:
395 xfree(st);
c64d8cd5
MW
396fail_1:
397 close(sk);
398fail_0:
399 return (-1);
400}
401
88510d86
MW
402/*----- Doing it the hard way ---------------------------------------------*/
403
404#if defined(linux) || defined(__OpenBSD__)
405#define IPHDR_SANE
406#endif
407
408#ifdef IPHDR_SANE
409# define sane_htons htons
410# define sane_htonl htonl
c64d8cd5 411#else
88510d86
MW
412# define sane_htons
413# define sane_htonl
414#endif
415
416static int rawicmp = -1, rawudp = -1, rawerr = 0;
417
418#define IPCK_INIT 0xffff
419
420/* Compute an IP checksum over some data. This is a restartable interface:
421 * initialize A to `IPCK_INIT' for the first call.
422 */
423static unsigned ipcksum(const void *buf, size_t n, unsigned a)
424{
425 unsigned long aa = a ^ 0xffff;
426 const unsigned char *p = buf, *l = p + n;
427
428 while (p < l - 1) { aa += LOAD16_B(p); p += 2; }
429 if (p < l) { aa += (unsigned)(*p) << 8; }
430 do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000);
431 return (aa == 0xffff ? aa : aa ^ 0xffff);
432}
433
434/* TCP/UDP pseudoheader structure. */
435struct phdr {
436 struct in_addr ph_src, ph_dst;
437 u_char ph_z, ph_p;
438 u_short ph_len;
439};
440
441struct raw_state {
454f5a1a 442 union addr me, a;
88510d86
MW
443 int sk, rawicmp, rawudp;
444 unsigned q;
445};
446
447static int raw_setup(void *stv, int sk, const struct param *pp)
448{
449 struct raw_state *st = stv;
cb160b86 450 socklen_t sz;
88510d86
MW
451 int i, mtu = -1;
452 struct ifaddrs *ifa, *ifaa, *ifap;
453 struct ifreq ifr;
454
454f5a1a
MW
455 /* Check that the address is OK, and that we have the necessary raw
456 * sockets.
457 */
458 switch (pp->a.sa.sa_family) {
459 case AF_INET:
460 if (rawerr) { errno = rawerr; goto fail_0; }
461 st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk;
462 break;
463 default:
464 errno = EPFNOSUPPORT; goto fail_0;
465 }
88510d86
MW
466
467 /* Initialize the sequence number. */
468 st->q = rand() & 0xffff;
469
470 /* Snaffle the local and remote address and port number. */
454f5a1a 471 st->a = pp->a;
88510d86 472 sz = sizeof(st->me);
454f5a1a 473 if (getsockname(sk, &st->me.sa, &sz))
88510d86
MW
474 goto fail_0;
475
476 /* There isn't a portable way to force the DF flag onto a packet through
477 * UDP, or even through raw IP, unless we write the entire IP header
478 * ourselves. This is somewhat annoying, especially since we have an
479 * uphill struggle keeping track of which systems randomly expect which
480 * header fields to be presented in host byte order. Oh, well.
481 */
482 i = 1;
483 if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0;
484
485 /* Find an upper bound on the MTU. Do two passes over the interface
486 * list. If we can find matches for our local address then use the
487 * highest one of those; otherwise do a second pass and simply take the
488 * highest MTU of any network interface.
489 */
490 if (getifaddrs(&ifaa)) goto fail_0;
491 for (i = 0; i < 2; i++) {
492 for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) {
493 if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr ||
454f5a1a 494 ifa->ifa_addr->sa_family != st->me.sa.sa_family ||
88510d86 495 (i == 0 &&
454f5a1a 496 !addreq((union addr *)ifa->ifa_addr, &st->me, 0)) ||
88510d86
MW
497 (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) ||
498 strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name))
499 continue;
500 ifap = ifa;
501 strcpy(ifr.ifr_name, ifa->ifa_name);
502 if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1;
503 if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu;
504 }
505 if (mtu > 0) break;
506 }
507 if (mtu < 0) { errno = ENOTCONN; goto fail_1; }
508 freeifaddrs(ifaa);
509
510 /* Done. */
511 return (mtu);
512
513fail_1:
514 freeifaddrs(ifaa);
515fail_0:
516 return (-1);
517}
518
519static void raw_finish(void *stv) { ; }
520
521static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in)
522 { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); }
523
524static int raw_xmit(void *stv, int mtu)
525{
526 struct raw_state *st = stv;
527 unsigned char b[65536], *p;
528 struct ip *ip;
529 struct udphdr *udp;
530 struct phdr ph;
531 unsigned ck;
532
533 /* Build the IP header. */
534 ip = (struct ip *)b;
535 ip->ip_v = 4;
536 ip->ip_hl = sizeof(*ip)/4;
537 ip->ip_tos = IPTOS_RELIABILITY;
538 ip->ip_len = sane_htons(mtu);
539 STEP(st->q); ip->ip_id = htons(st->q);
540 ip->ip_off = sane_htons(0 | IP_DF);
541 ip->ip_ttl = 64;
542 ip->ip_p = IPPROTO_UDP;
543 ip->ip_sum = 0;
454f5a1a
MW
544 ip->ip_src = st->me.sin.sin_addr;
545 ip->ip_dst = st->a.sin.sin_addr;
88510d86
MW
546
547 /* Build a UDP packet in the output buffer. */
548 udp = (struct udphdr *)(ip + 1);
454f5a1a
MW
549 udp->uh_sport = st->me.sin.sin_port;
550 udp->uh_dport = st->a.sin.sin_port;
88510d86
MW
551 udp->uh_ulen = htons(mtu - sizeof(*ip));
552 udp->uh_sum = 0;
553
554 /* Copy the payload. */
555 p = (unsigned char *)(udp + 1);
556 memcpy(p, buf, mtu - (p - b));
557
558 /* Calculate the UDP checksum. */
559 ph.ph_src = ip->ip_src;
560 ph.ph_dst = ip->ip_dst;
561 ph.ph_z = 0;
562 ph.ph_p = IPPROTO_UDP;
563 ph.ph_len = udp->uh_ulen;
564 ck = IPCK_INIT;
565 ck = ipcksum(&ph, sizeof(ph), ck);
566 ck = ipcksum(udp, mtu - sizeof(*ip), ck);
567 udp->uh_sum = htons(ck);
568
569 /* Send the whole thing off. If we're too big for the interface then we
570 * might need to trim immediately.
571 */
454f5a1a 572 if (sendto(st->rawudp, b, mtu, 0, &st->a.sa, addrsz(&st->a)) < 0) {
88510d86
MW
573 if (errno == EMSGSIZE) return (RC_LOWER);
574 else goto fail_0;
575 }
576
577 /* Done. */
578 return (RC_OK);
579
580fail_0:
581 return (RC_FAIL);
582}
583
584static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps)
585{
586 struct raw_state *st = stv;
587 unsigned char b[65536];
588 struct ip *ip;
589 struct icmp *icmp;
590 struct udphdr *udp;
9ad20ce0 591 const unsigned char *payload;
88510d86
MW
592 ssize_t n;
593
594 /* An ICMP packet: see what's inside. */
595 if (FD_ISSET(st->rawicmp, fd_in)) {
596 if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0;
597
598 ip = (struct ip *)b;
599 if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) ||
600 ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP)
601 goto skip_icmp;
602 n -= sizeof(4*ip->ip_hl);
603
604 icmp = (struct icmp *)(b + 4*ip->ip_hl);
605 if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH)
606 goto skip_icmp;
607 n -= offsetof(struct icmp, icmp_ip);
608
609 ip = &icmp->icmp_ip;
610 if (n < sizeof(*ip) ||
611 ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 ||
612 ip->ip_id != htons(st->q) ||
454f5a1a
MW
613 ip->ip_src.s_addr != st->me.sin.sin_addr.s_addr ||
614 ip->ip_dst.s_addr != st->a.sin.sin_addr.s_addr)
88510d86
MW
615 goto skip_icmp;
616 n -= sizeof(*ip);
617
618 udp = (struct udphdr *)(ip + 1);
454f5a1a
MW
619 if (n < sizeof(udp) || udp->uh_sport != st->me.sin.sin_port ||
620 udp->uh_dport != st->a.sin.sin_port)
88510d86
MW
621 goto skip_icmp;
622 n -= sizeof(*udp);
623
9ad20ce0
MW
624 payload = (const unsigned char *)(udp + 1);
625 if (!mypacketp(ps, payload, n)) goto skip_icmp;
626
88510d86
MW
627 if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER);
628 else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp;
629 else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu));
630 else return (RC_LOWER);
631 }
632skip_icmp:;
633
634 /* If we got a reply to the current probe then we're good. If we got an
635 * error, or the packet's sequence number is wrong, then ignore it.
636 */
637 if (FD_ISSET(st->sk, fd_in)) {
638 if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK);
639 else if (mypacketp(ps, b, n)) return (RC_HIGHER);
640 else return (RC_OK);
641 }
642
643 return (RC_OK);
644
645fail_0:
646 return (RC_FAIL);
647}
648
649static const struct probe_ops raw_ops = {
650 "raw", OPS_CHAIN, sizeof(struct raw_state),
651 raw_setup, raw_finish,
652 raw_selprep, raw_xmit, raw_selproc
653};
654
655#undef OPS_CHAIN
656#define OPS_CHAIN &raw_ops
657
658/*----- Doing the job on Linux --------------------------------------------*/
659
660#if defined(linux)
661
662#ifndef IP_MTU
663# define IP_MTU 14 /* Blech! */
664#endif
665
666struct linux_state {
667 int sk;
668};
669
670static int linux_setup(void *stv, int sk, const struct param *pp)
671{
672 struct linux_state *st = stv;
673 int i, mtu;
cb160b86 674 socklen_t sz;
88510d86 675
454f5a1a
MW
676 /* Check that the address is OK. */
677 switch (pp->a.sa.sa_family) {
678 case AF_INET: break;
679 default: errno = EPFNOSUPPORT; return (-1);
680 }
681
88510d86
MW
682 /* Snaffle the UDP socket. */
683 st->sk = sk;
684
685 /* Turn on kernel path-MTU discovery and force DF on. */
18d5f6eb 686 i = IP_PMTUDISC_PROBE;
88510d86
MW
687 if (setsockopt(st->sk, IPPROTO_IP, IP_MTU_DISCOVER, &i, sizeof(i)))
688 return (-1);
689
690 /* Read the initial MTU guess back and report it. */
691 sz = sizeof(mtu);
692 if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz))
693 return (-1);
694
695 /* Done. */
696 return (mtu);
697}
698
699static void linux_finish(void *stv) { ; }
700
701static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in)
702 { struct linux_state *st = stv; ADDFD(st->sk); }
703
704static int linux_xmit(void *stv, int mtu)
705{
706 struct linux_state *st = stv;
707
708 /* Write the packet. */
709 if (write(st->sk, buf, mtu - 28) >= 0) return (RC_OK);
710 else if (errno == EMSGSIZE) return (RC_LOWER);
711 else return (RC_FAIL);
712}
713
714static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps)
715{
716 struct linux_state *st = stv;
717 int mtu;
cb160b86 718 socklen_t sz;
88510d86
MW
719 ssize_t n;
720 unsigned char b[65536];
721
722 /* Read an answer. If it looks like the right kind of error then report a
723 * success. This is potentially wrong, since we can't tell whether an
724 * error was delayed from an earlier probe. However, we never return
725 * RC_LOWER from this method, so the packet sizes ought to be monotonically
726 * decreasing and this won't cause trouble. Otherwise update from the
727 * kernel's idea of the right MTU.
728 */
729 if (FD_ISSET(st->sk, fd_in)) {
730 n = read(st->sk, &buf, sizeof(buf));
731 if (n >= 0 ?
732 mypacketp(ps, b, n) :
733 errno == ECONNREFUSED || errno == EHOSTUNREACH)
734 return (RC_HIGHER);
735 sz = sizeof(mtu);
736 if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz))
737 return (RC_FAIL);
738 return (mtu);
739 }
740 return (RC_OK);
741}
742
743static const struct probe_ops linux_ops = {
744 "linux", OPS_CHAIN, sizeof(struct linux_state),
745 linux_setup, linux_finish,
746 linux_selprep, linux_xmit, linux_selproc
747};
c64d8cd5 748
88510d86
MW
749#undef OPS_CHAIN
750#define OPS_CHAIN &linux_ops
c64d8cd5
MW
751
752#endif
753
754/*----- Help options ------------------------------------------------------*/
755
88510d86
MW
756static const struct probe_ops *probe_ops = OPS_CHAIN;
757
c64d8cd5
MW
758static void version(FILE *fp)
759 { pquis(fp, "$, TrIPE version " VERSION "\n"); }
760
761static void usage(FILE *fp)
88510d86 762{
22062fb6 763 pquis(fp, "Usage: $ [-46v] [-H HEADER] [-m METHOD]\n\
88510d86
MW
764 [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n");
765}
c64d8cd5
MW
766
767static void help(FILE *fp)
768{
88510d86
MW
769 const struct probe_ops *ops;
770
c64d8cd5
MW
771 version(fp);
772 fputc('\n', fp);
773 usage(fp);
774 fputs("\
775\n\
776Options in full:\n\
777\n\
778-h, --help Show this help text.\n\
b13c3272 779-V, --version Show version number.\n\
c64d8cd5
MW
780-u, --usage Show brief usage message.\n\
781\n\
22062fb6
MW
782-4, --ipv4 Restrict to IPv4 only.\n\
783-6, --ipv6 Restrict to IPv6 only.\n\
88510d86
MW
784-g, --growth=FACTOR Growth factor for retransmit interval.\n\
785-m, --method=METHOD Use METHOD to probe for MTU.\n\
786-r, --retransmit=SECS Retransmit if no reply after SEC.\n\
787-t, --timeout=SECS Give up expecting a reply after SECS.\n\
a8f70fe1 788-v, --verbose Write a running commentary to stderr.\n\
c64d8cd5 789-H, --header=HEX Packet header, in hexadecimal.\n\
88510d86
MW
790\n\
791Probe methods:\n\
c64d8cd5 792", fp);
88510d86
MW
793 for (ops = probe_ops; ops; ops = ops->next)
794 printf("\t%s\n", ops->name);
c64d8cd5
MW
795}
796
797/*----- Main code ---------------------------------------------------------*/
798
799int main(int argc, char *argv[])
800{
88510d86 801 struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN };
c64d8cd5
MW
802 hex_ctx hc;
803 dstr d = DSTR_INIT;
804 size_t sz;
22062fb6
MW
805 int i, err;
806 struct addrinfo aihint = { 0 }, *ailist, *ai;
807 const char *host, *svc = "7";
c64d8cd5
MW
808 unsigned f = 0;
809
810#define f_bogus 1u
811
88510d86
MW
812 if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 ||
813 (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0)
814 rawerr = errno;
815 if (setuid(getuid()))
816 abort();
817
c64d8cd5
MW
818 ego(argv[0]);
819 fillbuffer(buf, sizeof(buf));
22062fb6
MW
820
821 aihint.ai_family = AF_UNSPEC;
822 aihint.ai_protocol = IPPROTO_UDP;
823 aihint.ai_socktype = SOCK_DGRAM;
824 aihint.ai_flags = AI_ADDRCONFIG;
c64d8cd5
MW
825
826 for (;;) {
827 static const struct option opts[] = {
828 { "help", 0, 0, 'h' },
88510d86 829 { "version", 0, 0, 'V' },
c64d8cd5 830 { "usage", 0, 0, 'u' },
22062fb6
MW
831 { "ipv4", 0, 0, '4' },
832 { "ipv6", 0, 0, '6' },
c64d8cd5 833 { "header", OPTF_ARGREQ, 0, 'H' },
88510d86
MW
834 { "growth", OPTF_ARGREQ, 0, 'g' },
835 { "method", OPTF_ARGREQ, 0, 'm' },
836 { "retransmit", OPTF_ARGREQ, 0, 'r' },
c64d8cd5 837 { "timeout", OPTF_ARGREQ, 0, 't' },
88510d86 838 { "verbose", 0, 0, 'v' },
c64d8cd5
MW
839 { 0, 0, 0, 0 }
840 };
841
22062fb6 842 i = mdwopt(argc, argv, "hVu" "46H:g:m:r:t:v", opts, 0, 0, 0);
c64d8cd5
MW
843 if (i < 0) break;
844 switch (i) {
845 case 'h': help(stdout); exit(0);
88510d86 846 case 'V': version(stdout); exit(0);
c64d8cd5
MW
847 case 'u': usage(stdout); exit(0);
848
849 case 'H':
850 DRESET(&d);
851 hex_init(&hc);
852 hex_decode(&hc, optarg, strlen(optarg), &d);
853 hex_decode(&hc, 0, 0, &d);
88510d86 854 sz = d.len < 532 ? d.len : 532;
c64d8cd5 855 memcpy(buf, d.buf, sz);
88510d86 856 pp.seqoff = sz;
c64d8cd5
MW
857 break;
858
22062fb6
MW
859 case '4': aihint.ai_family = AF_INET; break;
860 case '6': aihint.ai_family = AF_INET6; break;
88510d86
MW
861 case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break;
862 case 'r': pp.retx = s2f(optarg, "retransmit interval"); break;
863 case 't': pp.timeout = s2f(optarg, "timeout"); break;
864
865 case 'm':
866 for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next)
867 if (strcmp(pp.pops->name, optarg) == 0) goto found_alg;
868 die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg);
869 found_alg:
c64d8cd5
MW
870 break;
871
88510d86
MW
872 case 'v': pp.f |= F_VERBOSE; break;
873
c64d8cd5
MW
874 default:
875 f |= f_bogus;
876 break;
877 }
878 }
879 argv += optind; argc -= optind;
880 if ((f & f_bogus) || 1 > argc || argc > 2) {
881 usage(stderr);
882 exit(EXIT_FAILURE);
883 }
884
22062fb6
MW
885 host = argv[0];
886 if (argv[1]) svc = argv[1];
887 if ((err = getaddrinfo(host, svc, &aihint, &ailist)) != 0) {
888 die(EXIT_FAILURE, "unknown host `%s' or service `%s': %s",
889 host, svc, gai_strerror(err));
c64d8cd5 890 }
22062fb6
MW
891 for (ai = ailist; ai && !addrfamok(ai->ai_family); ai = ai->ai_next);
892 if (!ai) die(EXIT_FAILURE, "no supported address families for `%s'", host);
893 assert(ai->ai_addrlen <= sizeof(pp.a));
894 memcpy(&pp.a, ai->ai_addr, ai->ai_addrlen);
c64d8cd5 895
88510d86 896 i = pathmtu(&pp);
c64d8cd5
MW
897 if (i < 0)
898 die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno));
899 printf("%d\n", i);
900 if (ferror(stdout) || fflush(stdout) || fclose(stdout))
901 die(EXIT_FAILURE, "failed to write result: %s", strerror(errno));
902 return (0);
903}
904
905/*----- That's all, folks -------------------------------------------------*/