chiark / gitweb /
server/admin.c: Remove spurious `ping' in usage message.
[tripe] / pathmtu / pathmtu.c
CommitLineData
c64d8cd5
MW
1/* -*-c-*-
2 *
3 * Report MTU on path to specified host
4 *
5 * (c) 2008 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Trivial IP Encryption (TrIPE).
11 *
11ad66c2
MW
12 * TrIPE is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
c64d8cd5 16 *
11ad66c2
MW
17 * TrIPE is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
c64d8cd5
MW
21 *
22 * You should have received a copy of the GNU General Public License
11ad66c2 23 * along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
c64d8cd5
MW
24 */
25
26/*----- Header files ------------------------------------------------------*/
27
28#include "config.h"
29
d245350a 30#include <assert.h>
c64d8cd5 31#include <errno.h>
88510d86 32#include <stddef.h>
c64d8cd5
MW
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <time.h>
37
38#include <sys/types.h>
39#include <sys/time.h>
40#include <unistd.h>
41
42#include <sys/socket.h>
43#include <netinet/in.h>
44#include <arpa/inet.h>
45#include <netdb.h>
46
88510d86
MW
47#include <netinet/in_systm.h>
48#include <netinet/ip.h>
49#include <netinet/ip_icmp.h>
102fa2f0
MW
50#include <netinet/ip6.h>
51#include <netinet/icmp6.h>
88510d86
MW
52#include <netinet/udp.h>
53
85ccca5f
MW
54#ifdef HAVE_GETIFADDRS
55# include <net/if.h>
56# include <ifaddrs.h>
57# include <sys/ioctl.h>
58#endif
88510d86
MW
59
60#include <mLib/alloc.h>
61#include <mLib/bits.h>
c64d8cd5
MW
62#include <mLib/dstr.h>
63#include <mLib/hex.h>
64#include <mLib/mdwopt.h>
65#include <mLib/quis.h>
66#include <mLib/report.h>
67#include <mLib/tv.h>
68
69/*----- Static variables --------------------------------------------------*/
70
71static unsigned char buf[65536];
72
88510d86
MW
73#define POLY 0x1002d
74
c64d8cd5
MW
75/*----- Utility functions -------------------------------------------------*/
76
88510d86
MW
77/* Step a value according to a simple LFSR. */
78#define STEP(q) \
79 do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0)
80
c64d8cd5
MW
81/* Fill buffer with a constant but pseudorandom string. Uses a simple
82 * LFSR.
83 */
84static void fillbuffer(unsigned char *p, size_t sz)
85{
86 unsigned int y = 0xbc20;
87 const unsigned char *l = p + sz;
88 int i;
c64d8cd5
MW
89
90 while (p < l) {
91 *p++ = y & 0xff;
88510d86 92 for (i = 0; i < 8; i++) STEP(y);
c64d8cd5
MW
93 }
94}
95
88510d86
MW
96/* Convert a string to floating point. */
97static double s2f(const char *s, const char *what)
98{
99 double f;
100 char *q;
c64d8cd5 101
88510d86
MW
102 errno = 0;
103 f = strtod(s, &q);
104 if (errno || *q) die(EXIT_FAILURE, "bad %s", what);
105 return (f);
106}
c64d8cd5 107
88510d86
MW
108/* Convert a floating-point value into a struct timeval. */
109static void f2tv(struct timeval *tv, double t)
110 { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; }
111
454f5a1a
MW
112union addr {
113 struct sockaddr sa;
114 struct sockaddr_in sin;
22062fb6 115 struct sockaddr_in6 sin6;
454f5a1a
MW
116};
117
22062fb6
MW
118/* Check whether an address family is even slightly supported. */
119static int addrfamok(int af)
120{
121 switch (af) {
122 case AF_INET: case AF_INET6: return (1);
123 default: return (0);
124 }
125}
126
454f5a1a
MW
127/* Return the size of a socket address. */
128static size_t addrsz(const union addr *a)
129{
130 switch (a->sa.sa_family) {
131 case AF_INET: return (sizeof(a->sin));
22062fb6 132 case AF_INET6: return (sizeof(a->sin6));
454f5a1a
MW
133 default: abort();
134 }
135}
136
88510d86
MW
137/*----- Main algorithm skeleton -------------------------------------------*/
138
139struct param {
140 unsigned f; /* Various flags */
141#define F_VERBOSE 1u /* Give a running commentary */
142 double retx; /* Initial retransmit interval */
143 double regr; /* Retransmit growth factor */
144 double timeout; /* Retransmission timeout */
145 int seqoff; /* Offset to write sequence number */
146 const struct probe_ops *pops; /* Probe algorithm description */
454f5a1a 147 union addr a; /* Destination address */
88510d86
MW
148};
149
150struct probestate {
151 const struct param *pp;
152 unsigned q;
153};
154
155struct probe_ops {
156 const char *name;
157 const struct probe_ops *next;
158 size_t statesz;
159 int (*setup)(void *, int, const struct param *);
160 void (*finish)(void *);
161 void (*selprep)(void *, int *, fd_set *);
162 int (*xmit)(void *, int);
163 int (*selproc)(void *, fd_set *, struct probestate *);
164};
165
166#define OPS_CHAIN 0
167
168enum {
169 RC_FAIL = -99,
170 RC_OK = 0,
171 RC_LOWER = -1,
172 RC_HIGHER = -2,
173 RC_NOREPLY = -3
174 /* or a positive MTU upper-bound */
175};
176
177/* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */
178#define ADDFD(fd) \
179 do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0)
180
181/* Check whether a buffer contains a packet from our current probe. */
182static int mypacketp(struct probestate *ps,
183 const unsigned char *p, size_t sz)
184{
185 const struct param *pp = ps->pp;
c64d8cd5 186
88510d86
MW
187 return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q);
188}
189
190/* See whether MTU is an acceptable MTU value. Return an appropriate
191 * RC_... code or a new suggested MTU.
192 */
193static int probe(struct probestate *ps, void *st, int mtu)
c64d8cd5 194{
88510d86 195 const struct param *pp = ps->pp;
c64d8cd5 196 fd_set fd_in;
88510d86
MW
197 struct timeval tv, now, when, done;
198 double timer = pp->retx;
199 int rc, maxfd;
200
201 /* Set up the first retransmit and give-up timers. */
202 gettimeofday(&now, 0);
203 f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv);
204 f2tv(&tv, timer); TV_ADD(&when, &now, &tv);
205 if (TV_CMP(&when, >, &done)) when = done;
206
207 /* Send the initial probe. */
208 if (pp->f & F_VERBOSE)
209 moan("sending probe of size %d (seq = %04x)", mtu, ps->q);
210 STEP(ps->q);
211 STORE16(buf + pp->seqoff, ps->q);
212 if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc);
213
214 for (;;) {
215
216 /* Wait for something interesting to happen. */
217 maxfd = 0; FD_ZERO(&fd_in);
218 pp->pops->selprep(st, &maxfd, &fd_in);
219 TV_SUB(&tv, &when, &now);
220 if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL);
221 gettimeofday(&now, 0);
222
223 /* See whether the probe method has any answers for us. */
224 if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc);
225
226 /* If we've waited too long, give up. If we should retransmit, do
227 * that.
228 */
229 if (TV_CMP(&now, >, &done))
230 return (RC_NOREPLY);
231 else if (TV_CMP(&now, >, &when)) {
232 if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu);
233 if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc);
234 do {
235 timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv);
236 } while (TV_CMP(&when, <, &now));
237 if (TV_CMP(&when, >, &done)) when = done;
238 }
239 }
240}
c64d8cd5 241
88510d86
MW
242/* Discover the path MTU to the destination address. */
243static int pathmtu(const struct param *pp)
244{
245 int sk;
246 int mtu, lo, hi;
247 int rc, droppy = -1;
248 void *st;
249 struct probestate ps;
250
251 /* Build and connect a UDP socket. We'll need this to know the local port
252 * number to use if nothing else. Set other stuff up.
253 */
454f5a1a
MW
254 if ((sk = socket(pp->a.sa.sa_family, SOCK_DGRAM, IPPROTO_UDP)) < 0)
255 goto fail_0;
256 if (connect(sk, &pp->a.sa, addrsz(&pp->a))) goto fail_1;
88510d86
MW
257 st = xmalloc(pp->pops->statesz);
258 if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2;
259 ps.pp = pp; ps.q = rand() & 0xffff;
22062fb6
MW
260 switch (pp->a.sa.sa_family) {
261 case AF_INET: lo = 576; break;
262 case AF_INET6: lo = 1280; break;
263 default: abort();
264 }
265 hi = mtu;
266 if (hi < lo) { errno = EMSGSIZE; return (-1); }
88510d86
MW
267
268 /* And now we do a thing which is sort of like a binary search, except that
269 * we also take explicit clues as establishing a new upper bound, and we
270 * try to hug that initially.
271 */
c64d8cd5 272 for (;;) {
d245350a
MW
273 assert(lo <= mtu && mtu <= hi);
274 if (pp->f & F_VERBOSE) moan("probe: %d <= %d <= %d", lo, mtu, hi);
88510d86
MW
275 rc = probe(&ps, st, mtu);
276 switch (rc) {
277
278 case RC_FAIL:
279 if (pp->f & F_VERBOSE) moan("probe failed");
280 goto fail_3;
281
282 case RC_NOREPLY:
283 /* If we've not seen a dropped packet before then we don't know what
284 * this means yet -- in particular, we don't know which bit of the
285 * network is swallowing packets. Send a minimum-size probe. If
286 * that doesn't come back then assume that the remote host is
287 * swallowing our packets. If it does, then we assume that dropped
288 * packets are a result of ICMP fragmentation-needed reports being
289 * lost or suppressed.
290 */
291 if (pp->f & F_VERBOSE) moan("gave up: black hole detected");
292 if (droppy == -1) {
293 if (pp->f & F_VERBOSE) moan("sending minimum-size probe");
294 switch (probe(&ps, st, lo)) {
295 case RC_FAIL:
296 goto fail_3;
297 case RC_NOREPLY:
298 if (pp->f & F_VERBOSE) {
299 moan("no reply from min-size probe: "
300 "assume black hole at target");
301 }
302 droppy = 1;
303 break;
304 case RC_HIGHER:
305 if (pp->f & F_VERBOSE) {
306 moan("reply from min-size probe OK: "
307 "assume black hole in network");
308 }
309 droppy = 0;
310 break;
311 default:
312 if (pp->f & F_VERBOSE)
313 moan("unexpected return code from probe");
314 errno = ENOTCONN;
315 goto fail_3;
316 }
317 }
318
319 if (droppy) goto higher; else goto lower;
320
321 case RC_HIGHER:
322 higher:
323 if (droppy == -1) {
324 if (pp->f & F_VERBOSE)
325 moan("probe returned: remote host is not a black hole");
326 droppy = 0;
327 }
328 if (mtu == hi) {
329 if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU");
330 goto done;
331 }
88510d86 332 lo = mtu;
d245350a
MW
333
334 /* Now we must make a new guess, between lo and hi. We know that lo
335 * is good; but we're not so sure about hi here. We know that hi >
336 * lo, so this will find an approximate midpoint, greater than lo and
337 * no more than hi.
338 */
339 if (pp->f & F_VERBOSE) moan("probe returned: guessing higher");
88510d86
MW
340 mtu += (hi - lo + 1)/2;
341 break;
342
343 case RC_LOWER:
344 lower:
d245350a
MW
345 /* If this didn't work, and we're already at the bottom of our
346 * possible range, then something has gone horribly wrong.
347 */
348 assert(lo < mtu);
349 hi = mtu - 1;
350 if (lo == hi) {
88510d86 351 if (pp->f & F_VERBOSE) moan("error returned: found correct MTU");
d245350a 352 mtu = lo;
88510d86
MW
353 goto done;
354 }
d245350a
MW
355
356 /* We must make a new guess, between lo and hi. We're probably
357 * fairly sure that lo will succeed, since either it's the minimum
358 * MTU or we've tested it already; but we're not quite sure about hi,
359 * so we want to aim high.
360 */
88510d86 361 if (pp->f & F_VERBOSE) moan("error returned: guessing lower");
88510d86
MW
362 mtu -= (hi - lo + 1)/2;
363 break;
364
365 default:
366 if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate");
367 mtu = hi = rc;
368 break;
369 }
c64d8cd5 370 }
88510d86
MW
371
372done:
373 /* Clean up and return our result. */
374 pp->pops->finish(st);
375 xfree(st);
c64d8cd5
MW
376 close(sk);
377 return (mtu);
378
88510d86
MW
379fail_3:
380 pp->pops->finish(st);
381fail_2:
382 xfree(st);
c64d8cd5
MW
383fail_1:
384 close(sk);
385fail_0:
386 return (-1);
387}
388
88510d86
MW
389/*----- Doing it the hard way ---------------------------------------------*/
390
85ccca5f
MW
391#ifdef HAVE_GETIFADDRS
392
88510d86 393#if defined(linux) || defined(__OpenBSD__)
4fcb3e51 394# define IPHDR_SANE
88510d86
MW
395#endif
396
397#ifdef IPHDR_SANE
398# define sane_htons htons
399# define sane_htonl htonl
c64d8cd5 400#else
88510d86
MW
401# define sane_htons
402# define sane_htonl
403#endif
404
405static int rawicmp = -1, rawudp = -1, rawerr = 0;
102fa2f0 406static int rawicmp6 = -1, rawudp6 = -1, rawerr6 = 0;
88510d86
MW
407
408#define IPCK_INIT 0xffff
409
f03efaf5
MW
410/* Compare two addresses. Maybe compare the port numbers too. */
411#define AEF_PORT 1u
412static int addreq(const union addr *a, const union addr *b, unsigned f)
413{
414 switch (a->sa.sa_family) {
415 case AF_INET:
416 return (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr &&
417 (!(f&AEF_PORT) || a->sin.sin_port == b->sin.sin_port));
418 case AF_INET6:
419 return (!memcmp(a->sin6.sin6_addr.s6_addr,
420 b->sin6.sin6_addr.s6_addr, 16) &&
421 (!(f&AEF_PORT) || a->sin6.sin6_port == b->sin6.sin6_port));
422 default:
423 abort();
424 }
425}
426
88510d86
MW
427/* Compute an IP checksum over some data. This is a restartable interface:
428 * initialize A to `IPCK_INIT' for the first call.
429 */
430static unsigned ipcksum(const void *buf, size_t n, unsigned a)
431{
432 unsigned long aa = a ^ 0xffff;
433 const unsigned char *p = buf, *l = p + n;
434
435 while (p < l - 1) { aa += LOAD16_B(p); p += 2; }
436 if (p < l) { aa += (unsigned)(*p) << 8; }
437 do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000);
438 return (aa == 0xffff ? aa : aa ^ 0xffff);
439}
440
441/* TCP/UDP pseudoheader structure. */
442struct phdr {
443 struct in_addr ph_src, ph_dst;
1d25a3ed
MW
444 uint8_t ph_z, ph_p;
445 uint16_t ph_len;
88510d86 446};
102fa2f0
MW
447struct phdr6 {
448 struct in6_addr ph6_src, ph6_dst;
449 uint32_t ph6_len;
450 uint8_t ph6_z0, ph6_z1, ph6_z2, ph6_nxt;
451};
88510d86
MW
452
453struct raw_state {
454f5a1a 454 union addr me, a;
88510d86 455 int sk, rawicmp, rawudp;
5854b1cc 456 uint16_t srcport, dstport;
88510d86
MW
457 unsigned q;
458};
459
460static int raw_setup(void *stv, int sk, const struct param *pp)
461{
462 struct raw_state *st = stv;
cb160b86 463 socklen_t sz;
88510d86
MW
464 int i, mtu = -1;
465 struct ifaddrs *ifa, *ifaa, *ifap;
466 struct ifreq ifr;
102fa2f0 467 struct icmp6_filter f6;
88510d86 468
454f5a1a
MW
469 /* Check that the address is OK, and that we have the necessary raw
470 * sockets.
102fa2f0
MW
471 *
472 * For IPv6, also set the filter so we don't get too many useless wakeups.
454f5a1a
MW
473 */
474 switch (pp->a.sa.sa_family) {
475 case AF_INET:
476 if (rawerr) { errno = rawerr; goto fail_0; }
477 st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk;
102fa2f0
MW
478 /* IPv4 filtering is available on Linux but isn't portable. */
479 break;
480 case AF_INET6:
481 if (rawerr6) { errno = rawerr6; goto fail_0; }
482 st->rawicmp = rawicmp6; st->rawudp = rawudp6; st->sk = sk;
483 ICMP6_FILTER_SETBLOCKALL(&f6);
484 ICMP6_FILTER_SETPASS(ICMP6_PACKET_TOO_BIG, &f6);
485 ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH, &f6);
486 if (setsockopt(st->rawicmp, IPPROTO_ICMPV6, ICMP6_FILTER,
487 &f6, sizeof(f6))) {
488 die(EXIT_FAILURE, "failed to set icmpv6 filter: %s",
489 strerror(errno));
490 }
454f5a1a
MW
491 break;
492 default:
493 errno = EPFNOSUPPORT; goto fail_0;
494 }
88510d86
MW
495
496 /* Initialize the sequence number. */
497 st->q = rand() & 0xffff;
498
499 /* Snaffle the local and remote address and port number. */
454f5a1a 500 st->a = pp->a;
88510d86 501 sz = sizeof(st->me);
454f5a1a 502 if (getsockname(sk, &st->me.sa, &sz))
88510d86
MW
503 goto fail_0;
504
102fa2f0
MW
505 /* Only now do some fiddling because Linux doesn't like port numbers in
506 * IPv6 raw destination addresses...
507 */
b9e97e20
MW
508 switch (pp->a.sa.sa_family) {
509 case AF_INET:
510 st->srcport = st->me.sin.sin_port; st->me.sin.sin_port = 0;
511 st->dstport = st->a.sin.sin_port; st->a.sin.sin_port = 0;
512 break;
102fa2f0
MW
513 case AF_INET6:
514 st->srcport = st->me.sin6.sin6_port; st->me.sin6.sin6_port = 0;
515 st->dstport = st->a.sin6.sin6_port; st->a.sin6.sin6_port = 0;
516 break;
b9e97e20
MW
517 default:
518 abort();
519 }
5854b1cc 520
88510d86
MW
521 /* There isn't a portable way to force the DF flag onto a packet through
522 * UDP, or even through raw IP, unless we write the entire IP header
523 * ourselves. This is somewhat annoying, especially since we have an
524 * uphill struggle keeping track of which systems randomly expect which
525 * header fields to be presented in host byte order. Oh, well.
526 */
527 i = 1;
528 if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0;
529
530 /* Find an upper bound on the MTU. Do two passes over the interface
531 * list. If we can find matches for our local address then use the
532 * highest one of those; otherwise do a second pass and simply take the
533 * highest MTU of any network interface.
534 */
535 if (getifaddrs(&ifaa)) goto fail_0;
536 for (i = 0; i < 2; i++) {
537 for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) {
538 if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr ||
454f5a1a 539 ifa->ifa_addr->sa_family != st->me.sa.sa_family ||
88510d86 540 (i == 0 &&
454f5a1a 541 !addreq((union addr *)ifa->ifa_addr, &st->me, 0)) ||
88510d86
MW
542 (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) ||
543 strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name))
544 continue;
545 ifap = ifa;
546 strcpy(ifr.ifr_name, ifa->ifa_name);
547 if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1;
548 if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu;
549 }
550 if (mtu > 0) break;
551 }
552 if (mtu < 0) { errno = ENOTCONN; goto fail_1; }
553 freeifaddrs(ifaa);
554
555 /* Done. */
556 return (mtu);
557
558fail_1:
559 freeifaddrs(ifaa);
560fail_0:
561 return (-1);
562}
563
564static void raw_finish(void *stv) { ; }
565
566static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in)
567 { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); }
568
569static int raw_xmit(void *stv, int mtu)
570{
571 struct raw_state *st = stv;
572 unsigned char b[65536], *p;
573 struct ip *ip;
102fa2f0 574 struct ip6_hdr *ip6;
88510d86
MW
575 struct udphdr *udp;
576 struct phdr ph;
102fa2f0 577 struct phdr6 ph6;
88510d86
MW
578 unsigned ck;
579
b9e97e20
MW
580 switch (st->a.sa.sa_family) {
581
582 case AF_INET:
583
584 /* Build the IP header. */
585 ip = (struct ip *)b;
586 ip->ip_v = 4;
587 ip->ip_hl = sizeof(*ip)/4;
588 ip->ip_tos = IPTOS_RELIABILITY;
589 ip->ip_len = sane_htons(mtu);
590 STEP(st->q); ip->ip_id = htons(st->q);
591 ip->ip_off = sane_htons(0 | IP_DF);
592 ip->ip_ttl = 64;
593 ip->ip_p = IPPROTO_UDP;
594 ip->ip_sum = 0;
595 ip->ip_src = st->me.sin.sin_addr;
596 ip->ip_dst = st->a.sin.sin_addr;
597
598 /* Build a UDP packet in the output buffer. */
599 udp = (struct udphdr *)(ip + 1);
600 udp->uh_sport = st->srcport;
601 udp->uh_dport = st->dstport;
602 udp->uh_ulen = htons(mtu - sizeof(*ip));
603 udp->uh_sum = 0;
604
605 /* Copy the payload. */
606 p = (unsigned char *)(udp + 1);
607 memcpy(p, buf, mtu - (p - b));
608
609 /* Calculate the UDP checksum. */
610 ph.ph_src = ip->ip_src;
611 ph.ph_dst = ip->ip_dst;
612 ph.ph_z = 0;
613 ph.ph_p = IPPROTO_UDP;
614 ph.ph_len = udp->uh_ulen;
615 ck = IPCK_INIT;
616 ck = ipcksum(&ph, sizeof(ph), ck);
617 ck = ipcksum(udp, mtu - sizeof(*ip), ck);
618 udp->uh_sum = htons(ck);
619
620 break;
621
102fa2f0
MW
622 case AF_INET6:
623
624 /* Build the IP header. */
625 ip6 = (struct ip6_hdr *)b;
626 STEP(st->q); ip6->ip6_flow = htonl(0x60000000 | st->q);
627 ip6->ip6_plen = htons(mtu - sizeof(*ip6));
628 ip6->ip6_nxt = IPPROTO_UDP;
629 ip6->ip6_hlim = 64;
630 ip6->ip6_src = st->me.sin6.sin6_addr;
631 ip6->ip6_dst = st->a.sin6.sin6_addr;
632
633 /* Build a UDP packet in the output buffer. */
634 udp = (struct udphdr *)(ip6 + 1);
635 udp->uh_sport = st->srcport;
636 udp->uh_dport = st->dstport;
637 udp->uh_ulen = htons(mtu - sizeof(*ip6));
638 udp->uh_sum = 0;
639
640 /* Copy the payload. */
641 p = (unsigned char *)(udp + 1);
642 memcpy(p, buf, mtu - (p - b));
643
644 /* Calculate the UDP checksum. */
645 ph6.ph6_src = ip6->ip6_src;
646 ph6.ph6_dst = ip6->ip6_dst;
647 ph6.ph6_len = udp->uh_ulen;
648 ph6.ph6_z0 = ph6.ph6_z1 = ph6.ph6_z2 = 0;
649 ph6.ph6_nxt = IPPROTO_UDP;
650 ck = IPCK_INIT;
651 ck = ipcksum(&ph6, sizeof(ph6), ck);
652 ck = ipcksum(udp, mtu - sizeof(*ip6), ck);
653 udp->uh_sum = htons(ck);
654
655 break;
656
b9e97e20
MW
657 default:
658 abort();
659 }
88510d86
MW
660
661 /* Send the whole thing off. If we're too big for the interface then we
662 * might need to trim immediately.
663 */
454f5a1a 664 if (sendto(st->rawudp, b, mtu, 0, &st->a.sa, addrsz(&st->a)) < 0) {
88510d86
MW
665 if (errno == EMSGSIZE) return (RC_LOWER);
666 else goto fail_0;
667 }
668
669 /* Done. */
670 return (RC_OK);
671
672fail_0:
673 return (RC_FAIL);
674}
675
676static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps)
677{
678 struct raw_state *st = stv;
679 unsigned char b[65536];
680 struct ip *ip;
102fa2f0 681 struct ip6_hdr *ip6;
88510d86 682 struct icmp *icmp;
102fa2f0 683 struct icmp6_hdr *icmp6;
88510d86 684 struct udphdr *udp;
9ad20ce0 685 const unsigned char *payload;
88510d86
MW
686 ssize_t n;
687
688 /* An ICMP packet: see what's inside. */
689 if (FD_ISSET(st->rawicmp, fd_in)) {
690 if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0;
691
b9e97e20
MW
692 switch (st->me.sa.sa_family) {
693
694 case AF_INET:
695
696 ip = (struct ip *)b;
697 if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) ||
698 ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP)
699 goto skip_icmp;
700 n -= sizeof(4*ip->ip_hl);
701
702 icmp = (struct icmp *)(b + 4*ip->ip_hl);
703 if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH)
704 goto skip_icmp;
705 n -= offsetof(struct icmp, icmp_ip);
706
707 ip = &icmp->icmp_ip;
708 if (n < sizeof(*ip) ||
709 ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 ||
710 ip->ip_id != htons(st->q) ||
711 ip->ip_src.s_addr != st->me.sin.sin_addr.s_addr ||
712 ip->ip_dst.s_addr != st->a.sin.sin_addr.s_addr)
713 goto skip_icmp;
714 n -= sizeof(*ip);
715
716 udp = (struct udphdr *)(ip + 1);
717 if (n < sizeof(*udp) || udp->uh_sport != st->srcport ||
718 udp->uh_dport != st->dstport)
719 goto skip_icmp;
720 n -= sizeof(*udp);
721
722 payload = (const unsigned char *)(udp + 1);
723 if (!mypacketp(ps, payload, n)) goto skip_icmp;
724
725 if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER);
726 else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp;
727 else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu));
728 else return (RC_LOWER);
729
730 break;
731
102fa2f0
MW
732 case AF_INET6:
733 icmp6 = (struct icmp6_hdr *)b;
734 if (n < sizeof(*icmp6) ||
735 (icmp6->icmp6_type != ICMP6_PACKET_TOO_BIG &&
736 icmp6->icmp6_type != ICMP6_DST_UNREACH))
737 goto skip_icmp;
738 n -= sizeof(*icmp6);
739
740 ip6 = (struct ip6_hdr *)(icmp6 + 1);
741 if (n < sizeof(*ip6) || ip6->ip6_nxt != IPPROTO_UDP ||
742 memcmp(ip6->ip6_src.s6_addr,
743 st->me.sin6.sin6_addr.s6_addr, 16) ||
744 memcmp(ip6->ip6_dst.s6_addr,
745 st->a.sin6.sin6_addr.s6_addr, 16) ||
746 (ntohl(ip6->ip6_flow)&0xffff) != st->q)
747 goto skip_icmp;
748 n -= sizeof(*ip6);
749
750 udp = (struct udphdr *)(ip6 + 1);
751 if (n < sizeof(*udp) || udp->uh_sport != st->srcport ||
752 udp->uh_dport != st->dstport)
753 goto skip_icmp;
754 n -= sizeof(*udp);
755
756 payload = (const unsigned char *)(udp + 1);
757 if (!mypacketp(ps, payload, n)) goto skip_icmp;
758
759 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG)
760 return (ntohs(icmp6->icmp6_mtu));
761 else switch (icmp6->icmp6_code) {
762 case ICMP6_DST_UNREACH_ADMIN:
763 case ICMP6_DST_UNREACH_NOPORT:
764 return (RC_HIGHER);
765 default:
766 goto skip_icmp;
767 }
768 break;
769
b9e97e20
MW
770 default:
771 abort();
772 }
88510d86 773 }
b9e97e20 774
88510d86
MW
775skip_icmp:;
776
777 /* If we got a reply to the current probe then we're good. If we got an
778 * error, or the packet's sequence number is wrong, then ignore it.
779 */
780 if (FD_ISSET(st->sk, fd_in)) {
781 if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK);
782 else if (mypacketp(ps, b, n)) return (RC_HIGHER);
783 else return (RC_OK);
784 }
785
786 return (RC_OK);
787
788fail_0:
789 return (RC_FAIL);
790}
791
792static const struct probe_ops raw_ops = {
793 "raw", OPS_CHAIN, sizeof(struct raw_state),
794 raw_setup, raw_finish,
795 raw_selprep, raw_xmit, raw_selproc
796};
797
798#undef OPS_CHAIN
799#define OPS_CHAIN &raw_ops
800
85ccca5f
MW
801#endif
802
88510d86
MW
803/*----- Doing the job on Linux --------------------------------------------*/
804
805#if defined(linux)
806
807#ifndef IP_MTU
808# define IP_MTU 14 /* Blech! */
809#endif
810
811struct linux_state {
10583b59 812 int sol, so_mtu_discover, so_mtu;
88510d86 813 int sk;
10583b59 814 size_t hdrlen;
88510d86
MW
815};
816
817static int linux_setup(void *stv, int sk, const struct param *pp)
818{
819 struct linux_state *st = stv;
820 int i, mtu;
cb160b86 821 socklen_t sz;
88510d86 822
454f5a1a
MW
823 /* Check that the address is OK. */
824 switch (pp->a.sa.sa_family) {
10583b59
MW
825 case AF_INET:
826 st->sol = IPPROTO_IP;
827 st->so_mtu_discover = IP_MTU_DISCOVER;
828 st->so_mtu = IP_MTU;
829 st->hdrlen = 28;
830 break;
831 case AF_INET6:
832 st->sol = IPPROTO_IPV6;
833 st->so_mtu_discover = IPV6_MTU_DISCOVER;
834 st->so_mtu = IPV6_MTU;
835 st->hdrlen = 48;
836 break;
837 default:
838 errno = EPFNOSUPPORT;
839 return (-1);
454f5a1a
MW
840 }
841
88510d86
MW
842 /* Snaffle the UDP socket. */
843 st->sk = sk;
844
845 /* Turn on kernel path-MTU discovery and force DF on. */
18d5f6eb 846 i = IP_PMTUDISC_PROBE;
10583b59 847 if (setsockopt(st->sk, st->sol, st->so_mtu_discover, &i, sizeof(i)))
88510d86
MW
848 return (-1);
849
850 /* Read the initial MTU guess back and report it. */
851 sz = sizeof(mtu);
10583b59 852 if (getsockopt(st->sk, st->sol, st->so_mtu, &mtu, &sz))
88510d86
MW
853 return (-1);
854
855 /* Done. */
856 return (mtu);
857}
858
859static void linux_finish(void *stv) { ; }
860
861static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in)
862 { struct linux_state *st = stv; ADDFD(st->sk); }
863
864static int linux_xmit(void *stv, int mtu)
865{
866 struct linux_state *st = stv;
867
868 /* Write the packet. */
10583b59 869 if (write(st->sk, buf, mtu - st->hdrlen) >= 0) return (RC_OK);
88510d86
MW
870 else if (errno == EMSGSIZE) return (RC_LOWER);
871 else return (RC_FAIL);
872}
873
874static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps)
875{
876 struct linux_state *st = stv;
877 int mtu;
cb160b86 878 socklen_t sz;
88510d86
MW
879 ssize_t n;
880 unsigned char b[65536];
881
882 /* Read an answer. If it looks like the right kind of error then report a
883 * success. This is potentially wrong, since we can't tell whether an
884 * error was delayed from an earlier probe. However, we never return
885 * RC_LOWER from this method, so the packet sizes ought to be monotonically
886 * decreasing and this won't cause trouble. Otherwise update from the
887 * kernel's idea of the right MTU.
888 */
889 if (FD_ISSET(st->sk, fd_in)) {
890 n = read(st->sk, &buf, sizeof(buf));
891 if (n >= 0 ?
892 mypacketp(ps, b, n) :
893 errno == ECONNREFUSED || errno == EHOSTUNREACH)
894 return (RC_HIGHER);
895 sz = sizeof(mtu);
10583b59 896 if (getsockopt(st->sk, st->sol, st->so_mtu, &mtu, &sz))
88510d86
MW
897 return (RC_FAIL);
898 return (mtu);
899 }
900 return (RC_OK);
901}
902
903static const struct probe_ops linux_ops = {
904 "linux", OPS_CHAIN, sizeof(struct linux_state),
905 linux_setup, linux_finish,
906 linux_selprep, linux_xmit, linux_selproc
907};
c64d8cd5 908
88510d86
MW
909#undef OPS_CHAIN
910#define OPS_CHAIN &linux_ops
c64d8cd5
MW
911
912#endif
913
914/*----- Help options ------------------------------------------------------*/
915
88510d86
MW
916static const struct probe_ops *probe_ops = OPS_CHAIN;
917
c64d8cd5
MW
918static void version(FILE *fp)
919 { pquis(fp, "$, TrIPE version " VERSION "\n"); }
920
921static void usage(FILE *fp)
88510d86 922{
22062fb6 923 pquis(fp, "Usage: $ [-46v] [-H HEADER] [-m METHOD]\n\
88510d86
MW
924 [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n");
925}
c64d8cd5
MW
926
927static void help(FILE *fp)
928{
88510d86
MW
929 const struct probe_ops *ops;
930
c64d8cd5
MW
931 version(fp);
932 fputc('\n', fp);
933 usage(fp);
934 fputs("\
935\n\
936Options in full:\n\
937\n\
938-h, --help Show this help text.\n\
b13c3272 939-V, --version Show version number.\n\
c64d8cd5
MW
940-u, --usage Show brief usage message.\n\
941\n\
22062fb6
MW
942-4, --ipv4 Restrict to IPv4 only.\n\
943-6, --ipv6 Restrict to IPv6 only.\n\
88510d86
MW
944-g, --growth=FACTOR Growth factor for retransmit interval.\n\
945-m, --method=METHOD Use METHOD to probe for MTU.\n\
946-r, --retransmit=SECS Retransmit if no reply after SEC.\n\
947-t, --timeout=SECS Give up expecting a reply after SECS.\n\
a8f70fe1 948-v, --verbose Write a running commentary to stderr.\n\
c64d8cd5 949-H, --header=HEX Packet header, in hexadecimal.\n\
88510d86
MW
950\n\
951Probe methods:\n\
c64d8cd5 952", fp);
88510d86
MW
953 for (ops = probe_ops; ops; ops = ops->next)
954 printf("\t%s\n", ops->name);
c64d8cd5
MW
955}
956
957/*----- Main code ---------------------------------------------------------*/
958
959int main(int argc, char *argv[])
960{
88510d86 961 struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN };
c64d8cd5
MW
962 hex_ctx hc;
963 dstr d = DSTR_INIT;
964 size_t sz;
22062fb6
MW
965 int i, err;
966 struct addrinfo aihint = { 0 }, *ailist, *ai;
967 const char *host, *svc = "7";
c64d8cd5
MW
968 unsigned f = 0;
969
970#define f_bogus 1u
971
85ccca5f 972#ifdef HAVE_GETIFADDRS
88510d86
MW
973 if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 ||
974 (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0)
975 rawerr = errno;
102fa2f0
MW
976 if ((rawicmp6 = socket(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0 ||
977 (rawudp6 = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW)) < 0)
978 rawerr6 = errno;
85ccca5f 979#endif
88510d86
MW
980 if (setuid(getuid()))
981 abort();
982
c64d8cd5
MW
983 ego(argv[0]);
984 fillbuffer(buf, sizeof(buf));
22062fb6
MW
985
986 aihint.ai_family = AF_UNSPEC;
987 aihint.ai_protocol = IPPROTO_UDP;
988 aihint.ai_socktype = SOCK_DGRAM;
989 aihint.ai_flags = AI_ADDRCONFIG;
c64d8cd5
MW
990
991 for (;;) {
992 static const struct option opts[] = {
993 { "help", 0, 0, 'h' },
88510d86 994 { "version", 0, 0, 'V' },
c64d8cd5 995 { "usage", 0, 0, 'u' },
22062fb6
MW
996 { "ipv4", 0, 0, '4' },
997 { "ipv6", 0, 0, '6' },
c64d8cd5 998 { "header", OPTF_ARGREQ, 0, 'H' },
88510d86
MW
999 { "growth", OPTF_ARGREQ, 0, 'g' },
1000 { "method", OPTF_ARGREQ, 0, 'm' },
1001 { "retransmit", OPTF_ARGREQ, 0, 'r' },
c64d8cd5 1002 { "timeout", OPTF_ARGREQ, 0, 't' },
88510d86 1003 { "verbose", 0, 0, 'v' },
c64d8cd5
MW
1004 { 0, 0, 0, 0 }
1005 };
1006
22062fb6 1007 i = mdwopt(argc, argv, "hVu" "46H:g:m:r:t:v", opts, 0, 0, 0);
c64d8cd5
MW
1008 if (i < 0) break;
1009 switch (i) {
1010 case 'h': help(stdout); exit(0);
88510d86 1011 case 'V': version(stdout); exit(0);
c64d8cd5
MW
1012 case 'u': usage(stdout); exit(0);
1013
1014 case 'H':
1015 DRESET(&d);
1016 hex_init(&hc);
1017 hex_decode(&hc, optarg, strlen(optarg), &d);
1018 hex_decode(&hc, 0, 0, &d);
88510d86 1019 sz = d.len < 532 ? d.len : 532;
c64d8cd5 1020 memcpy(buf, d.buf, sz);
88510d86 1021 pp.seqoff = sz;
c64d8cd5
MW
1022 break;
1023
22062fb6
MW
1024 case '4': aihint.ai_family = AF_INET; break;
1025 case '6': aihint.ai_family = AF_INET6; break;
88510d86
MW
1026 case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break;
1027 case 'r': pp.retx = s2f(optarg, "retransmit interval"); break;
1028 case 't': pp.timeout = s2f(optarg, "timeout"); break;
1029
1030 case 'm':
1031 for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next)
1032 if (strcmp(pp.pops->name, optarg) == 0) goto found_alg;
1033 die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg);
1034 found_alg:
c64d8cd5
MW
1035 break;
1036
88510d86
MW
1037 case 'v': pp.f |= F_VERBOSE; break;
1038
c64d8cd5
MW
1039 default:
1040 f |= f_bogus;
1041 break;
1042 }
1043 }
1044 argv += optind; argc -= optind;
1045 if ((f & f_bogus) || 1 > argc || argc > 2) {
1046 usage(stderr);
1047 exit(EXIT_FAILURE);
1048 }
1049
22062fb6
MW
1050 host = argv[0];
1051 if (argv[1]) svc = argv[1];
1052 if ((err = getaddrinfo(host, svc, &aihint, &ailist)) != 0) {
1053 die(EXIT_FAILURE, "unknown host `%s' or service `%s': %s",
1054 host, svc, gai_strerror(err));
c64d8cd5 1055 }
22062fb6
MW
1056 for (ai = ailist; ai && !addrfamok(ai->ai_family); ai = ai->ai_next);
1057 if (!ai) die(EXIT_FAILURE, "no supported address families for `%s'", host);
1058 assert(ai->ai_addrlen <= sizeof(pp.a));
1059 memcpy(&pp.a, ai->ai_addr, ai->ai_addrlen);
c64d8cd5 1060
88510d86 1061 i = pathmtu(&pp);
c64d8cd5
MW
1062 if (i < 0)
1063 die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno));
1064 printf("%d\n", i);
1065 if (ferror(stdout) || fflush(stdout) || fclose(stdout))
1066 die(EXIT_FAILURE, "failed to write result: %s", strerror(errno));
1067 return (0);
1068}
1069
1070/*----- That's all, folks -------------------------------------------------*/