Commit | Line | Data |
---|---|---|
c64d8cd5 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Report MTU on path to specified host | |
4 | * | |
5 | * (c) 2008 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Trivial IP Encryption (TrIPE). | |
11 | * | |
11ad66c2 MW |
12 | * TrIPE is free software: you can redistribute it and/or modify it under |
13 | * the terms of the GNU General Public License as published by the Free | |
14 | * Software Foundation; either version 3 of the License, or (at your | |
15 | * option) any later version. | |
c64d8cd5 | 16 | * |
11ad66c2 MW |
17 | * TrIPE is distributed in the hope that it will be useful, but WITHOUT |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
20 | * for more details. | |
c64d8cd5 MW |
21 | * |
22 | * You should have received a copy of the GNU General Public License | |
11ad66c2 | 23 | * along with TrIPE. If not, see <https://www.gnu.org/licenses/>. |
c64d8cd5 MW |
24 | */ |
25 | ||
26 | /*----- Header files ------------------------------------------------------*/ | |
27 | ||
28 | #include "config.h" | |
29 | ||
d245350a | 30 | #include <assert.h> |
c64d8cd5 | 31 | #include <errno.h> |
88510d86 | 32 | #include <stddef.h> |
c64d8cd5 MW |
33 | #include <stdio.h> |
34 | #include <stdlib.h> | |
35 | #include <string.h> | |
36 | #include <time.h> | |
37 | ||
38 | #include <sys/types.h> | |
39 | #include <sys/time.h> | |
40 | #include <unistd.h> | |
41 | ||
42 | #include <sys/socket.h> | |
43 | #include <netinet/in.h> | |
44 | #include <arpa/inet.h> | |
45 | #include <netdb.h> | |
46 | ||
88510d86 MW |
47 | #include <netinet/in_systm.h> |
48 | #include <netinet/ip.h> | |
49 | #include <netinet/ip_icmp.h> | |
50 | #include <netinet/udp.h> | |
51 | ||
52 | #include <net/if.h> | |
53 | #include <ifaddrs.h> | |
54 | #include <sys/ioctl.h> | |
55 | ||
56 | #include <mLib/alloc.h> | |
57 | #include <mLib/bits.h> | |
c64d8cd5 MW |
58 | #include <mLib/dstr.h> |
59 | #include <mLib/hex.h> | |
60 | #include <mLib/mdwopt.h> | |
61 | #include <mLib/quis.h> | |
62 | #include <mLib/report.h> | |
63 | #include <mLib/tv.h> | |
64 | ||
65 | /*----- Static variables --------------------------------------------------*/ | |
66 | ||
67 | static unsigned char buf[65536]; | |
68 | ||
88510d86 MW |
69 | #define POLY 0x1002d |
70 | ||
c64d8cd5 MW |
71 | /*----- Utility functions -------------------------------------------------*/ |
72 | ||
88510d86 MW |
73 | /* Step a value according to a simple LFSR. */ |
74 | #define STEP(q) \ | |
75 | do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0) | |
76 | ||
c64d8cd5 MW |
77 | /* Fill buffer with a constant but pseudorandom string. Uses a simple |
78 | * LFSR. | |
79 | */ | |
80 | static void fillbuffer(unsigned char *p, size_t sz) | |
81 | { | |
82 | unsigned int y = 0xbc20; | |
83 | const unsigned char *l = p + sz; | |
84 | int i; | |
c64d8cd5 MW |
85 | |
86 | while (p < l) { | |
87 | *p++ = y & 0xff; | |
88510d86 | 88 | for (i = 0; i < 8; i++) STEP(y); |
c64d8cd5 MW |
89 | } |
90 | } | |
91 | ||
88510d86 MW |
92 | /* Convert a string to floating point. */ |
93 | static double s2f(const char *s, const char *what) | |
94 | { | |
95 | double f; | |
96 | char *q; | |
c64d8cd5 | 97 | |
88510d86 MW |
98 | errno = 0; |
99 | f = strtod(s, &q); | |
100 | if (errno || *q) die(EXIT_FAILURE, "bad %s", what); | |
101 | return (f); | |
102 | } | |
c64d8cd5 | 103 | |
88510d86 MW |
104 | /* Convert a floating-point value into a struct timeval. */ |
105 | static void f2tv(struct timeval *tv, double t) | |
106 | { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; } | |
107 | ||
454f5a1a MW |
108 | union addr { |
109 | struct sockaddr sa; | |
110 | struct sockaddr_in sin; | |
22062fb6 | 111 | struct sockaddr_in6 sin6; |
454f5a1a MW |
112 | }; |
113 | ||
22062fb6 MW |
114 | /* Check whether an address family is even slightly supported. */ |
115 | static int addrfamok(int af) | |
116 | { | |
117 | switch (af) { | |
118 | case AF_INET: case AF_INET6: return (1); | |
119 | default: return (0); | |
120 | } | |
121 | } | |
122 | ||
454f5a1a MW |
123 | /* Return the size of a socket address. */ |
124 | static size_t addrsz(const union addr *a) | |
125 | { | |
126 | switch (a->sa.sa_family) { | |
127 | case AF_INET: return (sizeof(a->sin)); | |
22062fb6 | 128 | case AF_INET6: return (sizeof(a->sin6)); |
454f5a1a MW |
129 | default: abort(); |
130 | } | |
131 | } | |
132 | ||
133 | /* Compare two addresses. Maybe compare the port numbers too. */ | |
134 | #define AEF_PORT 1u | |
135 | static int addreq(const union addr *a, const union addr *b, unsigned f) | |
136 | { | |
137 | switch (a->sa.sa_family) { | |
138 | case AF_INET: | |
139 | return (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr && | |
140 | (!(f&AEF_PORT) || a->sin.sin_port == b->sin.sin_port)); | |
22062fb6 MW |
141 | case AF_INET6: |
142 | return (!memcmp(a->sin6.sin6_addr.s6_addr, | |
143 | b->sin6.sin6_addr.s6_addr, 16) && | |
144 | (!(f&AEF_PORT) || a->sin6.sin6_port == b->sin6.sin6_port)); | |
454f5a1a MW |
145 | default: |
146 | abort(); | |
147 | } | |
148 | } | |
149 | ||
88510d86 MW |
150 | /*----- Main algorithm skeleton -------------------------------------------*/ |
151 | ||
152 | struct param { | |
153 | unsigned f; /* Various flags */ | |
154 | #define F_VERBOSE 1u /* Give a running commentary */ | |
155 | double retx; /* Initial retransmit interval */ | |
156 | double regr; /* Retransmit growth factor */ | |
157 | double timeout; /* Retransmission timeout */ | |
158 | int seqoff; /* Offset to write sequence number */ | |
159 | const struct probe_ops *pops; /* Probe algorithm description */ | |
454f5a1a | 160 | union addr a; /* Destination address */ |
88510d86 MW |
161 | }; |
162 | ||
163 | struct probestate { | |
164 | const struct param *pp; | |
165 | unsigned q; | |
166 | }; | |
167 | ||
168 | struct probe_ops { | |
169 | const char *name; | |
170 | const struct probe_ops *next; | |
171 | size_t statesz; | |
172 | int (*setup)(void *, int, const struct param *); | |
173 | void (*finish)(void *); | |
174 | void (*selprep)(void *, int *, fd_set *); | |
175 | int (*xmit)(void *, int); | |
176 | int (*selproc)(void *, fd_set *, struct probestate *); | |
177 | }; | |
178 | ||
179 | #define OPS_CHAIN 0 | |
180 | ||
181 | enum { | |
182 | RC_FAIL = -99, | |
183 | RC_OK = 0, | |
184 | RC_LOWER = -1, | |
185 | RC_HIGHER = -2, | |
186 | RC_NOREPLY = -3 | |
187 | /* or a positive MTU upper-bound */ | |
188 | }; | |
189 | ||
190 | /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */ | |
191 | #define ADDFD(fd) \ | |
192 | do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0) | |
193 | ||
194 | /* Check whether a buffer contains a packet from our current probe. */ | |
195 | static int mypacketp(struct probestate *ps, | |
196 | const unsigned char *p, size_t sz) | |
197 | { | |
198 | const struct param *pp = ps->pp; | |
c64d8cd5 | 199 | |
88510d86 MW |
200 | return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q); |
201 | } | |
202 | ||
203 | /* See whether MTU is an acceptable MTU value. Return an appropriate | |
204 | * RC_... code or a new suggested MTU. | |
205 | */ | |
206 | static int probe(struct probestate *ps, void *st, int mtu) | |
c64d8cd5 | 207 | { |
88510d86 | 208 | const struct param *pp = ps->pp; |
c64d8cd5 | 209 | fd_set fd_in; |
88510d86 MW |
210 | struct timeval tv, now, when, done; |
211 | double timer = pp->retx; | |
212 | int rc, maxfd; | |
213 | ||
214 | /* Set up the first retransmit and give-up timers. */ | |
215 | gettimeofday(&now, 0); | |
216 | f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv); | |
217 | f2tv(&tv, timer); TV_ADD(&when, &now, &tv); | |
218 | if (TV_CMP(&when, >, &done)) when = done; | |
219 | ||
220 | /* Send the initial probe. */ | |
221 | if (pp->f & F_VERBOSE) | |
222 | moan("sending probe of size %d (seq = %04x)", mtu, ps->q); | |
223 | STEP(ps->q); | |
224 | STORE16(buf + pp->seqoff, ps->q); | |
225 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
226 | ||
227 | for (;;) { | |
228 | ||
229 | /* Wait for something interesting to happen. */ | |
230 | maxfd = 0; FD_ZERO(&fd_in); | |
231 | pp->pops->selprep(st, &maxfd, &fd_in); | |
232 | TV_SUB(&tv, &when, &now); | |
233 | if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL); | |
234 | gettimeofday(&now, 0); | |
235 | ||
236 | /* See whether the probe method has any answers for us. */ | |
237 | if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc); | |
238 | ||
239 | /* If we've waited too long, give up. If we should retransmit, do | |
240 | * that. | |
241 | */ | |
242 | if (TV_CMP(&now, >, &done)) | |
243 | return (RC_NOREPLY); | |
244 | else if (TV_CMP(&now, >, &when)) { | |
245 | if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu); | |
246 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
247 | do { | |
248 | timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv); | |
249 | } while (TV_CMP(&when, <, &now)); | |
250 | if (TV_CMP(&when, >, &done)) when = done; | |
251 | } | |
252 | } | |
253 | } | |
c64d8cd5 | 254 | |
88510d86 MW |
255 | /* Discover the path MTU to the destination address. */ |
256 | static int pathmtu(const struct param *pp) | |
257 | { | |
258 | int sk; | |
259 | int mtu, lo, hi; | |
260 | int rc, droppy = -1; | |
261 | void *st; | |
262 | struct probestate ps; | |
263 | ||
264 | /* Build and connect a UDP socket. We'll need this to know the local port | |
265 | * number to use if nothing else. Set other stuff up. | |
266 | */ | |
454f5a1a MW |
267 | if ((sk = socket(pp->a.sa.sa_family, SOCK_DGRAM, IPPROTO_UDP)) < 0) |
268 | goto fail_0; | |
269 | if (connect(sk, &pp->a.sa, addrsz(&pp->a))) goto fail_1; | |
88510d86 MW |
270 | st = xmalloc(pp->pops->statesz); |
271 | if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2; | |
272 | ps.pp = pp; ps.q = rand() & 0xffff; | |
22062fb6 MW |
273 | switch (pp->a.sa.sa_family) { |
274 | case AF_INET: lo = 576; break; | |
275 | case AF_INET6: lo = 1280; break; | |
276 | default: abort(); | |
277 | } | |
278 | hi = mtu; | |
279 | if (hi < lo) { errno = EMSGSIZE; return (-1); } | |
88510d86 MW |
280 | |
281 | /* And now we do a thing which is sort of like a binary search, except that | |
282 | * we also take explicit clues as establishing a new upper bound, and we | |
283 | * try to hug that initially. | |
284 | */ | |
c64d8cd5 | 285 | for (;;) { |
d245350a MW |
286 | assert(lo <= mtu && mtu <= hi); |
287 | if (pp->f & F_VERBOSE) moan("probe: %d <= %d <= %d", lo, mtu, hi); | |
88510d86 MW |
288 | rc = probe(&ps, st, mtu); |
289 | switch (rc) { | |
290 | ||
291 | case RC_FAIL: | |
292 | if (pp->f & F_VERBOSE) moan("probe failed"); | |
293 | goto fail_3; | |
294 | ||
295 | case RC_NOREPLY: | |
296 | /* If we've not seen a dropped packet before then we don't know what | |
297 | * this means yet -- in particular, we don't know which bit of the | |
298 | * network is swallowing packets. Send a minimum-size probe. If | |
299 | * that doesn't come back then assume that the remote host is | |
300 | * swallowing our packets. If it does, then we assume that dropped | |
301 | * packets are a result of ICMP fragmentation-needed reports being | |
302 | * lost or suppressed. | |
303 | */ | |
304 | if (pp->f & F_VERBOSE) moan("gave up: black hole detected"); | |
305 | if (droppy == -1) { | |
306 | if (pp->f & F_VERBOSE) moan("sending minimum-size probe"); | |
307 | switch (probe(&ps, st, lo)) { | |
308 | case RC_FAIL: | |
309 | goto fail_3; | |
310 | case RC_NOREPLY: | |
311 | if (pp->f & F_VERBOSE) { | |
312 | moan("no reply from min-size probe: " | |
313 | "assume black hole at target"); | |
314 | } | |
315 | droppy = 1; | |
316 | break; | |
317 | case RC_HIGHER: | |
318 | if (pp->f & F_VERBOSE) { | |
319 | moan("reply from min-size probe OK: " | |
320 | "assume black hole in network"); | |
321 | } | |
322 | droppy = 0; | |
323 | break; | |
324 | default: | |
325 | if (pp->f & F_VERBOSE) | |
326 | moan("unexpected return code from probe"); | |
327 | errno = ENOTCONN; | |
328 | goto fail_3; | |
329 | } | |
330 | } | |
331 | ||
332 | if (droppy) goto higher; else goto lower; | |
333 | ||
334 | case RC_HIGHER: | |
335 | higher: | |
336 | if (droppy == -1) { | |
337 | if (pp->f & F_VERBOSE) | |
338 | moan("probe returned: remote host is not a black hole"); | |
339 | droppy = 0; | |
340 | } | |
341 | if (mtu == hi) { | |
342 | if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU"); | |
343 | goto done; | |
344 | } | |
88510d86 | 345 | lo = mtu; |
d245350a MW |
346 | |
347 | /* Now we must make a new guess, between lo and hi. We know that lo | |
348 | * is good; but we're not so sure about hi here. We know that hi > | |
349 | * lo, so this will find an approximate midpoint, greater than lo and | |
350 | * no more than hi. | |
351 | */ | |
352 | if (pp->f & F_VERBOSE) moan("probe returned: guessing higher"); | |
88510d86 MW |
353 | mtu += (hi - lo + 1)/2; |
354 | break; | |
355 | ||
356 | case RC_LOWER: | |
357 | lower: | |
d245350a MW |
358 | /* If this didn't work, and we're already at the bottom of our |
359 | * possible range, then something has gone horribly wrong. | |
360 | */ | |
361 | assert(lo < mtu); | |
362 | hi = mtu - 1; | |
363 | if (lo == hi) { | |
88510d86 | 364 | if (pp->f & F_VERBOSE) moan("error returned: found correct MTU"); |
d245350a | 365 | mtu = lo; |
88510d86 MW |
366 | goto done; |
367 | } | |
d245350a MW |
368 | |
369 | /* We must make a new guess, between lo and hi. We're probably | |
370 | * fairly sure that lo will succeed, since either it's the minimum | |
371 | * MTU or we've tested it already; but we're not quite sure about hi, | |
372 | * so we want to aim high. | |
373 | */ | |
88510d86 | 374 | if (pp->f & F_VERBOSE) moan("error returned: guessing lower"); |
88510d86 MW |
375 | mtu -= (hi - lo + 1)/2; |
376 | break; | |
377 | ||
378 | default: | |
379 | if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate"); | |
380 | mtu = hi = rc; | |
381 | break; | |
382 | } | |
c64d8cd5 | 383 | } |
88510d86 MW |
384 | |
385 | done: | |
386 | /* Clean up and return our result. */ | |
387 | pp->pops->finish(st); | |
388 | xfree(st); | |
c64d8cd5 MW |
389 | close(sk); |
390 | return (mtu); | |
391 | ||
88510d86 MW |
392 | fail_3: |
393 | pp->pops->finish(st); | |
394 | fail_2: | |
395 | xfree(st); | |
c64d8cd5 MW |
396 | fail_1: |
397 | close(sk); | |
398 | fail_0: | |
399 | return (-1); | |
400 | } | |
401 | ||
88510d86 MW |
402 | /*----- Doing it the hard way ---------------------------------------------*/ |
403 | ||
404 | #if defined(linux) || defined(__OpenBSD__) | |
405 | #define IPHDR_SANE | |
406 | #endif | |
407 | ||
408 | #ifdef IPHDR_SANE | |
409 | # define sane_htons htons | |
410 | # define sane_htonl htonl | |
c64d8cd5 | 411 | #else |
88510d86 MW |
412 | # define sane_htons |
413 | # define sane_htonl | |
414 | #endif | |
415 | ||
416 | static int rawicmp = -1, rawudp = -1, rawerr = 0; | |
417 | ||
418 | #define IPCK_INIT 0xffff | |
419 | ||
420 | /* Compute an IP checksum over some data. This is a restartable interface: | |
421 | * initialize A to `IPCK_INIT' for the first call. | |
422 | */ | |
423 | static unsigned ipcksum(const void *buf, size_t n, unsigned a) | |
424 | { | |
425 | unsigned long aa = a ^ 0xffff; | |
426 | const unsigned char *p = buf, *l = p + n; | |
427 | ||
428 | while (p < l - 1) { aa += LOAD16_B(p); p += 2; } | |
429 | if (p < l) { aa += (unsigned)(*p) << 8; } | |
430 | do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000); | |
431 | return (aa == 0xffff ? aa : aa ^ 0xffff); | |
432 | } | |
433 | ||
434 | /* TCP/UDP pseudoheader structure. */ | |
435 | struct phdr { | |
436 | struct in_addr ph_src, ph_dst; | |
437 | u_char ph_z, ph_p; | |
438 | u_short ph_len; | |
439 | }; | |
440 | ||
441 | struct raw_state { | |
454f5a1a | 442 | union addr me, a; |
88510d86 MW |
443 | int sk, rawicmp, rawudp; |
444 | unsigned q; | |
445 | }; | |
446 | ||
447 | static int raw_setup(void *stv, int sk, const struct param *pp) | |
448 | { | |
449 | struct raw_state *st = stv; | |
cb160b86 | 450 | socklen_t sz; |
88510d86 MW |
451 | int i, mtu = -1; |
452 | struct ifaddrs *ifa, *ifaa, *ifap; | |
453 | struct ifreq ifr; | |
454 | ||
454f5a1a MW |
455 | /* Check that the address is OK, and that we have the necessary raw |
456 | * sockets. | |
457 | */ | |
458 | switch (pp->a.sa.sa_family) { | |
459 | case AF_INET: | |
460 | if (rawerr) { errno = rawerr; goto fail_0; } | |
461 | st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk; | |
462 | break; | |
463 | default: | |
464 | errno = EPFNOSUPPORT; goto fail_0; | |
465 | } | |
88510d86 MW |
466 | |
467 | /* Initialize the sequence number. */ | |
468 | st->q = rand() & 0xffff; | |
469 | ||
470 | /* Snaffle the local and remote address and port number. */ | |
454f5a1a | 471 | st->a = pp->a; |
88510d86 | 472 | sz = sizeof(st->me); |
454f5a1a | 473 | if (getsockname(sk, &st->me.sa, &sz)) |
88510d86 MW |
474 | goto fail_0; |
475 | ||
476 | /* There isn't a portable way to force the DF flag onto a packet through | |
477 | * UDP, or even through raw IP, unless we write the entire IP header | |
478 | * ourselves. This is somewhat annoying, especially since we have an | |
479 | * uphill struggle keeping track of which systems randomly expect which | |
480 | * header fields to be presented in host byte order. Oh, well. | |
481 | */ | |
482 | i = 1; | |
483 | if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0; | |
484 | ||
485 | /* Find an upper bound on the MTU. Do two passes over the interface | |
486 | * list. If we can find matches for our local address then use the | |
487 | * highest one of those; otherwise do a second pass and simply take the | |
488 | * highest MTU of any network interface. | |
489 | */ | |
490 | if (getifaddrs(&ifaa)) goto fail_0; | |
491 | for (i = 0; i < 2; i++) { | |
492 | for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) { | |
493 | if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr || | |
454f5a1a | 494 | ifa->ifa_addr->sa_family != st->me.sa.sa_family || |
88510d86 | 495 | (i == 0 && |
454f5a1a | 496 | !addreq((union addr *)ifa->ifa_addr, &st->me, 0)) || |
88510d86 MW |
497 | (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) || |
498 | strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name)) | |
499 | continue; | |
500 | ifap = ifa; | |
501 | strcpy(ifr.ifr_name, ifa->ifa_name); | |
502 | if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1; | |
503 | if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu; | |
504 | } | |
505 | if (mtu > 0) break; | |
506 | } | |
507 | if (mtu < 0) { errno = ENOTCONN; goto fail_1; } | |
508 | freeifaddrs(ifaa); | |
509 | ||
510 | /* Done. */ | |
511 | return (mtu); | |
512 | ||
513 | fail_1: | |
514 | freeifaddrs(ifaa); | |
515 | fail_0: | |
516 | return (-1); | |
517 | } | |
518 | ||
519 | static void raw_finish(void *stv) { ; } | |
520 | ||
521 | static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
522 | { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); } | |
523 | ||
524 | static int raw_xmit(void *stv, int mtu) | |
525 | { | |
526 | struct raw_state *st = stv; | |
527 | unsigned char b[65536], *p; | |
528 | struct ip *ip; | |
529 | struct udphdr *udp; | |
530 | struct phdr ph; | |
531 | unsigned ck; | |
532 | ||
533 | /* Build the IP header. */ | |
534 | ip = (struct ip *)b; | |
535 | ip->ip_v = 4; | |
536 | ip->ip_hl = sizeof(*ip)/4; | |
537 | ip->ip_tos = IPTOS_RELIABILITY; | |
538 | ip->ip_len = sane_htons(mtu); | |
539 | STEP(st->q); ip->ip_id = htons(st->q); | |
540 | ip->ip_off = sane_htons(0 | IP_DF); | |
541 | ip->ip_ttl = 64; | |
542 | ip->ip_p = IPPROTO_UDP; | |
543 | ip->ip_sum = 0; | |
454f5a1a MW |
544 | ip->ip_src = st->me.sin.sin_addr; |
545 | ip->ip_dst = st->a.sin.sin_addr; | |
88510d86 MW |
546 | |
547 | /* Build a UDP packet in the output buffer. */ | |
548 | udp = (struct udphdr *)(ip + 1); | |
454f5a1a MW |
549 | udp->uh_sport = st->me.sin.sin_port; |
550 | udp->uh_dport = st->a.sin.sin_port; | |
88510d86 MW |
551 | udp->uh_ulen = htons(mtu - sizeof(*ip)); |
552 | udp->uh_sum = 0; | |
553 | ||
554 | /* Copy the payload. */ | |
555 | p = (unsigned char *)(udp + 1); | |
556 | memcpy(p, buf, mtu - (p - b)); | |
557 | ||
558 | /* Calculate the UDP checksum. */ | |
559 | ph.ph_src = ip->ip_src; | |
560 | ph.ph_dst = ip->ip_dst; | |
561 | ph.ph_z = 0; | |
562 | ph.ph_p = IPPROTO_UDP; | |
563 | ph.ph_len = udp->uh_ulen; | |
564 | ck = IPCK_INIT; | |
565 | ck = ipcksum(&ph, sizeof(ph), ck); | |
566 | ck = ipcksum(udp, mtu - sizeof(*ip), ck); | |
567 | udp->uh_sum = htons(ck); | |
568 | ||
569 | /* Send the whole thing off. If we're too big for the interface then we | |
570 | * might need to trim immediately. | |
571 | */ | |
454f5a1a | 572 | if (sendto(st->rawudp, b, mtu, 0, &st->a.sa, addrsz(&st->a)) < 0) { |
88510d86 MW |
573 | if (errno == EMSGSIZE) return (RC_LOWER); |
574 | else goto fail_0; | |
575 | } | |
576 | ||
577 | /* Done. */ | |
578 | return (RC_OK); | |
579 | ||
580 | fail_0: | |
581 | return (RC_FAIL); | |
582 | } | |
583 | ||
584 | static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
585 | { | |
586 | struct raw_state *st = stv; | |
587 | unsigned char b[65536]; | |
588 | struct ip *ip; | |
589 | struct icmp *icmp; | |
590 | struct udphdr *udp; | |
9ad20ce0 | 591 | const unsigned char *payload; |
88510d86 MW |
592 | ssize_t n; |
593 | ||
594 | /* An ICMP packet: see what's inside. */ | |
595 | if (FD_ISSET(st->rawicmp, fd_in)) { | |
596 | if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0; | |
597 | ||
598 | ip = (struct ip *)b; | |
599 | if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) || | |
600 | ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP) | |
601 | goto skip_icmp; | |
602 | n -= sizeof(4*ip->ip_hl); | |
603 | ||
604 | icmp = (struct icmp *)(b + 4*ip->ip_hl); | |
605 | if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH) | |
606 | goto skip_icmp; | |
607 | n -= offsetof(struct icmp, icmp_ip); | |
608 | ||
609 | ip = &icmp->icmp_ip; | |
610 | if (n < sizeof(*ip) || | |
611 | ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 || | |
612 | ip->ip_id != htons(st->q) || | |
454f5a1a MW |
613 | ip->ip_src.s_addr != st->me.sin.sin_addr.s_addr || |
614 | ip->ip_dst.s_addr != st->a.sin.sin_addr.s_addr) | |
88510d86 MW |
615 | goto skip_icmp; |
616 | n -= sizeof(*ip); | |
617 | ||
618 | udp = (struct udphdr *)(ip + 1); | |
454f5a1a MW |
619 | if (n < sizeof(udp) || udp->uh_sport != st->me.sin.sin_port || |
620 | udp->uh_dport != st->a.sin.sin_port) | |
88510d86 MW |
621 | goto skip_icmp; |
622 | n -= sizeof(*udp); | |
623 | ||
9ad20ce0 MW |
624 | payload = (const unsigned char *)(udp + 1); |
625 | if (!mypacketp(ps, payload, n)) goto skip_icmp; | |
626 | ||
88510d86 MW |
627 | if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER); |
628 | else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp; | |
629 | else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu)); | |
630 | else return (RC_LOWER); | |
631 | } | |
632 | skip_icmp:; | |
633 | ||
634 | /* If we got a reply to the current probe then we're good. If we got an | |
635 | * error, or the packet's sequence number is wrong, then ignore it. | |
636 | */ | |
637 | if (FD_ISSET(st->sk, fd_in)) { | |
638 | if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK); | |
639 | else if (mypacketp(ps, b, n)) return (RC_HIGHER); | |
640 | else return (RC_OK); | |
641 | } | |
642 | ||
643 | return (RC_OK); | |
644 | ||
645 | fail_0: | |
646 | return (RC_FAIL); | |
647 | } | |
648 | ||
649 | static const struct probe_ops raw_ops = { | |
650 | "raw", OPS_CHAIN, sizeof(struct raw_state), | |
651 | raw_setup, raw_finish, | |
652 | raw_selprep, raw_xmit, raw_selproc | |
653 | }; | |
654 | ||
655 | #undef OPS_CHAIN | |
656 | #define OPS_CHAIN &raw_ops | |
657 | ||
658 | /*----- Doing the job on Linux --------------------------------------------*/ | |
659 | ||
660 | #if defined(linux) | |
661 | ||
662 | #ifndef IP_MTU | |
663 | # define IP_MTU 14 /* Blech! */ | |
664 | #endif | |
665 | ||
666 | struct linux_state { | |
667 | int sk; | |
668 | }; | |
669 | ||
670 | static int linux_setup(void *stv, int sk, const struct param *pp) | |
671 | { | |
672 | struct linux_state *st = stv; | |
673 | int i, mtu; | |
cb160b86 | 674 | socklen_t sz; |
88510d86 | 675 | |
454f5a1a MW |
676 | /* Check that the address is OK. */ |
677 | switch (pp->a.sa.sa_family) { | |
678 | case AF_INET: break; | |
679 | default: errno = EPFNOSUPPORT; return (-1); | |
680 | } | |
681 | ||
88510d86 MW |
682 | /* Snaffle the UDP socket. */ |
683 | st->sk = sk; | |
684 | ||
685 | /* Turn on kernel path-MTU discovery and force DF on. */ | |
18d5f6eb | 686 | i = IP_PMTUDISC_PROBE; |
88510d86 MW |
687 | if (setsockopt(st->sk, IPPROTO_IP, IP_MTU_DISCOVER, &i, sizeof(i))) |
688 | return (-1); | |
689 | ||
690 | /* Read the initial MTU guess back and report it. */ | |
691 | sz = sizeof(mtu); | |
692 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
693 | return (-1); | |
694 | ||
695 | /* Done. */ | |
696 | return (mtu); | |
697 | } | |
698 | ||
699 | static void linux_finish(void *stv) { ; } | |
700 | ||
701 | static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
702 | { struct linux_state *st = stv; ADDFD(st->sk); } | |
703 | ||
704 | static int linux_xmit(void *stv, int mtu) | |
705 | { | |
706 | struct linux_state *st = stv; | |
707 | ||
708 | /* Write the packet. */ | |
709 | if (write(st->sk, buf, mtu - 28) >= 0) return (RC_OK); | |
710 | else if (errno == EMSGSIZE) return (RC_LOWER); | |
711 | else return (RC_FAIL); | |
712 | } | |
713 | ||
714 | static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
715 | { | |
716 | struct linux_state *st = stv; | |
717 | int mtu; | |
cb160b86 | 718 | socklen_t sz; |
88510d86 MW |
719 | ssize_t n; |
720 | unsigned char b[65536]; | |
721 | ||
722 | /* Read an answer. If it looks like the right kind of error then report a | |
723 | * success. This is potentially wrong, since we can't tell whether an | |
724 | * error was delayed from an earlier probe. However, we never return | |
725 | * RC_LOWER from this method, so the packet sizes ought to be monotonically | |
726 | * decreasing and this won't cause trouble. Otherwise update from the | |
727 | * kernel's idea of the right MTU. | |
728 | */ | |
729 | if (FD_ISSET(st->sk, fd_in)) { | |
730 | n = read(st->sk, &buf, sizeof(buf)); | |
731 | if (n >= 0 ? | |
732 | mypacketp(ps, b, n) : | |
733 | errno == ECONNREFUSED || errno == EHOSTUNREACH) | |
734 | return (RC_HIGHER); | |
735 | sz = sizeof(mtu); | |
736 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
737 | return (RC_FAIL); | |
738 | return (mtu); | |
739 | } | |
740 | return (RC_OK); | |
741 | } | |
742 | ||
743 | static const struct probe_ops linux_ops = { | |
744 | "linux", OPS_CHAIN, sizeof(struct linux_state), | |
745 | linux_setup, linux_finish, | |
746 | linux_selprep, linux_xmit, linux_selproc | |
747 | }; | |
c64d8cd5 | 748 | |
88510d86 MW |
749 | #undef OPS_CHAIN |
750 | #define OPS_CHAIN &linux_ops | |
c64d8cd5 MW |
751 | |
752 | #endif | |
753 | ||
754 | /*----- Help options ------------------------------------------------------*/ | |
755 | ||
88510d86 MW |
756 | static const struct probe_ops *probe_ops = OPS_CHAIN; |
757 | ||
c64d8cd5 MW |
758 | static void version(FILE *fp) |
759 | { pquis(fp, "$, TrIPE version " VERSION "\n"); } | |
760 | ||
761 | static void usage(FILE *fp) | |
88510d86 | 762 | { |
22062fb6 | 763 | pquis(fp, "Usage: $ [-46v] [-H HEADER] [-m METHOD]\n\ |
88510d86 MW |
764 | [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n"); |
765 | } | |
c64d8cd5 MW |
766 | |
767 | static void help(FILE *fp) | |
768 | { | |
88510d86 MW |
769 | const struct probe_ops *ops; |
770 | ||
c64d8cd5 MW |
771 | version(fp); |
772 | fputc('\n', fp); | |
773 | usage(fp); | |
774 | fputs("\ | |
775 | \n\ | |
776 | Options in full:\n\ | |
777 | \n\ | |
778 | -h, --help Show this help text.\n\ | |
b13c3272 | 779 | -V, --version Show version number.\n\ |
c64d8cd5 MW |
780 | -u, --usage Show brief usage message.\n\ |
781 | \n\ | |
22062fb6 MW |
782 | -4, --ipv4 Restrict to IPv4 only.\n\ |
783 | -6, --ipv6 Restrict to IPv6 only.\n\ | |
88510d86 MW |
784 | -g, --growth=FACTOR Growth factor for retransmit interval.\n\ |
785 | -m, --method=METHOD Use METHOD to probe for MTU.\n\ | |
786 | -r, --retransmit=SECS Retransmit if no reply after SEC.\n\ | |
787 | -t, --timeout=SECS Give up expecting a reply after SECS.\n\ | |
a8f70fe1 | 788 | -v, --verbose Write a running commentary to stderr.\n\ |
c64d8cd5 | 789 | -H, --header=HEX Packet header, in hexadecimal.\n\ |
88510d86 MW |
790 | \n\ |
791 | Probe methods:\n\ | |
c64d8cd5 | 792 | ", fp); |
88510d86 MW |
793 | for (ops = probe_ops; ops; ops = ops->next) |
794 | printf("\t%s\n", ops->name); | |
c64d8cd5 MW |
795 | } |
796 | ||
797 | /*----- Main code ---------------------------------------------------------*/ | |
798 | ||
799 | int main(int argc, char *argv[]) | |
800 | { | |
88510d86 | 801 | struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN }; |
c64d8cd5 MW |
802 | hex_ctx hc; |
803 | dstr d = DSTR_INIT; | |
804 | size_t sz; | |
22062fb6 MW |
805 | int i, err; |
806 | struct addrinfo aihint = { 0 }, *ailist, *ai; | |
807 | const char *host, *svc = "7"; | |
c64d8cd5 MW |
808 | unsigned f = 0; |
809 | ||
810 | #define f_bogus 1u | |
811 | ||
88510d86 MW |
812 | if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 || |
813 | (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0) | |
814 | rawerr = errno; | |
815 | if (setuid(getuid())) | |
816 | abort(); | |
817 | ||
c64d8cd5 MW |
818 | ego(argv[0]); |
819 | fillbuffer(buf, sizeof(buf)); | |
22062fb6 MW |
820 | |
821 | aihint.ai_family = AF_UNSPEC; | |
822 | aihint.ai_protocol = IPPROTO_UDP; | |
823 | aihint.ai_socktype = SOCK_DGRAM; | |
824 | aihint.ai_flags = AI_ADDRCONFIG; | |
c64d8cd5 MW |
825 | |
826 | for (;;) { | |
827 | static const struct option opts[] = { | |
828 | { "help", 0, 0, 'h' }, | |
88510d86 | 829 | { "version", 0, 0, 'V' }, |
c64d8cd5 | 830 | { "usage", 0, 0, 'u' }, |
22062fb6 MW |
831 | { "ipv4", 0, 0, '4' }, |
832 | { "ipv6", 0, 0, '6' }, | |
c64d8cd5 | 833 | { "header", OPTF_ARGREQ, 0, 'H' }, |
88510d86 MW |
834 | { "growth", OPTF_ARGREQ, 0, 'g' }, |
835 | { "method", OPTF_ARGREQ, 0, 'm' }, | |
836 | { "retransmit", OPTF_ARGREQ, 0, 'r' }, | |
c64d8cd5 | 837 | { "timeout", OPTF_ARGREQ, 0, 't' }, |
88510d86 | 838 | { "verbose", 0, 0, 'v' }, |
c64d8cd5 MW |
839 | { 0, 0, 0, 0 } |
840 | }; | |
841 | ||
22062fb6 | 842 | i = mdwopt(argc, argv, "hVu" "46H:g:m:r:t:v", opts, 0, 0, 0); |
c64d8cd5 MW |
843 | if (i < 0) break; |
844 | switch (i) { | |
845 | case 'h': help(stdout); exit(0); | |
88510d86 | 846 | case 'V': version(stdout); exit(0); |
c64d8cd5 MW |
847 | case 'u': usage(stdout); exit(0); |
848 | ||
849 | case 'H': | |
850 | DRESET(&d); | |
851 | hex_init(&hc); | |
852 | hex_decode(&hc, optarg, strlen(optarg), &d); | |
853 | hex_decode(&hc, 0, 0, &d); | |
88510d86 | 854 | sz = d.len < 532 ? d.len : 532; |
c64d8cd5 | 855 | memcpy(buf, d.buf, sz); |
88510d86 | 856 | pp.seqoff = sz; |
c64d8cd5 MW |
857 | break; |
858 | ||
22062fb6 MW |
859 | case '4': aihint.ai_family = AF_INET; break; |
860 | case '6': aihint.ai_family = AF_INET6; break; | |
88510d86 MW |
861 | case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break; |
862 | case 'r': pp.retx = s2f(optarg, "retransmit interval"); break; | |
863 | case 't': pp.timeout = s2f(optarg, "timeout"); break; | |
864 | ||
865 | case 'm': | |
866 | for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next) | |
867 | if (strcmp(pp.pops->name, optarg) == 0) goto found_alg; | |
868 | die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg); | |
869 | found_alg: | |
c64d8cd5 MW |
870 | break; |
871 | ||
88510d86 MW |
872 | case 'v': pp.f |= F_VERBOSE; break; |
873 | ||
c64d8cd5 MW |
874 | default: |
875 | f |= f_bogus; | |
876 | break; | |
877 | } | |
878 | } | |
879 | argv += optind; argc -= optind; | |
880 | if ((f & f_bogus) || 1 > argc || argc > 2) { | |
881 | usage(stderr); | |
882 | exit(EXIT_FAILURE); | |
883 | } | |
884 | ||
22062fb6 MW |
885 | host = argv[0]; |
886 | if (argv[1]) svc = argv[1]; | |
887 | if ((err = getaddrinfo(host, svc, &aihint, &ailist)) != 0) { | |
888 | die(EXIT_FAILURE, "unknown host `%s' or service `%s': %s", | |
889 | host, svc, gai_strerror(err)); | |
c64d8cd5 | 890 | } |
22062fb6 MW |
891 | for (ai = ailist; ai && !addrfamok(ai->ai_family); ai = ai->ai_next); |
892 | if (!ai) die(EXIT_FAILURE, "no supported address families for `%s'", host); | |
893 | assert(ai->ai_addrlen <= sizeof(pp.a)); | |
894 | memcpy(&pp.a, ai->ai_addr, ai->ai_addrlen); | |
c64d8cd5 | 895 | |
88510d86 | 896 | i = pathmtu(&pp); |
c64d8cd5 MW |
897 | if (i < 0) |
898 | die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno)); | |
899 | printf("%d\n", i); | |
900 | if (ferror(stdout) || fflush(stdout) || fclose(stdout)) | |
901 | die(EXIT_FAILURE, "failed to write result: %s", strerror(errno)); | |
902 | return (0); | |
903 | } | |
904 | ||
905 | /*----- That's all, folks -------------------------------------------------*/ |