Commit | Line | Data |
---|---|---|
c64d8cd5 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Report MTU on path to specified host | |
4 | * | |
5 | * (c) 2008 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Trivial IP Encryption (TrIPE). | |
11 | * | |
12 | * TrIPE is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or | |
15 | * (at your option) any later version. | |
16 | * | |
17 | * TrIPE is distributed in the hope that it will be useful, | |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU General Public License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU General Public License | |
23 | * along with TrIPE; if not, write to the Free Software Foundation, | |
24 | * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | */ | |
26 | ||
27 | /*----- Header files ------------------------------------------------------*/ | |
28 | ||
88510d86 MW |
29 | #if defined(linux) |
30 | # define _BSD_SOURCE | |
31 | #endif | |
32 | ||
c64d8cd5 MW |
33 | #include "config.h" |
34 | ||
35 | #include <errno.h> | |
88510d86 | 36 | #include <stddef.h> |
c64d8cd5 MW |
37 | #include <stdio.h> |
38 | #include <stdlib.h> | |
39 | #include <string.h> | |
40 | #include <time.h> | |
41 | ||
42 | #include <sys/types.h> | |
43 | #include <sys/time.h> | |
44 | #include <unistd.h> | |
45 | ||
46 | #include <sys/socket.h> | |
47 | #include <netinet/in.h> | |
48 | #include <arpa/inet.h> | |
49 | #include <netdb.h> | |
50 | ||
88510d86 MW |
51 | #include <netinet/in_systm.h> |
52 | #include <netinet/ip.h> | |
53 | #include <netinet/ip_icmp.h> | |
54 | #include <netinet/udp.h> | |
55 | ||
56 | #include <net/if.h> | |
57 | #include <ifaddrs.h> | |
58 | #include <sys/ioctl.h> | |
59 | ||
60 | #include <mLib/alloc.h> | |
61 | #include <mLib/bits.h> | |
c64d8cd5 MW |
62 | #include <mLib/dstr.h> |
63 | #include <mLib/hex.h> | |
64 | #include <mLib/mdwopt.h> | |
65 | #include <mLib/quis.h> | |
66 | #include <mLib/report.h> | |
67 | #include <mLib/tv.h> | |
68 | ||
69 | /*----- Static variables --------------------------------------------------*/ | |
70 | ||
71 | static unsigned char buf[65536]; | |
72 | ||
88510d86 MW |
73 | #define POLY 0x1002d |
74 | ||
c64d8cd5 MW |
75 | /*----- Utility functions -------------------------------------------------*/ |
76 | ||
88510d86 MW |
77 | /* Step a value according to a simple LFSR. */ |
78 | #define STEP(q) \ | |
79 | do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0) | |
80 | ||
c64d8cd5 MW |
81 | /* Fill buffer with a constant but pseudorandom string. Uses a simple |
82 | * LFSR. | |
83 | */ | |
84 | static void fillbuffer(unsigned char *p, size_t sz) | |
85 | { | |
86 | unsigned int y = 0xbc20; | |
87 | const unsigned char *l = p + sz; | |
88 | int i; | |
c64d8cd5 MW |
89 | |
90 | while (p < l) { | |
91 | *p++ = y & 0xff; | |
88510d86 | 92 | for (i = 0; i < 8; i++) STEP(y); |
c64d8cd5 MW |
93 | } |
94 | } | |
95 | ||
88510d86 MW |
96 | /* Convert a string to floating point. */ |
97 | static double s2f(const char *s, const char *what) | |
98 | { | |
99 | double f; | |
100 | char *q; | |
c64d8cd5 | 101 | |
88510d86 MW |
102 | errno = 0; |
103 | f = strtod(s, &q); | |
104 | if (errno || *q) die(EXIT_FAILURE, "bad %s", what); | |
105 | return (f); | |
106 | } | |
c64d8cd5 | 107 | |
88510d86 MW |
108 | /* Convert a floating-point value into a struct timeval. */ |
109 | static void f2tv(struct timeval *tv, double t) | |
110 | { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; } | |
111 | ||
112 | /*----- Main algorithm skeleton -------------------------------------------*/ | |
113 | ||
114 | struct param { | |
115 | unsigned f; /* Various flags */ | |
116 | #define F_VERBOSE 1u /* Give a running commentary */ | |
117 | double retx; /* Initial retransmit interval */ | |
118 | double regr; /* Retransmit growth factor */ | |
119 | double timeout; /* Retransmission timeout */ | |
120 | int seqoff; /* Offset to write sequence number */ | |
121 | const struct probe_ops *pops; /* Probe algorithm description */ | |
122 | struct sockaddr_in sin; /* Destination address */ | |
123 | }; | |
124 | ||
125 | struct probestate { | |
126 | const struct param *pp; | |
127 | unsigned q; | |
128 | }; | |
129 | ||
130 | struct probe_ops { | |
131 | const char *name; | |
132 | const struct probe_ops *next; | |
133 | size_t statesz; | |
134 | int (*setup)(void *, int, const struct param *); | |
135 | void (*finish)(void *); | |
136 | void (*selprep)(void *, int *, fd_set *); | |
137 | int (*xmit)(void *, int); | |
138 | int (*selproc)(void *, fd_set *, struct probestate *); | |
139 | }; | |
140 | ||
141 | #define OPS_CHAIN 0 | |
142 | ||
143 | enum { | |
144 | RC_FAIL = -99, | |
145 | RC_OK = 0, | |
146 | RC_LOWER = -1, | |
147 | RC_HIGHER = -2, | |
148 | RC_NOREPLY = -3 | |
149 | /* or a positive MTU upper-bound */ | |
150 | }; | |
151 | ||
152 | /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */ | |
153 | #define ADDFD(fd) \ | |
154 | do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0) | |
155 | ||
156 | /* Check whether a buffer contains a packet from our current probe. */ | |
157 | static int mypacketp(struct probestate *ps, | |
158 | const unsigned char *p, size_t sz) | |
159 | { | |
160 | const struct param *pp = ps->pp; | |
c64d8cd5 | 161 | |
88510d86 MW |
162 | return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q); |
163 | } | |
164 | ||
165 | /* See whether MTU is an acceptable MTU value. Return an appropriate | |
166 | * RC_... code or a new suggested MTU. | |
167 | */ | |
168 | static int probe(struct probestate *ps, void *st, int mtu) | |
c64d8cd5 | 169 | { |
88510d86 | 170 | const struct param *pp = ps->pp; |
c64d8cd5 | 171 | fd_set fd_in; |
88510d86 MW |
172 | struct timeval tv, now, when, done; |
173 | double timer = pp->retx; | |
174 | int rc, maxfd; | |
175 | ||
176 | /* Set up the first retransmit and give-up timers. */ | |
177 | gettimeofday(&now, 0); | |
178 | f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv); | |
179 | f2tv(&tv, timer); TV_ADD(&when, &now, &tv); | |
180 | if (TV_CMP(&when, >, &done)) when = done; | |
181 | ||
182 | /* Send the initial probe. */ | |
183 | if (pp->f & F_VERBOSE) | |
184 | moan("sending probe of size %d (seq = %04x)", mtu, ps->q); | |
185 | STEP(ps->q); | |
186 | STORE16(buf + pp->seqoff, ps->q); | |
187 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
188 | ||
189 | for (;;) { | |
190 | ||
191 | /* Wait for something interesting to happen. */ | |
192 | maxfd = 0; FD_ZERO(&fd_in); | |
193 | pp->pops->selprep(st, &maxfd, &fd_in); | |
194 | TV_SUB(&tv, &when, &now); | |
195 | if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL); | |
196 | gettimeofday(&now, 0); | |
197 | ||
198 | /* See whether the probe method has any answers for us. */ | |
199 | if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc); | |
200 | ||
201 | /* If we've waited too long, give up. If we should retransmit, do | |
202 | * that. | |
203 | */ | |
204 | if (TV_CMP(&now, >, &done)) | |
205 | return (RC_NOREPLY); | |
206 | else if (TV_CMP(&now, >, &when)) { | |
207 | if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu); | |
208 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
209 | do { | |
210 | timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv); | |
211 | } while (TV_CMP(&when, <, &now)); | |
212 | if (TV_CMP(&when, >, &done)) when = done; | |
213 | } | |
214 | } | |
215 | } | |
c64d8cd5 | 216 | |
88510d86 MW |
217 | /* Discover the path MTU to the destination address. */ |
218 | static int pathmtu(const struct param *pp) | |
219 | { | |
220 | int sk; | |
221 | int mtu, lo, hi; | |
222 | int rc, droppy = -1; | |
223 | void *st; | |
224 | struct probestate ps; | |
225 | ||
226 | /* Build and connect a UDP socket. We'll need this to know the local port | |
227 | * number to use if nothing else. Set other stuff up. | |
228 | */ | |
c64d8cd5 | 229 | if ((sk = socket(PF_INET, SOCK_DGRAM, 0)) < 0) goto fail_0; |
88510d86 MW |
230 | if (connect(sk, (struct sockaddr *)&pp->sin, sizeof(pp->sin))) goto fail_1; |
231 | st = xmalloc(pp->pops->statesz); | |
232 | if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2; | |
233 | ps.pp = pp; ps.q = rand() & 0xffff; | |
234 | lo = 576; hi = mtu; | |
235 | ||
236 | /* And now we do a thing which is sort of like a binary search, except that | |
237 | * we also take explicit clues as establishing a new upper bound, and we | |
238 | * try to hug that initially. | |
239 | */ | |
c64d8cd5 | 240 | for (;;) { |
88510d86 MW |
241 | rc = probe(&ps, st, mtu); |
242 | switch (rc) { | |
243 | ||
244 | case RC_FAIL: | |
245 | if (pp->f & F_VERBOSE) moan("probe failed"); | |
246 | goto fail_3; | |
247 | ||
248 | case RC_NOREPLY: | |
249 | /* If we've not seen a dropped packet before then we don't know what | |
250 | * this means yet -- in particular, we don't know which bit of the | |
251 | * network is swallowing packets. Send a minimum-size probe. If | |
252 | * that doesn't come back then assume that the remote host is | |
253 | * swallowing our packets. If it does, then we assume that dropped | |
254 | * packets are a result of ICMP fragmentation-needed reports being | |
255 | * lost or suppressed. | |
256 | */ | |
257 | if (pp->f & F_VERBOSE) moan("gave up: black hole detected"); | |
258 | if (droppy == -1) { | |
259 | if (pp->f & F_VERBOSE) moan("sending minimum-size probe"); | |
260 | switch (probe(&ps, st, lo)) { | |
261 | case RC_FAIL: | |
262 | goto fail_3; | |
263 | case RC_NOREPLY: | |
264 | if (pp->f & F_VERBOSE) { | |
265 | moan("no reply from min-size probe: " | |
266 | "assume black hole at target"); | |
267 | } | |
268 | droppy = 1; | |
269 | break; | |
270 | case RC_HIGHER: | |
271 | if (pp->f & F_VERBOSE) { | |
272 | moan("reply from min-size probe OK: " | |
273 | "assume black hole in network"); | |
274 | } | |
275 | droppy = 0; | |
276 | break; | |
277 | default: | |
278 | if (pp->f & F_VERBOSE) | |
279 | moan("unexpected return code from probe"); | |
280 | errno = ENOTCONN; | |
281 | goto fail_3; | |
282 | } | |
283 | } | |
284 | ||
285 | if (droppy) goto higher; else goto lower; | |
286 | ||
287 | case RC_HIGHER: | |
288 | higher: | |
289 | if (droppy == -1) { | |
290 | if (pp->f & F_VERBOSE) | |
291 | moan("probe returned: remote host is not a black hole"); | |
292 | droppy = 0; | |
293 | } | |
294 | if (mtu == hi) { | |
295 | if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU"); | |
296 | goto done; | |
297 | } | |
298 | if (pp->f & F_VERBOSE) moan("probe returned: guessing higher"); | |
299 | lo = mtu; | |
300 | mtu += (hi - lo + 1)/2; | |
301 | break; | |
302 | ||
303 | case RC_LOWER: | |
304 | lower: | |
305 | if (mtu == lo) { | |
306 | if (pp->f & F_VERBOSE) moan("error returned: found correct MTU"); | |
307 | goto done; | |
308 | } | |
309 | if (pp->f & F_VERBOSE) moan("error returned: guessing lower"); | |
310 | hi = mtu - 1; | |
311 | mtu -= (hi - lo + 1)/2; | |
312 | break; | |
313 | ||
314 | default: | |
315 | if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate"); | |
316 | mtu = hi = rc; | |
317 | break; | |
318 | } | |
c64d8cd5 | 319 | } |
88510d86 MW |
320 | |
321 | done: | |
322 | /* Clean up and return our result. */ | |
323 | pp->pops->finish(st); | |
324 | xfree(st); | |
c64d8cd5 MW |
325 | close(sk); |
326 | return (mtu); | |
327 | ||
88510d86 MW |
328 | fail_3: |
329 | pp->pops->finish(st); | |
330 | fail_2: | |
331 | xfree(st); | |
c64d8cd5 MW |
332 | fail_1: |
333 | close(sk); | |
334 | fail_0: | |
335 | return (-1); | |
336 | } | |
337 | ||
88510d86 MW |
338 | /*----- Doing it the hard way ---------------------------------------------*/ |
339 | ||
340 | #if defined(linux) || defined(__OpenBSD__) | |
341 | #define IPHDR_SANE | |
342 | #endif | |
343 | ||
344 | #ifdef IPHDR_SANE | |
345 | # define sane_htons htons | |
346 | # define sane_htonl htonl | |
c64d8cd5 | 347 | #else |
88510d86 MW |
348 | # define sane_htons |
349 | # define sane_htonl | |
350 | #endif | |
351 | ||
352 | static int rawicmp = -1, rawudp = -1, rawerr = 0; | |
353 | ||
354 | #define IPCK_INIT 0xffff | |
355 | ||
356 | /* Compute an IP checksum over some data. This is a restartable interface: | |
357 | * initialize A to `IPCK_INIT' for the first call. | |
358 | */ | |
359 | static unsigned ipcksum(const void *buf, size_t n, unsigned a) | |
360 | { | |
361 | unsigned long aa = a ^ 0xffff; | |
362 | const unsigned char *p = buf, *l = p + n; | |
363 | ||
364 | while (p < l - 1) { aa += LOAD16_B(p); p += 2; } | |
365 | if (p < l) { aa += (unsigned)(*p) << 8; } | |
366 | do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000); | |
367 | return (aa == 0xffff ? aa : aa ^ 0xffff); | |
368 | } | |
369 | ||
370 | /* TCP/UDP pseudoheader structure. */ | |
371 | struct phdr { | |
372 | struct in_addr ph_src, ph_dst; | |
373 | u_char ph_z, ph_p; | |
374 | u_short ph_len; | |
375 | }; | |
376 | ||
377 | struct raw_state { | |
378 | struct sockaddr_in me, sin; | |
379 | int sk, rawicmp, rawudp; | |
380 | unsigned q; | |
381 | }; | |
382 | ||
383 | static int raw_setup(void *stv, int sk, const struct param *pp) | |
384 | { | |
385 | struct raw_state *st = stv; | |
386 | size_t sz; | |
387 | int i, mtu = -1; | |
388 | struct ifaddrs *ifa, *ifaa, *ifap; | |
389 | struct ifreq ifr; | |
390 | ||
391 | /* If we couldn't acquire raw sockets, we fail here. */ | |
392 | if (rawerr) { errno = rawerr; goto fail_0; } | |
393 | st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk; | |
394 | ||
395 | /* Initialize the sequence number. */ | |
396 | st->q = rand() & 0xffff; | |
397 | ||
398 | /* Snaffle the local and remote address and port number. */ | |
399 | st->sin = pp->sin; | |
400 | sz = sizeof(st->me); | |
401 | if (getsockname(sk, (struct sockaddr *)&st->me, &sz)) | |
402 | goto fail_0; | |
403 | ||
404 | /* There isn't a portable way to force the DF flag onto a packet through | |
405 | * UDP, or even through raw IP, unless we write the entire IP header | |
406 | * ourselves. This is somewhat annoying, especially since we have an | |
407 | * uphill struggle keeping track of which systems randomly expect which | |
408 | * header fields to be presented in host byte order. Oh, well. | |
409 | */ | |
410 | i = 1; | |
411 | if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0; | |
412 | ||
413 | /* Find an upper bound on the MTU. Do two passes over the interface | |
414 | * list. If we can find matches for our local address then use the | |
415 | * highest one of those; otherwise do a second pass and simply take the | |
416 | * highest MTU of any network interface. | |
417 | */ | |
418 | if (getifaddrs(&ifaa)) goto fail_0; | |
419 | for (i = 0; i < 2; i++) { | |
420 | for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) { | |
421 | if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr || | |
422 | ifa->ifa_addr->sa_family != AF_INET || | |
423 | (i == 0 && | |
424 | ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr != | |
425 | st->me.sin_addr.s_addr) || | |
426 | (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) || | |
427 | strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name)) | |
428 | continue; | |
429 | ifap = ifa; | |
430 | strcpy(ifr.ifr_name, ifa->ifa_name); | |
431 | if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1; | |
432 | if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu; | |
433 | } | |
434 | if (mtu > 0) break; | |
435 | } | |
436 | if (mtu < 0) { errno = ENOTCONN; goto fail_1; } | |
437 | freeifaddrs(ifaa); | |
438 | ||
439 | /* Done. */ | |
440 | return (mtu); | |
441 | ||
442 | fail_1: | |
443 | freeifaddrs(ifaa); | |
444 | fail_0: | |
445 | return (-1); | |
446 | } | |
447 | ||
448 | static void raw_finish(void *stv) { ; } | |
449 | ||
450 | static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
451 | { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); } | |
452 | ||
453 | static int raw_xmit(void *stv, int mtu) | |
454 | { | |
455 | struct raw_state *st = stv; | |
456 | unsigned char b[65536], *p; | |
457 | struct ip *ip; | |
458 | struct udphdr *udp; | |
459 | struct phdr ph; | |
460 | unsigned ck; | |
461 | ||
462 | /* Build the IP header. */ | |
463 | ip = (struct ip *)b; | |
464 | ip->ip_v = 4; | |
465 | ip->ip_hl = sizeof(*ip)/4; | |
466 | ip->ip_tos = IPTOS_RELIABILITY; | |
467 | ip->ip_len = sane_htons(mtu); | |
468 | STEP(st->q); ip->ip_id = htons(st->q); | |
469 | ip->ip_off = sane_htons(0 | IP_DF); | |
470 | ip->ip_ttl = 64; | |
471 | ip->ip_p = IPPROTO_UDP; | |
472 | ip->ip_sum = 0; | |
473 | ip->ip_src = st->me.sin_addr; | |
474 | ip->ip_dst = st->sin.sin_addr; | |
475 | ||
476 | /* Build a UDP packet in the output buffer. */ | |
477 | udp = (struct udphdr *)(ip + 1); | |
478 | udp->uh_sport = st->me.sin_port; | |
479 | udp->uh_dport = st->sin.sin_port; | |
480 | udp->uh_ulen = htons(mtu - sizeof(*ip)); | |
481 | udp->uh_sum = 0; | |
482 | ||
483 | /* Copy the payload. */ | |
484 | p = (unsigned char *)(udp + 1); | |
485 | memcpy(p, buf, mtu - (p - b)); | |
486 | ||
487 | /* Calculate the UDP checksum. */ | |
488 | ph.ph_src = ip->ip_src; | |
489 | ph.ph_dst = ip->ip_dst; | |
490 | ph.ph_z = 0; | |
491 | ph.ph_p = IPPROTO_UDP; | |
492 | ph.ph_len = udp->uh_ulen; | |
493 | ck = IPCK_INIT; | |
494 | ck = ipcksum(&ph, sizeof(ph), ck); | |
495 | ck = ipcksum(udp, mtu - sizeof(*ip), ck); | |
496 | udp->uh_sum = htons(ck); | |
497 | ||
498 | /* Send the whole thing off. If we're too big for the interface then we | |
499 | * might need to trim immediately. | |
500 | */ | |
501 | if (sendto(st->rawudp, b, mtu, 0, | |
502 | (struct sockaddr *)&st->sin, sizeof(st->sin)) < 0) { | |
503 | if (errno == EMSGSIZE) return (RC_LOWER); | |
504 | else goto fail_0; | |
505 | } | |
506 | ||
507 | /* Done. */ | |
508 | return (RC_OK); | |
509 | ||
510 | fail_0: | |
511 | return (RC_FAIL); | |
512 | } | |
513 | ||
514 | static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
515 | { | |
516 | struct raw_state *st = stv; | |
517 | unsigned char b[65536]; | |
518 | struct ip *ip; | |
519 | struct icmp *icmp; | |
520 | struct udphdr *udp; | |
521 | ssize_t n; | |
522 | ||
523 | /* An ICMP packet: see what's inside. */ | |
524 | if (FD_ISSET(st->rawicmp, fd_in)) { | |
525 | if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0; | |
526 | ||
527 | ip = (struct ip *)b; | |
528 | if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) || | |
529 | ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP) | |
530 | goto skip_icmp; | |
531 | n -= sizeof(4*ip->ip_hl); | |
532 | ||
533 | icmp = (struct icmp *)(b + 4*ip->ip_hl); | |
534 | if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH) | |
535 | goto skip_icmp; | |
536 | n -= offsetof(struct icmp, icmp_ip); | |
537 | ||
538 | ip = &icmp->icmp_ip; | |
539 | if (n < sizeof(*ip) || | |
540 | ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 || | |
541 | ip->ip_id != htons(st->q) || | |
542 | ip->ip_src.s_addr != st->me.sin_addr.s_addr || | |
543 | ip->ip_dst.s_addr != st->sin.sin_addr.s_addr) | |
544 | goto skip_icmp; | |
545 | n -= sizeof(*ip); | |
546 | ||
547 | udp = (struct udphdr *)(ip + 1); | |
548 | if (n < sizeof(udp) || udp->uh_sport != st->me.sin_port || | |
549 | udp->uh_dport != st->sin.sin_port) | |
550 | goto skip_icmp; | |
551 | n -= sizeof(*udp); | |
552 | ||
553 | if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER); | |
554 | else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp; | |
555 | else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu)); | |
556 | else return (RC_LOWER); | |
557 | } | |
558 | skip_icmp:; | |
559 | ||
560 | /* If we got a reply to the current probe then we're good. If we got an | |
561 | * error, or the packet's sequence number is wrong, then ignore it. | |
562 | */ | |
563 | if (FD_ISSET(st->sk, fd_in)) { | |
564 | if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK); | |
565 | else if (mypacketp(ps, b, n)) return (RC_HIGHER); | |
566 | else return (RC_OK); | |
567 | } | |
568 | ||
569 | return (RC_OK); | |
570 | ||
571 | fail_0: | |
572 | return (RC_FAIL); | |
573 | } | |
574 | ||
575 | static const struct probe_ops raw_ops = { | |
576 | "raw", OPS_CHAIN, sizeof(struct raw_state), | |
577 | raw_setup, raw_finish, | |
578 | raw_selprep, raw_xmit, raw_selproc | |
579 | }; | |
580 | ||
581 | #undef OPS_CHAIN | |
582 | #define OPS_CHAIN &raw_ops | |
583 | ||
584 | /*----- Doing the job on Linux --------------------------------------------*/ | |
585 | ||
586 | #if defined(linux) | |
587 | ||
588 | #ifndef IP_MTU | |
589 | # define IP_MTU 14 /* Blech! */ | |
590 | #endif | |
591 | ||
592 | struct linux_state { | |
593 | int sk; | |
594 | }; | |
595 | ||
596 | static int linux_setup(void *stv, int sk, const struct param *pp) | |
597 | { | |
598 | struct linux_state *st = stv; | |
599 | int i, mtu; | |
600 | size_t sz; | |
601 | ||
602 | /* Snaffle the UDP socket. */ | |
603 | st->sk = sk; | |
604 | ||
605 | /* Turn on kernel path-MTU discovery and force DF on. */ | |
606 | i = IP_PMTUDISC_DO; | |
607 | if (setsockopt(st->sk, IPPROTO_IP, IP_MTU_DISCOVER, &i, sizeof(i))) | |
608 | return (-1); | |
609 | ||
610 | /* Read the initial MTU guess back and report it. */ | |
611 | sz = sizeof(mtu); | |
612 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
613 | return (-1); | |
614 | ||
615 | /* Done. */ | |
616 | return (mtu); | |
617 | } | |
618 | ||
619 | static void linux_finish(void *stv) { ; } | |
620 | ||
621 | static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
622 | { struct linux_state *st = stv; ADDFD(st->sk); } | |
623 | ||
624 | static int linux_xmit(void *stv, int mtu) | |
625 | { | |
626 | struct linux_state *st = stv; | |
627 | ||
628 | /* Write the packet. */ | |
629 | if (write(st->sk, buf, mtu - 28) >= 0) return (RC_OK); | |
630 | else if (errno == EMSGSIZE) return (RC_LOWER); | |
631 | else return (RC_FAIL); | |
632 | } | |
633 | ||
634 | static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
635 | { | |
636 | struct linux_state *st = stv; | |
637 | int mtu; | |
638 | size_t sz; | |
639 | ssize_t n; | |
640 | unsigned char b[65536]; | |
641 | ||
642 | /* Read an answer. If it looks like the right kind of error then report a | |
643 | * success. This is potentially wrong, since we can't tell whether an | |
644 | * error was delayed from an earlier probe. However, we never return | |
645 | * RC_LOWER from this method, so the packet sizes ought to be monotonically | |
646 | * decreasing and this won't cause trouble. Otherwise update from the | |
647 | * kernel's idea of the right MTU. | |
648 | */ | |
649 | if (FD_ISSET(st->sk, fd_in)) { | |
650 | n = read(st->sk, &buf, sizeof(buf)); | |
651 | if (n >= 0 ? | |
652 | mypacketp(ps, b, n) : | |
653 | errno == ECONNREFUSED || errno == EHOSTUNREACH) | |
654 | return (RC_HIGHER); | |
655 | sz = sizeof(mtu); | |
656 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
657 | return (RC_FAIL); | |
658 | return (mtu); | |
659 | } | |
660 | return (RC_OK); | |
661 | } | |
662 | ||
663 | static const struct probe_ops linux_ops = { | |
664 | "linux", OPS_CHAIN, sizeof(struct linux_state), | |
665 | linux_setup, linux_finish, | |
666 | linux_selprep, linux_xmit, linux_selproc | |
667 | }; | |
c64d8cd5 | 668 | |
88510d86 MW |
669 | #undef OPS_CHAIN |
670 | #define OPS_CHAIN &linux_ops | |
c64d8cd5 MW |
671 | |
672 | #endif | |
673 | ||
674 | /*----- Help options ------------------------------------------------------*/ | |
675 | ||
88510d86 MW |
676 | static const struct probe_ops *probe_ops = OPS_CHAIN; |
677 | ||
c64d8cd5 MW |
678 | static void version(FILE *fp) |
679 | { pquis(fp, "$, TrIPE version " VERSION "\n"); } | |
680 | ||
681 | static void usage(FILE *fp) | |
88510d86 MW |
682 | { |
683 | pquis(fp, "Usage: $ [-H HEADER] [-m METHOD]\n\ | |
684 | [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n"); | |
685 | } | |
c64d8cd5 MW |
686 | |
687 | static void help(FILE *fp) | |
688 | { | |
88510d86 MW |
689 | const struct probe_ops *ops; |
690 | ||
c64d8cd5 MW |
691 | version(fp); |
692 | fputc('\n', fp); | |
693 | usage(fp); | |
694 | fputs("\ | |
695 | \n\ | |
696 | Options in full:\n\ | |
697 | \n\ | |
698 | -h, --help Show this help text.\n\ | |
699 | -v, --version Show version number.\n\ | |
700 | -u, --usage Show brief usage message.\n\ | |
701 | \n\ | |
88510d86 MW |
702 | -g, --growth=FACTOR Growth factor for retransmit interval.\n\ |
703 | -m, --method=METHOD Use METHOD to probe for MTU.\n\ | |
704 | -r, --retransmit=SECS Retransmit if no reply after SEC.\n\ | |
705 | -t, --timeout=SECS Give up expecting a reply after SECS.\n\ | |
c64d8cd5 | 706 | -H, --header=HEX Packet header, in hexadecimal.\n\ |
88510d86 MW |
707 | \n\ |
708 | Probe methods:\n\ | |
c64d8cd5 | 709 | ", fp); |
88510d86 MW |
710 | for (ops = probe_ops; ops; ops = ops->next) |
711 | printf("\t%s\n", ops->name); | |
c64d8cd5 MW |
712 | } |
713 | ||
714 | /*----- Main code ---------------------------------------------------------*/ | |
715 | ||
716 | int main(int argc, char *argv[]) | |
717 | { | |
88510d86 | 718 | struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN }; |
c64d8cd5 MW |
719 | hex_ctx hc; |
720 | dstr d = DSTR_INIT; | |
721 | size_t sz; | |
722 | int i; | |
723 | unsigned long u; | |
724 | char *q; | |
725 | struct hostent *h; | |
726 | struct servent *s; | |
c64d8cd5 MW |
727 | unsigned f = 0; |
728 | ||
729 | #define f_bogus 1u | |
730 | ||
88510d86 MW |
731 | if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 || |
732 | (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0) | |
733 | rawerr = errno; | |
734 | if (setuid(getuid())) | |
735 | abort(); | |
736 | ||
c64d8cd5 MW |
737 | ego(argv[0]); |
738 | fillbuffer(buf, sizeof(buf)); | |
88510d86 | 739 | pp.sin.sin_port = htons(7); |
c64d8cd5 MW |
740 | |
741 | for (;;) { | |
742 | static const struct option opts[] = { | |
743 | { "help", 0, 0, 'h' }, | |
88510d86 | 744 | { "version", 0, 0, 'V' }, |
c64d8cd5 MW |
745 | { "usage", 0, 0, 'u' }, |
746 | { "header", OPTF_ARGREQ, 0, 'H' }, | |
88510d86 MW |
747 | { "growth", OPTF_ARGREQ, 0, 'g' }, |
748 | { "method", OPTF_ARGREQ, 0, 'm' }, | |
749 | { "retransmit", OPTF_ARGREQ, 0, 'r' }, | |
c64d8cd5 | 750 | { "timeout", OPTF_ARGREQ, 0, 't' }, |
88510d86 | 751 | { "verbose", 0, 0, 'v' }, |
c64d8cd5 MW |
752 | { 0, 0, 0, 0 } |
753 | }; | |
754 | ||
88510d86 | 755 | i = mdwopt(argc, argv, "hVu" "H:g:m:r:t:v", opts, 0, 0, 0); |
c64d8cd5 MW |
756 | if (i < 0) break; |
757 | switch (i) { | |
758 | case 'h': help(stdout); exit(0); | |
88510d86 | 759 | case 'V': version(stdout); exit(0); |
c64d8cd5 MW |
760 | case 'u': usage(stdout); exit(0); |
761 | ||
762 | case 'H': | |
763 | DRESET(&d); | |
764 | hex_init(&hc); | |
765 | hex_decode(&hc, optarg, strlen(optarg), &d); | |
766 | hex_decode(&hc, 0, 0, &d); | |
88510d86 | 767 | sz = d.len < 532 ? d.len : 532; |
c64d8cd5 | 768 | memcpy(buf, d.buf, sz); |
88510d86 | 769 | pp.seqoff = sz; |
c64d8cd5 MW |
770 | break; |
771 | ||
88510d86 MW |
772 | case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break; |
773 | case 'r': pp.retx = s2f(optarg, "retransmit interval"); break; | |
774 | case 't': pp.timeout = s2f(optarg, "timeout"); break; | |
775 | ||
776 | case 'm': | |
777 | for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next) | |
778 | if (strcmp(pp.pops->name, optarg) == 0) goto found_alg; | |
779 | die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg); | |
780 | found_alg: | |
c64d8cd5 MW |
781 | break; |
782 | ||
88510d86 MW |
783 | case 'v': pp.f |= F_VERBOSE; break; |
784 | ||
c64d8cd5 MW |
785 | default: |
786 | f |= f_bogus; | |
787 | break; | |
788 | } | |
789 | } | |
790 | argv += optind; argc -= optind; | |
791 | if ((f & f_bogus) || 1 > argc || argc > 2) { | |
792 | usage(stderr); | |
793 | exit(EXIT_FAILURE); | |
794 | } | |
795 | ||
796 | if ((h = gethostbyname(*argv)) == 0) | |
797 | die(EXIT_FAILURE, "unknown host `%s': %s", *argv, hstrerror(h_errno)); | |
798 | if (h->h_addrtype != AF_INET) | |
799 | die(EXIT_FAILURE, "unsupported address family for host `%s'", *argv); | |
88510d86 | 800 | memcpy(&pp.sin.sin_addr, h->h_addr, sizeof(struct in_addr)); |
c64d8cd5 MW |
801 | argv++; argc--; |
802 | ||
803 | if (*argv) { | |
804 | errno = 0; | |
805 | u = strtoul(*argv, &q, 0); | |
806 | if (!errno && !*q) | |
88510d86 | 807 | pp.sin.sin_port = htons(u); |
c64d8cd5 MW |
808 | else if ((s = getservbyname(*argv, "udp")) == 0) |
809 | die(EXIT_FAILURE, "unknown UDP service `%s'", *argv); | |
810 | else | |
88510d86 | 811 | pp.sin.sin_port = s->s_port; |
c64d8cd5 MW |
812 | } |
813 | ||
88510d86 MW |
814 | pp.sin.sin_family = AF_INET; |
815 | i = pathmtu(&pp); | |
c64d8cd5 MW |
816 | if (i < 0) |
817 | die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno)); | |
818 | printf("%d\n", i); | |
819 | if (ferror(stdout) || fflush(stdout) || fclose(stdout)) | |
820 | die(EXIT_FAILURE, "failed to write result: %s", strerror(errno)); | |
821 | return (0); | |
822 | } | |
823 | ||
824 | /*----- That's all, folks -------------------------------------------------*/ |