Commit | Line | Data |
---|---|---|
c64d8cd5 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Report MTU on path to specified host | |
4 | * | |
5 | * (c) 2008 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Trivial IP Encryption (TrIPE). | |
11 | * | |
12 | * TrIPE is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or | |
15 | * (at your option) any later version. | |
16 | * | |
17 | * TrIPE is distributed in the hope that it will be useful, | |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU General Public License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU General Public License | |
23 | * along with TrIPE; if not, write to the Free Software Foundation, | |
24 | * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | */ | |
26 | ||
27 | /*----- Header files ------------------------------------------------------*/ | |
28 | ||
29 | #include "config.h" | |
30 | ||
d245350a | 31 | #include <assert.h> |
c64d8cd5 | 32 | #include <errno.h> |
88510d86 | 33 | #include <stddef.h> |
c64d8cd5 MW |
34 | #include <stdio.h> |
35 | #include <stdlib.h> | |
36 | #include <string.h> | |
37 | #include <time.h> | |
38 | ||
39 | #include <sys/types.h> | |
40 | #include <sys/time.h> | |
41 | #include <unistd.h> | |
42 | ||
43 | #include <sys/socket.h> | |
44 | #include <netinet/in.h> | |
45 | #include <arpa/inet.h> | |
46 | #include <netdb.h> | |
47 | ||
88510d86 MW |
48 | #include <netinet/in_systm.h> |
49 | #include <netinet/ip.h> | |
50 | #include <netinet/ip_icmp.h> | |
51 | #include <netinet/udp.h> | |
52 | ||
53 | #include <net/if.h> | |
54 | #include <ifaddrs.h> | |
55 | #include <sys/ioctl.h> | |
56 | ||
57 | #include <mLib/alloc.h> | |
58 | #include <mLib/bits.h> | |
c64d8cd5 MW |
59 | #include <mLib/dstr.h> |
60 | #include <mLib/hex.h> | |
61 | #include <mLib/mdwopt.h> | |
62 | #include <mLib/quis.h> | |
63 | #include <mLib/report.h> | |
64 | #include <mLib/tv.h> | |
65 | ||
66 | /*----- Static variables --------------------------------------------------*/ | |
67 | ||
68 | static unsigned char buf[65536]; | |
69 | ||
88510d86 MW |
70 | #define POLY 0x1002d |
71 | ||
c64d8cd5 MW |
72 | /*----- Utility functions -------------------------------------------------*/ |
73 | ||
88510d86 MW |
74 | /* Step a value according to a simple LFSR. */ |
75 | #define STEP(q) \ | |
76 | do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0) | |
77 | ||
c64d8cd5 MW |
78 | /* Fill buffer with a constant but pseudorandom string. Uses a simple |
79 | * LFSR. | |
80 | */ | |
81 | static void fillbuffer(unsigned char *p, size_t sz) | |
82 | { | |
83 | unsigned int y = 0xbc20; | |
84 | const unsigned char *l = p + sz; | |
85 | int i; | |
c64d8cd5 MW |
86 | |
87 | while (p < l) { | |
88 | *p++ = y & 0xff; | |
88510d86 | 89 | for (i = 0; i < 8; i++) STEP(y); |
c64d8cd5 MW |
90 | } |
91 | } | |
92 | ||
88510d86 MW |
93 | /* Convert a string to floating point. */ |
94 | static double s2f(const char *s, const char *what) | |
95 | { | |
96 | double f; | |
97 | char *q; | |
c64d8cd5 | 98 | |
88510d86 MW |
99 | errno = 0; |
100 | f = strtod(s, &q); | |
101 | if (errno || *q) die(EXIT_FAILURE, "bad %s", what); | |
102 | return (f); | |
103 | } | |
c64d8cd5 | 104 | |
88510d86 MW |
105 | /* Convert a floating-point value into a struct timeval. */ |
106 | static void f2tv(struct timeval *tv, double t) | |
107 | { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; } | |
108 | ||
109 | /*----- Main algorithm skeleton -------------------------------------------*/ | |
110 | ||
111 | struct param { | |
112 | unsigned f; /* Various flags */ | |
113 | #define F_VERBOSE 1u /* Give a running commentary */ | |
114 | double retx; /* Initial retransmit interval */ | |
115 | double regr; /* Retransmit growth factor */ | |
116 | double timeout; /* Retransmission timeout */ | |
117 | int seqoff; /* Offset to write sequence number */ | |
118 | const struct probe_ops *pops; /* Probe algorithm description */ | |
119 | struct sockaddr_in sin; /* Destination address */ | |
120 | }; | |
121 | ||
122 | struct probestate { | |
123 | const struct param *pp; | |
124 | unsigned q; | |
125 | }; | |
126 | ||
127 | struct probe_ops { | |
128 | const char *name; | |
129 | const struct probe_ops *next; | |
130 | size_t statesz; | |
131 | int (*setup)(void *, int, const struct param *); | |
132 | void (*finish)(void *); | |
133 | void (*selprep)(void *, int *, fd_set *); | |
134 | int (*xmit)(void *, int); | |
135 | int (*selproc)(void *, fd_set *, struct probestate *); | |
136 | }; | |
137 | ||
138 | #define OPS_CHAIN 0 | |
139 | ||
140 | enum { | |
141 | RC_FAIL = -99, | |
142 | RC_OK = 0, | |
143 | RC_LOWER = -1, | |
144 | RC_HIGHER = -2, | |
145 | RC_NOREPLY = -3 | |
146 | /* or a positive MTU upper-bound */ | |
147 | }; | |
148 | ||
149 | /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */ | |
150 | #define ADDFD(fd) \ | |
151 | do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0) | |
152 | ||
153 | /* Check whether a buffer contains a packet from our current probe. */ | |
154 | static int mypacketp(struct probestate *ps, | |
155 | const unsigned char *p, size_t sz) | |
156 | { | |
157 | const struct param *pp = ps->pp; | |
c64d8cd5 | 158 | |
88510d86 MW |
159 | return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q); |
160 | } | |
161 | ||
162 | /* See whether MTU is an acceptable MTU value. Return an appropriate | |
163 | * RC_... code or a new suggested MTU. | |
164 | */ | |
165 | static int probe(struct probestate *ps, void *st, int mtu) | |
c64d8cd5 | 166 | { |
88510d86 | 167 | const struct param *pp = ps->pp; |
c64d8cd5 | 168 | fd_set fd_in; |
88510d86 MW |
169 | struct timeval tv, now, when, done; |
170 | double timer = pp->retx; | |
171 | int rc, maxfd; | |
172 | ||
173 | /* Set up the first retransmit and give-up timers. */ | |
174 | gettimeofday(&now, 0); | |
175 | f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv); | |
176 | f2tv(&tv, timer); TV_ADD(&when, &now, &tv); | |
177 | if (TV_CMP(&when, >, &done)) when = done; | |
178 | ||
179 | /* Send the initial probe. */ | |
180 | if (pp->f & F_VERBOSE) | |
181 | moan("sending probe of size %d (seq = %04x)", mtu, ps->q); | |
182 | STEP(ps->q); | |
183 | STORE16(buf + pp->seqoff, ps->q); | |
184 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
185 | ||
186 | for (;;) { | |
187 | ||
188 | /* Wait for something interesting to happen. */ | |
189 | maxfd = 0; FD_ZERO(&fd_in); | |
190 | pp->pops->selprep(st, &maxfd, &fd_in); | |
191 | TV_SUB(&tv, &when, &now); | |
192 | if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL); | |
193 | gettimeofday(&now, 0); | |
194 | ||
195 | /* See whether the probe method has any answers for us. */ | |
196 | if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc); | |
197 | ||
198 | /* If we've waited too long, give up. If we should retransmit, do | |
199 | * that. | |
200 | */ | |
201 | if (TV_CMP(&now, >, &done)) | |
202 | return (RC_NOREPLY); | |
203 | else if (TV_CMP(&now, >, &when)) { | |
204 | if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu); | |
205 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
206 | do { | |
207 | timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv); | |
208 | } while (TV_CMP(&when, <, &now)); | |
209 | if (TV_CMP(&when, >, &done)) when = done; | |
210 | } | |
211 | } | |
212 | } | |
c64d8cd5 | 213 | |
88510d86 MW |
214 | /* Discover the path MTU to the destination address. */ |
215 | static int pathmtu(const struct param *pp) | |
216 | { | |
217 | int sk; | |
218 | int mtu, lo, hi; | |
219 | int rc, droppy = -1; | |
220 | void *st; | |
221 | struct probestate ps; | |
222 | ||
223 | /* Build and connect a UDP socket. We'll need this to know the local port | |
224 | * number to use if nothing else. Set other stuff up. | |
225 | */ | |
c64d8cd5 | 226 | if ((sk = socket(PF_INET, SOCK_DGRAM, 0)) < 0) goto fail_0; |
88510d86 MW |
227 | if (connect(sk, (struct sockaddr *)&pp->sin, sizeof(pp->sin))) goto fail_1; |
228 | st = xmalloc(pp->pops->statesz); | |
229 | if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2; | |
230 | ps.pp = pp; ps.q = rand() & 0xffff; | |
231 | lo = 576; hi = mtu; | |
232 | ||
233 | /* And now we do a thing which is sort of like a binary search, except that | |
234 | * we also take explicit clues as establishing a new upper bound, and we | |
235 | * try to hug that initially. | |
236 | */ | |
c64d8cd5 | 237 | for (;;) { |
d245350a MW |
238 | assert(lo <= mtu && mtu <= hi); |
239 | if (pp->f & F_VERBOSE) moan("probe: %d <= %d <= %d", lo, mtu, hi); | |
88510d86 MW |
240 | rc = probe(&ps, st, mtu); |
241 | switch (rc) { | |
242 | ||
243 | case RC_FAIL: | |
244 | if (pp->f & F_VERBOSE) moan("probe failed"); | |
245 | goto fail_3; | |
246 | ||
247 | case RC_NOREPLY: | |
248 | /* If we've not seen a dropped packet before then we don't know what | |
249 | * this means yet -- in particular, we don't know which bit of the | |
250 | * network is swallowing packets. Send a minimum-size probe. If | |
251 | * that doesn't come back then assume that the remote host is | |
252 | * swallowing our packets. If it does, then we assume that dropped | |
253 | * packets are a result of ICMP fragmentation-needed reports being | |
254 | * lost or suppressed. | |
255 | */ | |
256 | if (pp->f & F_VERBOSE) moan("gave up: black hole detected"); | |
257 | if (droppy == -1) { | |
258 | if (pp->f & F_VERBOSE) moan("sending minimum-size probe"); | |
259 | switch (probe(&ps, st, lo)) { | |
260 | case RC_FAIL: | |
261 | goto fail_3; | |
262 | case RC_NOREPLY: | |
263 | if (pp->f & F_VERBOSE) { | |
264 | moan("no reply from min-size probe: " | |
265 | "assume black hole at target"); | |
266 | } | |
267 | droppy = 1; | |
268 | break; | |
269 | case RC_HIGHER: | |
270 | if (pp->f & F_VERBOSE) { | |
271 | moan("reply from min-size probe OK: " | |
272 | "assume black hole in network"); | |
273 | } | |
274 | droppy = 0; | |
275 | break; | |
276 | default: | |
277 | if (pp->f & F_VERBOSE) | |
278 | moan("unexpected return code from probe"); | |
279 | errno = ENOTCONN; | |
280 | goto fail_3; | |
281 | } | |
282 | } | |
283 | ||
284 | if (droppy) goto higher; else goto lower; | |
285 | ||
286 | case RC_HIGHER: | |
287 | higher: | |
288 | if (droppy == -1) { | |
289 | if (pp->f & F_VERBOSE) | |
290 | moan("probe returned: remote host is not a black hole"); | |
291 | droppy = 0; | |
292 | } | |
293 | if (mtu == hi) { | |
294 | if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU"); | |
295 | goto done; | |
296 | } | |
88510d86 | 297 | lo = mtu; |
d245350a MW |
298 | |
299 | /* Now we must make a new guess, between lo and hi. We know that lo | |
300 | * is good; but we're not so sure about hi here. We know that hi > | |
301 | * lo, so this will find an approximate midpoint, greater than lo and | |
302 | * no more than hi. | |
303 | */ | |
304 | if (pp->f & F_VERBOSE) moan("probe returned: guessing higher"); | |
88510d86 MW |
305 | mtu += (hi - lo + 1)/2; |
306 | break; | |
307 | ||
308 | case RC_LOWER: | |
309 | lower: | |
d245350a MW |
310 | /* If this didn't work, and we're already at the bottom of our |
311 | * possible range, then something has gone horribly wrong. | |
312 | */ | |
313 | assert(lo < mtu); | |
314 | hi = mtu - 1; | |
315 | if (lo == hi) { | |
88510d86 | 316 | if (pp->f & F_VERBOSE) moan("error returned: found correct MTU"); |
d245350a | 317 | mtu = lo; |
88510d86 MW |
318 | goto done; |
319 | } | |
d245350a MW |
320 | |
321 | /* We must make a new guess, between lo and hi. We're probably | |
322 | * fairly sure that lo will succeed, since either it's the minimum | |
323 | * MTU or we've tested it already; but we're not quite sure about hi, | |
324 | * so we want to aim high. | |
325 | */ | |
88510d86 | 326 | if (pp->f & F_VERBOSE) moan("error returned: guessing lower"); |
88510d86 MW |
327 | mtu -= (hi - lo + 1)/2; |
328 | break; | |
329 | ||
330 | default: | |
331 | if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate"); | |
332 | mtu = hi = rc; | |
333 | break; | |
334 | } | |
c64d8cd5 | 335 | } |
88510d86 MW |
336 | |
337 | done: | |
338 | /* Clean up and return our result. */ | |
339 | pp->pops->finish(st); | |
340 | xfree(st); | |
c64d8cd5 MW |
341 | close(sk); |
342 | return (mtu); | |
343 | ||
88510d86 MW |
344 | fail_3: |
345 | pp->pops->finish(st); | |
346 | fail_2: | |
347 | xfree(st); | |
c64d8cd5 MW |
348 | fail_1: |
349 | close(sk); | |
350 | fail_0: | |
351 | return (-1); | |
352 | } | |
353 | ||
88510d86 MW |
354 | /*----- Doing it the hard way ---------------------------------------------*/ |
355 | ||
356 | #if defined(linux) || defined(__OpenBSD__) | |
357 | #define IPHDR_SANE | |
358 | #endif | |
359 | ||
360 | #ifdef IPHDR_SANE | |
361 | # define sane_htons htons | |
362 | # define sane_htonl htonl | |
c64d8cd5 | 363 | #else |
88510d86 MW |
364 | # define sane_htons |
365 | # define sane_htonl | |
366 | #endif | |
367 | ||
368 | static int rawicmp = -1, rawudp = -1, rawerr = 0; | |
369 | ||
370 | #define IPCK_INIT 0xffff | |
371 | ||
372 | /* Compute an IP checksum over some data. This is a restartable interface: | |
373 | * initialize A to `IPCK_INIT' for the first call. | |
374 | */ | |
375 | static unsigned ipcksum(const void *buf, size_t n, unsigned a) | |
376 | { | |
377 | unsigned long aa = a ^ 0xffff; | |
378 | const unsigned char *p = buf, *l = p + n; | |
379 | ||
380 | while (p < l - 1) { aa += LOAD16_B(p); p += 2; } | |
381 | if (p < l) { aa += (unsigned)(*p) << 8; } | |
382 | do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000); | |
383 | return (aa == 0xffff ? aa : aa ^ 0xffff); | |
384 | } | |
385 | ||
386 | /* TCP/UDP pseudoheader structure. */ | |
387 | struct phdr { | |
388 | struct in_addr ph_src, ph_dst; | |
389 | u_char ph_z, ph_p; | |
390 | u_short ph_len; | |
391 | }; | |
392 | ||
393 | struct raw_state { | |
394 | struct sockaddr_in me, sin; | |
395 | int sk, rawicmp, rawudp; | |
396 | unsigned q; | |
397 | }; | |
398 | ||
399 | static int raw_setup(void *stv, int sk, const struct param *pp) | |
400 | { | |
401 | struct raw_state *st = stv; | |
cb160b86 | 402 | socklen_t sz; |
88510d86 MW |
403 | int i, mtu = -1; |
404 | struct ifaddrs *ifa, *ifaa, *ifap; | |
405 | struct ifreq ifr; | |
406 | ||
407 | /* If we couldn't acquire raw sockets, we fail here. */ | |
408 | if (rawerr) { errno = rawerr; goto fail_0; } | |
409 | st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk; | |
410 | ||
411 | /* Initialize the sequence number. */ | |
412 | st->q = rand() & 0xffff; | |
413 | ||
414 | /* Snaffle the local and remote address and port number. */ | |
415 | st->sin = pp->sin; | |
416 | sz = sizeof(st->me); | |
417 | if (getsockname(sk, (struct sockaddr *)&st->me, &sz)) | |
418 | goto fail_0; | |
419 | ||
420 | /* There isn't a portable way to force the DF flag onto a packet through | |
421 | * UDP, or even through raw IP, unless we write the entire IP header | |
422 | * ourselves. This is somewhat annoying, especially since we have an | |
423 | * uphill struggle keeping track of which systems randomly expect which | |
424 | * header fields to be presented in host byte order. Oh, well. | |
425 | */ | |
426 | i = 1; | |
427 | if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0; | |
428 | ||
429 | /* Find an upper bound on the MTU. Do two passes over the interface | |
430 | * list. If we can find matches for our local address then use the | |
431 | * highest one of those; otherwise do a second pass and simply take the | |
432 | * highest MTU of any network interface. | |
433 | */ | |
434 | if (getifaddrs(&ifaa)) goto fail_0; | |
435 | for (i = 0; i < 2; i++) { | |
436 | for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) { | |
437 | if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr || | |
438 | ifa->ifa_addr->sa_family != AF_INET || | |
439 | (i == 0 && | |
440 | ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr != | |
441 | st->me.sin_addr.s_addr) || | |
442 | (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) || | |
443 | strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name)) | |
444 | continue; | |
445 | ifap = ifa; | |
446 | strcpy(ifr.ifr_name, ifa->ifa_name); | |
447 | if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1; | |
448 | if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu; | |
449 | } | |
450 | if (mtu > 0) break; | |
451 | } | |
452 | if (mtu < 0) { errno = ENOTCONN; goto fail_1; } | |
453 | freeifaddrs(ifaa); | |
454 | ||
455 | /* Done. */ | |
456 | return (mtu); | |
457 | ||
458 | fail_1: | |
459 | freeifaddrs(ifaa); | |
460 | fail_0: | |
461 | return (-1); | |
462 | } | |
463 | ||
464 | static void raw_finish(void *stv) { ; } | |
465 | ||
466 | static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
467 | { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); } | |
468 | ||
469 | static int raw_xmit(void *stv, int mtu) | |
470 | { | |
471 | struct raw_state *st = stv; | |
472 | unsigned char b[65536], *p; | |
473 | struct ip *ip; | |
474 | struct udphdr *udp; | |
475 | struct phdr ph; | |
476 | unsigned ck; | |
477 | ||
478 | /* Build the IP header. */ | |
479 | ip = (struct ip *)b; | |
480 | ip->ip_v = 4; | |
481 | ip->ip_hl = sizeof(*ip)/4; | |
482 | ip->ip_tos = IPTOS_RELIABILITY; | |
483 | ip->ip_len = sane_htons(mtu); | |
484 | STEP(st->q); ip->ip_id = htons(st->q); | |
485 | ip->ip_off = sane_htons(0 | IP_DF); | |
486 | ip->ip_ttl = 64; | |
487 | ip->ip_p = IPPROTO_UDP; | |
488 | ip->ip_sum = 0; | |
489 | ip->ip_src = st->me.sin_addr; | |
490 | ip->ip_dst = st->sin.sin_addr; | |
491 | ||
492 | /* Build a UDP packet in the output buffer. */ | |
493 | udp = (struct udphdr *)(ip + 1); | |
494 | udp->uh_sport = st->me.sin_port; | |
495 | udp->uh_dport = st->sin.sin_port; | |
496 | udp->uh_ulen = htons(mtu - sizeof(*ip)); | |
497 | udp->uh_sum = 0; | |
498 | ||
499 | /* Copy the payload. */ | |
500 | p = (unsigned char *)(udp + 1); | |
501 | memcpy(p, buf, mtu - (p - b)); | |
502 | ||
503 | /* Calculate the UDP checksum. */ | |
504 | ph.ph_src = ip->ip_src; | |
505 | ph.ph_dst = ip->ip_dst; | |
506 | ph.ph_z = 0; | |
507 | ph.ph_p = IPPROTO_UDP; | |
508 | ph.ph_len = udp->uh_ulen; | |
509 | ck = IPCK_INIT; | |
510 | ck = ipcksum(&ph, sizeof(ph), ck); | |
511 | ck = ipcksum(udp, mtu - sizeof(*ip), ck); | |
512 | udp->uh_sum = htons(ck); | |
513 | ||
514 | /* Send the whole thing off. If we're too big for the interface then we | |
515 | * might need to trim immediately. | |
516 | */ | |
517 | if (sendto(st->rawudp, b, mtu, 0, | |
518 | (struct sockaddr *)&st->sin, sizeof(st->sin)) < 0) { | |
519 | if (errno == EMSGSIZE) return (RC_LOWER); | |
520 | else goto fail_0; | |
521 | } | |
522 | ||
523 | /* Done. */ | |
524 | return (RC_OK); | |
525 | ||
526 | fail_0: | |
527 | return (RC_FAIL); | |
528 | } | |
529 | ||
530 | static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
531 | { | |
532 | struct raw_state *st = stv; | |
533 | unsigned char b[65536]; | |
534 | struct ip *ip; | |
535 | struct icmp *icmp; | |
536 | struct udphdr *udp; | |
537 | ssize_t n; | |
538 | ||
539 | /* An ICMP packet: see what's inside. */ | |
540 | if (FD_ISSET(st->rawicmp, fd_in)) { | |
541 | if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0; | |
542 | ||
543 | ip = (struct ip *)b; | |
544 | if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) || | |
545 | ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP) | |
546 | goto skip_icmp; | |
547 | n -= sizeof(4*ip->ip_hl); | |
548 | ||
549 | icmp = (struct icmp *)(b + 4*ip->ip_hl); | |
550 | if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH) | |
551 | goto skip_icmp; | |
552 | n -= offsetof(struct icmp, icmp_ip); | |
553 | ||
554 | ip = &icmp->icmp_ip; | |
555 | if (n < sizeof(*ip) || | |
556 | ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 || | |
557 | ip->ip_id != htons(st->q) || | |
558 | ip->ip_src.s_addr != st->me.sin_addr.s_addr || | |
559 | ip->ip_dst.s_addr != st->sin.sin_addr.s_addr) | |
560 | goto skip_icmp; | |
561 | n -= sizeof(*ip); | |
562 | ||
563 | udp = (struct udphdr *)(ip + 1); | |
564 | if (n < sizeof(udp) || udp->uh_sport != st->me.sin_port || | |
565 | udp->uh_dport != st->sin.sin_port) | |
566 | goto skip_icmp; | |
567 | n -= sizeof(*udp); | |
568 | ||
569 | if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER); | |
570 | else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp; | |
571 | else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu)); | |
572 | else return (RC_LOWER); | |
573 | } | |
574 | skip_icmp:; | |
575 | ||
576 | /* If we got a reply to the current probe then we're good. If we got an | |
577 | * error, or the packet's sequence number is wrong, then ignore it. | |
578 | */ | |
579 | if (FD_ISSET(st->sk, fd_in)) { | |
580 | if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK); | |
581 | else if (mypacketp(ps, b, n)) return (RC_HIGHER); | |
582 | else return (RC_OK); | |
583 | } | |
584 | ||
585 | return (RC_OK); | |
586 | ||
587 | fail_0: | |
588 | return (RC_FAIL); | |
589 | } | |
590 | ||
591 | static const struct probe_ops raw_ops = { | |
592 | "raw", OPS_CHAIN, sizeof(struct raw_state), | |
593 | raw_setup, raw_finish, | |
594 | raw_selprep, raw_xmit, raw_selproc | |
595 | }; | |
596 | ||
597 | #undef OPS_CHAIN | |
598 | #define OPS_CHAIN &raw_ops | |
599 | ||
600 | /*----- Doing the job on Linux --------------------------------------------*/ | |
601 | ||
602 | #if defined(linux) | |
603 | ||
604 | #ifndef IP_MTU | |
605 | # define IP_MTU 14 /* Blech! */ | |
606 | #endif | |
607 | ||
608 | struct linux_state { | |
609 | int sk; | |
610 | }; | |
611 | ||
612 | static int linux_setup(void *stv, int sk, const struct param *pp) | |
613 | { | |
614 | struct linux_state *st = stv; | |
615 | int i, mtu; | |
cb160b86 | 616 | socklen_t sz; |
88510d86 MW |
617 | |
618 | /* Snaffle the UDP socket. */ | |
619 | st->sk = sk; | |
620 | ||
621 | /* Turn on kernel path-MTU discovery and force DF on. */ | |
18d5f6eb | 622 | i = IP_PMTUDISC_PROBE; |
88510d86 MW |
623 | if (setsockopt(st->sk, IPPROTO_IP, IP_MTU_DISCOVER, &i, sizeof(i))) |
624 | return (-1); | |
625 | ||
626 | /* Read the initial MTU guess back and report it. */ | |
627 | sz = sizeof(mtu); | |
628 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
629 | return (-1); | |
630 | ||
631 | /* Done. */ | |
632 | return (mtu); | |
633 | } | |
634 | ||
635 | static void linux_finish(void *stv) { ; } | |
636 | ||
637 | static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
638 | { struct linux_state *st = stv; ADDFD(st->sk); } | |
639 | ||
640 | static int linux_xmit(void *stv, int mtu) | |
641 | { | |
642 | struct linux_state *st = stv; | |
643 | ||
644 | /* Write the packet. */ | |
645 | if (write(st->sk, buf, mtu - 28) >= 0) return (RC_OK); | |
646 | else if (errno == EMSGSIZE) return (RC_LOWER); | |
647 | else return (RC_FAIL); | |
648 | } | |
649 | ||
650 | static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
651 | { | |
652 | struct linux_state *st = stv; | |
653 | int mtu; | |
cb160b86 | 654 | socklen_t sz; |
88510d86 MW |
655 | ssize_t n; |
656 | unsigned char b[65536]; | |
657 | ||
658 | /* Read an answer. If it looks like the right kind of error then report a | |
659 | * success. This is potentially wrong, since we can't tell whether an | |
660 | * error was delayed from an earlier probe. However, we never return | |
661 | * RC_LOWER from this method, so the packet sizes ought to be monotonically | |
662 | * decreasing and this won't cause trouble. Otherwise update from the | |
663 | * kernel's idea of the right MTU. | |
664 | */ | |
665 | if (FD_ISSET(st->sk, fd_in)) { | |
666 | n = read(st->sk, &buf, sizeof(buf)); | |
667 | if (n >= 0 ? | |
668 | mypacketp(ps, b, n) : | |
669 | errno == ECONNREFUSED || errno == EHOSTUNREACH) | |
670 | return (RC_HIGHER); | |
671 | sz = sizeof(mtu); | |
672 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
673 | return (RC_FAIL); | |
674 | return (mtu); | |
675 | } | |
676 | return (RC_OK); | |
677 | } | |
678 | ||
679 | static const struct probe_ops linux_ops = { | |
680 | "linux", OPS_CHAIN, sizeof(struct linux_state), | |
681 | linux_setup, linux_finish, | |
682 | linux_selprep, linux_xmit, linux_selproc | |
683 | }; | |
c64d8cd5 | 684 | |
88510d86 MW |
685 | #undef OPS_CHAIN |
686 | #define OPS_CHAIN &linux_ops | |
c64d8cd5 MW |
687 | |
688 | #endif | |
689 | ||
690 | /*----- Help options ------------------------------------------------------*/ | |
691 | ||
88510d86 MW |
692 | static const struct probe_ops *probe_ops = OPS_CHAIN; |
693 | ||
c64d8cd5 MW |
694 | static void version(FILE *fp) |
695 | { pquis(fp, "$, TrIPE version " VERSION "\n"); } | |
696 | ||
697 | static void usage(FILE *fp) | |
88510d86 MW |
698 | { |
699 | pquis(fp, "Usage: $ [-H HEADER] [-m METHOD]\n\ | |
700 | [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n"); | |
701 | } | |
c64d8cd5 MW |
702 | |
703 | static void help(FILE *fp) | |
704 | { | |
88510d86 MW |
705 | const struct probe_ops *ops; |
706 | ||
c64d8cd5 MW |
707 | version(fp); |
708 | fputc('\n', fp); | |
709 | usage(fp); | |
710 | fputs("\ | |
711 | \n\ | |
712 | Options in full:\n\ | |
713 | \n\ | |
714 | -h, --help Show this help text.\n\ | |
715 | -v, --version Show version number.\n\ | |
716 | -u, --usage Show brief usage message.\n\ | |
717 | \n\ | |
88510d86 MW |
718 | -g, --growth=FACTOR Growth factor for retransmit interval.\n\ |
719 | -m, --method=METHOD Use METHOD to probe for MTU.\n\ | |
720 | -r, --retransmit=SECS Retransmit if no reply after SEC.\n\ | |
721 | -t, --timeout=SECS Give up expecting a reply after SECS.\n\ | |
c64d8cd5 | 722 | -H, --header=HEX Packet header, in hexadecimal.\n\ |
88510d86 MW |
723 | \n\ |
724 | Probe methods:\n\ | |
c64d8cd5 | 725 | ", fp); |
88510d86 MW |
726 | for (ops = probe_ops; ops; ops = ops->next) |
727 | printf("\t%s\n", ops->name); | |
c64d8cd5 MW |
728 | } |
729 | ||
730 | /*----- Main code ---------------------------------------------------------*/ | |
731 | ||
732 | int main(int argc, char *argv[]) | |
733 | { | |
88510d86 | 734 | struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN }; |
c64d8cd5 MW |
735 | hex_ctx hc; |
736 | dstr d = DSTR_INIT; | |
737 | size_t sz; | |
738 | int i; | |
739 | unsigned long u; | |
740 | char *q; | |
741 | struct hostent *h; | |
742 | struct servent *s; | |
c64d8cd5 MW |
743 | unsigned f = 0; |
744 | ||
745 | #define f_bogus 1u | |
746 | ||
88510d86 MW |
747 | if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 || |
748 | (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0) | |
749 | rawerr = errno; | |
750 | if (setuid(getuid())) | |
751 | abort(); | |
752 | ||
c64d8cd5 MW |
753 | ego(argv[0]); |
754 | fillbuffer(buf, sizeof(buf)); | |
88510d86 | 755 | pp.sin.sin_port = htons(7); |
c64d8cd5 MW |
756 | |
757 | for (;;) { | |
758 | static const struct option opts[] = { | |
759 | { "help", 0, 0, 'h' }, | |
88510d86 | 760 | { "version", 0, 0, 'V' }, |
c64d8cd5 MW |
761 | { "usage", 0, 0, 'u' }, |
762 | { "header", OPTF_ARGREQ, 0, 'H' }, | |
88510d86 MW |
763 | { "growth", OPTF_ARGREQ, 0, 'g' }, |
764 | { "method", OPTF_ARGREQ, 0, 'm' }, | |
765 | { "retransmit", OPTF_ARGREQ, 0, 'r' }, | |
c64d8cd5 | 766 | { "timeout", OPTF_ARGREQ, 0, 't' }, |
88510d86 | 767 | { "verbose", 0, 0, 'v' }, |
c64d8cd5 MW |
768 | { 0, 0, 0, 0 } |
769 | }; | |
770 | ||
88510d86 | 771 | i = mdwopt(argc, argv, "hVu" "H:g:m:r:t:v", opts, 0, 0, 0); |
c64d8cd5 MW |
772 | if (i < 0) break; |
773 | switch (i) { | |
774 | case 'h': help(stdout); exit(0); | |
88510d86 | 775 | case 'V': version(stdout); exit(0); |
c64d8cd5 MW |
776 | case 'u': usage(stdout); exit(0); |
777 | ||
778 | case 'H': | |
779 | DRESET(&d); | |
780 | hex_init(&hc); | |
781 | hex_decode(&hc, optarg, strlen(optarg), &d); | |
782 | hex_decode(&hc, 0, 0, &d); | |
88510d86 | 783 | sz = d.len < 532 ? d.len : 532; |
c64d8cd5 | 784 | memcpy(buf, d.buf, sz); |
88510d86 | 785 | pp.seqoff = sz; |
c64d8cd5 MW |
786 | break; |
787 | ||
88510d86 MW |
788 | case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break; |
789 | case 'r': pp.retx = s2f(optarg, "retransmit interval"); break; | |
790 | case 't': pp.timeout = s2f(optarg, "timeout"); break; | |
791 | ||
792 | case 'm': | |
793 | for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next) | |
794 | if (strcmp(pp.pops->name, optarg) == 0) goto found_alg; | |
795 | die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg); | |
796 | found_alg: | |
c64d8cd5 MW |
797 | break; |
798 | ||
88510d86 MW |
799 | case 'v': pp.f |= F_VERBOSE; break; |
800 | ||
c64d8cd5 MW |
801 | default: |
802 | f |= f_bogus; | |
803 | break; | |
804 | } | |
805 | } | |
806 | argv += optind; argc -= optind; | |
807 | if ((f & f_bogus) || 1 > argc || argc > 2) { | |
808 | usage(stderr); | |
809 | exit(EXIT_FAILURE); | |
810 | } | |
811 | ||
812 | if ((h = gethostbyname(*argv)) == 0) | |
813 | die(EXIT_FAILURE, "unknown host `%s': %s", *argv, hstrerror(h_errno)); | |
814 | if (h->h_addrtype != AF_INET) | |
815 | die(EXIT_FAILURE, "unsupported address family for host `%s'", *argv); | |
88510d86 | 816 | memcpy(&pp.sin.sin_addr, h->h_addr, sizeof(struct in_addr)); |
c64d8cd5 MW |
817 | argv++; argc--; |
818 | ||
819 | if (*argv) { | |
820 | errno = 0; | |
821 | u = strtoul(*argv, &q, 0); | |
822 | if (!errno && !*q) | |
88510d86 | 823 | pp.sin.sin_port = htons(u); |
c64d8cd5 MW |
824 | else if ((s = getservbyname(*argv, "udp")) == 0) |
825 | die(EXIT_FAILURE, "unknown UDP service `%s'", *argv); | |
826 | else | |
88510d86 | 827 | pp.sin.sin_port = s->s_port; |
c64d8cd5 MW |
828 | } |
829 | ||
88510d86 MW |
830 | pp.sin.sin_family = AF_INET; |
831 | i = pathmtu(&pp); | |
c64d8cd5 MW |
832 | if (i < 0) |
833 | die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno)); | |
834 | printf("%d\n", i); | |
835 | if (ferror(stdout) || fflush(stdout) || fclose(stdout)) | |
836 | die(EXIT_FAILURE, "failed to write result: %s", strerror(errno)); | |
837 | return (0); | |
838 | } | |
839 | ||
840 | /*----- That's all, folks -------------------------------------------------*/ |