#include "netlink.h"
#include "process.h"
+#ifdef NETLINK_DEBUG
+#define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
+#else /* !NETLINK_DEBUG */
+#define MDEBUG(...) ((void)0)
+#endif /* !NETLINK_DEBUG */
+
#define ICMP_TYPE_ECHO_REPLY 0
#define ICMP_TYPE_UNREACHABLE 3
return sum;
}
#else
-static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
+static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl)
{
assert(ihl < INT_MAX/4);
return ip_csum(iph,ihl*4);
uint16_t id;
uint16_t seq;
} echo;
+ struct {
+ uint16_t unused;
+ uint16_t mtu;
+ } fragneeded;
} d;
};
static const union icmpinfofield icmp_noinfo;
+static void netlink_client_deliver(struct netlink *st,
+ struct netlink_client *client,
+ uint32_t source, uint32_t dest,
+ struct buffer_if *buf);
+static void netlink_host_deliver(struct netlink *st,
+ struct netlink_client *sender,
+ uint32_t source, uint32_t dest,
+ struct buffer_if *buf);
+
+static const char *sender_name(struct netlink_client *sender /* or NULL */)
+{
+ return sender?sender->name:"(local)";
+}
+
static void netlink_packet_deliver(struct netlink *st,
struct netlink_client *client,
struct buffer_if *buf);
settable.
*/
static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
- uint32_t dest,uint16_t len)
+ uint32_t source, uint32_t dest,
+ uint16_t len)
{
struct icmphdr *h;
h->iph.frag=0;
h->iph.ttl=255; /* XXX should be configurable */
h->iph.protocol=1;
- h->iph.saddr=htonl(st->secnet_address);
+ h->iph.saddr=htonl(source);
h->iph.daddr=htonl(dest);
h->iph.check=0;
h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
/* client indicates where the packet we're constructing a response to
comes from. NULL indicates the host. */
-static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
+static void netlink_icmp_simple(struct netlink *st,
+ struct netlink_client *origsender,
+ struct buffer_if *buf,
uint8_t type, uint8_t code,
union icmpinfofield info)
{
if (netlink_icmp_may_reply(buf)) {
struct iphdr *iph=(struct iphdr *)buf->start;
+
+ uint32_t icmpdest = ntohl(iph->saddr);
+ uint32_t icmpsource;
+ const char *icmpsourcedebugprefix;
+ if (!st->ptp) {
+ icmpsource=st->secnet_address;
+ icmpsourcedebugprefix="";
+ } else if (origsender) {
+ /* was from peer, send reply as if from host */
+ icmpsource=st->local_address;
+ icmpsourcedebugprefix="L!";
+ } else {
+ /* was from host, send reply as if from peer */
+ icmpsource=st->secnet_address; /* actually, peer address */
+ icmpsourcedebugprefix="P!";
+ }
+ MDEBUG("%s: generating ICMP re %s[%s]->[%s]:"
+ " from %s%s type=%u code=%u\n",
+ st->name, sender_name(origsender),
+ ipaddr_to_string(ntohl(iph->saddr)),
+ ipaddr_to_string(ntohl(iph->daddr)),
+ icmpsourcedebugprefix,
+ ipaddr_to_string(icmpsource),
+ type, code);
+
len=netlink_icmp_reply_len(buf);
- h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
+ h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len);
h->type=type; h->code=code; h->d=info;
- memcpy(buf_append(&st->icmp,len),buf->start,len);
+ BUF_ADD_BYTES(append,&st->icmp,buf->start,len);
netlink_icmp_csum(h);
- netlink_packet_deliver(st,NULL,&st->icmp);
+
+ if (!st->ptp) {
+ netlink_packet_deliver(st,NULL,&st->icmp);
+ } else if (origsender) {
+ netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp);
+ } else {
+ netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp);
+ }
BUF_ASSERT_FREE(&st->icmp);
}
}
#undef BAD
}
+static const char *fragment_filter_header(uint8_t *base, long *hlp)
+{
+ const int fixedhl = sizeof(struct iphdr);
+ long hl = *hlp;
+ const uint8_t *ipend = base + hl;
+ uint8_t *op = base + fixedhl;
+ const uint8_t *ip = op;
+
+ while (ip < ipend) {
+ uint8_t opt = ip[0];
+ int remain = ipend - ip;
+ if (opt == 0x00) /* End of Options List */ break;
+ if (opt == 0x01) /* No Operation */ continue;
+ if (remain < 2) return "IPv4 options truncated at length";
+ int optlen = ip[1];
+ if (remain < optlen) return "IPv4 options truncated in option";
+ if (opt & 0x80) /* copy */ {
+ memmove(op, ip, optlen);
+ op += optlen;
+ }
+ ip += optlen;
+ }
+ while ((hl = (op - base)) & 0x3)
+ *op++ = 0x00 /* End of Option List */;
+ ((struct iphdr*)base)->ihl = hl >> 2;
+ *hlp = hl;
+
+ return 0;
+}
+
+/* Fragment or send ICMP Fragmentation Needed */
+static void netlink_maybe_fragment(struct netlink *st,
+ struct netlink_client *sender,
+ netlink_deliver_fn *deliver,
+ void *deliver_dst,
+ const char *delivery_name,
+ int32_t mtu,
+ uint32_t source, uint32_t dest,
+ struct buffer_if *buf)
+{
+ struct iphdr *iph=(struct iphdr*)buf->start;
+ long hl = iph->ihl*4;
+ const char *ssource = ipaddr_to_string(source);
+
+ if (buf->size <= mtu) {
+ deliver(deliver_dst, buf);
+ return;
+ }
+
+ MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
+ st->name, ssource, delivery_name, buf->size);
+
+#define BADFRAG(m, ...) \
+ Message(M_WARNING, \
+ "%s: fragmenting packet from source %s" \
+ " for transmission via %s: " m "\n", \
+ st->name, ssource, delivery_name, \
+ ## __VA_ARGS__);
+
+ unsigned orig_frag = ntohs(iph->frag);
+
+ if (orig_frag&IPHDR_FRAG_DONT) {
+ union icmpinfofield info =
+ { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
+ netlink_icmp_simple(st,sender,buf,
+ ICMP_TYPE_UNREACHABLE,
+ ICMP_CODE_FRAGMENTATION_REQUIRED,
+ info);
+ BUF_FREE(buf);
+ return;
+ }
+ if (mtu < hl + 8) {
+ BADFRAG("mtu %"PRId32" too small", mtu);
+ BUF_FREE(buf);
+ return;
+ }
+
+ /* we (ab)use the icmp buffer to stash the original packet */
+ struct buffer_if *orig = &st->icmp;
+ BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
+ buffer_copy(orig,buf);
+ BUF_FREE(buf);
+
+ const uint8_t *startindata = orig->start + hl;
+ const uint8_t *indata = startindata;
+ const uint8_t *endindata = orig->start + orig->size;
+ _Bool filtered = 0;
+
+ for (;;) {
+ /* compute our fragment offset */
+ long dataoffset = indata - startindata
+ + (orig_frag & IPHDR_FRAG_OFF)*8;
+ assert(!(dataoffset & 7));
+ if (dataoffset > IPHDR_FRAG_OFF*8) {
+ BADFRAG("ultimate fragment offset out of range");
+ break;
+ }
+
+ BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
+ buffer_init(buf,calculate_max_start_pad());
+
+ /* copy header (possibly filtered); will adjust in a bit */
+ struct iphdr *fragh = buf_append(buf, hl);
+ memcpy(fragh, orig->start, hl);
+
+ /* decide how much payload to copy and copy it */
+ long avail = mtu - hl;
+ long remain = endindata - indata;
+ long use = avail < remain ? (avail & ~(long)7) : remain;
+ BUF_ADD_BYTES(append, buf, indata, use);
+ indata += use;
+
+ _Bool last_frag = indata >= endindata;
+
+ /* adjust the header */
+ fragh->tot_len = htons(buf->size);
+ fragh->frag =
+ htons((orig_frag & ~IPHDR_FRAG_OFF) |
+ (last_frag ? 0 : IPHDR_FRAG_MORE) |
+ (dataoffset >> 3));
+ fragh->check = 0;
+ fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
+
+ /* actually send it */
+ deliver(deliver_dst, buf);
+ if (last_frag)
+ break;
+
+ /* after copying the header for the first frag,
+ * we filter the header for the remaining frags */
+ if (!filtered++) {
+ const char *bad = fragment_filter_header(orig->start, &hl);
+ if (bad) { BADFRAG("%s", bad); break; }
+ }
+ }
+
+ BUF_FREE(orig);
+
+#undef BADFRAG
+}
+
/* Deliver a packet _to_ client; used after we have decided
* what to do with it (and just to check that the client has
* actually registered a delivery function with us). */
d=ipaddr_to_string(dest);
Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
st->name,s,d);
- free(s); free(d);
BUF_FREE(buf);
return;
}
- client->deliver(client->dst, buf);
+ netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
+ client->mtu, source,dest,buf);
client->outcount++;
}
/* Deliver a packet to the host; used after we have decided that that
* is what to do with it. */
static void netlink_host_deliver(struct netlink *st,
+ struct netlink_client *sender,
uint32_t source, uint32_t dest,
struct buffer_if *buf)
{
- st->deliver_to_host(st->dst,buf);
+ netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
+ st->mtu, source,dest,buf);
st->outcount++;
}
-/* Deliver a packet. "client" is the _origin_ of the packet, not its
- destination, and is NULL for packets from the host and packets
+/* Deliver a packet. "sender"==NULL for packets from the host and packets
generated internally in secnet. */
static void netlink_packet_deliver(struct netlink *st,
- struct netlink_client *client,
+ struct netlink_client *sender,
struct buffer_if *buf)
{
if (buf->size < (int)sizeof(struct iphdr)) {
Message(M_ERR,"%s: trying to deliver a too-short packet"
- " from %s!\n",st->name, client?client->name:"(local)");
+ " from %s!\n",st->name, sender_name(sender));
BUF_FREE(buf);
return;
}
return;
}
- /* Packets from the host (client==NULL) may always be routed. Packets
+ /* Packets from the host (sender==NULL) may always be routed. Packets
from clients with the allow_route option will also be routed. */
- if (!client || (client && (client->options & OPT_ALLOWROUTE)))
+ if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
allow_route=True;
/* If !allow_route, we check the routing table anyway, and if
/* The packet's not going down a tunnel. It might (ought to)
be for the host. */
if (ipset_contains_addr(st->networks,dest)) {
- netlink_host_deliver(st,source,dest,buf);
+ netlink_host_deliver(st,sender,source,dest,buf);
BUF_ASSERT_FREE(buf);
} else {
string_t s,d;
d=ipaddr_to_string(dest);
Message(M_DEBUG,"%s: don't know where to deliver packet "
"(s=%s, d=%s)\n", st->name, s, d);
- free(s); free(d);
- netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
+ netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
BUF_FREE(buf);
}
with destination network administratively prohibited */
Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
st->name,s,d);
- free(s); free(d);
- netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
+ netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
BUF_FREE(buf);
} else {
if (best_quality>0) {
- /* XXX Fragment if required */
netlink_client_deliver(st,st->routes[best_match],
source,dest,buf);
BUF_ASSERT_FREE(buf);
} else {
/* Generate ICMP destination unreachable */
- netlink_icmp_simple(st,buf,
+ netlink_icmp_simple(st,sender,buf,
ICMP_TYPE_UNREACHABLE,
ICMP_CODE_NET_UNREACHABLE,
icmp_noinfo);
}
static void netlink_packet_forward(struct netlink *st,
- struct netlink_client *client,
+ struct netlink_client *sender,
struct buffer_if *buf)
{
if (buf->size < (int)sizeof(struct iphdr)) return;
/* Packet has already been checked */
if (iph->ttl<=1) {
/* Generate ICMP time exceeded */
- netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
+ netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
BUF_FREE(buf);
return;
iph->check=0;
iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
- netlink_packet_deliver(st,client,buf);
+ netlink_packet_deliver(st,sender,buf);
BUF_ASSERT_FREE(buf);
}
/* Deal with packets addressed explicitly to us */
static void netlink_packet_local(struct netlink *st,
- struct netlink_client *client,
+ struct netlink_client *sender,
struct buffer_if *buf)
{
struct icmphdr *h;
}
h=(struct icmphdr *)buf->start;
- if ((ntohs(h->iph.frag)&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
- Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
- "ignoring it\n",st->name);
+ unsigned fraginfo = ntohs(h->iph.frag);
+ if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
+ if (!(fraginfo & IPHDR_FRAG_OFF))
+ /* report only for first fragment */
+ Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
+ "ignoring it\n",st->name);
BUF_FREE(buf);
return;
}
Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
} else {
/* Send ICMP protocol unreachable */
- netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
+ netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
BUF_FREE(buf);
return;
/* If cid==NULL packet is from host, otherwise cid specifies which tunnel
it came from. */
-static void netlink_incoming(struct netlink *st, struct netlink_client *client,
+static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
struct buffer_if *buf)
{
uint32_t source,dest;
struct iphdr *iph;
char errmsgbuf[50];
- const char *sourcedesc=client?client->name:"host";
+ const char *sourcedesc=sender?sender->name:"host";
BUF_ASSERT_USED(buf);
BUF_FREE(buf);
return;
}
- assert(buf->size >= (int)sizeof(struct icmphdr));
+ assert(buf->size >= (int)sizeof(struct iphdr));
iph=(struct iphdr *)buf->start;
source=ntohl(iph->saddr);
/* Check source. If we don't like the source, there's no point
generating ICMP because we won't know how to get it to the
source of the packet. */
- if (client) {
+ if (sender) {
/* Check that the packet source is appropriate for the tunnel
it came down */
- if (!ipset_contains_addr(client->networks,source)) {
+ if (!ipset_contains_addr(sender->networks,source)) {
string_t s,d;
s=ipaddr_to_string(source);
d=ipaddr_to_string(dest);
Message(M_WARNING,"%s: packet from tunnel %s with bad "
- "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
- free(s); free(d);
+ "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
BUF_FREE(buf);
return;
}
d=ipaddr_to_string(dest);
Message(M_WARNING,"%s: outgoing packet with bad source address "
"(s=%s,d=%s)\n",st->name,s,d);
- free(s); free(d);
BUF_FREE(buf);
return;
}
where it came from. It's up to external software to check
address validity and generate ICMP, etc. */
if (st->ptp) {
- if (client) {
- netlink_host_deliver(st,source,dest,buf);
+ if (sender) {
+ netlink_host_deliver(st,sender,source,dest,buf);
} else {
netlink_client_deliver(st,st->clients,source,dest,buf);
}
/* st->secnet_address needs checking before matching destination
addresses */
if (dest==st->secnet_address) {
- netlink_packet_local(st,client,buf);
+ netlink_packet_local(st,sender,buf);
BUF_ASSERT_FREE(buf);
return;
}
- netlink_packet_forward(st,client,buf);
+ netlink_packet_forward(st,sender,buf);
BUF_ASSERT_FREE(buf);
}
for (i=0; i<snets->entries; i++) {
net=subnet_to_string(snets->list[i]);
Message(loglevel,"%s ",net);
- free(net);
}
}
net=ipaddr_to_string(st->secnet_address);
Message(c,"%s: point-to-point (remote end is %s); routes: ",
st->name, net);
- free(net);
netlink_output_subnets(st,c,st->clients->subnets);
Message(c,"\n");
} else {
net=ipaddr_to_string(st->secnet_address);
Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
net,st->name,st->localcount);
- free(net);
for (i=0; i<st->subnets->entries; i++) {
net=subnet_to_string(st->subnets->list[i]);
Message(c,"%s ",net);
- free(net);
}
if (i>0)
Message(c,"-> host (use %d)\n",st->outcount);
}
static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
- void *dst)
+ void *dst, uint32_t *localmtu_r)
{
struct netlink_client *c=sst;
+ struct netlink *st=c->nst;
c->deliver=deliver;
c->dst=dst;
+
+ if (localmtu_r)
+ *localmtu_r=st->mtu;
}
static struct flagstr netlink_option_table[]={
st->remote_networks=ipset_complement(empty);
ipset_free(empty);
}
+ st->local_address=string_item_to_ipaddr(
+ dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
though, and will make the route dump look complicated... */
st->subnets=ipset_to_subnet_list(st->networks);
st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
- buffer_new(&st->icmp,ICMP_BUFSIZE);
+ buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
st->outcount=0;
st->localcount=0;