chiark / gitweb /
networkd: add FDB support
[elogind.git] / src / libsystemd / sd-rtnl / rtnl-message.c
index 5265184098c32c2e37b1d9715c984f5ef58791da..9099440ad461bf0475eb0a66822c1a1b97be3b5b 100644 (file)
 #include <netinet/ether.h>
 #include <stdbool.h>
 #include <unistd.h>
-#include <linux/netlink.h>
-#include <linux/veth.h>
-#include <linux/if.h>
-#include <linux/ip.h>
-#include <linux/if_tunnel.h>
-#include <linux/if_bridge.h>
 
 #include "util.h"
 #include "refcnt.h"
 #define GET_CONTAINER(m, i) ((i) < (m)->n_containers ? (struct rtattr*)((uint8_t*)(m)->hdr + (m)->container_offsets[i]) : NULL)
 #define PUSH_CONTAINER(m, new) (m)->container_offsets[(m)->n_containers ++] = (uint8_t*)(new) - (uint8_t*)(m)->hdr;
 
-static int message_new_empty(sd_rtnl *rtnl, sd_rtnl_message **ret, size_t initial_size) {
+#define RTA_TYPE(rta) ((rta)->rta_type & NLA_TYPE_MASK)
+
+static int message_new_empty(sd_rtnl *rtnl, sd_rtnl_message **ret) {
         sd_rtnl_message *m;
 
         assert_return(ret, -EINVAL);
-        assert_return(initial_size >= sizeof(struct nlmsghdr), -EINVAL);
 
         /* Note that 'rtnl' is curretly unused, if we start using it internally
            we must take care to avoid problems due to mutual references between
@@ -57,15 +52,8 @@ static int message_new_empty(sd_rtnl *rtnl, sd_rtnl_message **ret, size_t initia
         if (!m)
                 return -ENOMEM;
 
-        m->hdr = malloc0(initial_size);
-        if (!m->hdr) {
-                free(m);
-                return -ENOMEM;
-        }
-
         m->n_ref = REFCNT_INIT;
 
-        m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
         m->sealed = false;
 
         *ret = m;
@@ -74,6 +62,7 @@ static int message_new_empty(sd_rtnl *rtnl, sd_rtnl_message **ret, size_t initia
 }
 
 int message_new(sd_rtnl *rtnl, sd_rtnl_message **ret, uint16_t type) {
+        _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
         const NLType *nl_type;
         size_t size;
         int r;
@@ -84,15 +73,25 @@ int message_new(sd_rtnl *rtnl, sd_rtnl_message **ret, uint16_t type) {
 
         assert(nl_type->type == NLA_NESTED);
 
-        size = NLMSG_SPACE(nl_type->size);
-
-        r = message_new_empty(rtnl, ret, size);
+        r = message_new_empty(rtnl, &m);
         if (r < 0)
                 return r;
 
-        (*ret)->container_type_system[0] = nl_type->type_system;
-        (*ret)->hdr->nlmsg_len = size;
-        (*ret)->hdr->nlmsg_type = type;
+        size = NLMSG_SPACE(nl_type->size);
+
+        assert(size >= sizeof(struct nlmsghdr));
+        m->hdr = malloc0(size);
+        if (!m->hdr)
+                return -ENOMEM;
+
+        m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+
+        m->container_type_system[0] = nl_type->type_system;
+        m->hdr->nlmsg_len = size;
+        m->hdr->nlmsg_type = type;
+
+        *ret = m;
+        m = NULL;
 
         return 0;
 }
@@ -115,6 +114,24 @@ int sd_rtnl_message_route_set_dst_prefixlen(sd_rtnl_message *m, unsigned char pr
         return 0;
 }
 
+int sd_rtnl_message_route_set_src_prefixlen(sd_rtnl_message *m, unsigned char prefixlen) {
+        struct rtmsg *rtm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+        rtm = NLMSG_DATA(m->hdr);
+
+        if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+            (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+                return -ERANGE;
+
+        rtm->rtm_src_len = prefixlen;
+
+        return 0;
+}
+
 int sd_rtnl_message_route_set_scope(sd_rtnl_message *m, unsigned char scope) {
         struct rtmsg *rtm;
 
@@ -129,13 +146,60 @@ int sd_rtnl_message_route_set_scope(sd_rtnl_message *m, unsigned char scope) {
         return 0;
 }
 
+int sd_rtnl_message_route_get_family(sd_rtnl_message *m, int *family) {
+        struct rtmsg *rtm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(family, -EINVAL);
+
+        rtm = NLMSG_DATA(m->hdr);
+
+        *family = rtm->rtm_family;
+
+        return 0;
+}
+
+int sd_rtnl_message_route_get_dst_prefixlen(sd_rtnl_message *m, unsigned char *dst_len) {
+        struct rtmsg *rtm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(dst_len, -EINVAL);
+
+        rtm = NLMSG_DATA(m->hdr);
+
+        *dst_len = rtm->rtm_dst_len;
+
+        return 0;
+}
+
+int sd_rtnl_message_route_get_src_prefixlen(sd_rtnl_message *m, unsigned char *src_len) {
+        struct rtmsg *rtm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(src_len, -EINVAL);
+
+        rtm = NLMSG_DATA(m->hdr);
+
+        *src_len = rtm->rtm_src_len;
+
+        return 0;
+}
+
 int sd_rtnl_message_new_route(sd_rtnl *rtnl, sd_rtnl_message **ret,
-                              uint16_t nlmsg_type, unsigned char rtm_family) {
+                              uint16_t nlmsg_type, int rtm_family,
+                              unsigned char rtm_protocol) {
         struct rtmsg *rtm;
         int r;
 
         assert_return(rtnl_message_type_is_route(nlmsg_type), -EINVAL);
-        assert_return(rtm_family == AF_INET || rtm_family == AF_INET6, -EINVAL);
+        assert_return((nlmsg_type == RTM_GETROUTE && rtm_family == AF_UNSPEC) ||
+                      rtm_family == AF_INET || rtm_family == AF_INET6, -EINVAL);
         assert_return(ret, -EINVAL);
 
         r = message_new(rtnl, ret, nlmsg_type);
@@ -143,7 +207,7 @@ int sd_rtnl_message_new_route(sd_rtnl *rtnl, sd_rtnl_message **ret,
                 return r;
 
         if (nlmsg_type == RTM_NEWROUTE)
-                (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+                (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
 
         rtm = NLMSG_DATA((*ret)->hdr);
 
@@ -151,7 +215,114 @@ int sd_rtnl_message_new_route(sd_rtnl *rtnl, sd_rtnl_message **ret,
         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
         rtm->rtm_type = RTN_UNICAST;
         rtm->rtm_table = RT_TABLE_MAIN;
-        rtm->rtm_protocol = RTPROT_BOOT;
+        rtm->rtm_protocol = rtm_protocol;
+
+        return 0;
+}
+
+int sd_rtnl_message_neigh_set_flags(sd_rtnl_message *m, uint8_t flags) {
+        struct ndmsg *ndm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+        ndm = NLMSG_DATA(m->hdr);
+        ndm->ndm_flags |= flags;
+
+        return 0;
+}
+
+int sd_rtnl_message_neigh_set_state(sd_rtnl_message *m, uint16_t state) {
+        struct ndmsg *ndm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+        ndm = NLMSG_DATA(m->hdr);
+        ndm->ndm_state |= state;
+
+        return 0;
+}
+
+int sd_rtnl_message_neigh_get_flags(sd_rtnl_message *m, uint8_t *flags) {
+        struct ndmsg *ndm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+        ndm = NLMSG_DATA(m->hdr);
+        *flags = ndm->ndm_flags;
+
+        return 0;
+}
+
+int sd_rtnl_message_neigh_get_state(sd_rtnl_message *m, uint16_t *state) {
+        struct ndmsg *ndm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+        ndm = NLMSG_DATA(m->hdr);
+        *state = ndm->ndm_state;
+
+        return 0;
+}
+
+int sd_rtnl_message_neigh_get_family(sd_rtnl_message *m, int *family) {
+        struct ndmsg *ndm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(family, -EINVAL);
+
+        ndm = NLMSG_DATA(m->hdr);
+
+        *family = ndm->ndm_family;
+
+        return 0;
+}
+
+int sd_rtnl_message_neigh_get_ifindex(sd_rtnl_message *m, int *index) {
+        struct ndmsg *ndm;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(index, -EINVAL);
+
+        ndm = NLMSG_DATA(m->hdr);
+
+        *index = ndm->ndm_ifindex;
+
+        return 0;
+}
+
+int sd_rtnl_message_new_neigh(sd_rtnl *rtnl, sd_rtnl_message **ret, uint16_t nlmsg_type, int index, int ndm_family) {
+        struct ndmsg *ndm;
+        int r;
+
+        assert_return(rtnl_message_type_is_neigh(nlmsg_type), -EINVAL);
+        assert_return(ndm_family == AF_INET  ||
+                      ndm_family == AF_INET6 ||
+                      ndm_family == PF_BRIDGE, -EINVAL);
+        assert_return(ret, -EINVAL);
+
+        r = message_new(rtnl, ret, nlmsg_type);
+        if (r < 0)
+                return r;
+
+        if (nlmsg_type == RTM_NEWNEIGH)
+                (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+        ndm = NLMSG_DATA((*ret)->hdr);
+
+        ndm->ndm_family = ndm_family;
+        ndm->ndm_ifindex = index;
 
         return 0;
 }
@@ -186,6 +357,20 @@ int sd_rtnl_message_link_set_type(sd_rtnl_message *m, unsigned type) {
         return 0;
 }
 
+int sd_rtnl_message_link_set_family(sd_rtnl_message *m, unsigned family) {
+        struct ifinfomsg *ifi;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+        ifi = NLMSG_DATA(m->hdr);
+
+        ifi->ifi_family = family;
+
+        return 0;
+}
+
 int sd_rtnl_message_new_link(sd_rtnl *rtnl, sd_rtnl_message **ret,
                              uint16_t nlmsg_type, int index) {
         struct ifinfomsg *ifi;
@@ -210,6 +395,23 @@ int sd_rtnl_message_new_link(sd_rtnl *rtnl, sd_rtnl_message **ret,
         return 0;
 }
 
+int sd_rtnl_message_request_dump(sd_rtnl_message *m, int dump) {
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(m->hdr->nlmsg_type == RTM_GETLINK  ||
+                      m->hdr->nlmsg_type == RTM_GETADDR  ||
+                      m->hdr->nlmsg_type == RTM_GETROUTE ||
+                      m->hdr->nlmsg_type == RTM_GETNEIGH,
+                      -EINVAL);
+
+        if (dump)
+                m->hdr->nlmsg_flags |= NLM_F_DUMP;
+        else
+                m->hdr->nlmsg_flags &= ~NLM_F_DUMP;
+
+        return 0;
+}
+
 int sd_rtnl_message_addr_set_prefixlen(sd_rtnl_message *m, unsigned char prefixlen) {
         struct ifaddrmsg *ifa;
 
@@ -256,22 +458,99 @@ int sd_rtnl_message_addr_set_scope(sd_rtnl_message *m, unsigned char scope) {
         return 0;
 }
 
+int sd_rtnl_message_addr_get_family(sd_rtnl_message *m, int *family) {
+        struct ifaddrmsg *ifa;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(family, -EINVAL);
+
+        ifa = NLMSG_DATA(m->hdr);
+
+        *family = ifa->ifa_family;
+
+        return 0;
+}
+
+int sd_rtnl_message_addr_get_prefixlen(sd_rtnl_message *m, unsigned char *prefixlen) {
+        struct ifaddrmsg *ifa;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(prefixlen, -EINVAL);
+
+        ifa = NLMSG_DATA(m->hdr);
+
+        *prefixlen = ifa->ifa_prefixlen;
+
+        return 0;
+}
+
+int sd_rtnl_message_addr_get_scope(sd_rtnl_message *m, unsigned char *scope) {
+        struct ifaddrmsg *ifa;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(scope, -EINVAL);
+
+        ifa = NLMSG_DATA(m->hdr);
+
+        *scope = ifa->ifa_scope;
+
+        return 0;
+}
+
+int sd_rtnl_message_addr_get_flags(sd_rtnl_message *m, unsigned char *flags) {
+        struct ifaddrmsg *ifa;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(flags, -EINVAL);
+
+        ifa = NLMSG_DATA(m->hdr);
+
+        *flags = ifa->ifa_flags;
+
+        return 0;
+}
+
+int sd_rtnl_message_addr_get_ifindex(sd_rtnl_message *m, int *ifindex) {
+        struct ifaddrmsg *ifa;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(ifindex, -EINVAL);
+
+        ifa = NLMSG_DATA(m->hdr);
+
+        *ifindex = ifa->ifa_index;
+
+        return 0;
+}
+
 int sd_rtnl_message_new_addr(sd_rtnl *rtnl, sd_rtnl_message **ret,
                              uint16_t nlmsg_type, int index,
-                             unsigned char family) {
+                             int family) {
         struct ifaddrmsg *ifa;
         int r;
 
         assert_return(rtnl_message_type_is_addr(nlmsg_type), -EINVAL);
-        assert_return(index > 0, -EINVAL);
-        assert_return(family == AF_INET || family == AF_INET6, -EINVAL);
+        assert_return((nlmsg_type == RTM_GETADDR && index == 0) ||
+                      index > 0, -EINVAL);
+        assert_return((nlmsg_type == RTM_GETADDR && family == AF_UNSPEC) ||
+                      family == AF_INET || family == AF_INET6, -EINVAL);
         assert_return(ret, -EINVAL);
 
         r = message_new(rtnl, ret, nlmsg_type);
         if (r < 0)
                 return r;
 
-        if (nlmsg_type == RTM_GETADDR && family == AF_INET)
+        if (nlmsg_type == RTM_GETADDR)
                 (*ret)->hdr->nlmsg_flags |= NLM_F_DUMP;
 
         ifa = NLMSG_DATA((*ret)->hdr);
@@ -286,6 +565,19 @@ int sd_rtnl_message_new_addr(sd_rtnl *rtnl, sd_rtnl_message **ret,
         return 0;
 }
 
+int sd_rtnl_message_new_addr_update(sd_rtnl *rtnl, sd_rtnl_message **ret,
+                             int index, int family) {
+        int r;
+
+        r = sd_rtnl_message_new_addr(rtnl, ret, RTM_NEWADDR, index, family);
+        if (r < 0)
+                return r;
+
+        (*ret)->hdr->nlmsg_flags |= NLM_F_REPLACE;
+
+        return 0;
+}
+
 sd_rtnl_message *sd_rtnl_message_ref(sd_rtnl_message *m) {
         if (m)
                 assert_se(REFCNT_INC(m->n_ref) >= 2);
@@ -302,6 +594,8 @@ sd_rtnl_message *sd_rtnl_message_unref(sd_rtnl_message *m) {
                 for (i = 0; i <= m->n_containers; i++)
                         free(m->rta_offset_tb[i]);
 
+                sd_rtnl_message_unref(m->next);
+
                 free(m);
         }
 
@@ -353,27 +647,46 @@ int sd_rtnl_message_link_get_flags(sd_rtnl_message *m, unsigned *flags) {
         return 0;
 }
 
+int sd_rtnl_message_link_get_type(sd_rtnl_message *m, unsigned *type) {
+        struct ifinfomsg *ifi;
+
+        assert_return(m, -EINVAL);
+        assert_return(m->hdr, -EINVAL);
+        assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+        assert_return(type, -EINVAL);
+
+        ifi = NLMSG_DATA(m->hdr);
+
+        *type = ifi->ifi_type;
+
+        return 0;
+}
+
 /* If successful the updated message will be correctly aligned, if
    unsuccessful the old message is untouched. */
 static int add_rtattr(sd_rtnl_message *m, unsigned short type, const void *data, size_t data_length) {
-        uint32_t rta_length, message_length;
+        uint32_t rta_length;
+        size_t message_length, padding_length;
         struct nlmsghdr *new_hdr;
         struct rtattr *rta;
         char *padding;
         unsigned i;
+        int offset;
 
         assert(m);
         assert(m->hdr);
         assert(!m->sealed);
         assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len);
-        assert(!data || data_length > 0);
-        assert(data || m->n_containers < RTNL_CONTAINER_DEPTH);
+        assert(!data || data_length);
+
+        /* get offset of the new attribute */
+        offset = m->hdr->nlmsg_len;
 
         /* get the size of the new rta attribute (with padding at the end) */
         rta_length = RTA_LENGTH(data_length);
 
         /* get the new message size (with padding at the end) */
-        message_length = m->hdr->nlmsg_len + RTA_ALIGN(rta_length);
+        message_length = offset + RTA_ALIGN(rta_length);
 
         /* realloc to fit the new attribute */
         new_hdr = realloc(m->hdr, message_length);
@@ -382,33 +695,34 @@ static int add_rtattr(sd_rtnl_message *m, unsigned short type, const void *data,
         m->hdr = new_hdr;
 
         /* get pointer to the attribute we are about to add */
-        rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len);
+        rta = (struct rtattr *) ((uint8_t *) m->hdr + offset);
 
         /* if we are inside containers, extend them */
         for (i = 0; i < m->n_containers; i++)
-                GET_CONTAINER(m, i)->rta_len += message_length - m->hdr->nlmsg_len;
+                GET_CONTAINER(m, i)->rta_len += message_length - offset;
 
         /* fill in the attribute */
         rta->rta_type = type;
         rta->rta_len = rta_length;
-        if (!data) {
-                //TODO: simply return this value rather than check for !data
-                /* this is the start of a new container */
-                m->container_offsets[m->n_containers ++] = m->hdr->nlmsg_len;
-        } else {
+        if (data)
                 /* we don't deal with the case where the user lies about the type
                  * and gives us too little data (so don't do that)
-                */
+                 */
                 padding = mempcpy(RTA_DATA(rta), data, data_length);
-                /* make sure also the padding at the end of the message is initialized */
-                memzero(padding,
-                        (uint8_t *) m->hdr + message_length - (uint8_t *) padding);
+        else {
+                /* if no data was passed, make sure we still initialize the padding
+                   note that we can have data_length > 0 (used by some containers) */
+                padding = RTA_DATA(rta);
         }
 
+        /* make sure also the padding at the end of the message is initialized */
+        padding_length = (uint8_t*)m->hdr + message_length - (uint8_t*)padding;
+        memzero(padding, padding_length);
+
         /* update message size */
         m->hdr->nlmsg_len = message_length;
 
-        return 0;
+        return offset;
 }
 
 static int message_attribute_has_type(sd_rtnl_message *m, uint16_t attribute_type, uint16_t data_type) {
@@ -440,8 +754,8 @@ int sd_rtnl_message_append_string(sd_rtnl_message *m, unsigned short type, const
                 size = (size_t)r;
 
         if (size) {
-                length = strnlen(data, size);
-                if (length >= size)
+                length = strnlen(data, size+1);
+                if (length > size)
                         return -EINVAL;
         } else
                 length = strlen(data);
@@ -559,12 +873,31 @@ int sd_rtnl_message_append_ether_addr(sd_rtnl_message *m, unsigned short type, c
         return 0;
 }
 
+int sd_rtnl_message_append_cache_info(sd_rtnl_message *m, unsigned short type, const struct ifa_cacheinfo *info) {
+        int r;
+
+        assert_return(m, -EINVAL);
+        assert_return(!m->sealed, -EPERM);
+        assert_return(info, -EINVAL);
+
+        r = message_attribute_has_type(m, type, NLA_CACHE_INFO);
+        if (r < 0)
+                return r;
+
+        r = add_rtattr(m, type, info, sizeof(struct ifa_cacheinfo));
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
 int sd_rtnl_message_open_container(sd_rtnl_message *m, unsigned short type) {
         size_t size;
         int r;
 
         assert_return(m, -EINVAL);
         assert_return(!m->sealed, -EPERM);
+        assert_return(m->n_containers < RTNL_CONTAINER_DEPTH, -ERANGE);
 
         r = message_attribute_has_type(m, type, NLA_NESTED);
         if (r < 0)
@@ -578,10 +911,12 @@ int sd_rtnl_message_open_container(sd_rtnl_message *m, unsigned short type) {
         if (r < 0)
                 return r;
 
-        r = add_rtattr(m, type, NULL, size);
+        r = add_rtattr(m, type | NLA_F_NESTED, NULL, size);
         if (r < 0)
                 return r;
 
+        m->container_offsets[m->n_containers ++] = r;
+
         return 0;
 }
 
@@ -611,6 +946,8 @@ int sd_rtnl_message_open_container_union(sd_rtnl_message *m, unsigned short type
         if (r < 0)
                 return r;
 
+        m->container_offsets[m->n_containers ++] = r;
+
         return 0;
 }
 
@@ -646,10 +983,12 @@ int rtnl_message_read_internal(sd_rtnl_message *m, unsigned short type, void **d
         return RTA_PAYLOAD(rta);
 }
 
-int sd_rtnl_message_read_string(sd_rtnl_message *m, unsigned short type, char **data) {
+int sd_rtnl_message_read_string(sd_rtnl_message *m, unsigned short type, const char **data) {
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_STRING);
         if (r < 0)
                 return r;
@@ -660,7 +999,8 @@ int sd_rtnl_message_read_string(sd_rtnl_message *m, unsigned short type, char **
         else if (strnlen(attr_data, r) >= (size_t) r)
                 return -EIO;
 
-        *data = (char *) attr_data;
+        if (data)
+                *data = (const char *) attr_data;
 
         return 0;
 }
@@ -669,6 +1009,8 @@ int sd_rtnl_message_read_u8(sd_rtnl_message *m, unsigned short type, uint8_t *da
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_U8);
         if (r < 0)
                 return r;
@@ -679,7 +1021,8 @@ int sd_rtnl_message_read_u8(sd_rtnl_message *m, unsigned short type, uint8_t *da
         else if ((size_t) r < sizeof(uint8_t))
                 return -EIO;
 
-        *data = *(uint8_t *) attr_data;
+        if (data)
+                *data = *(uint8_t *) attr_data;
 
         return 0;
 }
@@ -688,6 +1031,8 @@ int sd_rtnl_message_read_u16(sd_rtnl_message *m, unsigned short type, uint16_t *
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_U16);
         if (r < 0)
                 return r;
@@ -698,7 +1043,8 @@ int sd_rtnl_message_read_u16(sd_rtnl_message *m, unsigned short type, uint16_t *
         else if ((size_t) r < sizeof(uint16_t))
                 return -EIO;
 
-        *data = *(uint16_t *) attr_data;
+        if (data)
+                *data = *(uint16_t *) attr_data;
 
         return 0;
 }
@@ -707,6 +1053,8 @@ int sd_rtnl_message_read_u32(sd_rtnl_message *m, unsigned short type, uint32_t *
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_U32);
         if (r < 0)
                 return r;
@@ -717,7 +1065,8 @@ int sd_rtnl_message_read_u32(sd_rtnl_message *m, unsigned short type, uint32_t *
         else if ((size_t)r < sizeof(uint32_t))
                 return -EIO;
 
-        *data = *(uint32_t *) attr_data;
+        if (data)
+                *data = *(uint32_t *) attr_data;
 
         return 0;
 }
@@ -726,6 +1075,8 @@ int sd_rtnl_message_read_ether_addr(sd_rtnl_message *m, unsigned short type, str
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_ETHER_ADDR);
         if (r < 0)
                 return r;
@@ -736,7 +1087,30 @@ int sd_rtnl_message_read_ether_addr(sd_rtnl_message *m, unsigned short type, str
         else if ((size_t)r < sizeof(struct ether_addr))
                 return -EIO;
 
-        memcpy(data, attr_data, sizeof(struct ether_addr));
+        if (data)
+                memcpy(data, attr_data, sizeof(struct ether_addr));
+
+        return 0;
+}
+
+int sd_rtnl_message_read_cache_info(sd_rtnl_message *m, unsigned short type, struct ifa_cacheinfo *info) {
+        int r;
+        void *attr_data;
+
+        assert_return(m, -EINVAL);
+
+        r = message_attribute_has_type(m, type, NLA_CACHE_INFO);
+        if (r < 0)
+                return r;
+
+        r = rtnl_message_read_internal(m, type, &attr_data);
+        if (r < 0)
+                return r;
+        else if ((size_t)r < sizeof(struct ifa_cacheinfo))
+                return -EIO;
+
+        if (info)
+                memcpy(info, attr_data, sizeof(struct ifa_cacheinfo));
 
         return 0;
 }
@@ -745,6 +1119,8 @@ int sd_rtnl_message_read_in_addr(sd_rtnl_message *m, unsigned short type, struct
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_IN_ADDR);
         if (r < 0)
                 return r;
@@ -755,7 +1131,8 @@ int sd_rtnl_message_read_in_addr(sd_rtnl_message *m, unsigned short type, struct
         else if ((size_t)r < sizeof(struct in_addr))
                 return -EIO;
 
-        memcpy(data, attr_data, sizeof(struct in_addr));
+        if (data)
+                memcpy(data, attr_data, sizeof(struct in_addr));
 
         return 0;
 }
@@ -764,6 +1141,8 @@ int sd_rtnl_message_read_in6_addr(sd_rtnl_message *m, unsigned short type, struc
         int r;
         void *attr_data;
 
+        assert_return(m, -EINVAL);
+
         r = message_attribute_has_type(m, type, NLA_IN_ADDR);
         if (r < 0)
                 return r;
@@ -774,7 +1153,8 @@ int sd_rtnl_message_read_in6_addr(sd_rtnl_message *m, unsigned short type, struc
         else if ((size_t)r < sizeof(struct in6_addr))
                 return -EIO;
 
-        memcpy(data, attr_data, sizeof(struct in6_addr));
+        if (data)
+                memcpy(data, attr_data, sizeof(struct in6_addr));
 
         return 0;
 }
@@ -803,7 +1183,7 @@ int sd_rtnl_message_enter_container(sd_rtnl_message *m, unsigned short type) {
                         return r;
         } else if (nl_type->type == NLA_UNION) {
                 const NLTypeSystemUnion *type_system_union;
-                char *key;
+                const char *key;
 
                 r = type_system_get_type_system_union(m->container_type_system[m->n_containers],
                                                       &type_system_union,
@@ -868,13 +1248,20 @@ uint32_t rtnl_message_get_serial(sd_rtnl_message *m) {
         return m->hdr->nlmsg_seq;
 }
 
+int sd_rtnl_message_is_error(sd_rtnl_message *m) {
+        assert_return(m, 0);
+        assert_return(m->hdr, 0);
+
+        return m->hdr->nlmsg_type == NLMSG_ERROR;
+}
+
 int sd_rtnl_message_get_errno(sd_rtnl_message *m) {
         struct nlmsgerr *err;
 
         assert_return(m, -EINVAL);
         assert_return(m->hdr, -EINVAL);
 
-        if (m->hdr->nlmsg_type != NLMSG_ERROR)
+        if (!sd_rtnl_message_is_error(m))
                 return 0;
 
         err = NLMSG_DATA(m->hdr);
@@ -882,26 +1269,6 @@ int sd_rtnl_message_get_errno(sd_rtnl_message *m) {
         return err->error;
 }
 
-static int message_receive_need(sd_rtnl *rtnl, size_t *need) {
-        assert(rtnl);
-        assert(need);
-
-        /* ioctl(rtnl->fd, FIONREAD, &need)
-           Does not appear to work on netlink sockets. libnl uses
-           MSG_PEEK instead. I don't know if that is worth the
-           extra roundtrip.
-
-           For now we simply use the maximum message size the kernel
-           may use (NLMSG_GOODSIZE), and then realloc to the actual
-           size after reading the message (hence avoiding huge memory
-           usage in case many small messages are kept around) */
-        *need = page_size();
-        if (*need > 8192UL)
-                *need = 8192UL;
-
-        return 0;
-}
-
 int rtnl_message_parse(sd_rtnl_message *m,
                        size_t **rta_offset_tb,
                        unsigned short *rta_tb_size,
@@ -911,14 +1278,14 @@ int rtnl_message_parse(sd_rtnl_message *m,
         unsigned short type;
         size_t *tb;
 
-        tb = (size_t *) new0(size_t *, max);
+        tb = new0(size_t, max + 1);
         if(!tb)
                 return -ENOMEM;
 
-        *rta_tb_size = max;
+        *rta_tb_size = max + 1;
 
         for (; RTA_OK(rta, rt_len); rta = RTA_NEXT(rta, rt_len)) {
-                type = rta->rta_type;
+                type = RTA_TYPE(rta);
 
                 /* if the kernel is newer than the headers we used
                    when building, we ignore out-of-range attributes
@@ -959,88 +1326,215 @@ int socket_write_message(sd_rtnl *nl, sd_rtnl_message *m) {
         return k;
 }
 
+static int socket_recv_message(int fd, struct iovec *iov, uint32_t *_group, bool peek) {
+        uint8_t cred_buffer[CMSG_SPACE(sizeof(struct ucred)) +
+                            CMSG_SPACE(sizeof(struct nl_pktinfo))];
+        struct msghdr msg = {
+                .msg_iov = iov,
+                .msg_iovlen = 1,
+                .msg_control = cred_buffer,
+                .msg_controllen = sizeof(cred_buffer),
+        };
+        struct cmsghdr *cmsg;
+        uint32_t group = 0;
+        bool auth = false;
+        int r;
+
+        assert(fd >= 0);
+        assert(iov);
+
+        r = recvmsg(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
+        if (r < 0) {
+                /* no data */
+                if (errno == ENOBUFS)
+                        log_debug("rtnl: kernel receive buffer overrun");
+
+                return (errno == EAGAIN) ? 0 : -errno;
+        } else if (r == 0)
+                /* connection was closed by the kernel */
+                return -ECONNRESET;
+
+        for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+                if (cmsg->cmsg_level == SOL_SOCKET &&
+                    cmsg->cmsg_type == SCM_CREDENTIALS &&
+                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+                        struct ucred *ucred = (void *)CMSG_DATA(cmsg);
+
+                        /* from the kernel */
+                        if (ucred->uid == 0 && ucred->pid == 0)
+                                auth = true;
+                } else if (cmsg->cmsg_level == SOL_NETLINK &&
+                           cmsg->cmsg_type == NETLINK_PKTINFO &&
+                           cmsg->cmsg_len == CMSG_LEN(sizeof(struct nl_pktinfo))) {
+                        struct nl_pktinfo *pktinfo = (void *)CMSG_DATA(cmsg);
+
+                        /* multi-cast group */
+                        group = pktinfo->group;
+                }
+        }
+
+        if (!auth)
+                /* not from the kernel, ignore */
+                return 0;
+
+        if (group)
+                *_group = group;
+
+        return r;
+}
+
 /* On success, the number of bytes received is returned and *ret points to the received message
  * which has a valid header and the correct size.
  * If nothing useful was received 0 is returned.
  * On failure, a negative error code is returned.
  */
-int socket_read_message(sd_rtnl *nl, sd_rtnl_message **ret) {
-        _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
-        const NLType *nl_type;
-        struct nlmsghdr *new_hdr;
-        union {
-                struct sockaddr sa;
-                struct sockaddr_nl nl;
-        } addr;
-        socklen_t addr_len;
-        size_t need, len;
+int socket_read_message(sd_rtnl *rtnl) {
+        _cleanup_rtnl_message_unref_ sd_rtnl_message *first = NULL;
+        struct iovec iov = {};
+        uint32_t group = 0;
+        bool multi_part = false, done = false;
+        struct nlmsghdr *new_msg;
+        size_t len;
         int r;
+        unsigned i = 0;
 
-        assert(nl);
-        assert(ret);
+        assert(rtnl);
+        assert(rtnl->rbuffer);
+        assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr));
 
-        r = message_receive_need(nl, &need);
-        if (r < 0)
+        /* read nothing, just get the pending message size */
+        r = socket_recv_message(rtnl->fd, &iov, &group, true);
+        if (r <= 0)
                 return r;
+        else
+                len = (size_t)r;
 
-        r = message_new_empty(nl, &m, need);
-        if (r < 0)
-                return r;
+        /* make room for the pending message */
+        if (!greedy_realloc((void **)&rtnl->rbuffer,
+                            &rtnl->rbuffer_allocated,
+                            len, sizeof(uint8_t)))
+                return -ENOMEM;
 
-        addr_len = sizeof(addr);
+        iov.iov_base = rtnl->rbuffer;
+        iov.iov_len = rtnl->rbuffer_allocated;
 
-        r = recvfrom(nl->fd, m->hdr, need,
-                        0, &addr.sa, &addr_len);
-        if (r < 0)
-                return (errno == EAGAIN) ? 0 : -errno; /* no data */
-        else if (r == 0)
-                return -ECONNRESET; /* connection was closed by the kernel */
-        else if (addr_len != sizeof(addr.nl) ||
-                        addr.nl.nl_family != AF_NETLINK)
-                return -EIO; /* not a netlink message */
-        else if (addr.nl.nl_pid != 0)
-                return 0; /* not from the kernel */
-        else if ((size_t) r < sizeof(struct nlmsghdr) ||
-                        (size_t) r < m->hdr->nlmsg_len)
-                return -EIO; /* too small (we do accept too big though) */
-        else if (m->hdr->nlmsg_pid && m->hdr->nlmsg_pid != nl->sockaddr.nl.nl_pid)
-                return 0; /* not broadcast and not for us */
+        /* read the pending message */
+        r = socket_recv_message(rtnl->fd, &iov, &group, false);
+        if (r <= 0)
+                return r;
         else
-                len = (size_t) r;
+                len = (size_t)r;
 
-        /* silently drop noop messages */
-        if (m->hdr->nlmsg_type == NLMSG_NOOP)
-                return 0;
+        if (len > rtnl->rbuffer_allocated)
+                /* message did not fit in read buffer */
+                return -EIO;
 
-        /* check that we support this message type */
-        r = type_system_get_type(NULL, &nl_type, m->hdr->nlmsg_type);
-        if (r < 0) {
-                if (r == -ENOTSUP)
-                        log_debug("sd-rtnl: ignored message with unknown type: %u",
-                                  m->hdr->nlmsg_type);
+        if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
+                multi_part = true;
 
-                return 0;
+                for (i = 0; i < rtnl->rqueue_partial_size; i++) {
+                        if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) ==
+                            rtnl->rbuffer->nlmsg_seq) {
+                                first = rtnl->rqueue_partial[i];
+                                break;
+                        }
+                }
         }
 
-        /* check that the size matches the message type */
-        if (len < NLMSG_LENGTH(nl_type->size))
-                        return -EIO;
+        for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
+                _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
+                const NLType *nl_type;
 
-        /* we probably allocated way too much memory, give it back */
-        new_hdr = realloc(m->hdr, len);
-        if (!new_hdr)
-                return -ENOMEM;
-        m->hdr = new_hdr;
+                if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid)
+                        /* not broadcast and not for us */
+                        continue;
 
-        /* seal and parse the top-level message */
-        r = sd_rtnl_message_rewind(m);
-        if (r < 0)
-                return r;
+                if (new_msg->nlmsg_type == NLMSG_NOOP)
+                        /* silently drop noop messages */
+                        continue;
 
-        *ret = m;
-        m = NULL;
+                if (new_msg->nlmsg_type == NLMSG_DONE) {
+                        /* finished reading multi-part message */
+                        done = true;
+
+                        continue;
+                }
+
+                /* check that we support this message type */
+                r = type_system_get_type(NULL, &nl_type, new_msg->nlmsg_type);
+                if (r < 0) {
+                        if (r == -ENOTSUP)
+                                log_debug("sd-rtnl: ignored message with unknown type: %u",
+                                          new_msg->nlmsg_type);
+
+                        continue;
+                }
+
+                /* check that the size matches the message type */
+                if (new_msg->nlmsg_len < NLMSG_LENGTH(nl_type->size)) {
+                        log_debug("sd-rtnl: message larger than expected, dropping");
+                        continue;
+                }
+
+                r = message_new_empty(rtnl, &m);
+                if (r < 0)
+                        return r;
+
+                m->hdr = memdup(new_msg, new_msg->nlmsg_len);
+                if (!m->hdr)
+                        return -ENOMEM;
+
+                /* seal and parse the top-level message */
+                r = sd_rtnl_message_rewind(m);
+                if (r < 0)
+                        return r;
 
-        return len;
+                /* push the message onto the multi-part message stack */
+                if (first)
+                        m->next = first;
+                first = m;
+                m = NULL;
+        }
+
+        if (len)
+                log_debug("sd-rtnl: discarding %zu bytes of incoming message", len);
+
+        if (!first)
+                return 0;
+
+        if (!multi_part || done) {
+                /* we got a complete message, push it on the read queue */
+                r = rtnl_rqueue_make_room(rtnl);
+                if (r < 0)
+                        return r;
+
+                rtnl->rqueue[rtnl->rqueue_size ++] = first;
+                first = NULL;
+
+                if (multi_part && (i < rtnl->rqueue_partial_size)) {
+                        /* remove the message form the partial read queue */
+                        memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1,
+                                sizeof(sd_rtnl_message*) * (rtnl->rqueue_partial_size - i - 1));
+                        rtnl->rqueue_partial_size --;
+                }
+
+                return 1;
+        } else {
+                /* we only got a partial multi-part message, push it on the
+                   partial read queue */
+                if (i < rtnl->rqueue_partial_size) {
+                        rtnl->rqueue_partial[i] = first;
+                } else {
+                        r = rtnl_rqueue_partial_make_room(rtnl);
+                        if (r < 0)
+                                return r;
+
+                        rtnl->rqueue_partial[rtnl->rqueue_partial_size ++] = first;
+                }
+                first = NULL;
+
+                return 0;
+        }
 }
 
 int sd_rtnl_message_rewind(sd_rtnl_message *m) {
@@ -1101,3 +1595,9 @@ void rtnl_message_seal(sd_rtnl_message *m) {
 
         m->sealed = true;
 }
+
+sd_rtnl_message *sd_rtnl_message_next(sd_rtnl_message *m) {
+        assert_return(m, NULL);
+
+        return m->next;
+}