Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140410005542.GL26358@brightrain.aerifal.cx>
Date: Wed, 9 Apr 2014 20:55:42 -0400
From: Rich Felker <dalias@...ifal.cx>
To: musl@...ts.openwall.com
Cc: Timo Teras <timo.teras@....fi>, justin@...cialbusservice.com
Subject: Re: if_nameindex/getifaddrs and dhcpcd issue

On Wed, Apr 09, 2014 at 05:02:22PM +0300, Timo Teras wrote:
> On Tue, 8 Apr 2014 19:08:07 +0300
> Timo Teras <timo.teras@....fi> wrote:
> 
> > > > I'm willing to write an alternative getifaddrs() and
> > > > if_nameindex() implementations using netlink. Perhaps let's see
> > > > how they end up?  
> > > 
> > > It wouldn't hurt; if they're not upstreamed they could still be used
> > > as a separate library for the one application which needs this
> > > functionality (dhcpcd).
> > 
> > Yeah. I'll play with this and see what I come up with. I'll also
> > delete the bad kernel #define's and try to do them a bit better - not
> > sure how well I succeed that at, though.
> 
> Posting here the current version of my patch as requested.
> 
> While it has some corner cases still that need cleaning, it's a good
> start. It implements the APIs, and is usable in read world.
> 
> From 4a82a84fb4fbfdae48f14bf86df0fd92086b7556 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@....fi>
> Date: Tue, 8 Apr 2014 14:03:16 +0000
> Subject: [PATCH] reimplement if_nameindex and getifaddrs using netlink
> 
> ---
>  src/network/__netlink.c    |  68 ++++++++++
>  src/network/__netlink.h    | 143 ++++++++++++++++++++
>  src/network/getifaddrs.c   | 322 ++++++++++++++++++++++++---------------------
>  src/network/if_nameindex.c | 105 +++++++++------
>  4 files changed, 451 insertions(+), 187 deletions(-)
>  create mode 100644 src/network/__netlink.c
>  create mode 100644 src/network/__netlink.h
> 
> diff --git a/src/network/__netlink.c b/src/network/__netlink.c
> new file mode 100644
> index 0000000..d0c9fab
> --- /dev/null
> +++ b/src/network/__netlink.c
> @@ -0,0 +1,68 @@
> +#define _GNU_SOURCE
> +#include <errno.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/socket.h>
> +#include <sys/param.h>
> +#include "__netlink.h"
> +
> +struct __netlink_handle {
> +	int fd;
> +	unsigned int seq;
> +	size_t bufsize;
> +};
> +
> +struct __netlink_handle *__netlink_open(int type)
> +{
> +	struct __netlink_handle *nh;
> +	int bufsize = getpagesize();
                      ^^^^^^^^^^^

getpagesize is a nonstandard function, not in the namespace that can
be used. sysconf could be used, but in musl it's preferable to use
PAGE_SIZE. On archs that don't define it, libc.h (which you can
include) defines a fallback to get the runtime value efficiently.

> +	/* required buffer size is MIN(8192,pagesize)-sizeof(struct skb_shared_info)
> +	 * the estimate for skb_shared_info size is conservative, but gives enough
> +	 * space to fit struct __netlink_handle including malloc overhead in one page . */
> +	if (bufsize > 8192) bufsize = 8192;

But in this case I would juse use 8192 as the fixed size.

> +	bufsize -= 128;
> +	nh = malloc(sizeof(struct __netlink_handle) + bufsize);

Then there's no need to malloc this, just use a normal automatic buffer.

> +	if (!nh) return 0;

And no need to check for failure.

> +	nh->fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, type);

socket is a cancellation point, but if_nameindex is not, so you need
to block cancellation. Note: this bug exists in the current code too.

> +	if (nh->fd < 0) { free(nh); return 0; }
> +	nh->seq = 1;
> +	nh->bufsize = bufsize;
> +	return nh;
> +}
> +
> +void __netlink_close(struct __netlink_handle *nh)
> +{
> +	close(nh->fd);
> +	free(nh);
> +}

Same for close, but here it's easy to just do
__syscall(SYS_close,nh->fd).

However, I think the whole idea of having a netlink handle that's
allocated and freed is ugly and inefficient. Just return a struct with
two members, seq and fd, by value.

> +int __netlink_enumerate(struct __netlink_handle *nh, int type, int (*cb)(void *ctx, struct nlmsghdr *h), void *ctx)
> +{
> +	struct nlmsghdr *h;
> +	void *buf = (void*)(nh+1);
> +	struct {
> +		struct nlmsghdr nlh;
> +		struct rtgenmsg g;
> +	} *req = buf;
> +	int r, ret = 0;
> +
> +	memset(req, 0, NETLINK_ALIGN(sizeof(*req)));
> +	req->nlh.nlmsg_len = sizeof(*req);
> +	req->nlh.nlmsg_type = type;
> +	req->nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
> +	req->nlh.nlmsg_seq = nh->seq++;
> +	req->g.rtgen_family = AF_UNSPEC;
> +	r = send(nh->fd, req, sizeof(*req), 0);

send is also a cancellation point. There may be others I'm missing, so
if in doubt, all of these functions should just temporarily block and
restore cancellation state.

> +	if (r < 0) return r;
> +
> +	while (1) {
> +		r = recv(nh->fd, buf, nh->bufsize, MSG_DONTWAIT);
> +		if (r <= 0) return -1;
> +		for (h = (struct nlmsghdr*) buf; NLMSG_OK(h, (void*)((uint8_t*)buf+r)); h = NLMSG_NEXT(h)) {
> +			if (h->nlmsg_type == NLMSG_DONE) return ret;
> +			if (h->nlmsg_type == NLMSG_ERROR) return -1;
> +			if (!ret) ret = cb(ctx, h);
> +		}
> +	}
> +}
> diff --git a/src/network/__netlink.h b/src/network/__netlink.h
> new file mode 100644
> index 0000000..94728f3
> --- /dev/null
> +++ b/src/network/__netlink.h
> @@ -0,0 +1,143 @@
> +#include <stdint.h>
> +
> +/* linux/netlink.h */
> +
> +#define NETLINK_ROUTE 0
> +
> +struct nlmsghdr {
> +	uint32_t	nlmsg_len;
> +	uint16_t	nlmsg_type;
> +	uint16_t	nlmsg_flags;
> +	uint32_t	nlmsg_seq;
> +	uint32_t	nlmsg_pid;
> +};
> +
> +#define NLM_F_REQUEST	1
> +#define NLM_F_MULTI	2
> +#define NLM_F_ACK	4
> +#define NLM_F_ECHO	8
> +#define NLM_F_DUMP_INTR	16
> +
> +#define NLM_F_ROOT	0x100
> +#define NLM_F_MATCH	0x200
> +#define NLM_F_ATOMIC	0x400
> +#define NLM_F_DUMP	(NLM_F_ROOT|NLM_F_MATCH)
> +
> +#define NLMSG_NOOP	0x1
> +#define NLMSG_ERROR	0x2
> +#define NLMSG_DONE	0x3
> +#define NLMSG_OVERRUN	0x4
> +
> +/* linux/rtnetlink.h */
> +
> +#define RTM_GETLINK	18
> +#define RTM_GETADDR	22
> +
> +struct rtattr {
> +	unsigned short	rta_len;
> +	unsigned short	rta_type;
> +};
> +
> +struct rtgenmsg {
> +	unsigned char	rtgen_family;
> +};
> +
> +struct ifinfomsg {
> +	unsigned char	ifi_family;
> +	unsigned char	__ifi_pad;
> +	unsigned short	ifi_type;
> +	int		ifi_index;
> +	unsigned	ifi_flags;
> +	unsigned	ifi_change;
> +};
> +
> +/* linux/if_link.h */
> +
> +enum {
> +	IFLA_UNSPEC,
> +	IFLA_ADDRESS,
> +	IFLA_BROADCAST,
> +	IFLA_IFNAME,
> +	IFLA_MTU,
> +	IFLA_LINK,
> +	IFLA_QDISC,
> +	IFLA_STATS,
> +	IFLA_COST,
> +	IFLA_PRIORITY,
> +	IFLA_MASTER,
> +	IFLA_WIRELESS,
> +	IFLA_PROTINFO,
> +	IFLA_TXQLEN,
> +	IFLA_MAP,
> +	IFLA_WEIGHT,
> +	IFLA_OPERSTATE,
> +	IFLA_LINKMODE,
> +	IFLA_LINKINFO,
> +	IFLA_NET_NS_PID,
> +	IFLA_IFALIAS,
> +	IFLA_NUM_VF,
> +	IFLA_VFINFO_LIST,
> +	IFLA_STATS64,
> +	IFLA_VF_PORTS,
> +	IFLA_PORT_SELF,
> +	IFLA_AF_SPEC,
> +	IFLA_GROUP,
> +	IFLA_NET_NS_FD,
> +	IFLA_EXT_MASK,
> +	IFLA_PROMISCUITY,
> +	IFLA_NUM_TX_QUEUES,
> +	IFLA_NUM_RX_QUEUES,
> +	IFLA_CARRIER,
> +	IFLA_PHYS_PORT_ID,
> +	__IFLA_MAX
> +};
> +
> +/* linux/if_addr.h */
> +
> +struct ifaddrmsg {
> +	uint8_t		ifa_family;
> +	uint8_t		ifa_prefixlen;
> +	uint8_t		ifa_flags;
> +	uint8_t		ifa_scope;
> +	uint32_t	ifa_index;
> +};
> +
> +enum {
> +	IFA_UNSPEC,
> +	IFA_ADDRESS,
> +	IFA_LOCAL,
> +	IFA_LABEL,
> +	IFA_BROADCAST,
> +	IFA_ANYCAST,
> +	IFA_CACHEINFO,
> +	IFA_MULTICAST,
> +	__IFA_MAX
> +};
> +
> +/* musl */
> +
> +#define NETLINK_ALIGN(len)	(((len)+3) & ~3)
> +#define NLMSG_DATA(nlh)		((void*)((char*)(nlh)+NETLINK_ALIGN(sizeof(struct nlmsghdr))))
> +#define NLMSG_DATALEN(nlh)	((nlh)->nlmsg_len-NETLINK_ALIGN(sizeof(struct nlmsghdr)))
> +#define NLMSG_DATAEND(nlh)	((char*)(nlh)+(nlh)->nlmsg_len)
> +#define NLMSG_NEXT(nlh)		(struct nlmsghdr*)((char*)(nlh)+NETLINK_ALIGN((nlh)->nlmsg_len))
> +#define NLMSG_OK(nlh,end)	(NLMSG_DATA(nlh) <= (end) && \
> +				 (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
> +				 (void*)NLMSG_NEXT(nlh) <= (end))
> +
> +#define RTA_DATA(rta)		((void*)((char*)(rta)+NETLINK_ALIGN(sizeof(struct rtattr))))
> +#define RTA_DATALEN(rta)	((rta)->rta_len-NETLINK_ALIGN(sizeof(struct rtattr)))
> +#define RTA_DATAEND(rta)	((char*)(rta)+(rta)->rta_len)
> +#define RTA_NEXT(rta)		(struct rtattr*)((char*)(rta)+NETLINK_ALIGN((rta)->rta_len))
> +#define RTA_OK(rta,end)		(RTA_DATA(rta) <= (void*)(end) && \
> +				 (rta)->rta_len >= sizeof(struct rtattr) && \
> +				 (void*)RTA_NEXT(rta) <= (void*)(end))
> +
> +#define NLMSG_RTA(nlh,len)	((void*)((char*)(nlh)+NETLINK_ALIGN(sizeof(struct nlmsghdr))+NETLINK_ALIGN(len)))
> +#define NLMSG_RTAOK(rta,nlh)	RTA_OK(rta,NLMSG_DATAEND(nlh))
> +
> +struct __netlink_handle;
> +
> +struct __netlink_handle *__netlink_open(int type);
> +void __netlink_close(struct __netlink_handle *h);
> +int __netlink_enumerate(struct __netlink_handle *h, int type, int (*cb)(void *ctx, struct nlmsghdr *h), void *ctx);
> diff --git a/src/network/getifaddrs.c b/src/network/getifaddrs.c
> index 5a94cc7..5b1ebe7 100644
> --- a/src/network/getifaddrs.c
> +++ b/src/network/getifaddrs.c
> @@ -1,181 +1,209 @@
> -/* (C) 2013 John Spencer. released under musl's standard MIT license. */
> -#undef _GNU_SOURCE
>  #define _GNU_SOURCE
> -#include <ifaddrs.h>
> -#include <stdlib.h>
> -#include <net/if.h> /* IFNAMSIZ, ifreq, ifconf */
> -#include <stdio.h>
> -#include <ctype.h>
> -#include <string.h>
>  #include <errno.h>
> -#include <arpa/inet.h> /* inet_pton */
> +#include <string.h>
> +#include <stdlib.h>
>  #include <unistd.h>
> -#include <sys/ioctl.h>
> -#include <sys/socket.h>
> +#include <ifaddrs.h>
> +#include <net/if.h>
> +#include "__netlink.h"
>  
> -typedef union {
> -	struct sockaddr_in6 v6;
> +/* getifaddrs() uses PF_PACKET to relay hardware addresses.
> + * But Infiniband socket address length is longer, so use this hack
> + * (like glibc) to return it anyway. */

This comment suggests a hack is going on...

> +struct sockaddr_ll_hack {
> +	unsigned short sll_family, sll_protocol;
> +	int sll_ifindex;
> +	unsigned short sll_hatype;
> +	unsigned char sll_pkttype, sll_halen;
> +	unsigned char sll_addr[24];
> +};

...but it's not at all obvious if/where there's any hack in this
structure.

I haven't reviewed the rest yet because it looked like I'd need to
actually understand what the code is doing rather than just evaluate
general things like the above. :)

Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.