[iproute2,1/2] lib/libnetlink: re malloc buff if size is not enough

Message ID 1504865697-27274-2-git-send-email-liuhangbin@gmail.com
State Superseded
Delegated to: stephen hemminger
Headers show
Series
  • malloc correct buff at run time
Related show

Commit Message

Hangbin Liu Sept. 8, 2017, 10:14 a.m.
With commit 72b365e8e0fd ("libnetlink: Double the dump buffer size")
we doubled the buffer size to support more VFs. But the VFs number is
increasing all the time. Some customers even use more than 200 VFs now.

We could not double it everytime when the buffer is not enough. Let's just
not hard code the buffer size and malloc the correct number when running.

Introduce a new function rtnl_recvmsg() to get correct buffer.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 lib/libnetlink.c | 98 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 66 insertions(+), 32 deletions(-)

Comments

Phil Sutter Sept. 8, 2017, 11:02 a.m. | #1
Hi Hangbin,

On Fri, Sep 08, 2017 at 06:14:56PM +0800, Hangbin Liu wrote:
[...]
> diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> index be7ac86..37cfb5a 100644
> --- a/lib/libnetlink.c
> +++ b/lib/libnetlink.c
> @@ -402,6 +402,59 @@ static void rtnl_dump_error(const struct rtnl_handle *rth,
>  	}
>  }
>  
> +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **buf)
> +{
> +	struct iovec *iov;
> +	int len = -1, buf_len = 32768;
> +	char *buffer = *buf;

Isn't it possible to make 'buffer' static instead of the two 'buf'
variables in rtnl_dump_filter_l() and __rtnl_talk()? Then we would have
only a single buffer which is shared between both functions instead of
two which are independently allocated.

> +
> +	int flag = MSG_PEEK | MSG_TRUNC;
> +
> +	if (buffer == NULL)
> +re_malloc:
> +		buffer = malloc(buf_len);

I think using realloc() here is more appropriate since there is no need
to free the buffer in beforehand and calling realloc(NULL, len) is
equivalent to calling malloc(len). I think 'realloc' is also a better
name for the goto label.

> +	if (buffer == NULL) {
> +		fprintf(stderr, "malloc error: no enough buffer\n");

Minor typo here: s/no/not/

> +		return -1;

Return -ENOMEM?

> +	}
> +
> +	iov = msg->msg_iov;
> +	iov->iov_base = buffer;
> +	iov->iov_len = buf_len;
> +
> +re_recv:

Just call this 'recv'? (Not really important though.)

> +	len = recvmsg(fd, msg, flag);
> +
> +	if (len < 0) {
> +		if (errno == EINTR || errno == EAGAIN)
> +			return 0;

Instead of returning 0 (which makes callers retry), goto re_recv?

> +		fprintf(stderr, "netlink receive error %s (%d)\n",
> +			strerror(errno), errno);
> +		return len;
> +	}
> +
> +	if (len == 0) {
> +		fprintf(stderr, "EOF on netlink\n");
> +		return -1;

Return -ENODATA here? (Initially I though about -EOF, but EOF is -1 so
that would be incorrect).

> +	}
> +
> +	if (len > buf_len) {
> +		free(buffer);

If you use realloc() above, this can be dropped.

> +		buf_len = len;

For this to work you have to make buf_len static too, otherwise you will
unnecessarily reallocate the buffer. Oh, and that also requires the
single buffer (as pointed out above) because you will otherwise use a
common buf_len for both static buffers passed to this function.

> +		flag = 0;
> +		goto re_malloc;
> +	}
> +
> +	if (flag != 0) {
> +		flag = 0;
> +		goto re_recv;
> +	}
> +
> +	*buf = buffer;
> +	return len;
> +}
> +
>  int rtnl_dump_filter_l(struct rtnl_handle *rth,
>  		       const struct rtnl_dump_filter_arg *arg)
>  {
> @@ -413,31 +466,20 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
>  		.msg_iov = &iov,
>  		.msg_iovlen = 1,
>  	};
> -	char buf[32768];
> +	static char *buf = NULL;

If you keep the static buffer in rtnl_recvmsg(), there is no need to
assign NULL here.

>  	int dump_intr = 0;
>  
> -	iov.iov_base = buf;
>  	while (1) {
>  		int status;
>  		const struct rtnl_dump_filter_arg *a;
>  		int found_done = 0;
>  		int msglen = 0;
>  
> -		iov.iov_len = sizeof(buf);
> -		status = recvmsg(rth->fd, &msg, 0);
> -
> -		if (status < 0) {
> -			if (errno == EINTR || errno == EAGAIN)
> -				continue;
> -			fprintf(stderr, "netlink receive error %s (%d)\n",
> -				strerror(errno), errno);
> -			return -1;
> -		}
> -
> -		if (status == 0) {
> -			fprintf(stderr, "EOF on netlink\n");
> -			return -1;
> -		}
> +		status = rtnl_recvmsg(rth->fd, &msg, &buf);
> +		if (status < 0)
> +			return status;
> +		else if (status == 0)
> +			continue;

When retrying inside rtnl_recvmsg(), it won't return 0 anymore. I
believe the whole 'while (1)' loop could go away then.

Everything noted for rtnl_dump_filter_l() applies to __rtnl_talk() as
well.

Thanks, Phil
Michal Kubecek Sept. 8, 2017, 12:32 p.m. | #2
On Fri, Sep 08, 2017 at 01:02:47PM +0200, Phil Sutter wrote:
> Hi Hangbin,
> 
> On Fri, Sep 08, 2017 at 06:14:56PM +0800, Hangbin Liu wrote:
> [...]
> > diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> > index be7ac86..37cfb5a 100644
> > --- a/lib/libnetlink.c
> > +++ b/lib/libnetlink.c
> > @@ -402,6 +402,59 @@ static void rtnl_dump_error(const struct rtnl_handle *rth,
> >  	}
> >  }
> >  
> > +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **buf)
> > +{
> > +	struct iovec *iov;
> > +	int len = -1, buf_len = 32768;
> > +	char *buffer = *buf;
> 
> Isn't it possible to make 'buffer' static instead of the two 'buf'
> variables in rtnl_dump_filter_l() and __rtnl_talk()? Then we would have
> only a single buffer which is shared between both functions instead of
> two which are independently allocated.

Do we have to worry about reentrancy? Only arpd is multithreaded in
iproute2 but there might be also other users of libnetlink.

> > +
> > +	int flag = MSG_PEEK | MSG_TRUNC;
> > +
> > +	if (buffer == NULL)
> > +re_malloc:
> > +		buffer = malloc(buf_len);
> 
> I think using realloc() here is more appropriate since there is no need
> to free the buffer in beforehand and calling realloc(NULL, len) is
> equivalent to calling malloc(len). I think 'realloc' is also a better
> name for the goto label.
> 
> > +	if (buffer == NULL) {
> > +		fprintf(stderr, "malloc error: no enough buffer\n");
> 
> Minor typo here: s/no/not/
> 
> > +		return -1;
> 
> Return -ENOMEM?

Hm... the only caller of rtnl_dump_filter_l only checks if the return
value is negative. Even worse, at least some of the functions calling
__rtnl_talk() check errno (or use perror()) instead, even if it's not
always preserved. That's something for a wider cleanup, I would say.

> 
> > +	}
> > +
> > +	iov = msg->msg_iov;
> > +	iov->iov_base = buffer;
> > +	iov->iov_len = buf_len;
> > +
> > +re_recv:
> 
> Just call this 'recv'? (Not really important though.)
> 
> > +	len = recvmsg(fd, msg, flag);
> > +
> > +	if (len < 0) {
> > +		if (errno == EINTR || errno == EAGAIN)
> > +			return 0;
> 
> Instead of returning 0 (which makes callers retry), goto re_recv?
> 
> > +		fprintf(stderr, "netlink receive error %s (%d)\n",
> > +			strerror(errno), errno);
> > +		return len;
> > +	}
> > +
> > +	if (len == 0) {
> > +		fprintf(stderr, "EOF on netlink\n");
> > +		return -1;
> 
> Return -ENODATA here? (Initially I though about -EOF, but EOF is -1 so
> that would be incorrect).
> 
> > +	}
> > +
> > +	if (len > buf_len) {
> > +		free(buffer);
> 
> If you use realloc() above, this can be dropped.
> 
> > +		buf_len = len;
> 
> For this to work you have to make buf_len static too, otherwise you will
> unnecessarily reallocate the buffer. Oh, and that also requires the
> single buffer (as pointed out above) because you will otherwise use a
> common buf_len for both static buffers passed to this function.
> 
> > +		flag = 0;
> > +		goto re_malloc;
> > +	}
> > +
> > +	if (flag != 0) {
> > +		flag = 0;
> > +		goto re_recv;
> > +	}
> > +
> > +	*buf = buffer;
> > +	return len;
> > +}
> > +
> >  int rtnl_dump_filter_l(struct rtnl_handle *rth,
> >  		       const struct rtnl_dump_filter_arg *arg)
> >  {
> > @@ -413,31 +466,20 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
> >  		.msg_iov = &iov,
> >  		.msg_iovlen = 1,
> >  	};
> > -	char buf[32768];
> > +	static char *buf = NULL;
> 
> If you keep the static buffer in rtnl_recvmsg(), there is no need to
> assign NULL here.
> 
> >  	int dump_intr = 0;
> >  
> > -	iov.iov_base = buf;
> >  	while (1) {
> >  		int status;
> >  		const struct rtnl_dump_filter_arg *a;
> >  		int found_done = 0;
> >  		int msglen = 0;
> >  
> > -		iov.iov_len = sizeof(buf);
> > -		status = recvmsg(rth->fd, &msg, 0);
> > -
> > -		if (status < 0) {
> > -			if (errno == EINTR || errno == EAGAIN)
> > -				continue;
> > -			fprintf(stderr, "netlink receive error %s (%d)\n",
> > -				strerror(errno), errno);
> > -			return -1;
> > -		}
> > -
> > -		if (status == 0) {
> > -			fprintf(stderr, "EOF on netlink\n");
> > -			return -1;
> > -		}
> > +		status = rtnl_recvmsg(rth->fd, &msg, &buf);
> > +		if (status < 0)
> > +			return status;
> > +		else if (status == 0)
> > +			continue;
> 
> When retrying inside rtnl_recvmsg(), it won't return 0 anymore. I
> believe the whole 'while (1)' loop could go away then.

Doesn't this loop also handle the response divided into multiple
packets?

Michal Kubecek
Hangbin Liu Sept. 8, 2017, 2:01 p.m. | #3
Hi Phil,

Thanks for the comments, see replies bellow.

On Fri, Sep 08, 2017 at 01:02:47PM +0200, Phil Sutter wrote:
> Hi Hangbin,
> 
> On Fri, Sep 08, 2017 at 06:14:56PM +0800, Hangbin Liu wrote:
> [...]
> > diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> > index be7ac86..37cfb5a 100644
> > --- a/lib/libnetlink.c
> > +++ b/lib/libnetlink.c
> > @@ -402,6 +402,59 @@ static void rtnl_dump_error(const struct rtnl_handle *rth,
> >  	}
> >  }
> >  
> > +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **buf)
> > +{
> > +	struct iovec *iov;
> > +	int len = -1, buf_len = 32768;
> > +	char *buffer = *buf;
> 
> Isn't it possible to make 'buffer' static instead of the two 'buf'
> variables in rtnl_dump_filter_l() and __rtnl_talk()? Then we would have
> only a single buffer which is shared between both functions instead of
> two which are independently allocated.

I was also thinking of this before. But in function ipaddrlabel_flush()

	if (rtnl_dump_filter(&rth, flush_addrlabel, NULL) < 0)

It will cal rtnl_dump_filter_l() first via
rtnl_dump_filter() -> rtnl_dump_filter_nc() -> rtnl_dump_filter_l().

Then call rtnl_talk() later via call back
a->filter(&nladdr, h, a->arg1) -> flush_addrlabel() -> rtnl_talk()

So if we only use one static buffer in rtnl_recvmsg(). Then it will be written
at lease twice.

The path looks like bellow in function rtnl_dump_filter_l()

	while (1) {
		status = rtnl_recvmsg(rth->fd, &msg, &buf);	<== write buf

		for (a = arg; a->filter; a++) {
			struct nlmsghdr *h = (struct nlmsghdr *)buf;	<== assign buf to h

			while (NLMSG_OK(h, msglen)) {

				if (!rth->dump_fp) {
					err = a->filter(&nladdr, h, a->arg1);	<== buf changed via rtnl_talk()
				}

				h = NLMSG_NEXT(h, msglen);	<== so h will also be changed
			}
		}
	}

That's why I have to use two static buffers.
> 
> > +
> > +	int flag = MSG_PEEK | MSG_TRUNC;
> > +
> > +	if (buffer == NULL)
> > +re_malloc:
> > +		buffer = malloc(buf_len);
> 
> I think using realloc() here is more appropriate since there is no need
> to free the buffer in beforehand and calling realloc(NULL, len) is
> equivalent to calling malloc(len). I think 'realloc' is also a better
> name for the goto label.

Good idea.
> 
> > +	if (buffer == NULL) {
> > +		fprintf(stderr, "malloc error: no enough buffer\n");
> 
> Minor typo here: s/no/not/
> 
> > +		return -1;
> 
> Return -ENOMEM?
> 
> > +	}
> > +
> > +	iov = msg->msg_iov;
> > +	iov->iov_base = buffer;
> > +	iov->iov_len = buf_len;
> > +
> > +re_recv:
> 
> Just call this 'recv'? (Not really important though.)
> 
> > +	len = recvmsg(fd, msg, flag);
> > +
> > +	if (len < 0) {
> > +		if (errno == EINTR || errno == EAGAIN)
> > +			return 0;
> 
> Instead of returning 0 (which makes callers retry), goto re_recv?

Yes, will fix this.
> 
> > +		fprintf(stderr, "netlink receive error %s (%d)\n",
> > +			strerror(errno), errno);
> > +		return len;
> > +	}
> > +
> > +	if (len == 0) {
> > +		fprintf(stderr, "EOF on netlink\n");
> > +		return -1;
> 
> Return -ENODATA here? (Initially I though about -EOF, but EOF is -1 so
> that would be incorrect).
> 
> > +	}
> > +
> > +	if (len > buf_len) {
> > +		free(buffer);
> 
> If you use realloc() above, this can be dropped.

Yes.
> 
> > +		buf_len = len;
> 
> For this to work you have to make buf_len static too, otherwise you will
> unnecessarily reallocate the buffer. Oh, and that also requires the
> single buffer (as pointed out above) because you will otherwise use a
> common buf_len for both static buffers passed to this function.

Since we have to use two static bufffers. So how about check like

	if (len > strlen(buffer))

> 
> > +		flag = 0;
> > +		goto re_malloc;
> > +	}
> > +
> > +	if (flag != 0) {
> > +		flag = 0;
> > +		goto re_recv;
> > +	}
> > +
> > +	*buf = buffer;
> > +	return len;
> > +}
> > +
> >  int rtnl_dump_filter_l(struct rtnl_handle *rth,
> >  		       const struct rtnl_dump_filter_arg *arg)
> >  {
> > @@ -413,31 +466,20 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
> >  		.msg_iov = &iov,
> >  		.msg_iovlen = 1,
> >  	};
> > -	char buf[32768];
> > +	static char *buf = NULL;
> 
> If you keep the static buffer in rtnl_recvmsg(), there is no need to
> assign NULL here.
> 
> >  	int dump_intr = 0;
> >  
> > -	iov.iov_base = buf;
> >  	while (1) {
> >  		int status;
> >  		const struct rtnl_dump_filter_arg *a;
> >  		int found_done = 0;
> >  		int msglen = 0;
> >  
> > -		iov.iov_len = sizeof(buf);
> > -		status = recvmsg(rth->fd, &msg, 0);
> > -
> > -		if (status < 0) {
> > -			if (errno == EINTR || errno == EAGAIN)
> > -				continue;
> > -			fprintf(stderr, "netlink receive error %s (%d)\n",
> > -				strerror(errno), errno);
> > -			return -1;
> > -		}
> > -
> > -		if (status == 0) {
> > -			fprintf(stderr, "EOF on netlink\n");
> > -			return -1;
> > -		}
> > +		status = rtnl_recvmsg(rth->fd, &msg, &buf);
> > +		if (status < 0)
> > +			return status;
> > +		else if (status == 0)
> > +			continue;
> 
> When retrying inside rtnl_recvmsg(), it won't return 0 anymore. I
> believe the whole 'while (1)' loop could go away then.
> 

Like Michal said, there may have multi netlink packets?

Thanks
Hangbin
Phil Sutter Sept. 8, 2017, 2:51 p.m. | #4
Hi,

On Fri, Sep 08, 2017 at 10:01:31PM +0800, Hangbin Liu wrote:
[...]
> > > diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> > > index be7ac86..37cfb5a 100644
> > > --- a/lib/libnetlink.c
> > > +++ b/lib/libnetlink.c
> > > @@ -402,6 +402,59 @@ static void rtnl_dump_error(const struct rtnl_handle *rth,
> > >  	}
> > >  }
> > >  
> > > +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **buf)
> > > +{
> > > +	struct iovec *iov;
> > > +	int len = -1, buf_len = 32768;
> > > +	char *buffer = *buf;
> > 
> > Isn't it possible to make 'buffer' static instead of the two 'buf'
> > variables in rtnl_dump_filter_l() and __rtnl_talk()? Then we would have
> > only a single buffer which is shared between both functions instead of
> > two which are independently allocated.
> 
> I was also thinking of this before. But in function ipaddrlabel_flush()
> 
> 	if (rtnl_dump_filter(&rth, flush_addrlabel, NULL) < 0)
> 
> It will cal rtnl_dump_filter_l() first via
> rtnl_dump_filter() -> rtnl_dump_filter_nc() -> rtnl_dump_filter_l().
> 
> Then call rtnl_talk() later via call back
> a->filter(&nladdr, h, a->arg1) -> flush_addrlabel() -> rtnl_talk()
> 
> So if we only use one static buffer in rtnl_recvmsg(). Then it will be written
> at lease twice.

Oh yes, in that case we really can't have a single buffer.

[...]
> > > +		buf_len = len;
> > 
> > For this to work you have to make buf_len static too, otherwise you will
> > unnecessarily reallocate the buffer. Oh, and that also requires the
> > single buffer (as pointed out above) because you will otherwise use a
> > common buf_len for both static buffers passed to this function.
> 
> Since we have to use two static bufffers. So how about check like
> 
> 	if (len > strlen(buffer))

I don't think that will work. We are reusing the buffer and it contains
binary data, so a NUL byte may appear anywhere. I fear you will have to
change rtnl_recvmsg() to accept a buflen parameter which callers have to
define statically together with the buffer pointer.

Regarding Michal's concern about reentrancy, maybe we should go into a
different direction and make rtnl_recvmsg() return a newly allocated
buffer which the caller has to free.

[...]
> > When retrying inside rtnl_recvmsg(), it won't return 0 anymore. I
> > believe the whole 'while (1)' loop could go away then.
> > 
> 
> Like Michal said, there may have multi netlink packets?

Ah yes, I missed that.

Thanks, Phil
Hangbin Liu Sept. 11, 2017, 7:19 a.m. | #5
Hi Phil and Michal,
On Fri, Sep 08, 2017 at 04:51:13PM +0200, Phil Sutter wrote:
> Hi,
> 
> On Fri, Sep 08, 2017 at 10:01:31PM +0800, Hangbin Liu wrote:
> [...]
> > Since we have to use two static bufffers. So how about check like
> > 
> > 	if (len > strlen(buffer))
> 
> I don't think that will work. We are reusing the buffer and it contains
> binary data, so a NUL byte may appear anywhere. I fear you will have to
> change rtnl_recvmsg() to accept a buflen parameter which callers have to
> define statically together with the buffer pointer.

OK
> 
> Regarding Michal's concern about reentrancy, maybe we should go into a
> different direction and make rtnl_recvmsg() return a newly allocated
> buffer which the caller has to free.

Hmm... But we could not free the buf in __rtnl_talk(). Because in
__rtnl_talk() we assign the answer with the buf address and return to caller.

	for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
		[...]
		if (answer) {
			*answer= h;
			return 0;
		}
	}

And the caller will keep use it in later code. Since there are plenty of
functions called rtnl_talk. I think it would be much more complex to free
the buffer every time.


Hi Michal,

Would you like to tell me more about your concern with reentrancy? It's looks
arpd doesn't call rtnl_talk() or rtnl_dump_filter_l().

Thanks
Hangbin
Michal Kubecek Sept. 12, 2017, 8:47 a.m. | #6
On Mon, Sep 11, 2017 at 03:19:55PM +0800, Hangbin Liu wrote:
> On Fri, Sep 08, 2017 at 04:51:13PM +0200, Phil Sutter wrote:
> > Regarding Michal's concern about reentrancy, maybe we should go into a
> > different direction and make rtnl_recvmsg() return a newly allocated
> > buffer which the caller has to free.
> 
> Hmm... But we could not free the buf in __rtnl_talk(). Because in
> __rtnl_talk() we assign the answer with the buf address and return to caller.
> 
> 	for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
> 		[...]
> 		if (answer) {
> 			*answer= h;
> 			return 0;
> 		}
> 	}
> 
> And the caller will keep use it in later code. Since there are plenty of
> functions called rtnl_talk. I think it would be much more complex to free
> the buffer every time.
> 
> 
> Hi Michal,
> 
> Would you like to tell me more about your concern with reentrancy? It's looks
> arpd doesn't call rtnl_talk() or rtnl_dump_filter_l().

I checked again and arpd indeed isn't a problem. It doesn't seem to call
any of the two functions (directly or indirectly) and while it's linked
with "-lpthread", it's not really multithreaded.

But my concern was rather about other potential users of libnetlink
(i.e. those which are not part of iproute2). I must admit, though, that
I'm not sure if libnetlink code is reentrant as of now. (And people are
discouraged from using it in its own manual page.)

That being said, I still like Phil's idea for a different reason. While
investigating the issue with "ip link show dev eth ..." which led me to
commit 6599162b958e ("iplink: check for message truncation in
iplink_get()"), I quickly peeked at some other callers of rtnl_talk()
and I'm afraid there may be others which wouldn't handle truncated
message correctly. I assume the maxlen argument was always chosen to be
sufficient for any expected messages but as the example of iplink_get()
shows, messages returned by kernel my grow over time.

That's why I like the idea of __rtnl_talk() returning a pointer to newly
allocated buffer (of sufficient size) rather than copying the response
into a buffer provided by caller and potentially truncating it.

Michal Kubecek
Michal Kubecek Sept. 12, 2017, 9:09 a.m. | #7
On Tue, Sep 12, 2017 at 10:47:48AM +0200, Michal Kubecek wrote:
> On Mon, Sep 11, 2017 at 03:19:55PM +0800, Hangbin Liu wrote:
> > On Fri, Sep 08, 2017 at 04:51:13PM +0200, Phil Sutter wrote:
> > > Regarding Michal's concern about reentrancy, maybe we should go into a
> > > different direction and make rtnl_recvmsg() return a newly allocated
> > > buffer which the caller has to free.
> > 
> > Hmm... But we could not free the buf in __rtnl_talk(). Because in
> > __rtnl_talk() we assign the answer with the buf address and return to caller.
> > 
> > 	for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
> > 		[...]
> > 		if (answer) {
> > 			*answer= h;
> > 			return 0;
> > 		}
> > 	}
> > 
> > And the caller will keep use it in later code. Since there are plenty of
> > functions called rtnl_talk. I think it would be much more complex to free
> > the buffer every time.
> > 
> > 
> > Hi Michal,
> > 
> > Would you like to tell me more about your concern with reentrancy? It's looks
> > arpd doesn't call rtnl_talk() or rtnl_dump_filter_l().
> 
> I checked again and arpd indeed isn't a problem. It doesn't seem to call
> any of the two functions (directly or indirectly) and while it's linked
> with "-lpthread", it's not really multithreaded.
> 
> But my concern was rather about other potential users of libnetlink
> (i.e. those which are not part of iproute2). I must admit, though, that
> I'm not sure if libnetlink code is reentrant as of now. (And people are
> discouraged from using it in its own manual page.)
> 
> That being said, I still like Phil's idea for a different reason. While
> investigating the issue with "ip link show dev eth ..." which led me to
> commit 6599162b958e ("iplink: check for message truncation in
> iplink_get()"), I quickly peeked at some other callers of rtnl_talk()
> and I'm afraid there may be others which wouldn't handle truncated
> message correctly. I assume the maxlen argument was always chosen to be
> sufficient for any expected messages but as the example of iplink_get()
> shows, messages returned by kernel my grow over time.
> 
> That's why I like the idea of __rtnl_talk() returning a pointer to newly
> allocated buffer (of sufficient size) rather than copying the response
> into a buffer provided by caller and potentially truncating it.

I'm sorry, I managed to forget that your patch 2 does already address
this problem. But the fact that any caller must keep in mind that he
must not call the same function again until the previous response is no
longer needed still feels like a trap. It's something you need to keep
in mind (where "you" in fact means any future contributor) and it's
easy to forget. That's why I prefer the reentrant functions like
strerror_r() or localtime_r() even in code which is not intended to be
multithreaded. Getting an object which is "mine" to do with whatever
I want until I no longer need it feels like a cleaner interface to me.

Michal Kubecek
Hangbin Liu Sept. 13, 2017, 9:26 a.m. | #8
Hi Michal,

Thanks a lot for all your explains. Phil has helped update the patch to support
return a newly allocated buffer. I will post it soon.

Thanks
Hangbin
On Tue, Sep 12, 2017 at 11:09:26AM +0200, Michal Kubecek wrote:
> > 
> > I checked again and arpd indeed isn't a problem. It doesn't seem to call
> > any of the two functions (directly or indirectly) and while it's linked
> > with "-lpthread", it's not really multithreaded.
> > 
> > But my concern was rather about other potential users of libnetlink
> > (i.e. those which are not part of iproute2). I must admit, though, that
> > I'm not sure if libnetlink code is reentrant as of now. (And people are
> > discouraged from using it in its own manual page.)
> > 
> > That being said, I still like Phil's idea for a different reason. While
> > investigating the issue with "ip link show dev eth ..." which led me to
> > commit 6599162b958e ("iplink: check for message truncation in
> > iplink_get()"), I quickly peeked at some other callers of rtnl_talk()
> > and I'm afraid there may be others which wouldn't handle truncated
> > message correctly. I assume the maxlen argument was always chosen to be
> > sufficient for any expected messages but as the example of iplink_get()
> > shows, messages returned by kernel my grow over time.
> > 
> > That's why I like the idea of __rtnl_talk() returning a pointer to newly
> > allocated buffer (of sufficient size) rather than copying the response
> > into a buffer provided by caller and potentially truncating it.
> 
> I'm sorry, I managed to forget that your patch 2 does already address
> this problem. But the fact that any caller must keep in mind that he
> must not call the same function again until the previous response is no
> longer needed still feels like a trap. It's something you need to keep
> in mind (where "you" in fact means any future contributor) and it's
> easy to forget. That's why I prefer the reentrant functions like
> strerror_r() or localtime_r() even in code which is not intended to be
> multithreaded. Getting an object which is "mine" to do with whatever
> I want until I no longer need it feels like a cleaner interface to me.
> 
> Michal Kubecek
>

Patch

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index be7ac86..37cfb5a 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -402,6 +402,59 @@  static void rtnl_dump_error(const struct rtnl_handle *rth,
 	}
 }
 
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **buf)
+{
+	struct iovec *iov;
+	int len = -1, buf_len = 32768;
+	char *buffer = *buf;
+
+	int flag = MSG_PEEK | MSG_TRUNC;
+
+	if (buffer == NULL)
+re_malloc:
+		buffer = malloc(buf_len);
+
+	if (buffer == NULL) {
+		fprintf(stderr, "malloc error: no enough buffer\n");
+		return -1;
+	}
+
+	iov = msg->msg_iov;
+	iov->iov_base = buffer;
+	iov->iov_len = buf_len;
+
+re_recv:
+	len = recvmsg(fd, msg, flag);
+
+	if (len < 0) {
+		if (errno == EINTR || errno == EAGAIN)
+			return 0;
+		fprintf(stderr, "netlink receive error %s (%d)\n",
+			strerror(errno), errno);
+		return len;
+	}
+
+	if (len == 0) {
+		fprintf(stderr, "EOF on netlink\n");
+		return -1;
+	}
+
+	if (len > buf_len) {
+		free(buffer);
+		buf_len = len;
+		flag = 0;
+		goto re_malloc;
+	}
+
+	if (flag != 0) {
+		flag = 0;
+		goto re_recv;
+	}
+
+	*buf = buffer;
+	return len;
+}
+
 int rtnl_dump_filter_l(struct rtnl_handle *rth,
 		       const struct rtnl_dump_filter_arg *arg)
 {
@@ -413,31 +466,20 @@  int rtnl_dump_filter_l(struct rtnl_handle *rth,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
 	};
-	char buf[32768];
+	static char *buf = NULL;
 	int dump_intr = 0;
 
-	iov.iov_base = buf;
 	while (1) {
 		int status;
 		const struct rtnl_dump_filter_arg *a;
 		int found_done = 0;
 		int msglen = 0;
 
-		iov.iov_len = sizeof(buf);
-		status = recvmsg(rth->fd, &msg, 0);
-
-		if (status < 0) {
-			if (errno == EINTR || errno == EAGAIN)
-				continue;
-			fprintf(stderr, "netlink receive error %s (%d)\n",
-				strerror(errno), errno);
-			return -1;
-		}
-
-		if (status == 0) {
-			fprintf(stderr, "EOF on netlink\n");
-			return -1;
-		}
+		status = rtnl_recvmsg(rth->fd, &msg, &buf);
+		if (status < 0)
+			return status;
+		else if (status == 0)
+			continue;
 
 		if (rth->dump_fp)
 			fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp);
@@ -543,7 +585,7 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
 	};
-	char   buf[32768] = {};
+	static char *buf = NULL;
 
 	n->nlmsg_seq = seq = ++rtnl->seq;
 
@@ -556,22 +598,14 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 		return -1;
 	}
 
-	iov.iov_base = buf;
 	while (1) {
-		iov.iov_len = sizeof(buf);
-		status = recvmsg(rtnl->fd, &msg, 0);
+		status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+
+		if (status < 0)
+			return status;
+		else if (status == 0)
+			continue;
 
-		if (status < 0) {
-			if (errno == EINTR || errno == EAGAIN)
-				continue;
-			fprintf(stderr, "netlink receive error %s (%d)\n",
-				strerror(errno), errno);
-			return -1;
-		}
-		if (status == 0) {
-			fprintf(stderr, "EOF on netlink\n");
-			return -1;
-		}
 		if (msg.msg_namelen != sizeof(nladdr)) {
 			fprintf(stderr,
 				"sender address length == %d\n",