diff mbox series

[PATCHv3,iproute2,1/2] lib/libnetlink: re malloc buff if size is not enough

Message ID 1505871820-31580-2-git-send-email-liuhangbin@gmail.com
State Superseded, archived
Delegated to: stephen hemminger
Headers show
Series libnetlink: malloc correct buff at run time | expand

Commit Message

Hangbin Liu Sept. 20, 2017, 1:43 a.m. UTC
With commit 72b365e8e0fd ("libnetlink: Double the dump buffer size")
we doubled the buffer size to support more VFs. But the VFs number is
increasing all the time. Some customers even use more than 200 VFs now.

We could not double it everytime when the buffer is not enough. Let's just
not hard code the buffer size and malloc the correct number when running.

Introduce function rtnl_recvmsg() to always return a newly allocated buffer.
The caller need to free it after using.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 lib/libnetlink.c | 114 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 80 insertions(+), 34 deletions(-)

Comments

Stephen Hemminger Sept. 20, 2017, 4:56 p.m. UTC | #1
On Wed, 20 Sep 2017 09:43:39 +0800
Hangbin Liu <liuhangbin@gmail.com> wrote:

Thanks for keeping up on this.


> +realloc:
> +	bufp = realloc(buf, buf_len);
> +
> +	if (bufp == NULL) {

Minor personal style issue:
To me, blank lines are like paragraphs in writing.
Code reads better assignment and condition check are next to
each other.

> +recv:
> +	len = recvmsg(fd, msg, flag);
> +
> +	if (len < 0) {
> +		if (errno == EINTR || errno == EAGAIN)
> +			goto recv;
> +		fprintf(stderr, "netlink receive error %s (%d)\n",
> +			strerror(errno), errno);
> +		free(buf);
> +		return -errno;
> +	}
> +
> +	if (len == 0) {
> +		fprintf(stderr, "EOF on netlink\n");
> +		free(buf);
> +		return -ENODATA;
> +	}
> +
> +	if (len > buf_len) {
> +		buf_len = len;
> +		flag = 0;
> +		goto realloc;
> +	}
> +
> +	if (flag != 0) {
> +		flag = 0;
> +		goto recv;

Although I programmed in BASIC years ago. I never liked code
with loops via goto. To me it indicates the logic is not well thought
through.  Not sure exactly how to rearrange the control flow, but it
should be possible to rewrite this so that it reads cleaner.

Still think this needs to go through a few more review cycles
before applying.
Hangbin Liu Sept. 21, 2017, 7:20 a.m. UTC | #2
Hi Stephen,
On Wed, Sep 20, 2017 at 09:56:05AM -0700, Stephen Hemminger wrote:
> > +realloc:
> > +	bufp = realloc(buf, buf_len);
> > +
> > +	if (bufp == NULL) {
> 
> Minor personal style issue:
> To me, blank lines are like paragraphs in writing.
> Code reads better assignment and condition check are next to
> each other.

OK, I will remove the blank lines.
> 
> > +recv:
> > +	len = recvmsg(fd, msg, flag);
> > +
> > +	if (len < 0) {
> > +		if (errno == EINTR || errno == EAGAIN)
> > +			goto recv;
> > +		fprintf(stderr, "netlink receive error %s (%d)\n",
> > +			strerror(errno), errno);
> > +		free(buf);
> > +		return -errno;
> > +	}
> > +
> > +	if (len == 0) {
> > +		fprintf(stderr, "EOF on netlink\n");
> > +		free(buf);
> > +		return -ENODATA;
> > +	}
> > +
> > +	if (len > buf_len) {
> > +		buf_len = len;
> > +		flag = 0;
> > +		goto realloc;
> > +	}
> > +
> > +	if (flag != 0) {
> > +		flag = 0;
> > +		goto recv;
> 
> Although I programmed in BASIC years ago. I never liked code
> with loops via goto. To me it indicates the logic is not well thought
> through.  Not sure exactly how to rearrange the control flow, but it
> should be possible to rewrite this so that it reads cleaner.

Hmm, if we remove goto. Then the logic should look like

	bufp = realloc(buf, buf_len);
	/* check bufp and set msg */

	len = recvmsg(fd, msg, flag);
	/* check len */

	if (len > buf_len) {
		buf_len = len;
		bufp = realloc(buf, buf_len);
		/* check bufp and set msg */

		len = recvmsg(fd, msg, flag);
		/* check len */
	}

	len = recvmsg(fd, msg, flag);
	/* check len */

Or maybe we can set buf_len very small first. Then it will force to realloc at
the second time. And the code would like

	int buf_len = 16;
	bufp = realloc(buf, buf_len);
	/* check bufp and set msg */

	len = recvmsg(fd, msg, flag);
	/* check len */

	buf_len = len;
	bufp = realloc(buf, buf_len);
	/* check bufp and set msg */

	len = recvmsg(fd, msg, flag);
	/* check len */

What do you think?

Thanks
Hangbin
Michal Kubecek Sept. 21, 2017, 7:34 a.m. UTC | #3
On Thu, Sep 21, 2017 at 03:20:02PM +0800, Hangbin Liu wrote:
> 
> Or maybe we can set buf_len very small first. Then it will force to realloc at
> the second time. And the code would like
> 
> 	int buf_len = 16;
> 	bufp = realloc(buf, buf_len);
> 	/* check bufp and set msg */
> 
> 	len = recvmsg(fd, msg, flag);
> 	/* check len */
> 
> 	buf_len = len;
> 	bufp = realloc(buf, buf_len);
> 	/* check bufp and set msg */
> 
> 	len = recvmsg(fd, msg, flag);
> 	/* check len */
> 
> What do you think?

I will have to check but IIRC it might be possible to use zero length
for the peek to only check the length which could help you to avoid both
the reallocation and copying the same data from kernel to userspace
twice.

Michal Kubecek
Hangbin Liu Sept. 28, 2017, 7:13 a.m. UTC | #4
Hi Michal,

On Thu, Sep 21, 2017 at 09:34:20AM +0200, Michal Kubecek wrote:
> I will have to check but IIRC it might be possible to use zero length
> for the peek to only check the length which could help you to avoid both
> the reallocation and copying the same data from kernel to userspace
> twice.

Yes, msg with zero buf length also works. I will post a new patch after fix.

Thanks
Hangbin
diff mbox series

Patch

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index be7ac86..ab45b48 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -402,6 +402,64 @@  static void rtnl_dump_error(const struct rtnl_handle *rth,
 	}
 }
 
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
+{
+	struct iovec *iov;
+	int len = -1, buf_len = 32768;
+	char *bufp, *buf = NULL;
+
+	int flag = MSG_PEEK | MSG_TRUNC;
+
+realloc:
+	bufp = realloc(buf, buf_len);
+
+	if (bufp == NULL) {
+		fprintf(stderr, "malloc error: not enough buffer\n");
+		free(buf);
+		return -ENOMEM;
+	}
+	buf = bufp;
+	iov = msg->msg_iov;
+	iov->iov_base = buf;
+	iov->iov_len = buf_len;
+
+recv:
+	len = recvmsg(fd, msg, flag);
+
+	if (len < 0) {
+		if (errno == EINTR || errno == EAGAIN)
+			goto recv;
+		fprintf(stderr, "netlink receive error %s (%d)\n",
+			strerror(errno), errno);
+		free(buf);
+		return -errno;
+	}
+
+	if (len == 0) {
+		fprintf(stderr, "EOF on netlink\n");
+		free(buf);
+		return -ENODATA;
+	}
+
+	if (len > buf_len) {
+		buf_len = len;
+		flag = 0;
+		goto realloc;
+	}
+
+	if (flag != 0) {
+		flag = 0;
+		goto recv;
+	}
+
+	if (answer)
+		*answer = buf;
+	else
+		free(buf);
+
+	return len;
+}
+
 int rtnl_dump_filter_l(struct rtnl_handle *rth,
 		       const struct rtnl_dump_filter_arg *arg)
 {
@@ -413,31 +471,18 @@  int rtnl_dump_filter_l(struct rtnl_handle *rth,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
 	};
-	char buf[32768];
+	char *buf;
 	int dump_intr = 0;
 
-	iov.iov_base = buf;
 	while (1) {
 		int status;
 		const struct rtnl_dump_filter_arg *a;
 		int found_done = 0;
 		int msglen = 0;
 
-		iov.iov_len = sizeof(buf);
-		status = recvmsg(rth->fd, &msg, 0);
-
-		if (status < 0) {
-			if (errno == EINTR || errno == EAGAIN)
-				continue;
-			fprintf(stderr, "netlink receive error %s (%d)\n",
-				strerror(errno), errno);
-			return -1;
-		}
-
-		if (status == 0) {
-			fprintf(stderr, "EOF on netlink\n");
-			return -1;
-		}
+		status = rtnl_recvmsg(rth->fd, &msg, &buf);
+		if (status < 0)
+			return status;
 
 		if (rth->dump_fp)
 			fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp);
@@ -462,8 +507,10 @@  int rtnl_dump_filter_l(struct rtnl_handle *rth,
 
 				if (h->nlmsg_type == NLMSG_DONE) {
 					err = rtnl_dump_done(h);
-					if (err < 0)
+					if (err < 0) {
+						free(buf);
 						return -1;
+					}
 
 					found_done = 1;
 					break; /* process next filter */
@@ -471,19 +518,23 @@  int rtnl_dump_filter_l(struct rtnl_handle *rth,
 
 				if (h->nlmsg_type == NLMSG_ERROR) {
 					rtnl_dump_error(rth, h);
+					free(buf);
 					return -1;
 				}
 
 				if (!rth->dump_fp) {
 					err = a->filter(&nladdr, h, a->arg1);
-					if (err < 0)
+					if (err < 0) {
+						free(buf);
 						return err;
+					}
 				}
 
 skip_it:
 				h = NLMSG_NEXT(h, msglen);
 			}
 		}
+		free(buf);
 
 		if (found_done) {
 			if (dump_intr)
@@ -543,7 +594,7 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
 	};
-	char   buf[32768] = {};
+	char *buf;
 
 	n->nlmsg_seq = seq = ++rtnl->seq;
 
@@ -556,22 +607,12 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 		return -1;
 	}
 
-	iov.iov_base = buf;
 	while (1) {
-		iov.iov_len = sizeof(buf);
-		status = recvmsg(rtnl->fd, &msg, 0);
+		status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+
+		if (status < 0)
+			return status;
 
-		if (status < 0) {
-			if (errno == EINTR || errno == EAGAIN)
-				continue;
-			fprintf(stderr, "netlink receive error %s (%d)\n",
-				strerror(errno), errno);
-			return -1;
-		}
-		if (status == 0) {
-			fprintf(stderr, "EOF on netlink\n");
-			return -1;
-		}
 		if (msg.msg_namelen != sizeof(nladdr)) {
 			fprintf(stderr,
 				"sender address length == %d\n",
@@ -585,6 +626,7 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 			if (l < 0 || len > status) {
 				if (msg.msg_flags & MSG_TRUNC) {
 					fprintf(stderr, "Truncated message\n");
+					free(buf);
 					return -1;
 				}
 				fprintf(stderr,
@@ -611,6 +653,7 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 					if (answer)
 						memcpy(answer, h,
 						       MIN(maxlen, h->nlmsg_len));
+					free(buf);
 					return 0;
 				}
 
@@ -619,12 +662,14 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 					rtnl_talk_error(h, err, errfn);
 
 				errno = -err->error;
+				free(buf);
 				return -1;
 			}
 
 			if (answer) {
 				memcpy(answer, h,
 				       MIN(maxlen, h->nlmsg_len));
+				free(buf);
 				return 0;
 			}
 
@@ -633,6 +678,7 @@  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 			status -= NLMSG_ALIGN(len);
 			h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
 		}
+		free(buf);
 
 		if (msg.msg_flags & MSG_TRUNC) {
 			fprintf(stderr, "Message truncated\n");