diff mbox series

[net-next,7/7] selftests/net: add zerocopy support for PF_RDS test case

Message ID c25caa53d73057989d40d26789a901fff2962b3e.1516793253.git.sowmini.varadhan@oracle.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series RDS zerocopy support | expand

Commit Message

Sowmini Varadhan Jan. 24, 2018, 11:46 a.m. UTC
Send a cookie with sendmsg() on PF_RDS sockets, and process the
returned batched cookies in do_recv_completion()

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
 tools/testing/selftests/net/msg_zerocopy.c |  119 ++++++++++++++++++++-------
 1 files changed, 88 insertions(+), 31 deletions(-)

Comments

Willem de Bruijn Jan. 28, 2018, 2 p.m. UTC | #1
On Wed, Jan 24, 2018 at 12:46 PM, Sowmini Varadhan
<sowmini.varadhan@oracle.com> wrote:
> Send a cookie with sendmsg() on PF_RDS sockets, and process the
> returned batched cookies in do_recv_completion()
>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> ---
>  tools/testing/selftests/net/msg_zerocopy.c |  119 ++++++++++++++++++++-------
>  1 files changed, 88 insertions(+), 31 deletions(-)
>
> diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
> index 7a5b353..19d2b1a 100644
> --- a/tools/testing/selftests/net/msg_zerocopy.c
> +++ b/tools/testing/selftests/net/msg_zerocopy.c
> @@ -168,7 +168,26 @@ static int do_accept(int fd)
>         return fd;
>  }
>
> -static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
> +static void add_zcopy_cookie(struct msghdr *msg)
> +{
> +       int olen = msg->msg_controllen;
> +       struct cmsghdr *cm;
> +       static uint32_t cookie;
> +
> +       msg->msg_controllen += CMSG_SPACE(sizeof(cookie));
> +       msg->msg_control = (struct cmsghdr *)realloc(msg->msg_control,
> +                                                    msg->msg_controllen);

Please just allocate ahead of time. And since cookie size is fixed
and small just define a local variable on the stack in do_sendmsg.

  char control[CMSG_SPACE(sizeof(uint32_t)];

> +       if (!msg->msg_control)
> +               error(1, errno, "cannot allocate cmsghdr for cookie");
> +       cm = (void *)msg->msg_control + olen;
> +       cm->cmsg_len = CMSG_SPACE(sizeof(cookie));

CMSG_LEN

> +       cm->cmsg_level = SOL_RDS;
> +       cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
> +       ++cookie;
> +       memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));

cookie is not initialized

> @@ -346,36 +382,57 @@ static bool do_recv_completion(int fd)
>                       cm->cmsg_level, cm->cmsg_type);
>
>         serr = (void *) CMSG_DATA(cm);
> -       if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
> -               error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
> -       if (serr->ee_errno != 0)
> -               error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
>
> -       hi = serr->ee_data;
> -       lo = serr->ee_info;
> -       range = hi - lo + 1;
> +       switch (serr->ee_origin) {
> +       case SO_EE_ORIGIN_ZEROCOPY: {
> +               if (serr->ee_errno != 0)
> +                       error(1, 0, "serr: wrong error code: %u",
> +                             serr->ee_errno);
> +               hi = serr->ee_data;
> +               lo = serr->ee_info;
> +               range = hi - lo + 1;
> +
> +               /* Detect notification gaps. These should not happen often,
> +                * if at all.  Gaps can occur due to drops, reordering and
> +                * retransmissions.
> +                */
> +               if (lo != next_completion)
> +                       fprintf(stderr, "gap: %u..%u does not append to %u\n",
> +                               lo, hi, next_completion);
> +               next_completion = hi + 1;
> +
> +               zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
> +               if (zerocopied == -1)
> +                       zerocopied = zerocopy;
> +               else if (zerocopied != zerocopy) {
> +                       fprintf(stderr, "serr: inconsistent\n");
> +                       zerocopied = zerocopy;
> +               }
> +               if (cfg_verbose >= 2)
> +                       fprintf(stderr, "completed: %u (h=%u l=%u)\n",
> +                               range, hi, lo);
>
> -       /* Detect notification gaps. These should not happen often, if at all.
> -        * Gaps can occur due to drops, reordering and retransmissions.
> -        */
> -       if (lo != next_completion)
> -               fprintf(stderr, "gap: %u..%u does not append to %u\n",
> -                       lo, hi, next_completion);
> -       next_completion = hi + 1;
> -
> -       zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
> -       if (zerocopied == -1)
> -               zerocopied = zerocopy;
> -       else if (zerocopied != zerocopy) {
> -               fprintf(stderr, "serr: inconsistent\n");
> -               zerocopied = zerocopy;

Instead of indenting all this existing code please add a helper
do_recv_completion_zcookie, call that if SO_EE_ORIGIN_ZCOOKIE
and fall through to existing code otherwise.

> +               completions += range;
> +               break;
> +       }
> +       case SO_EE_ORIGIN_ZCOOKIE: {
> +               int ncookies, i;
> +
> +               if (serr->ee_errno != 0)
> +                       error(1, 0, "serr: wrong error code: %u",
> +                             serr->ee_errno);
> +               ncookies = serr->ee_data;

Verify ncookies <= MAX_..
Verify ret == ncookies * sizeof(uint32_t)

> +               for (i = 0; i < ncookies; i++)
> +                       if (cfg_verbose >= 2)
> +                               fprintf(stderr, "%d\n", ckbuf[i]);
> +               completions += ncookies;
> +               zerocopied = 1;

Unused in this path

> +               break;
> +       }
> +       default:
> +               error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
>         }
>
> -       if (cfg_verbose >= 2)
> -               fprintf(stderr, "completed: %u (h=%u l=%u)\n",
> -                       range, hi, lo);
> -
> -       completions += range;
>         return true;
>  }
Sowmini Varadhan Jan. 28, 2018, 4:18 p.m. UTC | #2
Re-ordering review comments for selftests a bit..

> > +       cm->cmsg_level = SOL_RDS;
> > +       cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
> > +       ++cookie;
> > +       memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
> cookie is not initialized

it's a static uint32_t in the function. It will get initialized to 0.
But the whole test program is rather simplistic, since it doesnt
actually verify the value of the cookies (hopefully me pending
updates to rds-stress will provide better testing/examples in this
space, because I actully have something multi-threaded, that must
necesarily use a truly random value for the cookie, and must really
make sure that the returned value from the kernel matches some
cookie that is pending)

> > +static void add_zcopy_cookie(struct msghdr *msg)
> Please just allocate ahead of time. And since cookie size is fixed
> and small just define a local variable on the stack in do_sendmsg.

Ok! I was just trying to make this as future-proof as possible, and 
provide useful example code for the next cut/paste developer to use.

> > +       cm->cmsg_len = CMSG_SPACE(sizeof(cookie));
> CMSG_LEN
  :
> Instead of indenting all this existing code please add a helper
> do_recv_completion_zcookie, call that if SO_EE_ORIGIN_ZCOOKIE
> and fall through to existing code otherwise.
  :
> Verify ncookies <= MAX_..
> Verify ret == ncookies * sizeof(uint32_t)

> > +               zerocopied = 1;
> Unused in this path

Ok, will fix all these.

Also, coalescing some related comments for patch 6/7:

> +       memset(&msg, 0, sizeof(msg));
> +       msg.msg_name = &din;
> +       msg.msg_namelen = sizeof(din);

Not read, so no need to configure. In that case a simpler
recv will do and is more concise than setting up recvmsg.

Real RDS applications actually have to use recvmsg, since they 
can get cmsg info about other things like congestion notification 
so let's leave this as recvmsg - it does no harm, and provides
test coverage for the recvmsg case as well.
Willem de Bruijn Jan. 28, 2018, 6:39 p.m. UTC | #3
On Sun, Jan 28, 2018 at 5:18 PM, Sowmini Varadhan
<sowmini.varadhan@oracle.com> wrote:
>
> Re-ordering review comments for selftests a bit..
>
>> > +       cm->cmsg_level = SOL_RDS;
>> > +       cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
>> > +       ++cookie;
>> > +       memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
>> cookie is not initialized
>
> it's a static uint32_t in the function. It will get initialized to 0.

Oh right. I missed that.

> But the whole test program is rather simplistic, since it doesnt
> actually verify the value of the cookies (hopefully me pending
> updates to rds-stress will provide better testing/examples in this
> space, because I actully have something multi-threaded, that must
> necesarily use a truly random value for the cookie, and must really
> make sure that the returned value from the kernel matches some
> cookie that is pending)

It might be nice to at least increment the variable on each
successful send. The test is single threaded anyway. And
then we can test that the returned values are in the defined
range.

> Also, coalescing some related comments for patch 6/7:
>
>> +       memset(&msg, 0, sizeof(msg));
>> +       msg.msg_name = &din;
>> +       msg.msg_namelen = sizeof(din);
>
> Not read, so no need to configure. In that case a simpler
> recv will do and is more concise than setting up recvmsg.
>
> Real RDS applications actually have to use recvmsg, since they
> can get cmsg info about other things like congestion notification
> so let's leave this as recvmsg - it does no harm, and provides
> test coverage for the recvmsg case as well.

Ah, okay. Yes, let's keep it as, then.
Sowmini Varadhan Jan. 28, 2018, 7:57 p.m. UTC | #4
On (01/28/18 19:39), Willem de Bruijn wrote:
> > But the whole test program is rather simplistic, since it doesnt
> > actually verify the value of the cookies (hopefully me pending
    :
> It might be nice to at least increment the variable on each
> successful send. The test is single threaded anyway. And
> then we can test that the returned values are in the defined
> range.

Yeah one thought that went through my head was that since
I now pick the cookes as consecutive numbers (1..N) anyway,
we could do a very simple-minded checksum on recv_completion
notification to check that the sum of all the cookie values
returned is actually N * (N+1)/2 - and flag errors/warnings
if it is not.. 

this would be a test enhancement I could do later.. first let 
me try to avoid having all this complex timer-triggered code.

--Sowmini
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7a5b353..19d2b1a 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -168,7 +168,26 @@  static int do_accept(int fd)
 	return fd;
 }
 
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+static void add_zcopy_cookie(struct msghdr *msg)
+{
+	int olen = msg->msg_controllen;
+	struct cmsghdr *cm;
+	static uint32_t cookie;
+
+	msg->msg_controllen += CMSG_SPACE(sizeof(cookie));
+	msg->msg_control = (struct cmsghdr *)realloc(msg->msg_control,
+						     msg->msg_controllen);
+	if (!msg->msg_control)
+		error(1, errno, "cannot allocate cmsghdr for cookie");
+	cm = (void *)msg->msg_control + olen;
+	cm->cmsg_len = CMSG_SPACE(sizeof(cookie));
+	cm->cmsg_level = SOL_RDS;
+	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+	++cookie;
+	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 {
 	int ret, len, i, flags;
 
@@ -177,8 +196,11 @@  static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
 		len += msg->msg_iov[i].iov_len;
 
 	flags = MSG_DONTWAIT;
-	if (do_zerocopy)
+	if (do_zerocopy) {
 		flags |= MSG_ZEROCOPY;
+		if (domain == PF_RDS)
+			add_zcopy_cookie(msg);
+	}
 
 	ret = sendmsg(fd, msg, flags);
 	if (ret == -1 && errno == EAGAIN)
@@ -194,6 +216,11 @@  static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
 		if (do_zerocopy && ret)
 			expected_completions++;
 	}
+	if (do_zerocopy && domain == PF_RDS) {
+		free(msg->msg_control);
+		msg->msg_control = NULL;
+		msg->msg_controllen = 0;
+	}
 
 	return true;
 }
@@ -220,7 +247,9 @@  static void do_sendmsg_corked(int fd, struct msghdr *msg)
 		msg->msg_iov[0].iov_len = payload_len + extra_len;
 		extra_len = 0;
 
-		do_sendmsg(fd, msg, do_zerocopy);
+		do_sendmsg(fd, msg, do_zerocopy,
+			   (cfg_dst_addr.ss_family == AF_INET ?
+			    PF_INET : PF_INET6));
 	}
 
 	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
@@ -324,10 +353,17 @@  static bool do_recv_completion(int fd)
 	uint32_t hi, lo, range;
 	int ret, zerocopy;
 	char control[100];
+	uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
+	struct iovec iov;
 
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
 
+	iov.iov_base = ckbuf;
+	iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
 	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
 	if (ret == -1 && errno == EAGAIN)
 		return false;
@@ -346,36 +382,57 @@  static bool do_recv_completion(int fd)
 		      cm->cmsg_level, cm->cmsg_type);
 
 	serr = (void *) CMSG_DATA(cm);
-	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
-		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
-	if (serr->ee_errno != 0)
-		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
 
-	hi = serr->ee_data;
-	lo = serr->ee_info;
-	range = hi - lo + 1;
+	switch (serr->ee_origin) {
+	case SO_EE_ORIGIN_ZEROCOPY: {
+		if (serr->ee_errno != 0)
+			error(1, 0, "serr: wrong error code: %u",
+			      serr->ee_errno);
+		hi = serr->ee_data;
+		lo = serr->ee_info;
+		range = hi - lo + 1;
+
+		/* Detect notification gaps. These should not happen often,
+		 * if at all.  Gaps can occur due to drops, reordering and
+		 * retransmissions.
+		 */
+		if (lo != next_completion)
+			fprintf(stderr, "gap: %u..%u does not append to %u\n",
+				lo, hi, next_completion);
+		next_completion = hi + 1;
+
+		zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+		if (zerocopied == -1)
+			zerocopied = zerocopy;
+		else if (zerocopied != zerocopy) {
+			fprintf(stderr, "serr: inconsistent\n");
+			zerocopied = zerocopy;
+		}
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+				range, hi, lo);
 
-	/* Detect notification gaps. These should not happen often, if at all.
-	 * Gaps can occur due to drops, reordering and retransmissions.
-	 */
-	if (lo != next_completion)
-		fprintf(stderr, "gap: %u..%u does not append to %u\n",
-			lo, hi, next_completion);
-	next_completion = hi + 1;
-
-	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
-	if (zerocopied == -1)
-		zerocopied = zerocopy;
-	else if (zerocopied != zerocopy) {
-		fprintf(stderr, "serr: inconsistent\n");
-		zerocopied = zerocopy;
+		completions += range;
+		break;
+	}
+	case SO_EE_ORIGIN_ZCOOKIE: {
+		int ncookies, i;
+
+		if (serr->ee_errno != 0)
+			error(1, 0, "serr: wrong error code: %u",
+			      serr->ee_errno);
+		ncookies = serr->ee_data;
+		for (i = 0; i < ncookies; i++)
+			if (cfg_verbose >= 2)
+				fprintf(stderr, "%d\n", ckbuf[i]);
+		completions += ncookies;
+		zerocopied = 1;
+		break;
+	}
+	default:
+		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
 	}
 
-	if (cfg_verbose >= 2)
-		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
-			range, hi, lo);
-
-	completions += range;
 	return true;
 }
 
@@ -470,7 +527,7 @@  static void do_tx(int domain, int type, int protocol)
 		if (cfg_cork)
 			do_sendmsg_corked(fd, &msg);
 		else
-			do_sendmsg(fd, &msg, cfg_zerocopy);
+			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 
 		while (!do_poll(fd, POLLOUT)) {
 			if (cfg_zerocopy)