diff mbox

[ovs-dev,v1] netdev-linux: Replace sendmsg with sendmmsg in netdev_linux_send

Message ID 20170606012728.25535-1-sysugaozhenyu@gmail.com
State Changes Requested
Headers show

Commit Message

Gao Zhenyu June 6, 2017, 1:27 a.m. UTC
Sendmmsg can reduce cpu cycles in sending packets to kernel.
Replace sendmsg with sendmmsg in function netdev_linux_send to send
batch packets.

Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
---
 lib/netdev-linux.c | 122 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 73 insertions(+), 49 deletions(-)

Comments

Ben Pfaff June 6, 2017, 3:30 p.m. UTC | #1
On Tue, Jun 06, 2017 at 01:27:28AM +0000, Zhenyu Gao wrote:
> Sendmmsg can reduce cpu cycles in sending packets to kernel.
> Replace sendmsg with sendmmsg in function netdev_linux_send to send
> batch packets.
> 
> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>

Thanks for the patch!  It's always better to improve performance, if it
can be done without making code harder to understand.

I believe that, earlier, you reported the actual performance
improvement.  Can you please include that information in the commit
message?

sendmmsg was introduced in Linux 3.0, so it's possible that some users
may not have a kernel that supports it.  I suggest that the code should
detect the ENOSYS error that indicates lack of support and fall back to
sendmsg() in that case.

sendmmsg was introduced in glibc 2.14.  Debian "oldstable" has glibc
2.13, so it's possible that some users may not build with a glibc that
supports it.  I suggest that the configure script should detect whether
sendmmsg() is available and fall back to sendmsg() in that case.

"checkpatch" reports:

    warning: 2 lines add whitespace errors.
    WARNING: Line has trailing whitespace
    #68 FILE: lib/netdev-linux.c:1246:
    resend_batch: 

    ERROR: Improper whitespace around control block
    #79 FILE: lib/netdev-linux.c:1257:
            } else if(retval != batch->count - resend_idx) {

    WARNING: Line has trailing whitespace
    #82 FILE: lib/netdev-linux.c:1260:
            } 

    ERROR: Inappropriate bracing around statement
    #88 FILE: lib/netdev-linux.c:1266:
            for (int i = 0; i < batch->count; ) {

Thanks,

Ben.
diff mbox

Patch

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 8ae740a..bc975cd 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1194,10 +1194,14 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
 {
     int error = 0;
     int sock = 0;
+    ssize_t retval;
 
-    struct sockaddr_ll sll;
-    struct msghdr msg;
     if (!is_tap_netdev(netdev_)) {
+        uint32_t resend_idx = 0;
+        struct mmsghdr *msg;
+        struct iovec *iov;
+        struct sockaddr_ll sll;
+
         sock = af_packet_sock();
         if (sock < 0) {
             error = -sock;
@@ -1216,34 +1220,56 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
         sll.sll_family = AF_PACKET;
         sll.sll_ifindex = ifindex;
 
-        msg.msg_name = &sll;
-        msg.msg_namelen = sizeof sll;
-        msg.msg_iovlen = 1;
-        msg.msg_control = NULL;
-        msg.msg_controllen = 0;
-        msg.msg_flags = 0;
-    }
+        msg = xmalloc(sizeof(*msg) * batch->count);
+        iov = xmalloc(sizeof(*iov) * batch->count);
 
-    /* 'i' is incremented only if there's no error */
-    for (int i = 0; i < batch->count; ) {
-        const void *data = dp_packet_data(batch->packets[i]);
-        size_t size = dp_packet_size(batch->packets[i]);
-        ssize_t retval;
+        for (int i = 0; i < batch->count; i++) {
+            const void *data = dp_packet_data(batch->packets[i]);
+            size_t size = dp_packet_size(batch->packets[i]);
 
-        /* Truncate the packet if it is configured. */
-        size -= dp_packet_get_cutlen(batch->packets[i]);
+            /* Truncate the packet if it is configured. */
+            size -= dp_packet_get_cutlen(batch->packets[i]);
 
-        if (!is_tap_netdev(netdev_)) {
             /* Use our AF_PACKET socket to send to this device. */
-            struct iovec iov;
 
-            iov.iov_base = CONST_CAST(void *, data);
-            iov.iov_len = size;
+            msg[i].msg_hdr.msg_name = &sll;
+            msg[i].msg_hdr.msg_namelen = sizeof sll;
+            msg[i].msg_hdr.msg_iovlen = 1;
+            msg[i].msg_hdr.msg_control = NULL;
+            msg[i].msg_hdr.msg_controllen = 0;
+            msg[i].msg_hdr.msg_flags = 0;
+            iov[i].iov_base = CONST_CAST(void *, data);
+            iov[i].iov_len = size;
+            msg[i].msg_hdr.msg_iov = &iov[i];
+        }
 
-            msg.msg_iov = &iov;
+resend_batch: 
+        retval = sendmmsg(sock, msg + resend_idx,
+                          batch->count - resend_idx, 0);
+        if (retval < 0) {
+            if (errno == EINTR) {
+                goto resend_batch;
+            }
+            /* The Linux AF_PACKET implementation never blocks waiting for
+             * room for packets, instead returning ENOBUFS.  Translate this
+             * into EAGAIN for the caller. */
+            error = errno == ENOBUFS ? EAGAIN : errno;
+        } else if(retval != batch->count - resend_idx) {
+            resend_idx += retval;
+            goto resend_batch;
+        } 
+
+        free(msg);
+        free(iov);
+    } else {
+        /* 'i' is incremented only if there's no error */
+        for (int i = 0; i < batch->count; ) {
+            const void *data = dp_packet_data(batch->packets[i]);
+            size_t size = dp_packet_size(batch->packets[i]);
+
+            /* Truncate the packet if it is configured. */
+            size -= dp_packet_get_cutlen(batch->packets[i]);
 
-            retval = sendmsg(sock, &msg, 0);
-        } else {
             /* Use the tap fd to send to this device.  This is essential for
              * tap devices, because packets sent to a tap device with an
              * AF_PACKET socket will loop back to be *received* again on the
@@ -1252,45 +1278,43 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
             struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
             retval = write(netdev->tap_fd, data, size);
-        }
 
-        if (retval < 0) {
-            if (errno == EINTR) {
-                /* The send was interrupted by a signal.  Retry the packet by
-                 * continuing without incrementing 'i'.*/
-                continue;
-            } else if (errno == EIO && is_tap_netdev(netdev_)) {
-                /* The Linux tap driver returns EIO if the device is not up.
-                 * From the OVS side this is not an error, so ignore it. */
-            } else {
-                /* The Linux AF_PACKET implementation never blocks waiting for
-                 * room for packets, instead returning ENOBUFS.  Translate this
-                 * into EAGAIN for the caller. */
-                error = errno == ENOBUFS ? EAGAIN : errno;
+            if (retval < 0) {
+                if (errno == EINTR) {
+                    /* The send was interrupted by a signal.
+                     * Retry the packet by continuing without
+                     * incrementing 'i'.*/
+                    continue;
+                } else if (errno == EIO) {
+                    /* The Linux tap driver returns EIO if the device
+                     * is not up. From the OVS side this is not an error,
+                     * so ignore it. */
+                } else {
+                    break;
+                }
+            } else if (retval != size) {
+                VLOG_WARN_RL(&rl, "sent partial Ethernet packet"
+                                  " (%"PRIuSIZE" bytes"
+                                  " of %"PRIuSIZE") on %s", retval, size,
+                             netdev_get_name(netdev_));
+                error = EMSGSIZE;
                 break;
             }
-        } else if (retval != size) {
-            VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE" bytes"
-                              " of %"PRIuSIZE") on %s", retval, size,
-                         netdev_get_name(netdev_));
-            error = EMSGSIZE;
-            break;
-        }
 
-        /* Process the next packet in the batch */
-        i++;
+            /* Process the next packet in the batch */
+            i++;
+        }
     }
 
     if (error && error != EAGAIN) {
-            VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
-                         netdev_get_name(netdev_), ovs_strerror(error));
+        VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+                     netdev_get_name(netdev_), ovs_strerror(error));
     }
 
 free_batch:
     dp_packet_delete_batch(batch, may_steal);
 
     return error;
-
 }
 
 /* Registers with the poll loop to wake up from the next call to poll_block()