diff mbox

e1000: NetClientInfo.receive_iov implemented

Message ID 1378731102-4005-1-git-send-email-v.maffione@gmail.com
State New
Headers show

Commit Message

Vincenzo Maffione Sept. 9, 2013, 12:51 p.m. UTC
This patch implements the NetClientInfo.receive_iov method for the
e1000 device emulation. In this way a network backend that uses
qemu_sendv_packet() can deliver the fragmented packet without
requiring an additional copy in the frontend/backend network code
(nc_sendv_compat() function).

The existing method NetClientInfo.receive has been reimplemented
using the new method.

Signed-off-by: Vincenzo Maffione <v.maffione@gmail.com>
---
 hw/net/e1000.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 12 deletions(-)

I propose this patch also because our research group (University of Pisa,
Department of Computer Engineering) is working on the e1000 device
(optimizations and paravirtual extensions) and we have patches to
support the VALE switch as a network backend (see
http://info.iet.unipi.it/~luigi/vale/).
The VALE backend uses qemu_sendv_packet() to send fragmented packets: For
this reason we think it could be interesting to better support these packets
with e1000.

Comments

Stefan Hajnoczi Sept. 10, 2013, 8:31 a.m. UTC | #1
On Mon, Sep 09, 2013 at 02:51:42PM +0200, Vincenzo Maffione wrote:

Just two small style comments:

> @@ -834,11 +837,14 @@ e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
>      unsigned int n, rdt;
>      uint32_t rdh_start;
>      uint16_t vlan_special = 0;
> -    uint8_t vlan_status = 0, vlan_offset = 0;
> +    uint8_t vlan_status = 0;
>      uint8_t min_buf[MIN_BUF_SIZE];
>      size_t desc_offset;
>      size_t desc_size;
>      size_t total_size;
> +    size_t size = iov_size(iov, iovcnt), iov_ofs = 0;

Please keep these initializers on separate lines (not a hard rule, but I
find it clearer especially when calling functions):

size_t size = iov_size(iov, iovcnt);
size_t iov_ofs = 0;

> +    struct iovec iv;

This iovec is for min_buf[].  I suggest moving it below the min_buf[]
declaration and renaming it to make its purpose clearer:

struct iovec min_iovec;

Stefan
diff mbox

Patch

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index f5ebed4..70ab17f 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -32,6 +32,7 @@ 
 #include "hw/loader.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/dma.h"
+#include "qemu/iov.h"
 
 #include "e1000_regs.h"
 
@@ -64,6 +65,8 @@  static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
 /* this is the size past which hardware will drop packets when setting LPE=1 */
 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
 
+#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
+
 /*
  * HW models:
  *  E1000_DEV_ID_82540EM works with Windows and Linux
@@ -825,7 +828,7 @@  static uint64_t rx_desc_base(E1000State *s)
 }
 
 static ssize_t
-e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 {
     E1000State *s = qemu_get_nic_opaque(nc);
     PCIDevice *d = PCI_DEVICE(s);
@@ -834,11 +837,14 @@  e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     unsigned int n, rdt;
     uint32_t rdh_start;
     uint16_t vlan_special = 0;
-    uint8_t vlan_status = 0, vlan_offset = 0;
+    uint8_t vlan_status = 0;
     uint8_t min_buf[MIN_BUF_SIZE];
     size_t desc_offset;
     size_t desc_size;
     size_t total_size;
+    size_t size = iov_size(iov, iovcnt), iov_ofs = 0;
+    struct iovec iv;
+    uint8_t *filter_buf = iov->iov_base;
 
     if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
         return -1;
@@ -850,10 +856,16 @@  e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 
     /* Pad to minimum Ethernet frame length */
     if (size < sizeof(min_buf)) {
-        memcpy(min_buf, buf, size);
+        iov_to_buf(iov, iovcnt, 0, min_buf, size);
         memset(&min_buf[size], 0, sizeof(min_buf) - size);
-        buf = min_buf;
-        size = sizeof(min_buf);
+        iv.iov_base = filter_buf = min_buf;
+        iv.iov_len = size = sizeof(min_buf);
+        iovcnt = 1;
+        iov = &iv;
+    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
+        /* This is very unlikely, but may happen. */
+        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
+        filter_buf = min_buf;
     }
 
     /* Discard oversized packets if !LPE and !SBP. */
@@ -864,14 +876,24 @@  e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
         return size;
     }
 
-    if (!receive_filter(s, buf, size))
+    if (!receive_filter(s, filter_buf, size)) {
         return size;
+    }
 
-    if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
-        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
-        memmove((uint8_t *)buf + 4, buf, 12);
+    if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
+        vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
+                                                                + 14)));
+        iov_ofs = 4;
+        if (filter_buf == iov->iov_base) {
+            memmove(filter_buf + 4, filter_buf, 12);
+        } else {
+            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
+            while (iov->iov_len <= iov_ofs) {
+                iov_ofs -= iov->iov_len;
+                iov++;
+            }
+        }
         vlan_status = E1000_RXD_STAT_VP;
-        vlan_offset = 4;
         size -= 4;
     }
 
@@ -893,12 +915,24 @@  e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
         if (desc.buffer_addr) {
             if (desc_offset < size) {
+                size_t iov_copy, copied = 0;
+                hwaddr ba = le64_to_cpu(desc.buffer_addr);
                 size_t copy_size = size - desc_offset;
                 if (copy_size > s->rxbuf_size) {
                     copy_size = s->rxbuf_size;
                 }
-                pci_dma_write(d, le64_to_cpu(desc.buffer_addr),
-                              buf + desc_offset + vlan_offset, copy_size);
+                do {
+                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
+                    pci_dma_write(d, ba + copied,
+                                iov->iov_base + iov_ofs, iov_copy);
+                    copy_size -= iov_copy;
+                    copied += iov_copy;
+                    iov_ofs += iov_copy;
+                    if (iov_ofs == iov->iov_len) {
+                        iov++;
+                        iov_ofs = 0;
+                    }
+                } while (copy_size);
             }
             desc_offset += desc_size;
             desc.length = cpu_to_le16(desc_size);
@@ -948,6 +982,17 @@  e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     return size;
 }
 
+static ssize_t
+e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+    const struct iovec iov = {
+        .iov_base = (uint8_t *)buf,
+        .iov_len = size
+    };
+
+    return e1000_receive_iov(nc, &iov, 1);
+}
+
 static uint32_t
 mac_readreg(E1000State *s, int index)
 {
@@ -1326,6 +1371,7 @@  static NetClientInfo net_e1000_info = {
     .size = sizeof(NICState),
     .can_receive = e1000_can_receive,
     .receive = e1000_receive,
+    .receive_iov = e1000_receive_iov,
     .cleanup = e1000_cleanup,
     .link_status_changed = e1000_set_link_status,
 };