Patchwork [PATCHv2] e1000: multi-buffer packet support

login
register
mail settings
Submitter Michael S. Tsirkin
Date Feb. 3, 2011, 4:49 p.m.
Message ID <20110203164910.GA10672@redhat.com>
Download mbox | patch
Permalink /patch/81680/
State New
Headers show

Comments

Michael S. Tsirkin - Feb. 3, 2011, 4:49 p.m.
e1000 supports multi-buffer packets larger than rxbuf_size.

This fixes the following (on linux):
- in guest: ifconfig eth1 mtu 16110
- in host: ifconfig tap0 mtu 16110
           ping -s 16082 <guest-ip>

Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

Changes from v1:
	removed dead code

 hw/e1000.c |   29 +++++++++++++++++++----------
 1 files changed, 19 insertions(+), 10 deletions(-)
Alex Williamson - Feb. 3, 2011, 5:11 p.m.
On Thu, 2011-02-03 at 18:49 +0200, Michael S. Tsirkin wrote:
> e1000 supports multi-buffer packets larger than rxbuf_size.
> 
> This fixes the following (on linux):
> - in guest: ifconfig eth1 mtu 16110
> - in host: ifconfig tap0 mtu 16110
>            ping -s 16082 <guest-ip>
> 
> Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> Changes from v1:
> 	removed dead code
> 
>  hw/e1000.c |   29 +++++++++++++++++++----------
>  1 files changed, 19 insertions(+), 10 deletions(-)

Acked-by: Alex Williamson <alex.williamson@redhat.com>

> diff --git a/hw/e1000.c b/hw/e1000.c
> index af101bd..3427ff3 100644
> --- a/hw/e1000.c
> +++ b/hw/e1000.c
> @@ -642,6 +642,8 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>      uint16_t vlan_special = 0;
>      uint8_t vlan_status = 0, vlan_offset = 0;
>      uint8_t min_buf[MIN_BUF_SIZE];
> +    size_t desc_offset;
> +    size_t desc_size;
>  
>      if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
>          return -1;
> @@ -654,12 +656,6 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          size = sizeof(min_buf);
>      }
>  
> -    if (size > s->rxbuf_size) {
> -        DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
> -               (unsigned long)size, s->rxbuf_size);
> -        return -1;
> -    }
> -
>      if (!receive_filter(s, buf, size))
>          return size;
>  
> @@ -672,8 +668,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>      }
>  
>      rdh_start = s->mac_reg[RDH];
> +    desc_offset = 0;
>      do {
> +        desc_size = size - desc_offset;
> +        if (desc_size > s->rxbuf_size) {
> +            desc_size = s->rxbuf_size;
> +        }
>          if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
> +            /* Discard all data written so far */
> +            s->mac_reg[RDH] = rdh_start;
>              set_ics(s, 0, E1000_ICS_RXO);
>              return -1;
>          }
> @@ -684,9 +687,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          desc.status |= (vlan_status | E1000_RXD_STAT_DD);
>          if (desc.buffer_addr) {
>              cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
> -                                      (void *)(buf + vlan_offset), size);
> -            desc.length = cpu_to_le16(size + fcs_len(s));
> -            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
> +                                      (void *)(buf + desc_offset + vlan_offset),
> +                                      desc_size);
> +            desc_offset += desc_size;
> +            if (desc_offset >= size) {
> +                desc.length = cpu_to_le16(desc_size + fcs_len(s));
> +                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
> +            } else {
> +                desc.length = cpu_to_le16(desc_size);
> +            }
>          } else { // as per intel docs; skip descriptors with null buf addr
>              DBGOUT(RX, "Null RX descriptor!!\n");
>          }
> @@ -702,7 +711,7 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>              set_ics(s, 0, E1000_ICS_RXO);
>              return -1;
>          }
> -    } while (desc.buffer_addr == 0);
> +    } while (desc_offset < size);
>  
>      s->mac_reg[GPRC]++;
>      s->mac_reg[TPR]++;
Stefan Hajnoczi - Feb. 4, 2011, 6:20 a.m.
On Thu, Feb 3, 2011 at 4:49 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> e1000 supports multi-buffer packets larger than rxbuf_size.
>
> This fixes the following (on linux):
> - in guest: ifconfig eth1 mtu 16110
> - in host: ifconfig tap0 mtu 16110
>           ping -s 16082 <guest-ip>
>
> Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>
> Changes from v1:
>        removed dead code
>
>  hw/e1000.c |   29 +++++++++++++++++++----------
>  1 files changed, 19 insertions(+), 10 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Kevin Wolf - Feb. 8, 2011, 2:07 p.m.
Am 03.02.2011 17:49, schrieb Michael S. Tsirkin:
> e1000 supports multi-buffer packets larger than rxbuf_size.
> 
> This fixes the following (on linux):
> - in guest: ifconfig eth1 mtu 16110
> - in host: ifconfig tap0 mtu 16110
>            ping -s 16082 <guest-ip>
> 
> Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> Changes from v1:
> 	removed dead code
> 
>  hw/e1000.c |   29 +++++++++++++++++++----------
>  1 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/e1000.c b/hw/e1000.c
> index af101bd..3427ff3 100644
> --- a/hw/e1000.c
> +++ b/hw/e1000.c
> @@ -642,6 +642,8 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>      uint16_t vlan_special = 0;
>      uint8_t vlan_status = 0, vlan_offset = 0;
>      uint8_t min_buf[MIN_BUF_SIZE];
> +    size_t desc_offset;
> +    size_t desc_size;
>  
>      if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
>          return -1;
> @@ -654,12 +656,6 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          size = sizeof(min_buf);
>      }
>  
> -    if (size > s->rxbuf_size) {
> -        DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
> -               (unsigned long)size, s->rxbuf_size);
> -        return -1;
> -    }
> -
>      if (!receive_filter(s, buf, size))
>          return size;
>  
> @@ -672,8 +668,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>      }
>  
>      rdh_start = s->mac_reg[RDH];
> +    desc_offset = 0;
>      do {
> +        desc_size = size - desc_offset;
> +        if (desc_size > s->rxbuf_size) {
> +            desc_size = s->rxbuf_size;
> +        }
>          if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
> +            /* Discard all data written so far */
> +            s->mac_reg[RDH] = rdh_start;
>              set_ics(s, 0, E1000_ICS_RXO);
>              return -1;
>          }
> @@ -684,9 +687,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          desc.status |= (vlan_status | E1000_RXD_STAT_DD);
>          if (desc.buffer_addr) {
>              cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
> -                                      (void *)(buf + vlan_offset), size);
> -            desc.length = cpu_to_le16(size + fcs_len(s));
> -            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
> +                                      (void *)(buf + desc_offset + vlan_offset),
> +                                      desc_size);
> +            desc_offset += desc_size;
> +            if (desc_offset >= size) {
> +                desc.length = cpu_to_le16(desc_size + fcs_len(s));

I think this is not quite right: What happens if desc_size + fcs_len(s)
> s->rxbuf_size? IIUC, we would seemingly overflow the guests buffer (in
reality we leave the extra bytes untouched, but we may confuse the guest).

Kevin
Michael S. Tsirkin - Feb. 8, 2011, 4:02 p.m.
On Tue, Feb 08, 2011 at 03:07:58PM +0100, Kevin Wolf wrote:
> Am 03.02.2011 17:49, schrieb Michael S. Tsirkin:
> > e1000 supports multi-buffer packets larger than rxbuf_size.
> > 
> > This fixes the following (on linux):
> > - in guest: ifconfig eth1 mtu 16110
> > - in host: ifconfig tap0 mtu 16110
> >            ping -s 16082 <guest-ip>
> > 
> > Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > 
> > Changes from v1:
> > 	removed dead code
> > 
> >  hw/e1000.c |   29 +++++++++++++++++++----------
> >  1 files changed, 19 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/e1000.c b/hw/e1000.c
> > index af101bd..3427ff3 100644
> > --- a/hw/e1000.c
> > +++ b/hw/e1000.c
> > @@ -642,6 +642,8 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >      uint16_t vlan_special = 0;
> >      uint8_t vlan_status = 0, vlan_offset = 0;
> >      uint8_t min_buf[MIN_BUF_SIZE];
> > +    size_t desc_offset;
> > +    size_t desc_size;
> >  
> >      if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
> >          return -1;
> > @@ -654,12 +656,6 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >          size = sizeof(min_buf);
> >      }
> >  
> > -    if (size > s->rxbuf_size) {
> > -        DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
> > -               (unsigned long)size, s->rxbuf_size);
> > -        return -1;
> > -    }
> > -
> >      if (!receive_filter(s, buf, size))
> >          return size;
> >  
> > @@ -672,8 +668,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >      }
> >  
> >      rdh_start = s->mac_reg[RDH];
> > +    desc_offset = 0;
> >      do {
> > +        desc_size = size - desc_offset;
> > +        if (desc_size > s->rxbuf_size) {
> > +            desc_size = s->rxbuf_size;
> > +        }
> >          if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
> > +            /* Discard all data written so far */
> > +            s->mac_reg[RDH] = rdh_start;
> >              set_ics(s, 0, E1000_ICS_RXO);
> >              return -1;
> >          }
> > @@ -684,9 +687,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >          desc.status |= (vlan_status | E1000_RXD_STAT_DD);
> >          if (desc.buffer_addr) {
> >              cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
> > -                                      (void *)(buf + vlan_offset), size);
> > -            desc.length = cpu_to_le16(size + fcs_len(s));
> > -            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
> > +                                      (void *)(buf + desc_offset + vlan_offset),
> > +                                      desc_size);
> > +            desc_offset += desc_size;
> > +            if (desc_offset >= size) {
> > +                desc.length = cpu_to_le16(desc_size + fcs_len(s));
> 
> I think this is not quite right: What happens if desc_size + fcs_len(s)
> > s->rxbuf_size? IIUC, we would seemingly overflow the guests buffer (in
> reality we leave the extra bytes untouched, but we may confuse the guest).
> 
> Kevin

Hmm, good point.

Patch

diff --git a/hw/e1000.c b/hw/e1000.c
index af101bd..3427ff3 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -642,6 +642,8 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
     uint16_t vlan_special = 0;
     uint8_t vlan_status = 0, vlan_offset = 0;
     uint8_t min_buf[MIN_BUF_SIZE];
+    size_t desc_offset;
+    size_t desc_size;
 
     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
         return -1;
@@ -654,12 +656,6 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
         size = sizeof(min_buf);
     }
 
-    if (size > s->rxbuf_size) {
-        DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
-               (unsigned long)size, s->rxbuf_size);
-        return -1;
-    }
-
     if (!receive_filter(s, buf, size))
         return size;
 
@@ -672,8 +668,15 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
     }
 
     rdh_start = s->mac_reg[RDH];
+    desc_offset = 0;
     do {
+        desc_size = size - desc_offset;
+        if (desc_size > s->rxbuf_size) {
+            desc_size = s->rxbuf_size;
+        }
         if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
+            /* Discard all data written so far */
+            s->mac_reg[RDH] = rdh_start;
             set_ics(s, 0, E1000_ICS_RXO);
             return -1;
         }
@@ -684,9 +687,15 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
         if (desc.buffer_addr) {
             cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
-                                      (void *)(buf + vlan_offset), size);
-            desc.length = cpu_to_le16(size + fcs_len(s));
-            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
+                                      (void *)(buf + desc_offset + vlan_offset),
+                                      desc_size);
+            desc_offset += desc_size;
+            if (desc_offset >= size) {
+                desc.length = cpu_to_le16(desc_size + fcs_len(s));
+                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
+            } else {
+                desc.length = cpu_to_le16(desc_size);
+            }
         } else { // as per intel docs; skip descriptors with null buf addr
             DBGOUT(RX, "Null RX descriptor!!\n");
         }
@@ -702,7 +711,7 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
             set_ics(s, 0, E1000_ICS_RXO);
             return -1;
         }
-    } while (desc.buffer_addr == 0);
+    } while (desc_offset < size);
 
     s->mac_reg[GPRC]++;
     s->mac_reg[TPR]++;