Patchwork e1000: multi-buffer packet support

login
register
mail settings
Submitter Michael S. Tsirkin
Date Feb. 3, 2011, 3:38 p.m.
Message ID <20110203153835.GA28388@redhat.com>
Download mbox | patch
Permalink /patch/81675/
State New
Headers show

Comments

Michael S. Tsirkin - Feb. 3, 2011, 3:38 p.m.
e1000 supports multi-buffer packets larger than rxbuf_size.

This fixes the following (on linux):
- in guest: ifconfig eth1 mtu 16110
- in host: ifconfig tap0 mtu 16110
           ping -s 16082 <guest-ip>

Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/e1000.c |   25 ++++++++++++++++++++-----
 1 files changed, 20 insertions(+), 5 deletions(-)
Alex Williamson - Feb. 3, 2011, 4:36 p.m.
On Thu, 2011-02-03 at 17:38 +0200, Michael S. Tsirkin wrote:
> e1000 supports multi-buffer packets larger than rxbuf_size.
> 
> This fixes the following (on linux):
> - in guest: ifconfig eth1 mtu 16110
> - in host: ifconfig tap0 mtu 16110
>            ping -s 16082 <guest-ip>
> 
> Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/e1000.c |   25 ++++++++++++++++++++-----
>  1 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/e1000.c b/hw/e1000.c
> index af101bd..2c133ab 100644
> --- a/hw/e1000.c
> +++ b/hw/e1000.c
> @@ -642,6 +642,8 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>      uint16_t vlan_special = 0;
>      uint8_t vlan_status = 0, vlan_offset = 0;
>      uint8_t min_buf[MIN_BUF_SIZE];
> +    size_t desc_offset;
> +    size_t desc_size;
>  
>      if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
>          return -1;
> @@ -654,7 +656,7 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          size = sizeof(min_buf);
>      }
>  
> -    if (size > s->rxbuf_size) {
> +    if (0 && size > s->rxbuf_size) {
>          DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
>                 (unsigned long)size, s->rxbuf_size);
>          return -1;

Why are we saving this code if it's unreachable?

> @@ -672,8 +674,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>      }
>  
>      rdh_start = s->mac_reg[RDH];
> +    desc_offset = 0;
>      do {
> +        desc_size = size - desc_offset;
> +        if (desc_size > s->rxbuf_size) {
> +            desc_size = s->rxbuf_size;
> +        }
>          if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
> +            /* Discard all data written so far */
> +            s->mac_reg[RDH] = rdh_start;
>              set_ics(s, 0, E1000_ICS_RXO);
>              return -1;
>          }
> @@ -684,9 +693,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          desc.status |= (vlan_status | E1000_RXD_STAT_DD);
>          if (desc.buffer_addr) {
>              cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
> -                                      (void *)(buf + vlan_offset), size);
> -            desc.length = cpu_to_le16(size + fcs_len(s));
> -            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
> +                                      (void *)(buf + desc_offset + vlan_offset),
> +                                      desc_size);
> +            desc_offset += desc_size;
> +            if (desc_offset >= size) {
> +                desc.length = cpu_to_le16(desc_size + fcs_len(s));
> +                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
> +            } else {
> +                desc.length = cpu_to_le16(desc_size);
> +            }
>          } else { // as per intel docs; skip descriptors with null buf addr
>              DBGOUT(RX, "Null RX descriptor!!\n");
>          }
> @@ -702,7 +717,7 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>              set_ics(s, 0, E1000_ICS_RXO);
>              return -1;
>          }
> -    } while (desc.buffer_addr == 0);
> +    } while (desc_offset < size);
>  
>      s->mac_reg[GPRC]++;
>      s->mac_reg[TPR]++;
Stefan Hajnoczi - Feb. 3, 2011, 4:38 p.m.
On Thu, Feb 03, 2011 at 05:38:35PM +0200, Michael S. Tsirkin wrote:
> @@ -654,7 +656,7 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
>          size = sizeof(min_buf);
>      }
> 
> -    if (size > s->rxbuf_size) {
> +    if (0 && size > s->rxbuf_size) {
>          DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
>                 (unsigned long)size, s->rxbuf_size);
>          return -1;

Please remove completely or enhance this conditional to work in a
multi-buffer world.

Stefan
Michael S. Tsirkin - Feb. 3, 2011, 4:45 p.m.
On Thu, Feb 03, 2011 at 09:36:57AM -0700, Alex Williamson wrote:
> On Thu, 2011-02-03 at 17:38 +0200, Michael S. Tsirkin wrote:
> > e1000 supports multi-buffer packets larger than rxbuf_size.
> > 
> > This fixes the following (on linux):
> > - in guest: ifconfig eth1 mtu 16110
> > - in host: ifconfig tap0 mtu 16110
> >            ping -s 16082 <guest-ip>
> > 
> > Red Hat bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=602205
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/e1000.c |   25 ++++++++++++++++++++-----
> >  1 files changed, 20 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/e1000.c b/hw/e1000.c
> > index af101bd..2c133ab 100644
> > --- a/hw/e1000.c
> > +++ b/hw/e1000.c
> > @@ -642,6 +642,8 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >      uint16_t vlan_special = 0;
> >      uint8_t vlan_status = 0, vlan_offset = 0;
> >      uint8_t min_buf[MIN_BUF_SIZE];
> > +    size_t desc_offset;
> > +    size_t desc_size;
> >  
> >      if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
> >          return -1;
> > @@ -654,7 +656,7 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >          size = sizeof(min_buf);
> >      }
> >  
> > -    if (size > s->rxbuf_size) {
> > +    if (0 && size > s->rxbuf_size) {
> >          DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
> >                 (unsigned long)size, s->rxbuf_size);
> >          return -1;
> 
> Why are we saving this code if it's unreachable?

Just a leftover.

> > @@ -672,8 +674,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >      }
> >  
> >      rdh_start = s->mac_reg[RDH];
> > +    desc_offset = 0;
> >      do {
> > +        desc_size = size - desc_offset;
> > +        if (desc_size > s->rxbuf_size) {
> > +            desc_size = s->rxbuf_size;
> > +        }
> >          if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
> > +            /* Discard all data written so far */
> > +            s->mac_reg[RDH] = rdh_start;
> >              set_ics(s, 0, E1000_ICS_RXO);
> >              return -1;
> >          }
> > @@ -684,9 +693,15 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >          desc.status |= (vlan_status | E1000_RXD_STAT_DD);
> >          if (desc.buffer_addr) {
> >              cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
> > -                                      (void *)(buf + vlan_offset), size);
> > -            desc.length = cpu_to_le16(size + fcs_len(s));
> > -            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
> > +                                      (void *)(buf + desc_offset + vlan_offset),
> > +                                      desc_size);
> > +            desc_offset += desc_size;
> > +            if (desc_offset >= size) {
> > +                desc.length = cpu_to_le16(desc_size + fcs_len(s));
> > +                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
> > +            } else {
> > +                desc.length = cpu_to_le16(desc_size);
> > +            }
> >          } else { // as per intel docs; skip descriptors with null buf addr
> >              DBGOUT(RX, "Null RX descriptor!!\n");
> >          }
> > @@ -702,7 +717,7 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> >              set_ics(s, 0, E1000_ICS_RXO);
> >              return -1;
> >          }
> > -    } while (desc.buffer_addr == 0);
> > +    } while (desc_offset < size);
> >  
> >      s->mac_reg[GPRC]++;
> >      s->mac_reg[TPR]++;
> 
>

Patch

diff --git a/hw/e1000.c b/hw/e1000.c
index af101bd..2c133ab 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -642,6 +642,8 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
     uint16_t vlan_special = 0;
     uint8_t vlan_status = 0, vlan_offset = 0;
     uint8_t min_buf[MIN_BUF_SIZE];
+    size_t desc_offset;
+    size_t desc_size;
 
     if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
         return -1;
@@ -654,7 +656,7 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
         size = sizeof(min_buf);
     }
 
-    if (size > s->rxbuf_size) {
+    if (0 && size > s->rxbuf_size) {
         DBGOUT(RX, "packet too large for buffers (%lu > %d)\n",
                (unsigned long)size, s->rxbuf_size);
         return -1;
@@ -672,8 +674,15 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
     }
 
     rdh_start = s->mac_reg[RDH];
+    desc_offset = 0;
     do {
+        desc_size = size - desc_offset;
+        if (desc_size > s->rxbuf_size) {
+            desc_size = s->rxbuf_size;
+        }
         if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
+            /* Discard all data written so far */
+            s->mac_reg[RDH] = rdh_start;
             set_ics(s, 0, E1000_ICS_RXO);
             return -1;
         }
@@ -684,9 +693,15 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
         if (desc.buffer_addr) {
             cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
-                                      (void *)(buf + vlan_offset), size);
-            desc.length = cpu_to_le16(size + fcs_len(s));
-            desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
+                                      (void *)(buf + desc_offset + vlan_offset),
+                                      desc_size);
+            desc_offset += desc_size;
+            if (desc_offset >= size) {
+                desc.length = cpu_to_le16(desc_size + fcs_len(s));
+                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
+            } else {
+                desc.length = cpu_to_le16(desc_size);
+            }
         } else { // as per intel docs; skip descriptors with null buf addr
             DBGOUT(RX, "Null RX descriptor!!\n");
         }
@@ -702,7 +717,7 @@  e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
             set_ics(s, 0, E1000_ICS_RXO);
             return -1;
         }
-    } while (desc.buffer_addr == 0);
+    } while (desc_offset < size);
 
     s->mac_reg[GPRC]++;
     s->mac_reg[TPR]++;