Patchwork [V2] rtl8139: check the buffer availiability

login
register
mail settings
Submitter Jason Wang
Date Oct. 17, 2011, 2:55 a.m.
Message ID <20111017025557.4572.2621.stgit@dhcp-8-146.nay.redhat.com>
Download mbox | patch
Permalink /patch/120086/
State New
Headers show

Comments

Jason Wang - Oct. 17, 2011, 2:55 a.m.
Reduce spurious packet drops on RX ring empty when in c+ mode by verifying that
we have at least 1 buffer ahead of the time.

Change from v1:
Fix style comments from Stefan.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/rtl8139.c |   44 ++++++++++++++++++++++++++++++--------------
 1 files changed, 30 insertions(+), 14 deletions(-)
Mark Wu - Oct. 18, 2011, 10:09 a.m.
Hi Jason,
Could you please elaborate what problem you try to resolve by this 
patch?  And do you think we need notify I/O thread re-polling tap fd 
when receive descriptor becomes available? Otherwise, tap read polling 
will be disabled until the I/O handlers are updated by other reasons. Am 
I right?


On 10/17/2011 10:55 AM, Jason Wang wrote:
> Reduce spurious packet drops on RX ring empty when in c+ mode by verifying that
> we have at least 1 buffer ahead of the time.
>
> Change from v1:
> Fix style comments from Stefan.
>
> Signed-off-by: Jason Wang<jasowang@redhat.com>
> ---
>   hw/rtl8139.c |   44 ++++++++++++++++++++++++++++++--------------
>   1 files changed, 30 insertions(+), 14 deletions(-)
>
> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
> index 3753950..bcbc5e3 100644
> --- a/hw/rtl8139.c
> +++ b/hw/rtl8139.c
> @@ -84,6 +84,19 @@
>   #define VLAN_TCI_LEN 2
>   #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
>
> +/* w0 ownership flag */
> +#define CP_RX_OWN (1<<31)
> +/* w0 end of ring flag */
> +#define CP_RX_EOR (1<<30)
> +/* w0 bits 0...12 : buffer size */
> +#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
> +/* w1 tag available flag */
> +#define CP_RX_TAVA (1<<16)
> +/* w1 bits 0...15 : VLAN tag */
> +#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
> +/* w2 low  32bit of Rx buffer ptr */
> +/* w3 high 32bit of Rx buffer ptr */
> +
>   #if defined (DEBUG_RTL8139)
>   #  define DPRINTF(fmt, ...) \
>       do { fprintf(stderr, "RTL8139: " fmt, ## __VA_ARGS__); } while (0)
> @@ -805,6 +818,22 @@ static inline target_phys_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
>   #endif
>   }
>
> +/* Verify that we have at least one available rx buffer */
> +static int rtl8139_cp_has_rxbuf(RTL8139State *s)
> +{
> +    uint32_t val, rxdw0;
> +    target_phys_addr_t cplus_rx_ring_desc = rtl8139_addr64(s->RxRingAddrLO,
> +                                                           s->RxRingAddrHI);
> +    cplus_rx_ring_desc += 16 * s->currCPlusRxDesc;
> +    cpu_physical_memory_read(cplus_rx_ring_desc,&val, 4);
> +    rxdw0 = le32_to_cpu(val);
> +    if (rxdw0&  CP_RX_OWN) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> +
>   static int rtl8139_can_receive(VLANClientState *nc)
>   {
>       RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
> @@ -819,7 +848,7 @@ static int rtl8139_can_receive(VLANClientState *nc)
>       if (rtl8139_cp_receiver_enabled(s)) {
>           /* ??? Flow control not implemented in c+ mode.
>              This is a hack to work around slirp deficiencies anyway.  */
> -        return 1;
> +        return rtl8139_cp_has_rxbuf(s);
>       } else {
>           avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
>                        s->RxBufferSize);
> @@ -965,19 +994,6 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
>
>           /* begin C+ receiver mode */
>
> -/* w0 ownership flag */
> -#define CP_RX_OWN (1<<31)
> -/* w0 end of ring flag */
> -#define CP_RX_EOR (1<<30)
> -/* w0 bits 0...12 : buffer size */
> -#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
> -/* w1 tag available flag */
> -#define CP_RX_TAVA (1<<16)
> -/* w1 bits 0...15 : VLAN tag */
> -#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
> -/* w2 low  32bit of Rx buffer ptr */
> -/* w3 high 32bit of Rx buffer ptr */
> -
>           int descriptor = s->currCPlusRxDesc;
>           target_phys_addr_t cplus_rx_ring_desc;
>
>
>
Michael S. Tsirkin - Oct. 19, 2011, 1:28 a.m.
On Mon, Oct 17, 2011 at 10:55:57AM +0800, Jason Wang wrote:
> Reduce spurious packet drops on RX ring empty when in c+ mode by verifying that
> we have at least 1 buffer ahead of the time.
> 
> Change from v1:
> Fix style comments from Stefan.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  hw/rtl8139.c |   44 ++++++++++++++++++++++++++++++--------------
>  1 files changed, 30 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
> index 3753950..bcbc5e3 100644
> --- a/hw/rtl8139.c
> +++ b/hw/rtl8139.c
> @@ -84,6 +84,19 @@
>  #define VLAN_TCI_LEN 2
>  #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
>  
> +/* w0 ownership flag */
> +#define CP_RX_OWN (1<<31)
> +/* w0 end of ring flag */
> +#define CP_RX_EOR (1<<30)
> +/* w0 bits 0...12 : buffer size */
> +#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
> +/* w1 tag available flag */
> +#define CP_RX_TAVA (1<<16)
> +/* w1 bits 0...15 : VLAN tag */
> +#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
> +/* w2 low  32bit of Rx buffer ptr */
> +/* w3 high 32bit of Rx buffer ptr */
> +
>  #if defined (DEBUG_RTL8139)
>  #  define DPRINTF(fmt, ...) \
>      do { fprintf(stderr, "RTL8139: " fmt, ## __VA_ARGS__); } while (0)
> @@ -805,6 +818,22 @@ static inline target_phys_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
>  #endif
>  }
>  
> +/* Verify that we have at least one available rx buffer */
> +static int rtl8139_cp_has_rxbuf(RTL8139State *s)
> +{
> +    uint32_t val, rxdw0;
> +    target_phys_addr_t cplus_rx_ring_desc = rtl8139_addr64(s->RxRingAddrLO,
> +                                                           s->RxRingAddrHI);
> +    cplus_rx_ring_desc += 16 * s->currCPlusRxDesc;
> +    cpu_physical_memory_read(cplus_rx_ring_desc, &val, 4);

Interesting. Please note that cpu_physical_memory_read is not
done atomically. Can guest be writing the value while
we read it? If yes we'll get a corrupted value here,
because CP_RX_OWN is the high bit so it is read last.
Correct?

I realize we have the same pattern in other places in this
device, probably a bug as well?


> +    rxdw0 = le32_to_cpu(val);
> +    if (rxdw0 & CP_RX_OWN) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }

Do we need to check that buffer size is large enough
to include the packet like we do for non c+ mode?

> +}
> +
>  static int rtl8139_can_receive(VLANClientState *nc)
>  {
>      RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
> @@ -819,7 +848,7 @@ static int rtl8139_can_receive(VLANClientState *nc)
>      if (rtl8139_cp_receiver_enabled(s)) {
>          /* ??? Flow control not implemented in c+ mode.
>             This is a hack to work around slirp deficiencies anyway.  */
> -        return 1;
> +        return rtl8139_cp_has_rxbuf(s);
>      } else {
>          avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
>                       s->RxBufferSize);
> @@ -965,19 +994,6 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
>  
>          /* begin C+ receiver mode */
>  
> -/* w0 ownership flag */
> -#define CP_RX_OWN (1<<31)
> -/* w0 end of ring flag */
> -#define CP_RX_EOR (1<<30)
> -/* w0 bits 0...12 : buffer size */
> -#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
> -/* w1 tag available flag */
> -#define CP_RX_TAVA (1<<16)
> -/* w1 bits 0...15 : VLAN tag */
> -#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
> -/* w2 low  32bit of Rx buffer ptr */
> -/* w3 high 32bit of Rx buffer ptr */
> -
>          int descriptor = s->currCPlusRxDesc;
>          target_phys_addr_t cplus_rx_ring_desc;
>
Jason Wang - Oct. 22, 2011, 4:18 a.m.
On 10/18/2011 06:09 PM, Mark Wu wrote:
> Hi Jason,
> Could you please elaborate what problem you try to resolve by this
> patch?  And do you think we need notify I/O thread re-polling tap fd
> when receive descriptor becomes available? Otherwise, tap read polling
> will be disabled until the I/O handlers are updated by other reasons. Am
> I right?

Hi Mark, sorry for the late reply.

I think this patch may help to reduce packets dropping at the head of
the queue ( tap or netqueue in qemu) which may lead out of order packet
receiving in guest stack.

For the iothread notification, it looks impossible for 8139 as the
ownership was tracked through descriptor it self instead of a dedicated
register and there's no register which could be used to 'kick' 8139 for
packet receiving. When guest have refilled rx buffer of 8139cp,
tap_can_send() would return true and qemu would re-pool it. This may
introduce some latency when qemu is doning a block select() in
main_loop_wait().

Thanks.
> 
> 
> On 10/17/2011 10:55 AM, Jason Wang wrote:
>> Reduce spurious packet drops on RX ring empty when in c+ mode by
>> verifying that
>> we have at least 1 buffer ahead of the time.
>>
>> Change from v1:
>> Fix style comments from Stefan.
>>
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>> ---
>>   hw/rtl8139.c |   44 ++++++++++++++++++++++++++++++--------------
>>   1 files changed, 30 insertions(+), 14 deletions(-)
>>
>> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
>> index 3753950..bcbc5e3 100644
>> --- a/hw/rtl8139.c
>> +++ b/hw/rtl8139.c
>> @@ -84,6 +84,19 @@
>>   #define VLAN_TCI_LEN 2
>>   #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
>>
>> +/* w0 ownership flag */
>> +#define CP_RX_OWN (1<<31)
>> +/* w0 end of ring flag */
>> +#define CP_RX_EOR (1<<30)
>> +/* w0 bits 0...12 : buffer size */
>> +#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
>> +/* w1 tag available flag */
>> +#define CP_RX_TAVA (1<<16)
>> +/* w1 bits 0...15 : VLAN tag */
>> +#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
>> +/* w2 low  32bit of Rx buffer ptr */
>> +/* w3 high 32bit of Rx buffer ptr */
>> +
>>   #if defined (DEBUG_RTL8139)
>>   #  define DPRINTF(fmt, ...) \
>>       do { fprintf(stderr, "RTL8139: " fmt, ## __VA_ARGS__); } while (0)
>> @@ -805,6 +818,22 @@ static inline target_phys_addr_t
>> rtl8139_addr64(uint32_t low, uint32_t high)
>>   #endif
>>   }
>>
>> +/* Verify that we have at least one available rx buffer */
>> +static int rtl8139_cp_has_rxbuf(RTL8139State *s)
>> +{
>> +    uint32_t val, rxdw0;
>> +    target_phys_addr_t cplus_rx_ring_desc =
>> rtl8139_addr64(s->RxRingAddrLO,
>> +                                                          
>> s->RxRingAddrHI);
>> +    cplus_rx_ring_desc += 16 * s->currCPlusRxDesc;
>> +    cpu_physical_memory_read(cplus_rx_ring_desc,&val, 4);
>> +    rxdw0 = le32_to_cpu(val);
>> +    if (rxdw0&  CP_RX_OWN) {
>> +        return 1;
>> +    } else {
>> +        return 0;
>> +    }
>> +}
>> +
>>   static int rtl8139_can_receive(VLANClientState *nc)
>>   {
>>       RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
>> @@ -819,7 +848,7 @@ static int rtl8139_can_receive(VLANClientState *nc)
>>       if (rtl8139_cp_receiver_enabled(s)) {
>>           /* ??? Flow control not implemented in c+ mode.
>>              This is a hack to work around slirp deficiencies anyway.  */
>> -        return 1;
>> +        return rtl8139_cp_has_rxbuf(s);
>>       } else {
>>           avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
>>                        s->RxBufferSize);
>> @@ -965,19 +994,6 @@ static ssize_t rtl8139_do_receive(VLANClientState
>> *nc, const uint8_t *buf, size_
>>
>>           /* begin C+ receiver mode */
>>
>> -/* w0 ownership flag */
>> -#define CP_RX_OWN (1<<31)
>> -/* w0 end of ring flag */
>> -#define CP_RX_EOR (1<<30)
>> -/* w0 bits 0...12 : buffer size */
>> -#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
>> -/* w1 tag available flag */
>> -#define CP_RX_TAVA (1<<16)
>> -/* w1 bits 0...15 : VLAN tag */
>> -#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
>> -/* w2 low  32bit of Rx buffer ptr */
>> -/* w3 high 32bit of Rx buffer ptr */
>> -
>>           int descriptor = s->currCPlusRxDesc;
>>           target_phys_addr_t cplus_rx_ring_desc;
>>
>>
>>
> 
>
Jason Wang - Oct. 22, 2011, 4:30 a.m.
On 10/19/2011 09:28 AM, Michael S. Tsirkin wrote:
> On Mon, Oct 17, 2011 at 10:55:57AM +0800, Jason Wang wrote:
>> Reduce spurious packet drops on RX ring empty when in c+ mode by verifying that
>> we have at least 1 buffer ahead of the time.
>>
>> Change from v1:
>> Fix style comments from Stefan.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>  hw/rtl8139.c |   44 ++++++++++++++++++++++++++++++--------------
>>  1 files changed, 30 insertions(+), 14 deletions(-)
>>
>> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
>> index 3753950..bcbc5e3 100644
>> --- a/hw/rtl8139.c
>> +++ b/hw/rtl8139.c
>> @@ -84,6 +84,19 @@
>>  #define VLAN_TCI_LEN 2
>>  #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
>>  
>> +/* w0 ownership flag */
>> +#define CP_RX_OWN (1<<31)
>> +/* w0 end of ring flag */
>> +#define CP_RX_EOR (1<<30)
>> +/* w0 bits 0...12 : buffer size */
>> +#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
>> +/* w1 tag available flag */
>> +#define CP_RX_TAVA (1<<16)
>> +/* w1 bits 0...15 : VLAN tag */
>> +#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
>> +/* w2 low  32bit of Rx buffer ptr */
>> +/* w3 high 32bit of Rx buffer ptr */
>> +
>>  #if defined (DEBUG_RTL8139)
>>  #  define DPRINTF(fmt, ...) \
>>      do { fprintf(stderr, "RTL8139: " fmt, ## __VA_ARGS__); } while (0)
>> @@ -805,6 +818,22 @@ static inline target_phys_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
>>  #endif
>>  }
>>  
>> +/* Verify that we have at least one available rx buffer */
>> +static int rtl8139_cp_has_rxbuf(RTL8139State *s)
>> +{
>> +    uint32_t val, rxdw0;
>> +    target_phys_addr_t cplus_rx_ring_desc = rtl8139_addr64(s->RxRingAddrLO,
>> +                                                           s->RxRingAddrHI);
>> +    cplus_rx_ring_desc += 16 * s->currCPlusRxDesc;
>> +    cpu_physical_memory_read(cplus_rx_ring_desc, &val, 4);
> 
> Interesting. Please note that cpu_physical_memory_read is not
> done atomically. Can guest be writing the value while
> we read it? If yes we'll get a corrupted value here,
> because CP_RX_OWN is the high bit so it is read last.
> Correct?
> 

Yes, there's a race, we need use atomic memory access method.

> I realize we have the same pattern in other places in this
> device, probably a bug as well?
> 

Yes, we need use atomic method at least for ownership check during TX/RX.

> 
>> +    rxdw0 = le32_to_cpu(val);
>> +    if (rxdw0 & CP_RX_OWN) {
>> +        return 1;
>> +    } else {
>> +        return 0;
>> +    }
> 
> Do we need to check that buffer size is large enough
> to include the packet like we do for non c+ mode?
> 

Not sure here is the best place, the buffer size is checked during
receiving.

>> +}
>> +
>>  static int rtl8139_can_receive(VLANClientState *nc)
>>  {
>>      RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
>> @@ -819,7 +848,7 @@ static int rtl8139_can_receive(VLANClientState *nc)
>>      if (rtl8139_cp_receiver_enabled(s)) {
>>          /* ??? Flow control not implemented in c+ mode.
>>             This is a hack to work around slirp deficiencies anyway.  */
>> -        return 1;
>> +        return rtl8139_cp_has_rxbuf(s);
>>      } else {
>>          avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
>>                       s->RxBufferSize);
>> @@ -965,19 +994,6 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
>>  
>>          /* begin C+ receiver mode */
>>  
>> -/* w0 ownership flag */
>> -#define CP_RX_OWN (1<<31)
>> -/* w0 end of ring flag */
>> -#define CP_RX_EOR (1<<30)
>> -/* w0 bits 0...12 : buffer size */
>> -#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
>> -/* w1 tag available flag */
>> -#define CP_RX_TAVA (1<<16)
>> -/* w1 bits 0...15 : VLAN tag */
>> -#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
>> -/* w2 low  32bit of Rx buffer ptr */
>> -/* w3 high 32bit of Rx buffer ptr */
>> -
>>          int descriptor = s->currCPlusRxDesc;
>>          target_phys_addr_t cplus_rx_ring_desc;
>>

Patch

diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 3753950..bcbc5e3 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -84,6 +84,19 @@ 
 #define VLAN_TCI_LEN 2
 #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
 
+/* w0 ownership flag */
+#define CP_RX_OWN (1<<31)
+/* w0 end of ring flag */
+#define CP_RX_EOR (1<<30)
+/* w0 bits 0...12 : buffer size */
+#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
+/* w1 tag available flag */
+#define CP_RX_TAVA (1<<16)
+/* w1 bits 0...15 : VLAN tag */
+#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
+/* w2 low  32bit of Rx buffer ptr */
+/* w3 high 32bit of Rx buffer ptr */
+
 #if defined (DEBUG_RTL8139)
 #  define DPRINTF(fmt, ...) \
     do { fprintf(stderr, "RTL8139: " fmt, ## __VA_ARGS__); } while (0)
@@ -805,6 +818,22 @@  static inline target_phys_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
 #endif
 }
 
+/* Verify that we have at least one available rx buffer */
+static int rtl8139_cp_has_rxbuf(RTL8139State *s)
+{
+    uint32_t val, rxdw0;
+    target_phys_addr_t cplus_rx_ring_desc = rtl8139_addr64(s->RxRingAddrLO,
+                                                           s->RxRingAddrHI);
+    cplus_rx_ring_desc += 16 * s->currCPlusRxDesc;
+    cpu_physical_memory_read(cplus_rx_ring_desc, &val, 4);
+    rxdw0 = le32_to_cpu(val);
+    if (rxdw0 & CP_RX_OWN) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
 static int rtl8139_can_receive(VLANClientState *nc)
 {
     RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
@@ -819,7 +848,7 @@  static int rtl8139_can_receive(VLANClientState *nc)
     if (rtl8139_cp_receiver_enabled(s)) {
         /* ??? Flow control not implemented in c+ mode.
            This is a hack to work around slirp deficiencies anyway.  */
-        return 1;
+        return rtl8139_cp_has_rxbuf(s);
     } else {
         avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
                      s->RxBufferSize);
@@ -965,19 +994,6 @@  static ssize_t rtl8139_do_receive(VLANClientState *nc, const uint8_t *buf, size_
 
         /* begin C+ receiver mode */
 
-/* w0 ownership flag */
-#define CP_RX_OWN (1<<31)
-/* w0 end of ring flag */
-#define CP_RX_EOR (1<<30)
-/* w0 bits 0...12 : buffer size */
-#define CP_RX_BUFFER_SIZE_MASK ((1<<13) - 1)
-/* w1 tag available flag */
-#define CP_RX_TAVA (1<<16)
-/* w1 bits 0...15 : VLAN tag */
-#define CP_RX_VLAN_TAG_MASK ((1<<16) - 1)
-/* w2 low  32bit of Rx buffer ptr */
-/* w3 high 32bit of Rx buffer ptr */
-
         int descriptor = s->currCPlusRxDesc;
         target_phys_addr_t cplus_rx_ring_desc;