Patchwork [v2,15/15] net: invoke qemu_can_send_packet only before net queue sending function

login
register
mail settings
Submitter Zhiyong Wu
Date May 23, 2012, 3:14 p.m.
Message ID <1337786045-2277-16-git-send-email-zwu.kernel@gmail.com>
Download mbox | patch
Permalink /patch/160954/
State New
Headers show

Comments

Zhiyong Wu - May 23, 2012, 3:14 p.m.
From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 net/queue.c      |    4 ++--
 net/slirp.c      |    7 -------
 net/tap.c        |    2 +-
 slirp/if.c       |    5 -----
 slirp/libslirp.h |    1 -
 5 files changed, 3 insertions(+), 16 deletions(-)
Paolo Bonzini - May 23, 2012, 4 p.m.
Il 23/05/2012 17:14, zwu.kernel@gmail.com ha scritto:
> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> 
> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> ---
>  net/queue.c      |    4 ++--
>  net/slirp.c      |    7 -------
>  net/tap.c        |    2 +-
>  slirp/if.c       |    5 -----
>  slirp/libslirp.h |    1 -
>  5 files changed, 3 insertions(+), 16 deletions(-)
> 
> diff --git a/net/queue.c b/net/queue.c
> index 0afd783..d2e57de 100644
> --- a/net/queue.c
> +++ b/net/queue.c
> @@ -176,7 +176,7 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>  {
>      ssize_t ret;
>  
> -    if (queue->delivering) {
> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>          return qemu_net_queue_append(queue, sender, flags, data, size, NULL);
>      }
>  
> @@ -200,7 +200,7 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
>  {
>      ssize_t ret;
>  
> -    if (queue->delivering) {
> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>          return qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, NULL);
>      }
>  
> diff --git a/net/slirp.c b/net/slirp.c
> index a6ede2b..248f7ff 100644
> --- a/net/slirp.c
> +++ b/net/slirp.c
> @@ -96,13 +96,6 @@ static void slirp_smb_cleanup(SlirpState *s);
>  static inline void slirp_smb_cleanup(SlirpState *s) { }
>  #endif
>  
> -int slirp_can_output(void *opaque)
> -{
> -    SlirpState *s = opaque;
> -
> -    return qemu_can_send_packet(&s->nc);
> -}
> -
>  void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len)
>  {
>      SlirpState *s = opaque;
> diff --git a/net/tap.c b/net/tap.c
> index 65f45b8..7b1992b 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -210,7 +210,7 @@ static void tap_send(void *opaque)
>          if (size == 0) {
>              tap_read_poll(s, 0);
>          }
> -    } while (size > 0 && qemu_can_send_packet(&s->nc));
> +    } while (size > 0);

Can you explain this?  Also, have you benchmarked the change to see what
effect it has?

Also, can you explain why you didn't implement this?

>> If they did, hubs could then do their own flow control via can_receive.
>> When qemu_send_packet returns zero you increment a count of in-flight
>> packets, and a sent-packet callback would decrement the same count. When the
>> count is non-zero, can_receive returns false (and vice versa).  The sent_cb
>> also needs to call qemu_flush_queued_packets when the count drop to zero.
>> With this in place, I think the other TODO about the return value is easily
>> solved; receive/receive_iov callbacks can simply return immediate success,
>> and later block further sends.

Paolo

>  }
>  
>  int tap_has_ufo(NetClientState *nc)
> diff --git a/slirp/if.c b/slirp/if.c
> index 096cf6f..533295d 100644
> --- a/slirp/if.c
> +++ b/slirp/if.c
> @@ -177,11 +177,6 @@ void if_start(Slirp *slirp)
>      }
>  
>      while (ifm_next) {
> -        /* check if we can really output */
> -        if (!slirp_can_output(slirp->opaque)) {
> -            break;
> -        }
> -
>          ifm = ifm_next;
>          from_batchq = next_from_batchq;
>  
> diff --git a/slirp/libslirp.h b/slirp/libslirp.h
> index 77527ad..9b471b5 100644
> --- a/slirp/libslirp.h
> +++ b/slirp/libslirp.h
> @@ -25,7 +25,6 @@ void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds,
>  void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len);
>  
>  /* you must provide the following functions: */
> -int slirp_can_output(void *opaque);
>  void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len);
>  
>  int slirp_add_hostfwd(Slirp *slirp, int is_udp,
Zhiyong Wu - May 24, 2012, 4:05 a.m.
On Thu, May 24, 2012 at 12:00 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 23/05/2012 17:14, zwu.kernel@gmail.com ha scritto:
>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> ---
>>  net/queue.c      |    4 ++--
>>  net/slirp.c      |    7 -------
>>  net/tap.c        |    2 +-
>>  slirp/if.c       |    5 -----
>>  slirp/libslirp.h |    1 -
>>  5 files changed, 3 insertions(+), 16 deletions(-)
>>
>> diff --git a/net/queue.c b/net/queue.c
>> index 0afd783..d2e57de 100644
>> --- a/net/queue.c
>> +++ b/net/queue.c
>> @@ -176,7 +176,7 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>>  {
>>      ssize_t ret;
>>
>> -    if (queue->delivering) {
>> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>>          return qemu_net_queue_append(queue, sender, flags, data, size, NULL);
>>      }
>>
>> @@ -200,7 +200,7 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
>>  {
>>      ssize_t ret;
>>
>> -    if (queue->delivering) {
>> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>>          return qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, NULL);
>>      }
>>
>> diff --git a/net/slirp.c b/net/slirp.c
>> index a6ede2b..248f7ff 100644
>> --- a/net/slirp.c
>> +++ b/net/slirp.c
>> @@ -96,13 +96,6 @@ static void slirp_smb_cleanup(SlirpState *s);
>>  static inline void slirp_smb_cleanup(SlirpState *s) { }
>>  #endif
>>
>> -int slirp_can_output(void *opaque)
>> -{
>> -    SlirpState *s = opaque;
>> -
>> -    return qemu_can_send_packet(&s->nc);
>> -}
>> -
>>  void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len)
>>  {
>>      SlirpState *s = opaque;
>> diff --git a/net/tap.c b/net/tap.c
>> index 65f45b8..7b1992b 100644
>> --- a/net/tap.c
>> +++ b/net/tap.c
>> @@ -210,7 +210,7 @@ static void tap_send(void *opaque)
>>          if (size == 0) {
>>              tap_read_poll(s, 0);
>>          }
>> -    } while (size > 0 && qemu_can_send_packet(&s->nc));
>> +    } while (size > 0);
>
> Can you explain this?  Also, have you benchmarked the change to see what
Its code execution flow is like below:
tap_send --> qemu_send_packet_async
->qemu_send_packet_async_with_flags ->qemu_net_queue_send

So it will finally invoke qemu_can_send_packet to determine if it can
send packets. this code change delay this determination.

> effect it has?
No, i have not done benchmark testing. When a lot of packets will go
to the guest from outside and this guest' NIC can_receive return
false, this change will cause CPU to execute some additional codes.

>
> Also, can you explain why you didn't implement this?
Hub can now do its own flow control if it provides its can_recieve.
Why need we add some counts to track in-flight packets? Do you think
that it can speed up the packets delivery? otherwise i think that it
complex the code. To be honest, i prefer current solution. Do you
think of this? :)

>
>>> If they did, hubs could then do their own flow control via can_receive.
>>> When qemu_send_packet returns zero you increment a count of in-flight
>>> packets, and a sent-packet callback would decrement the same count. When the
>>> count is non-zero, can_receive returns false (and vice versa).  The sent_cb
>>> also needs to call qemu_flush_queued_packets when the count drop to zero.
>>> With this in place, I think the other TODO about the return value is easily
>>> solved; receive/receive_iov callbacks can simply return immediate success,
>>> and later block further sends.
>
> Paolo
>
>>  }
>>
>>  int tap_has_ufo(NetClientState *nc)
>> diff --git a/slirp/if.c b/slirp/if.c
>> index 096cf6f..533295d 100644
>> --- a/slirp/if.c
>> +++ b/slirp/if.c
>> @@ -177,11 +177,6 @@ void if_start(Slirp *slirp)
>>      }
>>
>>      while (ifm_next) {
>> -        /* check if we can really output */
>> -        if (!slirp_can_output(slirp->opaque)) {
>> -            break;
>> -        }
>> -
>>          ifm = ifm_next;
>>          from_batchq = next_from_batchq;
>>
>> diff --git a/slirp/libslirp.h b/slirp/libslirp.h
>> index 77527ad..9b471b5 100644
>> --- a/slirp/libslirp.h
>> +++ b/slirp/libslirp.h
>> @@ -25,7 +25,6 @@ void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds,
>>  void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len);
>>
>>  /* you must provide the following functions: */
>> -int slirp_can_output(void *opaque);
>>  void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len);
>>
>>  int slirp_add_hostfwd(Slirp *slirp, int is_udp,
>
Paolo Bonzini - May 24, 2012, 10:07 a.m.
Il 24/05/2012 06:05, Zhi Yong Wu ha scritto:
> On Thu, May 24, 2012 at 12:00 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 23/05/2012 17:14, zwu.kernel@gmail.com ha scritto:
>>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>
>>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>> ---
>>>  net/queue.c      |    4 ++--
>>>  net/slirp.c      |    7 -------
>>>  net/tap.c        |    2 +-
>>>  slirp/if.c       |    5 -----
>>>  slirp/libslirp.h |    1 -
>>>  5 files changed, 3 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/net/queue.c b/net/queue.c
>>> index 0afd783..d2e57de 100644
>>> --- a/net/queue.c
>>> +++ b/net/queue.c
>>> @@ -176,7 +176,7 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>>>  {
>>>      ssize_t ret;
>>>
>>> -    if (queue->delivering) {
>>> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>>>          return qemu_net_queue_append(queue, sender, flags, data, size, NULL);
>>>      }
>>>
>>> @@ -200,7 +200,7 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
>>>  {
>>>      ssize_t ret;
>>>
>>> -    if (queue->delivering) {
>>> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>>>          return qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, NULL);
>>>      }
>>>
>>> diff --git a/net/slirp.c b/net/slirp.c
>>> index a6ede2b..248f7ff 100644
>>> --- a/net/slirp.c
>>> +++ b/net/slirp.c
>>> @@ -96,13 +96,6 @@ static void slirp_smb_cleanup(SlirpState *s);
>>>  static inline void slirp_smb_cleanup(SlirpState *s) { }
>>>  #endif
>>>
>>> -int slirp_can_output(void *opaque)
>>> -{
>>> -    SlirpState *s = opaque;
>>> -
>>> -    return qemu_can_send_packet(&s->nc);
>>> -}
>>> -
>>>  void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len)
>>>  {
>>>      SlirpState *s = opaque;
>>> diff --git a/net/tap.c b/net/tap.c
>>> index 65f45b8..7b1992b 100644
>>> --- a/net/tap.c
>>> +++ b/net/tap.c
>>> @@ -210,7 +210,7 @@ static void tap_send(void *opaque)
>>>          if (size == 0) {
>>>              tap_read_poll(s, 0);
>>>          }
>>> -    } while (size > 0 && qemu_can_send_packet(&s->nc));
>>> +    } while (size > 0);
>>
>> Can you explain this?  Also, have you benchmarked the change to see what
> Its code execution flow is like below:
> tap_send --> qemu_send_packet_async
> ->qemu_send_packet_async_with_flags ->qemu_net_queue_send
> 
> So it will finally invoke qemu_can_send_packet to determine if it can
> send packets. this code change delay this determination.

But you will copy packets uselessly.  The code before the patch simply
left them on the tap file descriptor.  This is better because it
involves the kernel in flow control.  You are introducing "bufferbloat".

>> Also, can you explain why you didn't implement this?
> Hub can now do its own flow control if it provides its can_recieve.

But you didn't add can_receive.

> Why need we add some counts to track in-flight packets?

To implement can_receive.

Paolo
Zhiyong Wu - May 24, 2012, 11:58 a.m.
On Thu, May 24, 2012 at 6:07 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 24/05/2012 06:05, Zhi Yong Wu ha scritto:
>> On Thu, May 24, 2012 at 12:00 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 23/05/2012 17:14, zwu.kernel@gmail.com ha scritto:
>>>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>>
>>>> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>>> ---
>>>>  net/queue.c      |    4 ++--
>>>>  net/slirp.c      |    7 -------
>>>>  net/tap.c        |    2 +-
>>>>  slirp/if.c       |    5 -----
>>>>  slirp/libslirp.h |    1 -
>>>>  5 files changed, 3 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/net/queue.c b/net/queue.c
>>>> index 0afd783..d2e57de 100644
>>>> --- a/net/queue.c
>>>> +++ b/net/queue.c
>>>> @@ -176,7 +176,7 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>>>>  {
>>>>      ssize_t ret;
>>>>
>>>> -    if (queue->delivering) {
>>>> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>>>>          return qemu_net_queue_append(queue, sender, flags, data, size, NULL);
>>>>      }
>>>>
>>>> @@ -200,7 +200,7 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
>>>>  {
>>>>      ssize_t ret;
>>>>
>>>> -    if (queue->delivering) {
>>>> +    if (queue->delivering || !qemu_can_send_packet(sender)) {
>>>>          return qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, NULL);
>>>>      }
>>>>
>>>> diff --git a/net/slirp.c b/net/slirp.c
>>>> index a6ede2b..248f7ff 100644
>>>> --- a/net/slirp.c
>>>> +++ b/net/slirp.c
>>>> @@ -96,13 +96,6 @@ static void slirp_smb_cleanup(SlirpState *s);
>>>>  static inline void slirp_smb_cleanup(SlirpState *s) { }
>>>>  #endif
>>>>
>>>> -int slirp_can_output(void *opaque)
>>>> -{
>>>> -    SlirpState *s = opaque;
>>>> -
>>>> -    return qemu_can_send_packet(&s->nc);
>>>> -}
>>>> -
>>>>  void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len)
>>>>  {
>>>>      SlirpState *s = opaque;
>>>> diff --git a/net/tap.c b/net/tap.c
>>>> index 65f45b8..7b1992b 100644
>>>> --- a/net/tap.c
>>>> +++ b/net/tap.c
>>>> @@ -210,7 +210,7 @@ static void tap_send(void *opaque)
>>>>          if (size == 0) {
>>>>              tap_read_poll(s, 0);
>>>>          }
>>>> -    } while (size > 0 && qemu_can_send_packet(&s->nc));
>>>> +    } while (size > 0);
>>>
>>> Can you explain this?  Also, have you benchmarked the change to see what
>> Its code execution flow is like below:
>> tap_send --> qemu_send_packet_async
>> ->qemu_send_packet_async_with_flags ->qemu_net_queue_send
>>
>> So it will finally invoke qemu_can_send_packet to determine if it can
>> send packets. this code change delay this determination.
>
> But you will copy packets uselessly.  The code before the patch simply
> left them on the tap file descriptor.  This is better because it
> involves the kernel in flow control.  You are introducing "bufferbloat".
You are correct, but can_send_packet will be invoked twice for one
packet delivery.
>
>>> Also, can you explain why you didn't implement this?
>> Hub can now do its own flow control if it provides its can_recieve.
>
> But you didn't add can_receive.
>
>> Why need we add some counts to track in-flight packets?
>
> To implement can_receive.
Let me try.
>
> Paolo
Paolo Bonzini - May 24, 2012, 12:02 p.m.
Il 24/05/2012 13:58, Zhi Yong Wu ha scritto:
>> > But you will copy packets uselessly.  The code before the patch simply
>> > left them on the tap file descriptor.  This is better because it
>> > involves the kernel in flow control.  You are introducing "bufferbloat".
> You are correct, but can_send_packet will be invoked twice for one
> packet delivery.

Doesn't matter, it's cheap.

Paolo

Patch

diff --git a/net/queue.c b/net/queue.c
index 0afd783..d2e57de 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -176,7 +176,7 @@  ssize_t qemu_net_queue_send(NetQueue *queue,
 {
     ssize_t ret;
 
-    if (queue->delivering) {
+    if (queue->delivering || !qemu_can_send_packet(sender)) {
         return qemu_net_queue_append(queue, sender, flags, data, size, NULL);
     }
 
@@ -200,7 +200,7 @@  ssize_t qemu_net_queue_send_iov(NetQueue *queue,
 {
     ssize_t ret;
 
-    if (queue->delivering) {
+    if (queue->delivering || !qemu_can_send_packet(sender)) {
         return qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, NULL);
     }
 
diff --git a/net/slirp.c b/net/slirp.c
index a6ede2b..248f7ff 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -96,13 +96,6 @@  static void slirp_smb_cleanup(SlirpState *s);
 static inline void slirp_smb_cleanup(SlirpState *s) { }
 #endif
 
-int slirp_can_output(void *opaque)
-{
-    SlirpState *s = opaque;
-
-    return qemu_can_send_packet(&s->nc);
-}
-
 void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len)
 {
     SlirpState *s = opaque;
diff --git a/net/tap.c b/net/tap.c
index 65f45b8..7b1992b 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -210,7 +210,7 @@  static void tap_send(void *opaque)
         if (size == 0) {
             tap_read_poll(s, 0);
         }
-    } while (size > 0 && qemu_can_send_packet(&s->nc));
+    } while (size > 0);
 }
 
 int tap_has_ufo(NetClientState *nc)
diff --git a/slirp/if.c b/slirp/if.c
index 096cf6f..533295d 100644
--- a/slirp/if.c
+++ b/slirp/if.c
@@ -177,11 +177,6 @@  void if_start(Slirp *slirp)
     }
 
     while (ifm_next) {
-        /* check if we can really output */
-        if (!slirp_can_output(slirp->opaque)) {
-            break;
-        }
-
         ifm = ifm_next;
         from_batchq = next_from_batchq;
 
diff --git a/slirp/libslirp.h b/slirp/libslirp.h
index 77527ad..9b471b5 100644
--- a/slirp/libslirp.h
+++ b/slirp/libslirp.h
@@ -25,7 +25,6 @@  void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds,
 void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len);
 
 /* you must provide the following functions: */
-int slirp_can_output(void *opaque);
 void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len);
 
 int slirp_add_hostfwd(Slirp *slirp, int is_udp,