Patchwork [v2,5/6] net: defer nested call to BH

login
register
mail settings
Submitter pingfan liu
Date June 13, 2013, 9:03 a.m.
Message ID <1371114186-8854-6-git-send-email-qemulist@gmail.com>
Download mbox | patch
Permalink /patch/251014/
State New
Headers show

Comments

pingfan liu - June 13, 2013, 9:03 a.m.
From: Liu Ping Fan <pingfanl@linux.vnet.ibm.com>

Nested call caused by ->receive() will raise issue like deadlock,
so postphone it to BH.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 net/queue.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)
Stefan Hajnoczi - June 18, 2013, 12:57 p.m.
On Thu, Jun 13, 2013 at 05:03:05PM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfanl@linux.vnet.ibm.com>
> 
> Nested call caused by ->receive() will raise issue like deadlock,
> so postphone it to BH.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  net/queue.c | 40 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)

Does this patch belong before the netqueue lock patch?  The commit
history should be bisectable without temporary failures/deadlocks.

> diff --git a/net/queue.c b/net/queue.c
> index 58222b0..9c343ab 100644
> --- a/net/queue.c
> +++ b/net/queue.c
> @@ -24,6 +24,8 @@
>  #include "net/queue.h"
>  #include "qemu/queue.h"
>  #include "net/net.h"
> +#include "block/aio.h"
> +#include "qemu/main-loop.h"
>  
>  /* The delivery handler may only return zero if it will call
>   * qemu_net_queue_flush() when it determines that it is once again able
> @@ -183,6 +185,22 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
>      return ret;
>  }
>  
> +typedef struct NetQueBH {

This file uses "Queue" consistently, please don't add "Que" here.

> @@ -192,8 +210,17 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>  {
>      ssize_t ret;
>  
> -    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
> +    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
> +        || sender->send_queue->delivering) {

Not sure this is safe, we're only holding one NetClientState->peer_lock
and one NetQueue->lock.  How can we access both queue->delivering and
sender->send_queue->delivering safely?
pingfan liu - June 20, 2013, 6:30 a.m.
On Tue, Jun 18, 2013 at 8:57 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Thu, Jun 13, 2013 at 05:03:05PM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfanl@linux.vnet.ibm.com>
>>
>> Nested call caused by ->receive() will raise issue like deadlock,
>> so postphone it to BH.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  net/queue.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 38 insertions(+), 2 deletions(-)
>
> Does this patch belong before the netqueue lock patch?  The commit
> history should be bisectable without temporary failures/deadlocks.
>
Ok.
>> diff --git a/net/queue.c b/net/queue.c
>> index 58222b0..9c343ab 100644
>> --- a/net/queue.c
>> +++ b/net/queue.c
>> @@ -24,6 +24,8 @@
>>  #include "net/queue.h"
>>  #include "qemu/queue.h"
>>  #include "net/net.h"
>> +#include "block/aio.h"
>> +#include "qemu/main-loop.h"
>>
>>  /* The delivery handler may only return zero if it will call
>>   * qemu_net_queue_flush() when it determines that it is once again able
>> @@ -183,6 +185,22 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
>>      return ret;
>>  }
>>
>> +typedef struct NetQueBH {
>
> This file uses "Queue" consistently, please don't add "Que" here.
>
>> @@ -192,8 +210,17 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>>  {
>>      ssize_t ret;
>>
>> -    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
>> +    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
>> +        || sender->send_queue->delivering) {
>
> Not sure this is safe, we're only holding one NetClientState->peer_lock
> and one NetQueue->lock.  How can we access both queue->delivering and
> sender->send_queue->delivering safely?

Yes, you are right, it is not safely. The queue->delivering is
protected by peer_lock and we do not take the verse direction lock .
So finally the above code can not tell out the nested calling
"A-->B-->A"  from  "A-->B,  B-->A" (where A, B stands for a
NetClientState).
What about using TLS to trace the nested calling?  With it, we can
avoid AB-BA lock problem.

Thx & regards,
Pingfan
Stefan Hajnoczi - June 20, 2013, 7:48 a.m.
On Thu, Jun 20, 2013 at 02:30:56PM +0800, liu ping fan wrote:
> On Tue, Jun 18, 2013 at 8:57 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> > On Thu, Jun 13, 2013 at 05:03:05PM +0800, Liu Ping Fan wrote:
> >> From: Liu Ping Fan <pingfanl@linux.vnet.ibm.com>
> >>
> >> Nested call caused by ->receive() will raise issue like deadlock,
> >> so postphone it to BH.
> >>
> >> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> >> ---
> >>  net/queue.c | 40 ++++++++++++++++++++++++++++++++++++++--
> >>  1 file changed, 38 insertions(+), 2 deletions(-)
> >
> > Does this patch belong before the netqueue lock patch?  The commit
> > history should be bisectable without temporary failures/deadlocks.
> >
> Ok.
> >> diff --git a/net/queue.c b/net/queue.c
> >> index 58222b0..9c343ab 100644
> >> --- a/net/queue.c
> >> +++ b/net/queue.c
> >> @@ -24,6 +24,8 @@
> >>  #include "net/queue.h"
> >>  #include "qemu/queue.h"
> >>  #include "net/net.h"
> >> +#include "block/aio.h"
> >> +#include "qemu/main-loop.h"
> >>
> >>  /* The delivery handler may only return zero if it will call
> >>   * qemu_net_queue_flush() when it determines that it is once again able
> >> @@ -183,6 +185,22 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
> >>      return ret;
> >>  }
> >>
> >> +typedef struct NetQueBH {
> >
> > This file uses "Queue" consistently, please don't add "Que" here.
> >
> >> @@ -192,8 +210,17 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
> >>  {
> >>      ssize_t ret;
> >>
> >> -    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
> >> +    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
> >> +        || sender->send_queue->delivering) {
> >
> > Not sure this is safe, we're only holding one NetClientState->peer_lock
> > and one NetQueue->lock.  How can we access both queue->delivering and
> > sender->send_queue->delivering safely?
> 
> Yes, you are right, it is not safely. The queue->delivering is
> protected by peer_lock and we do not take the verse direction lock .
> So finally the above code can not tell out the nested calling
> "A-->B-->A"  from  "A-->B,  B-->A" (where A, B stands for a
> NetClientState).
> What about using TLS to trace the nested calling?  With it, we can
> avoid AB-BA lock problem.

I would take a step back and see if there's a way to avoid reaching into
inspect sender->send_queue->delivering here.

Stefan
Paolo Bonzini - July 3, 2013, 6:20 a.m.
Il 13/06/2013 11:03, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <pingfanl@linux.vnet.ibm.com>
> 
> Nested call caused by ->receive() will raise issue like deadlock,
> so postphone it to BH.

When I read this patch, I am completely puzzled.

All I see is that  a qemu_net_queue_flush is done in a bottom half.  I
have no clue of how you get a nested call of ->receive, and I have no
clue of what this patch does to prevent it (since the bottom half is
asynchronous).

A bottom half is not a synchronization primitive.  If you use a lock, or
a condition variable, or RCU, you can usually assume that the reader
understands how you're using it (though not if you're doing fancy
tricks).  If you are introducing infrastructure, you can use long
comments or docs/ and expect the reviewer to read those.

But if you're doing tricky stuff (such as if you have global/local
locks, or you have parts of the code that run lockless, or if you are
modifying the behavior of an existing subsystem to prevent deadlocks),
you need to document _every single step that you do_.

For example, let's take the patches you had for hostmem.c.  You had a
single patch doing all the work:

   hostmem: make hostmem global and RAM hotunplg safe

   The hostmem listener will translate gpa to hva quickly. Make it
   global, so other component can use it.
   Also this patch adopt MemoryRegionOps's ref/unref interface to
   make it survive the RAM hotunplug.

No reference whatsoever to the reference counting of the hostmem itself,
and how you are using the locks.  The "also" in the commit message is a
big warning sign, too (though I'm guilty of adding many "also"s in the
commit messages).

When I took your ideas and applied them to memory.c, here is how I
explained it:

   memory: use a new FlatView pointer on every topology update

   This is the first step towards converting as->current_map to
   RCU-style updates, where the FlatView updates run concurrently
   with uses of an old FlatView.
   ---
   memory: add reference counting to FlatView

   With this change, a FlatView can be used even after a concurrent
   update has replaced it.  Because we do not have RCU, we use a
   mutex to protect the small critical sections that read/write the
   as->current_map pointer.  Accesses to the FlatView can be done
   outside the mutex.

And in the code:

   /* flat_view_mutex is taken around reading as->current_map; the
    * critical section is extremely short, so I'm using a single mutex
    * for every AS.  We could also RCU for the read-side.
    *
    * The BQL is taken around transaction commits, hence both locks are
    * taken while writing to as->current_map (with the BQL taken
    * outside).
    */

It is still quite concise, but it explains both the concurrency to
expect, and the locking policies.

This patch is changing the behavior of existing code in a complex
scenario and in a tricky way.  If you want your patches accepted (or
even reviewed), you _must_ learn how to explain the scenarios and your
fixes.

And believe me, it is not a language problem; people with much worse
English than yours manage this all the time.  It is more of an attitude
problem, and I've explained it many times.

Paolo

> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  net/queue.c | 40 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/net/queue.c b/net/queue.c
> index 58222b0..9c343ab 100644
> --- a/net/queue.c
> +++ b/net/queue.c
> @@ -24,6 +24,8 @@
>  #include "net/queue.h"
>  #include "qemu/queue.h"
>  #include "net/net.h"
> +#include "block/aio.h"
> +#include "qemu/main-loop.h"
>  
>  /* The delivery handler may only return zero if it will call
>   * qemu_net_queue_flush() when it determines that it is once again able
> @@ -183,6 +185,22 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
>      return ret;
>  }
>  
> +typedef struct NetQueBH {
> +    QEMUBH *bh;
> +    NetClientState *nc;
> +} NetQueBH;
> +
> +static void qemu_net_queue_send_bh(void *opaque)
> +{
> +    NetQueBH *q_bh = opaque;
> +    NetQueue *queue = q_bh->nc->send_queue;
> +
> +    qemu_net_queue_flush(queue);
> +    netclient_unref(q_bh->nc);
> +    qemu_bh_delete(q_bh->bh);
> +    g_slice_free(NetQueBH, q_bh);
> +}
> +
>  ssize_t qemu_net_queue_send(NetQueue *queue,
>                              NetClientState *sender,
>                              unsigned flags,
> @@ -192,8 +210,17 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
>  {
>      ssize_t ret;
>  
> -    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
> +    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
> +        || sender->send_queue->delivering) {
>          qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
> +        /* Nested call will be deferred to BH */
> +        if (sender->send_queue->delivering) {
> +            NetQueBH *que_bh = g_slice_new(NetQueBH);
> +            que_bh->bh = qemu_bh_new(qemu_net_queue_send_bh, que_bh);
> +            que_bh->nc = queue->opaque;
> +            netclient_ref(queue->opaque);
> +            qemu_bh_schedule(que_bh->bh);
> +        }
>          return 0;
>      }
>  
> @@ -217,8 +244,17 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
>  {
>      ssize_t ret;
>  
> -    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
> +    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
> +        || sender->send_queue->delivering) {
>          qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb);
> +        /* Nested call will be deferred to BH */
> +        if (sender->send_queue->delivering) {
> +            NetQueBH *que_bh = g_slice_new(NetQueBH);
> +            que_bh->bh = qemu_bh_new(qemu_net_queue_send_bh, que_bh);
> +            que_bh->nc = queue->opaque;
> +            netclient_ref(queue->opaque);
> +            qemu_bh_schedule(que_bh->bh);
> +        }
>          return 0;
>      }
>  
>

Patch

diff --git a/net/queue.c b/net/queue.c
index 58222b0..9c343ab 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -24,6 +24,8 @@ 
 #include "net/queue.h"
 #include "qemu/queue.h"
 #include "net/net.h"
+#include "block/aio.h"
+#include "qemu/main-loop.h"
 
 /* The delivery handler may only return zero if it will call
  * qemu_net_queue_flush() when it determines that it is once again able
@@ -183,6 +185,22 @@  static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
     return ret;
 }
 
+typedef struct NetQueBH {
+    QEMUBH *bh;
+    NetClientState *nc;
+} NetQueBH;
+
+static void qemu_net_queue_send_bh(void *opaque)
+{
+    NetQueBH *q_bh = opaque;
+    NetQueue *queue = q_bh->nc->send_queue;
+
+    qemu_net_queue_flush(queue);
+    netclient_unref(q_bh->nc);
+    qemu_bh_delete(q_bh->bh);
+    g_slice_free(NetQueBH, q_bh);
+}
+
 ssize_t qemu_net_queue_send(NetQueue *queue,
                             NetClientState *sender,
                             unsigned flags,
@@ -192,8 +210,17 @@  ssize_t qemu_net_queue_send(NetQueue *queue,
 {
     ssize_t ret;
 
-    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
+    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
+        || sender->send_queue->delivering) {
         qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
+        /* Nested call will be deferred to BH */
+        if (sender->send_queue->delivering) {
+            NetQueBH *que_bh = g_slice_new(NetQueBH);
+            que_bh->bh = qemu_bh_new(qemu_net_queue_send_bh, que_bh);
+            que_bh->nc = queue->opaque;
+            netclient_ref(queue->opaque);
+            qemu_bh_schedule(que_bh->bh);
+        }
         return 0;
     }
 
@@ -217,8 +244,17 @@  ssize_t qemu_net_queue_send_iov(NetQueue *queue,
 {
     ssize_t ret;
 
-    if (queue->delivering || !qemu_can_send_packet_nolock(sender)) {
+    if (queue->delivering || !qemu_can_send_packet_nolock(sender)
+        || sender->send_queue->delivering) {
         qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb);
+        /* Nested call will be deferred to BH */
+        if (sender->send_queue->delivering) {
+            NetQueBH *que_bh = g_slice_new(NetQueBH);
+            que_bh->bh = qemu_bh_new(qemu_net_queue_send_bh, que_bh);
+            que_bh->nc = queue->opaque;
+            netclient_ref(queue->opaque);
+            qemu_bh_schedule(que_bh->bh);
+        }
         return 0;
     }