diff mbox

[RFC,v4,14/15] slirp: handle race condition

Message ID 1366187964-14265-15-git-send-email-qemulist@gmail.com
State New
Headers show

Commit Message

pingfan liu April 17, 2013, 8:39 a.m. UTC
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Slirp and its peer can run on different context at the same time.
Using lock to protect

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 slirp/slirp.c |   16 ++++++++++++++--
 slirp/slirp.h |    3 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

Comments

Jan Kiszka April 18, 2013, 7:13 a.m. UTC | #1
On 2013-04-17 10:39, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Slirp and its peer can run on different context at the same time.
> Using lock to protect

What are the usage rules for this lock, what precisely is it protecting?
Is it ensured that we do not take the BQL while holding this one?

Jan

> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  slirp/slirp.c |   16 ++++++++++++++--
>  slirp/slirp.h |    3 +++
>  2 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/slirp/slirp.c b/slirp/slirp.c
> index 883b7bd..6bfcc67 100644
> --- a/slirp/slirp.c
> +++ b/slirp/slirp.c
> @@ -206,6 +206,7 @@ Slirp *slirp_init(int restricted, struct in_addr vnetwork,
>  
>      slirp_init_once();
>  
> +    qemu_mutex_init(&slirp->lock);
>      slirp->restricted = restricted;
>  
>      if_init(slirp);
> @@ -248,6 +249,7 @@ void slirp_cleanup(Slirp *slirp)
>  
>      ip_cleanup(slirp);
>      m_cleanup(slirp);
> +    qemu_mutex_destroy(&slirp->lock);
>  
>      g_free(slirp->vdnssearch);
>      g_free(slirp->tftp_prefix);
> @@ -411,6 +413,7 @@ gboolean slirp_handler(gpointer data)
>      struct socket *so, *so_next;
>      int ret;
>  
> +    qemu_mutex_lock(&slirp->lock);
>      /*
>       * See if anything has timed out
>       */
> @@ -594,6 +597,7 @@ gboolean slirp_handler(gpointer data)
>      }
>  
>      if_start(slirp);
> +    qemu_mutex_unlock(&slirp->lock);
>      return true;
>  }
>  
> @@ -665,6 +669,7 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
>          return;
>  
>      proto = ntohs(*(uint16_t *)(pkt + 12));
> +    qemu_mutex_lock(&slirp->lock);
>      switch(proto) {
>      case ETH_P_ARP:
>          arp_input(slirp, pkt, pkt_len);
> @@ -688,6 +693,7 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
>      default:
>          break;
>      }
> +    qemu_mutex_unlock(&slirp->lock);
>  }
>  
>  /* Output the IP packet to the ethernet device. Returns 0 if the packet must be
> @@ -860,15 +866,21 @@ void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port,
>                         const uint8_t *buf, int size)
>  {
>      int ret;
> -    struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port);
> +    struct socket *so;
> +
> +    qemu_mutex_lock(&slirp->lock);
> +    so = slirp_find_ctl_socket(slirp, guest_addr, guest_port);
>  
> -    if (!so)
> +    if (!so) {
> +        qemu_mutex_unlock(&slirp->lock);
>          return;
> +    }
>  
>      ret = soreadbuf(so, (const char *)buf, size);
>  
>      if (ret > 0)
>          tcp_output(sototcpcb(so));
> +    qemu_mutex_unlock(&slirp->lock);
>  }
>  
>  static void slirp_tcp_save(QEMUFile *f, struct tcpcb *tp)
> diff --git a/slirp/slirp.h b/slirp/slirp.h
> index 008360e..7ab0c70 100644
> --- a/slirp/slirp.h
> +++ b/slirp/slirp.h
> @@ -135,6 +135,7 @@ void free(void *ptr);
>  
>  #include "qemu/queue.h"
>  #include "qemu/sockets.h"
> +#include "qemu/thread.h"
>  
>  #include "libslirp.h"
>  #include "ip.h"
> @@ -207,6 +208,8 @@ struct Slirp {
>      u_int last_slowtimo;
>      int do_slowtimo;
>  
> +    /* lock to protect slirp running both on frontend or SlirpState context */
> +    QemuMutex lock;
>      /* virtual network configuration */
>      struct in_addr vnetwork_addr;
>      struct in_addr vnetwork_mask;
>
pingfan liu April 19, 2013, 12:18 a.m. UTC | #2
On Thu, Apr 18, 2013 at 3:13 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> On 2013-04-17 10:39, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> Slirp and its peer can run on different context at the same time.
>> Using lock to protect
>
> What are the usage rules for this lock, what precisely is it protecting?
> Is it ensured that we do not take the BQL while holding this one?
>
It protect the slirp state, since slirp can be touched by slrip_input
called by frontend(ex, e1000), also it can be touched by its event
handler.  With this lock, we do not need BQL

Regards,
Pingfan


> Jan
>
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  slirp/slirp.c |   16 ++++++++++++++--
>>  slirp/slirp.h |    3 +++
>>  2 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/slirp/slirp.c b/slirp/slirp.c
>> index 883b7bd..6bfcc67 100644
>> --- a/slirp/slirp.c
>> +++ b/slirp/slirp.c
>> @@ -206,6 +206,7 @@ Slirp *slirp_init(int restricted, struct in_addr vnetwork,
>>
>>      slirp_init_once();
>>
>> +    qemu_mutex_init(&slirp->lock);
>>      slirp->restricted = restricted;
>>
>>      if_init(slirp);
>> @@ -248,6 +249,7 @@ void slirp_cleanup(Slirp *slirp)
>>
>>      ip_cleanup(slirp);
>>      m_cleanup(slirp);
>> +    qemu_mutex_destroy(&slirp->lock);
>>
>>      g_free(slirp->vdnssearch);
>>      g_free(slirp->tftp_prefix);
>> @@ -411,6 +413,7 @@ gboolean slirp_handler(gpointer data)
>>      struct socket *so, *so_next;
>>      int ret;
>>
>> +    qemu_mutex_lock(&slirp->lock);
>>      /*
>>       * See if anything has timed out
>>       */
>> @@ -594,6 +597,7 @@ gboolean slirp_handler(gpointer data)
>>      }
>>
>>      if_start(slirp);
>> +    qemu_mutex_unlock(&slirp->lock);
>>      return true;
>>  }
>>
>> @@ -665,6 +669,7 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
>>          return;
>>
>>      proto = ntohs(*(uint16_t *)(pkt + 12));
>> +    qemu_mutex_lock(&slirp->lock);
>>      switch(proto) {
>>      case ETH_P_ARP:
>>          arp_input(slirp, pkt, pkt_len);
>> @@ -688,6 +693,7 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
>>      default:
>>          break;
>>      }
>> +    qemu_mutex_unlock(&slirp->lock);
>>  }
>>
>>  /* Output the IP packet to the ethernet device. Returns 0 if the packet must be
>> @@ -860,15 +866,21 @@ void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port,
>>                         const uint8_t *buf, int size)
>>  {
>>      int ret;
>> -    struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port);
>> +    struct socket *so;
>> +
>> +    qemu_mutex_lock(&slirp->lock);
>> +    so = slirp_find_ctl_socket(slirp, guest_addr, guest_port);
>>
>> -    if (!so)
>> +    if (!so) {
>> +        qemu_mutex_unlock(&slirp->lock);
>>          return;
>> +    }
>>
>>      ret = soreadbuf(so, (const char *)buf, size);
>>
>>      if (ret > 0)
>>          tcp_output(sototcpcb(so));
>> +    qemu_mutex_unlock(&slirp->lock);
>>  }
>>
>>  static void slirp_tcp_save(QEMUFile *f, struct tcpcb *tp)
>> diff --git a/slirp/slirp.h b/slirp/slirp.h
>> index 008360e..7ab0c70 100644
>> --- a/slirp/slirp.h
>> +++ b/slirp/slirp.h
>> @@ -135,6 +135,7 @@ void free(void *ptr);
>>
>>  #include "qemu/queue.h"
>>  #include "qemu/sockets.h"
>> +#include "qemu/thread.h"
>>
>>  #include "libslirp.h"
>>  #include "ip.h"
>> @@ -207,6 +208,8 @@ struct Slirp {
>>      u_int last_slowtimo;
>>      int do_slowtimo;
>>
>> +    /* lock to protect slirp running both on frontend or SlirpState context */
>> +    QemuMutex lock;
>>      /* virtual network configuration */
>>      struct in_addr vnetwork_addr;
>>      struct in_addr vnetwork_mask;
>>
>
> --
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
Jan Kiszka April 19, 2013, 8:21 a.m. UTC | #3
On 2013-04-19 02:18, liu ping fan wrote:
> On Thu, Apr 18, 2013 at 3:13 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> On 2013-04-17 10:39, Liu Ping Fan wrote:
>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>
>>> Slirp and its peer can run on different context at the same time.
>>> Using lock to protect
>>
>> What are the usage rules for this lock, what precisely is it protecting?
>> Is it ensured that we do not take the BQL while holding this one?
>>
> It protect the slirp state, since slirp can be touched by slrip_input
> called by frontend(ex, e1000), also it can be touched by its event
> handler.  With this lock, we do not need BQL

...but the BQL will, at least initially, remain to be everywhere. Every
non-converted device model will hold it while calling into Slirp. So we
have the ordering "BQL before Slirp lock" already. And we must ensure
that there is no "BQL after Slirp lock". Can you guarantee this?

Jan
pingfan liu April 22, 2013, 5:55 a.m. UTC | #4
On Fri, Apr 19, 2013 at 4:21 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> On 2013-04-19 02:18, liu ping fan wrote:
>> On Thu, Apr 18, 2013 at 3:13 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>> On 2013-04-17 10:39, Liu Ping Fan wrote:
>>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>>
>>>> Slirp and its peer can run on different context at the same time.
>>>> Using lock to protect
>>>
>>> What are the usage rules for this lock, what precisely is it protecting?
>>> Is it ensured that we do not take the BQL while holding this one?
>>>
>> It protect the slirp state, since slirp can be touched by slrip_input
>> called by frontend(ex, e1000), also it can be touched by its event
>> handler.  With this lock, we do not need BQL
>
> ...but the BQL will, at least initially, remain to be everywhere. Every
> non-converted device model will hold it while calling into Slirp. So we
> have the ordering "BQL before Slirp lock" already. And we must ensure
> that there is no "BQL after Slirp lock". Can you guarantee this?
>
Oh, yes, there is a potential ABBA lock problem. Especially for slirp
backend, the NetClientState's receive() can be nested, the scene:
e1000 send packet -> net_slirp_receive() ...->arp_input()..->
->slirp_output() ->e1000_receive()
Although currently, there is no extra lock required by
e1000_receive(), but the nested model does cause the potential of ABBA
lock problem, when beginning to covert the frontend(e1000).
What about introducing  slirp->lockstate  and slirp->lock only used to
protect it. So the users of slirp will protect agaist each other by
slirp->lockstate, meanwhile, we can advoid the potential dead lock.

Regards,
Pingfan
> Jan
>
> --
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
pingfan liu April 23, 2013, 7:20 a.m. UTC | #5
On Mon, Apr 22, 2013 at 1:55 PM, liu ping fan <qemulist@gmail.com> wrote:
> On Fri, Apr 19, 2013 at 4:21 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> On 2013-04-19 02:18, liu ping fan wrote:
>>> On Thu, Apr 18, 2013 at 3:13 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>>> On 2013-04-17 10:39, Liu Ping Fan wrote:
>>>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>>>
>>>>> Slirp and its peer can run on different context at the same time.
>>>>> Using lock to protect
>>>>
>>>> What are the usage rules for this lock, what precisely is it protecting?
>>>> Is it ensured that we do not take the BQL while holding this one?
>>>>
>>> It protect the slirp state, since slirp can be touched by slrip_input
>>> called by frontend(ex, e1000), also it can be touched by its event
>>> handler.  With this lock, we do not need BQL
>>
>> ...but the BQL will, at least initially, remain to be everywhere. Every
>> non-converted device model will hold it while calling into Slirp. So we
>> have the ordering "BQL before Slirp lock" already. And we must ensure
>> that there is no "BQL after Slirp lock". Can you guarantee this?
>>
> Oh, yes, there is a potential ABBA lock problem. Especially for slirp
> backend, the NetClientState's receive() can be nested, the scene:
> e1000 send packet -> net_slirp_receive() ...->arp_input()..->
> ->slirp_output() ->e1000_receive()
> Although currently, there is no extra lock required by
> e1000_receive(), but the nested model does cause the potential of ABBA
> lock problem, when beginning to covert the frontend(e1000).
> What about introducing  slirp->lockstate  and slirp->lock only used to
> protect it. So the users of slirp will protect agaist each other by
> slirp->lockstate, meanwhile, we can advoid the potential dead lock.
>
Drop the bad idea. The slirp->lock should be dropped before calling
out direction method.  And if_start() is need to fix

> Regards,
> Pingfan
>> Jan
>>
>> --
>> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
>> Corporate Competence Center Embedded Linux
diff mbox

Patch

diff --git a/slirp/slirp.c b/slirp/slirp.c
index 883b7bd..6bfcc67 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -206,6 +206,7 @@  Slirp *slirp_init(int restricted, struct in_addr vnetwork,
 
     slirp_init_once();
 
+    qemu_mutex_init(&slirp->lock);
     slirp->restricted = restricted;
 
     if_init(slirp);
@@ -248,6 +249,7 @@  void slirp_cleanup(Slirp *slirp)
 
     ip_cleanup(slirp);
     m_cleanup(slirp);
+    qemu_mutex_destroy(&slirp->lock);
 
     g_free(slirp->vdnssearch);
     g_free(slirp->tftp_prefix);
@@ -411,6 +413,7 @@  gboolean slirp_handler(gpointer data)
     struct socket *so, *so_next;
     int ret;
 
+    qemu_mutex_lock(&slirp->lock);
     /*
      * See if anything has timed out
      */
@@ -594,6 +597,7 @@  gboolean slirp_handler(gpointer data)
     }
 
     if_start(slirp);
+    qemu_mutex_unlock(&slirp->lock);
     return true;
 }
 
@@ -665,6 +669,7 @@  void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
         return;
 
     proto = ntohs(*(uint16_t *)(pkt + 12));
+    qemu_mutex_lock(&slirp->lock);
     switch(proto) {
     case ETH_P_ARP:
         arp_input(slirp, pkt, pkt_len);
@@ -688,6 +693,7 @@  void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
     default:
         break;
     }
+    qemu_mutex_unlock(&slirp->lock);
 }
 
 /* Output the IP packet to the ethernet device. Returns 0 if the packet must be
@@ -860,15 +866,21 @@  void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port,
                        const uint8_t *buf, int size)
 {
     int ret;
-    struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port);
+    struct socket *so;
+
+    qemu_mutex_lock(&slirp->lock);
+    so = slirp_find_ctl_socket(slirp, guest_addr, guest_port);
 
-    if (!so)
+    if (!so) {
+        qemu_mutex_unlock(&slirp->lock);
         return;
+    }
 
     ret = soreadbuf(so, (const char *)buf, size);
 
     if (ret > 0)
         tcp_output(sototcpcb(so));
+    qemu_mutex_unlock(&slirp->lock);
 }
 
 static void slirp_tcp_save(QEMUFile *f, struct tcpcb *tp)
diff --git a/slirp/slirp.h b/slirp/slirp.h
index 008360e..7ab0c70 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -135,6 +135,7 @@  void free(void *ptr);
 
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
+#include "qemu/thread.h"
 
 #include "libslirp.h"
 #include "ip.h"
@@ -207,6 +208,8 @@  struct Slirp {
     u_int last_slowtimo;
     int do_slowtimo;
 
+    /* lock to protect slirp running both on frontend or SlirpState context */
+    QemuMutex lock;
     /* virtual network configuration */
     struct in_addr vnetwork_addr;
     struct in_addr vnetwork_mask;