[ovs-dev,v8,04/13] netdev-dpdk: Serialise non-pmds mbufs' alloc/free.

Message ID 1528734090-220990-5-git-send-email-tiago.lam@intel.com
State Superseded
Headers show
Series
  • Support multi-segment mbufs
Related show

Commit Message

Lam, Tiago June 11, 2018, 4:21 p.m.
A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
allocation and free operations by non-pmd threads on a given mempool.

free_dpdk_buf() has been modified to make use of the introduced mutex.

Signed-off-by: Tiago Lam <tiago.lam@intel.com>
---
 lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

Comments

Eelco Chaudron June 18, 2018, 11:28 a.m. | #1
On 11 Jun 2018, at 18:21, Tiago Lam wrote:

> A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
> allocation and free operations by non-pmd threads on a given mempool.
>

Can you explain why we need the mutex here? Can't see any reason why 
rte_pktmbuf_free() needs to be protected for non-pmd threads?

> free_dpdk_buf() has been modified to make use of the introduced mutex.
>
> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
> ---
>  lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
>  1 file changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index f546507..efd7c20 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -294,6 +294,10 @@ static struct ovs_mutex dpdk_mp_mutex 
> OVS_ACQ_AFTER(dpdk_mutex)
>  static struct ovs_list dpdk_mp_free_list 
> OVS_GUARDED_BY(dpdk_mp_mutex)
>      = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
>
> +/* This mutex must be used by non pmd threads when allocating or 
> freeing
> + * mbufs through mempools. */
> +static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
> +
>  /* Wrapper for a mempool released but not yet freed. */
>  struct dpdk_mp {
>       struct rte_mempool *mp;
> @@ -461,6 +465,8 @@ struct netdev_rxq_dpdk {
>      dpdk_port_t port_id;
>  };
>
> +static bool dpdk_thread_is_pmd(void);
> +
>  static void netdev_dpdk_destruct(struct netdev *netdev);
>  static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
>
> @@ -494,6 +500,12 @@ dpdk_buf_size(int mtu)
>                       NETDEV_DPDK_MBUF_ALIGN);
>  }
>
> +static bool
> +dpdk_thread_is_pmd(void)
> +{
> +     return rte_lcore_id() != NON_PMD_CORE_ID;

Will this continue to work with newer DPDK versions? I've not looked at 
it in detail, but I did notice that the vhost threads in the newer DPDK 
now get created with rte_ctrl_thread_create() and does some lcore 
mangling.

> +}
> +
>  /* Allocates an area of 'sz' bytes from DPDK.  The memory is zero'ed.
>   *
>   * Unlike xmalloc(), this function can return NULL on failure. */
> @@ -506,9 +518,16 @@ dpdk_rte_mzalloc(size_t sz)
>  void
>  free_dpdk_buf(struct dp_packet *p)
>  {
> -    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
> +    if (!dpdk_thread_is_pmd()) {
> +        ovs_mutex_lock(&nonpmd_mp_mutex);
> +    }
>
> +    struct rte_mbuf *pkt = (struct rte_mbuf *) p;

Should we not use a container_of macro here?

>      rte_pktmbuf_free(pkt);
> +
> +    if (!dpdk_thread_is_pmd()) {
> +        ovs_mutex_unlock(&nonpmd_mp_mutex);
> +    }
>  }
>
>  static void
> -- 
> 2.7.4
Lam, Tiago June 22, 2018, 7:03 p.m. | #2
On 18/06/2018 12:28, Eelco Chaudron wrote:
> 
> 
> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
> 
>> A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
>> allocation and free operations by non-pmd threads on a given mempool.
>>
> 
> Can you explain why we need the mutex here? Can't see any reason why 
> rte_pktmbuf_free() needs to be protected for non-pmd threads?

My understanding is that each PMD has a thread-local cache of its own,
which it can access without locking. However, all non-PMD threads have
direct access to the mempool, and thus need to lock in order to access
it. Otherwise memory corruption could ensue.

For OvS this means all these functions which can be called outside of a
PMD context, or without holding the `non_pmd_mutex` mutex, in struct
dp_netdev, need to lock before issue operations that modify a mempool.

Hopefully I've put it succinctly enough without botching up the concept
too much.

> 
>> free_dpdk_buf() has been modified to make use of the introduced mutex.
>>
>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>> ---
>>  lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
>>  1 file changed, 20 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index f546507..efd7c20 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -294,6 +294,10 @@ static struct ovs_mutex dpdk_mp_mutex 
>> OVS_ACQ_AFTER(dpdk_mutex)
>>  static struct ovs_list dpdk_mp_free_list 
>> OVS_GUARDED_BY(dpdk_mp_mutex)
>>      = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
>>
>> +/* This mutex must be used by non pmd threads when allocating or 
>> freeing
>> + * mbufs through mempools. */
>> +static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
>> +
>>  /* Wrapper for a mempool released but not yet freed. */
>>  struct dpdk_mp {
>>       struct rte_mempool *mp;
>> @@ -461,6 +465,8 @@ struct netdev_rxq_dpdk {
>>      dpdk_port_t port_id;
>>  };
>>
>> +static bool dpdk_thread_is_pmd(void);
>> +
>>  static void netdev_dpdk_destruct(struct netdev *netdev);
>>  static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
>>
>> @@ -494,6 +500,12 @@ dpdk_buf_size(int mtu)
>>                       NETDEV_DPDK_MBUF_ALIGN);
>>  }
>>
>> +static bool
>> +dpdk_thread_is_pmd(void)
>> +{
>> +     return rte_lcore_id() != NON_PMD_CORE_ID;
> 
> Will this continue to work with newer DPDK versions? I've not looked at 
> it in detail, but I did notice that the vhost threads in the newer DPDK 
> now get created with rte_ctrl_thread_create() and does some lcore 
> mangling.
> 

I had a look.  This seems to be a wrapper for creating management
threads only, to standardize pthread creation around that in DPDK. Usual
EAL thread creation and assigning of LCORE_ID_ANY to non-EAL threads
seems to still be the same.

>> +}
>> +
>>  /* Allocates an area of 'sz' bytes from DPDK.  The memory is zero'ed.
>>   *
>>   * Unlike xmalloc(), this function can return NULL on failure. */
>> @@ -506,9 +518,16 @@ dpdk_rte_mzalloc(size_t sz)
>>  void
>>  free_dpdk_buf(struct dp_packet *p)
>>  {
>> -    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>> +    if (!dpdk_thread_is_pmd()) {
>> +        ovs_mutex_lock(&nonpmd_mp_mutex);
>> +    }
>>
>> +    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
> 
> Should we not use a container_of macro here?
> 

I'll use it for the next iteration.

Tiago.
Eelco Chaudron June 26, 2018, 9:19 a.m. | #3
On 22 Jun 2018, at 21:03, Lam, Tiago wrote:

> On 18/06/2018 12:28, Eelco Chaudron wrote:
>>
>>
>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>
>>> A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
>>> allocation and free operations by non-pmd threads on a given 
>>> mempool.
>>>
>>
>> Can you explain why we need the mutex here? Can't see any reason why
>> rte_pktmbuf_free() needs to be protected for non-pmd threads?
>
> My understanding is that each PMD has a thread-local cache of its own,
> which it can access without locking. However, all non-PMD threads have
> direct access to the mempool, and thus need to lock in order to access
> it. Otherwise memory corruption could ensue.
>
> For OvS this means all these functions which can be called outside of 
> a
> PMD context, or without holding the `non_pmd_mutex` mutex, in struct
> dp_netdev, need to lock before issue operations that modify a mempool.
>
> Hopefully I've put it succinctly enough without botching up the 
> concept
> too much.

I do not think this is a problem, as DPDK uses 
rte_mempool_default_cache() to figure out which cache to use.
As the non PMD threads have a higher than RTE_MAX_LCORE id the cache 
will not be used.

>>
>>> free_dpdk_buf() has been modified to make use of the introduced 
>>> mutex.
>>>
>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>> ---
>>>  lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
>>>  1 file changed, 20 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>> index f546507..efd7c20 100644
>>> --- a/lib/netdev-dpdk.c
>>> +++ b/lib/netdev-dpdk.c
>>> @@ -294,6 +294,10 @@ static struct ovs_mutex dpdk_mp_mutex
>>> OVS_ACQ_AFTER(dpdk_mutex)
>>>  static struct ovs_list dpdk_mp_free_list
>>> OVS_GUARDED_BY(dpdk_mp_mutex)
>>>      = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
>>>
>>> +/* This mutex must be used by non pmd threads when allocating or
>>> freeing
>>> + * mbufs through mempools. */
>>> +static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
>>> +
>>>  /* Wrapper for a mempool released but not yet freed. */
>>>  struct dpdk_mp {
>>>       struct rte_mempool *mp;
>>> @@ -461,6 +465,8 @@ struct netdev_rxq_dpdk {
>>>      dpdk_port_t port_id;
>>>  };
>>>
>>> +static bool dpdk_thread_is_pmd(void);
>>> +
>>>  static void netdev_dpdk_destruct(struct netdev *netdev);
>>>  static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
>>>
>>> @@ -494,6 +500,12 @@ dpdk_buf_size(int mtu)
>>>                       NETDEV_DPDK_MBUF_ALIGN);
>>>  }
>>>
>>> +static bool
>>> +dpdk_thread_is_pmd(void)
>>> +{
>>> +     return rte_lcore_id() != NON_PMD_CORE_ID;
>>
>> Will this continue to work with newer DPDK versions? I've not looked 
>> at
>> it in detail, but I did notice that the vhost threads in the newer 
>> DPDK
>> now get created with rte_ctrl_thread_create() and does some lcore
>> mangling.
>>
>
> I had a look.  This seems to be a wrapper for creating management
> threads only, to standardize pthread creation around that in DPDK. 
> Usual
> EAL thread creation and assigning of LCORE_ID_ANY to non-EAL threads
> seems to still be the same.

Thanks for checking.
>
>>> +}
>>> +
>>>  /* Allocates an area of 'sz' bytes from DPDK.  The memory is 
>>> zero'ed.
>>>   *
>>>   * Unlike xmalloc(), this function can return NULL on failure. */
>>> @@ -506,9 +518,16 @@ dpdk_rte_mzalloc(size_t sz)
>>>  void
>>>  free_dpdk_buf(struct dp_packet *p)
>>>  {
>>> -    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>> +    if (!dpdk_thread_is_pmd()) {
>>> +        ovs_mutex_lock(&nonpmd_mp_mutex);
>>> +    }
>>>
>>> +    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>
>> Should we not use a container_of macro here?
>>
>
> I'll use it for the next iteration.
>
> Tiago.
Ilya Maximets June 26, 2018, 10:02 a.m. | #4
On 26.06.2018 12:19, Eelco Chaudron wrote:
> 
> 
> On 22 Jun 2018, at 21:03, Lam, Tiago wrote:
> 
>> On 18/06/2018 12:28, Eelco Chaudron wrote:
>>>
>>>
>>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>>
>>>> A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
>>>> allocation and free operations by non-pmd threads on a given mempool.
>>>>
>>>
>>> Can you explain why we need the mutex here? Can't see any reason why
>>> rte_pktmbuf_free() needs to be protected for non-pmd threads?
>>
>> My understanding is that each PMD has a thread-local cache of its own,
>> which it can access without locking. However, all non-PMD threads have
>> direct access to the mempool, and thus need to lock in order to access
>> it. Otherwise memory corruption could ensue.
>>
>> For OvS this means all these functions which can be called outside of a
>> PMD context, or without holding the `non_pmd_mutex` mutex, in struct
>> dp_netdev, need to lock before issue operations that modify a mempool.
>>
>> Hopefully I've put it succinctly enough without botching up the concept
>> too much.
> 
> I do not think this is a problem, as DPDK uses rte_mempool_default_cache() to figure out which cache to use.
> As the non PMD threads have a higher than RTE_MAX_LCORE id the cache will not be used.


It's not the problem. The main reason is that implementation of ring
buffers on which the mempools based is not preemptible:

  lib/librte_mempool/rte_mempool.h:
  * Note: the mempool implementation is not preemptible. An lcore must not be
  * interrupted by another task that uses the same mempool (because it uses a
  * ring which is not preemptible).

As soon as non-PMD threads are not pinned, they could be scheduled on the same
core and preempted while executing mempool operations.

> 
>>>
>>>> free_dpdk_buf() has been modified to make use of the introduced mutex.
>>>>
>>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>>> ---
>>>>  lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
>>>>  1 file changed, 20 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>>> index f546507..efd7c20 100644
>>>> --- a/lib/netdev-dpdk.c
>>>> +++ b/lib/netdev-dpdk.c
>>>> @@ -294,6 +294,10 @@ static struct ovs_mutex dpdk_mp_mutex
>>>> OVS_ACQ_AFTER(dpdk_mutex)
>>>>  static struct ovs_list dpdk_mp_free_list
>>>> OVS_GUARDED_BY(dpdk_mp_mutex)
>>>>      = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
>>>>
>>>> +/* This mutex must be used by non pmd threads when allocating or
>>>> freeing
>>>> + * mbufs through mempools. */
>>>> +static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
>>>> +
>>>>  /* Wrapper for a mempool released but not yet freed. */
>>>>  struct dpdk_mp {
>>>>       struct rte_mempool *mp;
>>>> @@ -461,6 +465,8 @@ struct netdev_rxq_dpdk {
>>>>      dpdk_port_t port_id;
>>>>  };
>>>>
>>>> +static bool dpdk_thread_is_pmd(void);
>>>> +
>>>>  static void netdev_dpdk_destruct(struct netdev *netdev);
>>>>  static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
>>>>
>>>> @@ -494,6 +500,12 @@ dpdk_buf_size(int mtu)
>>>>                       NETDEV_DPDK_MBUF_ALIGN);
>>>>  }
>>>>
>>>> +static bool
>>>> +dpdk_thread_is_pmd(void)
>>>> +{
>>>> +     return rte_lcore_id() != NON_PMD_CORE_ID;
>>>
>>> Will this continue to work with newer DPDK versions? I've not looked at
>>> it in detail, but I did notice that the vhost threads in the newer DPDK
>>> now get created with rte_ctrl_thread_create() and does some lcore
>>> mangling.
>>>
>>
>> I had a look.  This seems to be a wrapper for creating management
>> threads only, to standardize pthread creation around that in DPDK. Usual
>> EAL thread creation and assigning of LCORE_ID_ANY to non-EAL threads
>> seems to still be the same.
> 
> Thanks for checking.
>>
>>>> +}
>>>> +
>>>>  /* Allocates an area of 'sz' bytes from DPDK.  The memory is zero'ed.
>>>>   *
>>>>   * Unlike xmalloc(), this function can return NULL on failure. */
>>>> @@ -506,9 +518,16 @@ dpdk_rte_mzalloc(size_t sz)
>>>>  void
>>>>  free_dpdk_buf(struct dp_packet *p)
>>>>  {
>>>> -    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>>> +    if (!dpdk_thread_is_pmd()) {
>>>> +        ovs_mutex_lock(&nonpmd_mp_mutex);
>>>> +    }
>>>>
>>>> +    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>>
>>> Should we not use a container_of macro here?
>>>
>>
>> I'll use it for the next iteration.
>>
>> Tiago.
> 
> 
>
Eelco Chaudron June 26, 2018, 12:32 p.m. | #5
On 26 Jun 2018, at 12:02, Ilya Maximets wrote:

> On 26.06.2018 12:19, Eelco Chaudron wrote:
>>
>>
>> On 22 Jun 2018, at 21:03, Lam, Tiago wrote:
>>
>>> On 18/06/2018 12:28, Eelco Chaudron wrote:
>>>>
>>>>
>>>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>>>
>>>>> A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
>>>>> allocation and free operations by non-pmd threads on a given 
>>>>> mempool.
>>>>>
>>>>
>>>> Can you explain why we need the mutex here? Can't see any reason 
>>>> why
>>>> rte_pktmbuf_free() needs to be protected for non-pmd threads?
>>>
>>> My understanding is that each PMD has a thread-local cache of its 
>>> own,
>>> which it can access without locking. However, all non-PMD threads 
>>> have
>>> direct access to the mempool, and thus need to lock in order to 
>>> access
>>> it. Otherwise memory corruption could ensue.
>>>
>>> For OvS this means all these functions which can be called outside 
>>> of a
>>> PMD context, or without holding the `non_pmd_mutex` mutex, in struct
>>> dp_netdev, need to lock before issue operations that modify a 
>>> mempool.
>>>
>>> Hopefully I've put it succinctly enough without botching up the 
>>> concept
>>> too much.
>>
>> I do not think this is a problem, as DPDK uses 
>> rte_mempool_default_cache() to figure out which cache to use.
>> As the non PMD threads have a higher than RTE_MAX_LCORE id the cache 
>> will not be used.
>
>
> It's not the problem. The main reason is that implementation of ring
> buffers on which the mempools based is not preemptible:
>
>   lib/librte_mempool/rte_mempool.h:
>   * Note: the mempool implementation is not preemptible. An lcore must 
> not be
>   * interrupted by another task that uses the same mempool (because it 
> uses a
>   * ring which is not preemptible).
>
> As soon as non-PMD threads are not pinned, they could be scheduled on 
> the same
> core and preempted while executing mempool operations.

Thanks for the additional clarification! Tiago, maybe its good to add 
this info some where in the code as a comment?

>>
>>>>
>>>>> free_dpdk_buf() has been modified to make use of the introduced 
>>>>> mutex.
>>>>>
>>>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>>>> ---
>>>>>  lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
>>>>>  1 file changed, 20 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>>>> index f546507..efd7c20 100644
>>>>> --- a/lib/netdev-dpdk.c
>>>>> +++ b/lib/netdev-dpdk.c
>>>>> @@ -294,6 +294,10 @@ static struct ovs_mutex dpdk_mp_mutex
>>>>> OVS_ACQ_AFTER(dpdk_mutex)
>>>>>  static struct ovs_list dpdk_mp_free_list
>>>>> OVS_GUARDED_BY(dpdk_mp_mutex)
>>>>>      = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
>>>>>
>>>>> +/* This mutex must be used by non pmd threads when allocating or
>>>>> freeing
>>>>> + * mbufs through mempools. */
>>>>> +static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
>>>>> +
>>>>>  /* Wrapper for a mempool released but not yet freed. */
>>>>>  struct dpdk_mp {
>>>>>       struct rte_mempool *mp;
>>>>> @@ -461,6 +465,8 @@ struct netdev_rxq_dpdk {
>>>>>      dpdk_port_t port_id;
>>>>>  };
>>>>>
>>>>> +static bool dpdk_thread_is_pmd(void);
>>>>> +
>>>>>  static void netdev_dpdk_destruct(struct netdev *netdev);
>>>>>  static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
>>>>>
>>>>> @@ -494,6 +500,12 @@ dpdk_buf_size(int mtu)
>>>>>                       
>>>>> NETDEV_DPDK_MBUF_ALIGN);
>>>>>  }
>>>>>
>>>>> +static bool
>>>>> +dpdk_thread_is_pmd(void)
>>>>> +{
>>>>> +     return rte_lcore_id() != NON_PMD_CORE_ID;
>>>>
>>>> Will this continue to work with newer DPDK versions? I've not 
>>>> looked at
>>>> it in detail, but I did notice that the vhost threads in the newer 
>>>> DPDK
>>>> now get created with rte_ctrl_thread_create() and does some lcore
>>>> mangling.
>>>>
>>>
>>> I had a look.  This seems to be a wrapper for creating management
>>> threads only, to standardize pthread creation around that in DPDK. 
>>> Usual
>>> EAL thread creation and assigning of LCORE_ID_ANY to non-EAL threads
>>> seems to still be the same.
>>
>> Thanks for checking.
>>>
>>>>> +}
>>>>> +
>>>>>  /* Allocates an area of 'sz' bytes from DPDK.  The memory is 
>>>>> zero'ed.
>>>>>   *
>>>>>   * Unlike xmalloc(), this function can return NULL on failure. 
>>>>> */
>>>>> @@ -506,9 +518,16 @@ dpdk_rte_mzalloc(size_t sz)
>>>>>  void
>>>>>  free_dpdk_buf(struct dp_packet *p)
>>>>>  {
>>>>> -    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>>>> +    if (!dpdk_thread_is_pmd()) {
>>>>> +        ovs_mutex_lock(&nonpmd_mp_mutex);
>>>>> +    }
>>>>>
>>>>> +    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>>>
>>>> Should we not use a container_of macro here?
>>>>
>>>
>>> I'll use it for the next iteration.
>>>
>>> Tiago.
>>
>>
>>
Ilya Maximets June 26, 2018, 12:53 p.m. | #6
On 26.06.2018 15:32, Eelco Chaudron wrote:
> 
> 
> On 26 Jun 2018, at 12:02, Ilya Maximets wrote:
> 
>> On 26.06.2018 12:19, Eelco Chaudron wrote:
>>>
>>>
>>> On 22 Jun 2018, at 21:03, Lam, Tiago wrote:
>>>
>>>> On 18/06/2018 12:28, Eelco Chaudron wrote:
>>>>>
>>>>>
>>>>> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
>>>>>
>>>>>> A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
>>>>>> allocation and free operations by non-pmd threads on a given mempool.
>>>>>>
>>>>>
>>>>> Can you explain why we need the mutex here? Can't see any reason why
>>>>> rte_pktmbuf_free() needs to be protected for non-pmd threads?
>>>>
>>>> My understanding is that each PMD has a thread-local cache of its own,
>>>> which it can access without locking. However, all non-PMD threads have
>>>> direct access to the mempool, and thus need to lock in order to access
>>>> it. Otherwise memory corruption could ensue.
>>>>
>>>> For OvS this means all these functions which can be called outside of a
>>>> PMD context, or without holding the `non_pmd_mutex` mutex, in struct
>>>> dp_netdev, need to lock before issue operations that modify a mempool.
>>>>
>>>> Hopefully I've put it succinctly enough without botching up the concept
>>>> too much.
>>>
>>> I do not think this is a problem, as DPDK uses rte_mempool_default_cache() to figure out which cache to use.
>>> As the non PMD threads have a higher than RTE_MAX_LCORE id the cache will not be used.
>>
>>
>> It's not the problem. The main reason is that implementation of ring
>> buffers on which the mempools based is not preemptible:
>>
>>   lib/librte_mempool/rte_mempool.h:
>>   * Note: the mempool implementation is not preemptible. An lcore must not be
>>   * interrupted by another task that uses the same mempool (because it uses a
>>   * ring which is not preemptible).
>>
>> As soon as non-PMD threads are not pinned, they could be scheduled on the same
>> core and preempted while executing mempool operations.
> 
> Thanks for the additional clarification! Tiago, maybe its good to add this info some where in the code as a comment?

Some more detailed explanation also exists in DPDK prog guide:
https://doc.dpdk.org/guides/prog_guide/env_abstraction_layer.html#known-issues

> 
>>>
>>>>>
>>>>>> free_dpdk_buf() has been modified to make use of the introduced mutex.
>>>>>>
>>>>>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>>>>>> ---
>>>>>>  lib/netdev-dpdk.c | 21 ++++++++++++++++++++-
>>>>>>  1 file changed, 20 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>>>>> index f546507..efd7c20 100644
>>>>>> --- a/lib/netdev-dpdk.c
>>>>>> +++ b/lib/netdev-dpdk.c
>>>>>> @@ -294,6 +294,10 @@ static struct ovs_mutex dpdk_mp_mutex
>>>>>> OVS_ACQ_AFTER(dpdk_mutex)
>>>>>>  static struct ovs_list dpdk_mp_free_list
>>>>>> OVS_GUARDED_BY(dpdk_mp_mutex)
>>>>>>      = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
>>>>>>
>>>>>> +/* This mutex must be used by non pmd threads when allocating or
>>>>>> freeing
>>>>>> + * mbufs through mempools. */
>>>>>> +static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
>>>>>> +
>>>>>>  /* Wrapper for a mempool released but not yet freed. */
>>>>>>  struct dpdk_mp {
>>>>>>       struct rte_mempool *mp;
>>>>>> @@ -461,6 +465,8 @@ struct netdev_rxq_dpdk {
>>>>>>      dpdk_port_t port_id;
>>>>>>  };
>>>>>>
>>>>>> +static bool dpdk_thread_is_pmd(void);
>>>>>> +
>>>>>>  static void netdev_dpdk_destruct(struct netdev *netdev);
>>>>>>  static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
>>>>>>
>>>>>> @@ -494,6 +500,12 @@ dpdk_buf_size(int mtu)
>>>>>>                       NETDEV_DPDK_MBUF_ALIGN);
>>>>>>  }
>>>>>>
>>>>>> +static bool
>>>>>> +dpdk_thread_is_pmd(void)
>>>>>> +{
>>>>>> +     return rte_lcore_id() != NON_PMD_CORE_ID;
>>>>>
>>>>> Will this continue to work with newer DPDK versions? I've not looked at
>>>>> it in detail, but I did notice that the vhost threads in the newer DPDK
>>>>> now get created with rte_ctrl_thread_create() and does some lcore
>>>>> mangling.
>>>>>
>>>>
>>>> I had a look.  This seems to be a wrapper for creating management
>>>> threads only, to standardize pthread creation around that in DPDK. Usual
>>>> EAL thread creation and assigning of LCORE_ID_ANY to non-EAL threads
>>>> seems to still be the same.
>>>
>>> Thanks for checking.
>>>>
>>>>>> +}
>>>>>> +
>>>>>>  /* Allocates an area of 'sz' bytes from DPDK.  The memory is zero'ed.
>>>>>>   *
>>>>>>   * Unlike xmalloc(), this function can return NULL on failure. */
>>>>>> @@ -506,9 +518,16 @@ dpdk_rte_mzalloc(size_t sz)
>>>>>>  void
>>>>>>  free_dpdk_buf(struct dp_packet *p)
>>>>>>  {
>>>>>> -    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>>>>> +    if (!dpdk_thread_is_pmd()) {
>>>>>> +        ovs_mutex_lock(&nonpmd_mp_mutex);
>>>>>> +    }
>>>>>>
>>>>>> +    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
>>>>>
>>>>> Should we not use a container_of macro here?
>>>>>
>>>>
>>>> I'll use it for the next iteration.
>>>>
>>>> Tiago.
>>>
>>>
>>>
> 
> 
>

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f546507..efd7c20 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -294,6 +294,10 @@  static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
 static struct ovs_list dpdk_mp_free_list OVS_GUARDED_BY(dpdk_mp_mutex)
     = OVS_LIST_INITIALIZER(&dpdk_mp_free_list);
 
+/* This mutex must be used by non pmd threads when allocating or freeing
+ * mbufs through mempools. */
+static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
+
 /* Wrapper for a mempool released but not yet freed. */
 struct dpdk_mp {
      struct rte_mempool *mp;
@@ -461,6 +465,8 @@  struct netdev_rxq_dpdk {
     dpdk_port_t port_id;
 };
 
+static bool dpdk_thread_is_pmd(void);
+
 static void netdev_dpdk_destruct(struct netdev *netdev);
 static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
 
@@ -494,6 +500,12 @@  dpdk_buf_size(int mtu)
                      NETDEV_DPDK_MBUF_ALIGN);
 }
 
+static bool
+dpdk_thread_is_pmd(void)
+{
+     return rte_lcore_id() != NON_PMD_CORE_ID;
+}
+
 /* Allocates an area of 'sz' bytes from DPDK.  The memory is zero'ed.
  *
  * Unlike xmalloc(), this function can return NULL on failure. */
@@ -506,9 +518,16 @@  dpdk_rte_mzalloc(size_t sz)
 void
 free_dpdk_buf(struct dp_packet *p)
 {
-    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
+    if (!dpdk_thread_is_pmd()) {
+        ovs_mutex_lock(&nonpmd_mp_mutex);
+    }
 
+    struct rte_mbuf *pkt = (struct rte_mbuf *) p;
     rte_pktmbuf_free(pkt);
+
+    if (!dpdk_thread_is_pmd()) {
+        ovs_mutex_unlock(&nonpmd_mp_mutex);
+    }
 }
 
 static void