diff mbox series

[ovs-dev,RFC,2/5] dpif-netdev: Trigger parallel pmd reloads.

Message ID 1556626682-28858-3-git-send-email-david.marchand@redhat.com
State Changes Requested
Headers show
Series Quicker pmd threads reloads | expand

Commit Message

David Marchand April 30, 2019, 12:17 p.m. UTC
pmd reloads are currently serialised in each steps calling
reload_affected_pmds.
Any pmd processing packets, waiting on a mutex etc... will make other
pmd threads wait for a delay that can be undeterministic when syscalls
adds up.

Switch to a little busy loop on the control thread using an atomic
count.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

Comments

Eelco Chaudron May 7, 2019, 1:46 p.m. UTC | #1
On 30 Apr 2019, at 14:17, David Marchand wrote:

> pmd reloads are currently serialised in each steps calling
> reload_affected_pmds.
> Any pmd processing packets, waiting on a mutex etc... will make other
> pmd threads wait for a delay that can be undeterministic when syscalls
> adds up.
>
> Switch to a little busy loop on the control thread using an atomic
> count.
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++-------------
>  1 file changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 30774ed..b2b21fd 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -335,6 +335,9 @@ struct dp_netdev {
>      /* The time that a packet can wait in output batch for sending. 
> */
>      atomic_uint32_t tx_flush_interval;
>
> +    /* Count of pmds currently reloading */
> +    atomic_count reloading_pmds;
> +
>      /* Meters. */
>      struct ovs_mutex meter_locks[N_METER_LOCKS];
>      struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread {
>      struct ovs_refcount ref_cnt;    /* Every reference must be 
> refcount'ed. */
>      struct cmap_node node;          /* In 'dp->poll_threads'. */
>
> -    pthread_cond_t cond;            /* For synchronizing pmd thread 
> reload. */
> -    struct ovs_mutex cond_mutex;    /* Mutex for condition variable. 
> */
> -
>      /* Per thread exact-match cache.  Note, the instance for cpu core
>       * NON_PMD_CORE_ID can be accessed by multiple threads, and 
> thusly
>       * need to be protected by 'non_pmd_mutex'.  Every other instance
> @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>      atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
>      atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
>
> +    atomic_count_init(&dp->reloading_pmds, 0);
> +
>      cmap_init(&dp->poll_threads);
>      dp->pmd_rxq_assign_cyc = true;
>
> @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct 
> dp_netdev_pmd_thread *pmd)
>          return;
>      }
>
> -    ovs_mutex_lock(&pmd->cond_mutex);
>      seq_change(pmd->reload_seq);
>      atomic_store_relaxed(&pmd->reload, true);
> -    ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
> -    ovs_mutex_unlock(&pmd->cond_mutex);
>  }
>
>  static uint32_t
> @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool 
> pinned) OVS_REQUIRES(dp->port_mutex)
>  }
>
>  static void
> +wait_reloading_pmds(struct dp_netdev *dp)
> +{
> +    while (atomic_count_get(&dp->reloading_pmds) != 0) {
> +    }
> +}
> +
> +static void
>  reload_affected_pmds(struct dp_netdev *dp)
>  {
>      struct dp_netdev_pmd_thread *pmd;
> +    unsigned int pmd_count = 0;
> +
> +    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> +        if (pmd->core_id == NON_PMD_CORE_ID) {
> +            continue;
> +        }
> +        if (pmd->need_reload) {
> +            pmd_count++;
> +        }
> +    }
> +    atomic_count_set(&dp->reloading_pmds, pmd_count);
>      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>          if (pmd->need_reload) {
> @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp)

The above atomic_count_set() is a relaxed set, so is the 
atomic_store_relaxed(&pmd->reload, true) in dp_netdev_reload_pmd__(pmd).
Which could lead to the PMDs decreasing the dp->reloading_pmd before 
it’s set. Guess if the correct memory_order is selected for 
pmd->reload as suggested by Ilya in patch 1/5 it should work.

>              pmd->need_reload = false;
>          }
>      }
> +
> +    if (pmd_count != 0) {
> +        wait_reloading_pmds(dp);
> +    }
>  }
>
>  static void
> @@ -5813,11 +5834,9 @@ dpif_netdev_enable_upcall(struct dpif *dpif)
>  static void
>  dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
>  {
> -    ovs_mutex_lock(&pmd->cond_mutex);
>      atomic_store_relaxed(&pmd->reload, false);
>      pmd->last_reload_seq = seq_read(pmd->reload_seq);
> -    xpthread_cond_signal(&pmd->cond);
> -    ovs_mutex_unlock(&pmd->cond_mutex);
> +    atomic_count_dec(&pmd->dp->reloading_pmds);
>  }
>
>  /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'.  
> Returns
> @@ -5902,8 +5921,6 @@ dp_netdev_configure_pmd(struct 
> dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>      pmd->reload_seq = seq_create();
>      pmd->last_reload_seq = seq_read(pmd->reload_seq);
>      atomic_init(&pmd->reload, false);
> -    xpthread_cond_init(&pmd->cond, NULL);
> -    ovs_mutex_init(&pmd->cond_mutex);
>      ovs_mutex_init(&pmd->flow_mutex);
>      ovs_mutex_init(&pmd->port_mutex);
>      cmap_init(&pmd->flow_table);
> @@ -5946,8 +5963,6 @@ dp_netdev_destroy_pmd(struct 
> dp_netdev_pmd_thread *pmd)
>      cmap_destroy(&pmd->flow_table);
>      ovs_mutex_destroy(&pmd->flow_mutex);
>      seq_destroy(pmd->reload_seq);
> -    xpthread_cond_destroy(&pmd->cond);
> -    ovs_mutex_destroy(&pmd->cond_mutex);
>      ovs_mutex_destroy(&pmd->port_mutex);
>      free(pmd);
>  }
> @@ -5967,7 +5982,9 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct 
> dp_netdev_pmd_thread *pmd)
>          ovs_mutex_unlock(&dp->non_pmd_mutex);
>      } else {
>          atomic_store_relaxed(&pmd->exit, true);
> +        atomic_count_set(&dp->reloading_pmds, 1);
>          dp_netdev_reload_pmd__(pmd);
> +        wait_reloading_pmds(dp);
>          xpthread_join(pmd->thread, NULL);
>      }
>
> -- 
> 1.8.3.1
Ilya Maximets May 7, 2019, 2 p.m. UTC | #2
On 07.05.2019 16:46, Eelco Chaudron wrote:
> 
> 
> On 30 Apr 2019, at 14:17, David Marchand wrote:
> 
>> pmd reloads are currently serialised in each steps calling
>> reload_affected_pmds.
>> Any pmd processing packets, waiting on a mutex etc... will make other
>> pmd threads wait for a delay that can be undeterministic when syscalls
>> adds up.
>>
>> Switch to a little busy loop on the control thread using an atomic
>> count.
>>
>> Signed-off-by: David Marchand <david.marchand@redhat.com>
>> ---
>>  lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++-------------
>>  1 file changed, 30 insertions(+), 13 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 30774ed..b2b21fd 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -335,6 +335,9 @@ struct dp_netdev {
>>      /* The time that a packet can wait in output batch for sending. */
>>      atomic_uint32_t tx_flush_interval;
>>
>> +    /* Count of pmds currently reloading */
>> +    atomic_count reloading_pmds;
>> +
>>      /* Meters. */
>>      struct ovs_mutex meter_locks[N_METER_LOCKS];
>>      struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
>> @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread {
>>      struct ovs_refcount ref_cnt;    /* Every reference must be refcount'ed. */
>>      struct cmap_node node;          /* In 'dp->poll_threads'. */
>>
>> -    pthread_cond_t cond;            /* For synchronizing pmd thread reload. */
>> -    struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
>> -
>>      /* Per thread exact-match cache.  Note, the instance for cpu core
>>       * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>>       * need to be protected by 'non_pmd_mutex'.  Every other instance
>> @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>>      atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
>>      atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
>>
>> +    atomic_count_init(&dp->reloading_pmds, 0);
>> +
>>      cmap_init(&dp->poll_threads);
>>      dp->pmd_rxq_assign_cyc = true;
>>
>> @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
>>          return;
>>      }
>>
>> -    ovs_mutex_lock(&pmd->cond_mutex);
>>      seq_change(pmd->reload_seq);
>>      atomic_store_relaxed(&pmd->reload, true);
>> -    ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
>> -    ovs_mutex_unlock(&pmd->cond_mutex);
>>  }
>>
>>  static uint32_t
>> @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
>>  }
>>
>>  static void
>> +wait_reloading_pmds(struct dp_netdev *dp)
>> +{
>> +    while (atomic_count_get(&dp->reloading_pmds) != 0) {
>> +    }
>> +}
>> +
>> +static void
>>  reload_affected_pmds(struct dp_netdev *dp)
>>  {
>>      struct dp_netdev_pmd_thread *pmd;
>> +    unsigned int pmd_count = 0;
>> +
>> +    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>> +        if (pmd->core_id == NON_PMD_CORE_ID) {
>> +            continue;
>> +        }
>> +        if (pmd->need_reload) {
>> +            pmd_count++;
>> +        }
>> +    }
>> +    atomic_count_set(&dp->reloading_pmds, pmd_count);
>>      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>          if (pmd->need_reload) {
>> @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp)
> 
> The above atomic_count_set() is a relaxed set, so is the atomic_store_relaxed(&pmd->reload, true) in dp_netdev_reload_pmd__(pmd).
> Which could lead to the PMDs decreasing the dp->reloading_pmd before it’s set. Guess if the correct memory_order is selected for pmd->reload as suggested by Ilya in patch 1/5 it should work.

relaxed memory ordering does not imply any synchronization and will
not be synchronized with 'reload' regardless of its memory ordering.
To guarantee the order, atomic_count_set() must be replaced with
non-relaxed version along with 'reload'.
Eelco Chaudron May 8, 2019, 7:17 a.m. UTC | #3
On 7 May 2019, at 16:00, Ilya Maximets wrote:

> On 07.05.2019 16:46, Eelco Chaudron wrote:
>>
>>
>> On 30 Apr 2019, at 14:17, David Marchand wrote:
>>
>>> pmd reloads are currently serialised in each steps calling
>>> reload_affected_pmds.
>>> Any pmd processing packets, waiting on a mutex etc... will make 
>>> other
>>> pmd threads wait for a delay that can be undeterministic when 
>>> syscalls
>>> adds up.
>>>
>>> Switch to a little busy loop on the control thread using an atomic
>>> count.
>>>
>>> Signed-off-by: David Marchand <david.marchand@redhat.com>
>>> ---
>>>  lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++-------------
>>>  1 file changed, 30 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 30774ed..b2b21fd 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -335,6 +335,9 @@ struct dp_netdev {
>>>      /* The time that a packet can wait in output batch for 
>>> sending. */
>>>      atomic_uint32_t tx_flush_interval;
>>>
>>> +    /* Count of pmds currently reloading */
>>> +    atomic_count reloading_pmds;
>>> +
>>>      /* Meters. */
>>>      struct ovs_mutex meter_locks[N_METER_LOCKS];
>>>      struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
>>> @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread {
>>>      struct ovs_refcount ref_cnt;    /* Every reference must 
>>> be refcount'ed. */
>>>      struct cmap_node node;          /* In 
>>> 'dp->poll_threads'. */
>>>
>>> -    pthread_cond_t cond;            /* For 
>>> synchronizing pmd thread reload. */
>>> -    struct ovs_mutex cond_mutex;    /* Mutex for condition 
>>> variable. */
>>> -
>>>      /* Per thread exact-match cache.  Note, the instance for 
>>> cpu core
>>>       * NON_PMD_CORE_ID can be accessed by multiple threads, 
>>> and thusly
>>>       * need to be protected by 'non_pmd_mutex'.  Every other 
>>> instance
>>> @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const 
>>> struct dpif_class *class,
>>>      atomic_init(&dp->emc_insert_min, 
>>> DEFAULT_EM_FLOW_INSERT_MIN);
>>>      atomic_init(&dp->tx_flush_interval, 
>>> DEFAULT_TX_FLUSH_INTERVAL);
>>>
>>> +    atomic_count_init(&dp->reloading_pmds, 0);
>>> +
>>>      cmap_init(&dp->poll_threads);
>>>      dp->pmd_rxq_assign_cyc = true;
>>>
>>> @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct 
>>> dp_netdev_pmd_thread *pmd)
>>>          return;
>>>      }
>>>
>>> -    ovs_mutex_lock(&pmd->cond_mutex);
>>>      seq_change(pmd->reload_seq);
>>>      atomic_store_relaxed(&pmd->reload, true);
>>> -    ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
>>> -    ovs_mutex_unlock(&pmd->cond_mutex);
>>>  }
>>>
>>>  static uint32_t
>>> @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool 
>>> pinned) OVS_REQUIRES(dp->port_mutex)
>>>  }
>>>
>>>  static void
>>> +wait_reloading_pmds(struct dp_netdev *dp)
>>> +{
>>> +    while (atomic_count_get(&dp->reloading_pmds) != 0) {
>>> +    }
>>> +}
>>> +
>>> +static void
>>>  reload_affected_pmds(struct dp_netdev *dp)
>>>  {
>>>      struct dp_netdev_pmd_thread *pmd;
>>> +    unsigned int pmd_count = 0;
>>> +
>>> +    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>> +        if (pmd->core_id == NON_PMD_CORE_ID) {
>>> +            continue;
>>> +        }
>>> +        if (pmd->need_reload) {
>>> +            pmd_count++;
>>> +        }
>>> +    }
>>> +    atomic_count_set(&dp->reloading_pmds, pmd_count);
>>>      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>>          if (pmd->need_reload) {
>>> @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp)
>>
>> The above atomic_count_set() is a relaxed set, so is the 
>> atomic_store_relaxed(&pmd->reload, true) in 
>> dp_netdev_reload_pmd__(pmd).
>> Which could lead to the PMDs decreasing the dp->reloading_pmd before 
>> it’s set. Guess if the correct memory_order is selected for 
>> pmd->reload as suggested by Ilya in patch 1/5 it should work.
>
> relaxed memory ordering does not imply any synchronization and will
> not be synchronized with 'reload' regardless of its memory ordering.
> To guarantee the order, atomic_count_set() must be replaced with
> non-relaxed version along with 'reload'.

Thanks for the additional info, as it made me wonder why… But then I 
found this, https://lwn.net/Articles/586838/, telling me a 
memory_order_release is not the same as the Linux implementation with a 
full read/write barrier :(
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 30774ed..b2b21fd 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -335,6 +335,9 @@  struct dp_netdev {
     /* The time that a packet can wait in output batch for sending. */
     atomic_uint32_t tx_flush_interval;
 
+    /* Count of pmds currently reloading */
+    atomic_count reloading_pmds;
+
     /* Meters. */
     struct ovs_mutex meter_locks[N_METER_LOCKS];
     struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
@@ -647,9 +650,6 @@  struct dp_netdev_pmd_thread {
     struct ovs_refcount ref_cnt;    /* Every reference must be refcount'ed. */
     struct cmap_node node;          /* In 'dp->poll_threads'. */
 
-    pthread_cond_t cond;            /* For synchronizing pmd thread reload. */
-    struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
-
     /* Per thread exact-match cache.  Note, the instance for cpu core
      * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
      * need to be protected by 'non_pmd_mutex'.  Every other instance
@@ -1525,6 +1525,8 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
     atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
 
+    atomic_count_init(&dp->reloading_pmds, 0);
+
     cmap_init(&dp->poll_threads);
     dp->pmd_rxq_assign_cyc = true;
 
@@ -1754,11 +1756,8 @@  dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
         return;
     }
 
-    ovs_mutex_lock(&pmd->cond_mutex);
     seq_change(pmd->reload_seq);
     atomic_store_relaxed(&pmd->reload, true);
-    ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
-    ovs_mutex_unlock(&pmd->cond_mutex);
 }
 
 static uint32_t
@@ -4641,9 +4640,27 @@  rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
 }
 
 static void
+wait_reloading_pmds(struct dp_netdev *dp)
+{
+    while (atomic_count_get(&dp->reloading_pmds) != 0) {
+    }
+}
+
+static void
 reload_affected_pmds(struct dp_netdev *dp)
 {
     struct dp_netdev_pmd_thread *pmd;
+    unsigned int pmd_count = 0;
+
+    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+        if (pmd->core_id == NON_PMD_CORE_ID) {
+            continue;
+        }
+        if (pmd->need_reload) {
+            pmd_count++;
+        }
+    }
+    atomic_count_set(&dp->reloading_pmds, pmd_count);
 
     CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
         if (pmd->need_reload) {
@@ -4652,6 +4669,10 @@  reload_affected_pmds(struct dp_netdev *dp)
             pmd->need_reload = false;
         }
     }
+
+    if (pmd_count != 0) {
+        wait_reloading_pmds(dp);
+    }
 }
 
 static void
@@ -5813,11 +5834,9 @@  dpif_netdev_enable_upcall(struct dpif *dpif)
 static void
 dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
 {
-    ovs_mutex_lock(&pmd->cond_mutex);
     atomic_store_relaxed(&pmd->reload, false);
     pmd->last_reload_seq = seq_read(pmd->reload_seq);
-    xpthread_cond_signal(&pmd->cond);
-    ovs_mutex_unlock(&pmd->cond_mutex);
+    atomic_count_dec(&pmd->dp->reloading_pmds);
 }
 
 /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'.  Returns
@@ -5902,8 +5921,6 @@  dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     pmd->reload_seq = seq_create();
     pmd->last_reload_seq = seq_read(pmd->reload_seq);
     atomic_init(&pmd->reload, false);
-    xpthread_cond_init(&pmd->cond, NULL);
-    ovs_mutex_init(&pmd->cond_mutex);
     ovs_mutex_init(&pmd->flow_mutex);
     ovs_mutex_init(&pmd->port_mutex);
     cmap_init(&pmd->flow_table);
@@ -5946,8 +5963,6 @@  dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
     cmap_destroy(&pmd->flow_table);
     ovs_mutex_destroy(&pmd->flow_mutex);
     seq_destroy(pmd->reload_seq);
-    xpthread_cond_destroy(&pmd->cond);
-    ovs_mutex_destroy(&pmd->cond_mutex);
     ovs_mutex_destroy(&pmd->port_mutex);
     free(pmd);
 }
@@ -5967,7 +5982,9 @@  dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
         ovs_mutex_unlock(&dp->non_pmd_mutex);
     } else {
         atomic_store_relaxed(&pmd->exit, true);
+        atomic_count_set(&dp->reloading_pmds, 1);
         dp_netdev_reload_pmd__(pmd);
+        wait_reloading_pmds(dp);
         xpthread_join(pmd->thread, NULL);
     }