Message ID | 1558621432-13363-3-git-send-email-david.marchand@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Quicker pmd threads reloads | expand |
On 5/23/2019 3:23 PM, David Marchand wrote: > pmd reloads are currently serialised in each steps calling > reload_affected_pmds. > Any pmd processing packets, waiting on a mutex etc... will make other > pmd threads wait for a delay that can be undeterministic when syscalls > adds up. > > Switch to a little busy loop on the control thread using an atomic > count. > > The memory order on this atomic is rel-acq to have an explicit > synchronisation between the pmd threads and the control thread. > Hi David, as in patch 1, LGTM, at least tested without issue tested a number scenarios and did not see any breakage. You can add my ACK to this. Ian > Signed-off-by: David Marchand <david.marchand@redhat.com> > Acked-by: Eelco Chaudron <echaudro@redhat.com> > --- > lib/dpif-netdev.c | 50 +++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 37 insertions(+), 13 deletions(-) > > --- > Changelog since RFC v1: > - added memory ordering on 'reloading_pmds' atomic to serve as a > synchronisation point between pmd threads and control thread > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 19d7f7d..23cf6a6 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -334,6 +334,9 @@ struct dp_netdev { > /* The time that a packet can wait in output batch for sending. */ > atomic_uint32_t tx_flush_interval; > > + /* Count of pmds currently reloading */ > + atomic_uint32_t reloading_pmds; > + > /* Meters. */ > struct ovs_mutex meter_locks[N_METER_LOCKS]; > struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ > @@ -646,9 +649,6 @@ struct dp_netdev_pmd_thread { > struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */ > struct cmap_node node; /* In 'dp->poll_threads'. */ > > - pthread_cond_t cond; /* For synchronizing pmd thread reload. */ > - struct ovs_mutex cond_mutex; /* Mutex for condition variable. */ > - > /* Per thread exact-match cache. Note, the instance for cpu core > * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly > * need to be protected by 'non_pmd_mutex'. Every other instance > @@ -1524,6 +1524,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class, > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); > > + atomic_init(&dp->reloading_pmds, 0); > + > cmap_init(&dp->poll_threads); > dp->pmd_rxq_assign_cyc = true; > > @@ -1753,11 +1755,8 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd) > return; > } > > - ovs_mutex_lock(&pmd->cond_mutex); > seq_change(pmd->reload_seq); > atomic_store_explicit(&pmd->reload, true, memory_order_release); > - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); > - ovs_mutex_unlock(&pmd->cond_mutex); > } > > static uint32_t > @@ -4640,9 +4639,31 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) > } > > static void > +wait_reloading_pmds(struct dp_netdev *dp) > +{ > + uint32_t reloading; > + > + do { > + atomic_read_explicit(&dp->reloading_pmds, &reloading, > + memory_order_acquire); > + } while (reloading != 0); > +} > + > +static void > reload_affected_pmds(struct dp_netdev *dp) > { > struct dp_netdev_pmd_thread *pmd; > + unsigned int pmd_count = 0; > + > + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > + if (pmd->core_id == NON_PMD_CORE_ID) { > + continue; > + } > + if (pmd->need_reload) { > + pmd_count++; > + } > + } > + atomic_store_relaxed(&dp->reloading_pmds, pmd_count); > > CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > if (pmd->need_reload) { > @@ -4651,6 +4672,10 @@ reload_affected_pmds(struct dp_netdev *dp) > pmd->need_reload = false; > } > } > + > + if (pmd_count != 0) { > + wait_reloading_pmds(dp); > + } > } > > static void > @@ -5812,11 +5837,12 @@ dpif_netdev_enable_upcall(struct dpif *dpif) > static void > dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) > { > - ovs_mutex_lock(&pmd->cond_mutex); > + uint32_t old; > + > atomic_store_relaxed(&pmd->reload, false); > pmd->last_reload_seq = seq_read(pmd->reload_seq); > - xpthread_cond_signal(&pmd->cond); > - ovs_mutex_unlock(&pmd->cond_mutex); > + atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old, > + memory_order_release); > } > > /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns > @@ -5901,8 +5927,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > pmd->reload_seq = seq_create(); > pmd->last_reload_seq = seq_read(pmd->reload_seq); > atomic_init(&pmd->reload, false); > - xpthread_cond_init(&pmd->cond, NULL); > - ovs_mutex_init(&pmd->cond_mutex); > ovs_mutex_init(&pmd->flow_mutex); > ovs_mutex_init(&pmd->port_mutex); > cmap_init(&pmd->flow_table); > @@ -5945,8 +5969,6 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) > cmap_destroy(&pmd->flow_table); > ovs_mutex_destroy(&pmd->flow_mutex); > seq_destroy(pmd->reload_seq); > - xpthread_cond_destroy(&pmd->cond); > - ovs_mutex_destroy(&pmd->cond_mutex); > ovs_mutex_destroy(&pmd->port_mutex); > free(pmd); > } > @@ -5966,7 +5988,9 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) > ovs_mutex_unlock(&dp->non_pmd_mutex); > } else { > atomic_store_relaxed(&pmd->exit, true); > + atomic_store_relaxed(&dp->reloading_pmds, 1); > dp_netdev_reload_pmd__(pmd); > + wait_reloading_pmds(dp); > xpthread_join(pmd->thread, NULL); > } > >
On 23.05.2019 17:23, David Marchand wrote: > pmd reloads are currently serialised in each steps calling > reload_affected_pmds. > Any pmd processing packets, waiting on a mutex etc... will make other > pmd threads wait for a delay that can be undeterministic when syscalls > adds up. > > Switch to a little busy loop on the control thread using an atomic > count. > > The memory order on this atomic is rel-acq to have an explicit > synchronisation between the pmd threads and the control thread. > > Signed-off-by: David Marchand <david.marchand@redhat.com> > Acked-by: Eelco Chaudron <echaudro@redhat.com> > --- > lib/dpif-netdev.c | 50 +++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 37 insertions(+), 13 deletions(-) > > --- > Changelog since RFC v1: > - added memory ordering on 'reloading_pmds' atomic to serve as a > synchronisation point between pmd threads and control thread > Looking at the whole patch set, we have way to many synchronization knobs for PMD reloading process. It seems that we could easily remove the 'reloading_pmds' one by checking that PMD thread changed 'reload' back to 'false'. This way 'reload' will be the only synchronization point that will handle all the memory ordering. This also reduces the number of involved variables and simplifies the understanding of the code. In practice, could be even a bit faster than current version, because there will be no race for a single variable. So, I'm suggesting following incremental (diff made on top of the whole set, so it could not apply on this patch directly): diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 34ac09322..4b8756448 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -335,9 +335,6 @@ struct dp_netdev { /* The time that a packet can wait in output batch for sending. */ atomic_uint32_t tx_flush_interval; - /* Count of pmds currently reloading */ - atomic_uint32_t reloading_pmds; - /* Meters. */ struct ovs_mutex meter_locks[N_METER_LOCKS]; struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ @@ -1527,8 +1563,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class, atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); - atomic_init(&dp->reloading_pmds, 0); - cmap_init(&dp->poll_threads); dp->pmd_rxq_assign_cyc = true; @@ -4644,43 +4678,33 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) } static void -wait_reloading_pmds(struct dp_netdev *dp) +wait_reloading_pmd(struct dp_netdev_pmd_thread *pmd) { - uint32_t reloading; + bool reload; do { - atomic_read_explicit(&dp->reloading_pmds, &reloading, - memory_order_acquire); - } while (reloading != 0); + atomic_read_explicit(&pmd->reload, &reload, memory_order_acquire); + } while (reload); } static void reload_affected_pmds(struct dp_netdev *dp) { struct dp_netdev_pmd_thread *pmd; - unsigned int pmd_count = 0; CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { - if (pmd->core_id == NON_PMD_CORE_ID) { - continue; - } if (pmd->need_reload) { - pmd_count++; + flow_mark_flush(pmd); + dp_netdev_reload_pmd__(pmd); } } - atomic_store_relaxed(&dp->reloading_pmds, pmd_count); CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { if (pmd->need_reload) { - flow_mark_flush(pmd); - dp_netdev_reload_pmd__(pmd); + wait_reloading_pmd(pmd); pmd->need_reload = false; } } - - if (pmd_count != 0) { - wait_reloading_pmds(dp); - } } static void @@ -5885,14 +5907,10 @@ dpif_netdev_enable_upcall(struct dpif *dpif) static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) { - uint32_t old; - atomic_store_relaxed(&pmd->wait_for_reload, false); atomic_store_relaxed(&pmd->reload_tx_qid, false); - atomic_store_relaxed(&pmd->reload, false); pmd->last_reload_seq = seq_read(pmd->reload_seq); - atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old, - memory_order_release); + atomic_store_explicit(&pmd->reload, false, memory_order_release); } /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns @@ -6038,9 +6058,8 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) ovs_mutex_unlock(&dp->non_pmd_mutex); } else { atomic_store_relaxed(&pmd->exit, true); - atomic_store_relaxed(&dp->reloading_pmds, 1); dp_netdev_reload_pmd__(pmd); - wait_reloading_pmds(dp); + wait_reloading_pmd(pmd); xpthread_join(pmd->thread, NULL); } --- What do you think? Best regards, Ilya Maximets.
On Tue, Jun 25, 2019 at 3:40 PM Ilya Maximets <i.maximets@samsung.com> wrote: > On 23.05.2019 17:23, David Marchand wrote: > > pmd reloads are currently serialised in each steps calling > > reload_affected_pmds. > > Any pmd processing packets, waiting on a mutex etc... will make other > > pmd threads wait for a delay that can be undeterministic when syscalls > > adds up. > > > > Switch to a little busy loop on the control thread using an atomic > > count. > > > > The memory order on this atomic is rel-acq to have an explicit > > synchronisation between the pmd threads and the control thread. > > > > Signed-off-by: David Marchand <david.marchand@redhat.com> > > Acked-by: Eelco Chaudron <echaudro@redhat.com> > > --- > > lib/dpif-netdev.c | 50 > +++++++++++++++++++++++++++++++++++++------------- > > 1 file changed, 37 insertions(+), 13 deletions(-) > > > > --- > > Changelog since RFC v1: > > - added memory ordering on 'reloading_pmds' atomic to serve as a > > synchronisation point between pmd threads and control thread > > > > Looking at the whole patch set, we have way to many synchronization > knobs for PMD reloading process. It seems that we could easily remove > the 'reloading_pmds' one by checking that PMD thread changed 'reload' > back to 'false'. > This way 'reload' will be the only synchronization point that will > handle all the memory ordering. This also reduces the number of > involved variables and simplifies the understanding of the code. > In practice, could be even a bit faster than current version, because > there will be no race for a single variable. > > So, I'm suggesting following incremental (diff made on top of the > whole set, so it could not apply on this patch directly): > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 34ac09322..4b8756448 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -335,9 +335,6 @@ struct dp_netdev { > /* The time that a packet can wait in output batch for sending. */ > atomic_uint32_t tx_flush_interval; > > - /* Count of pmds currently reloading */ > - atomic_uint32_t reloading_pmds; > - > /* Meters. */ > struct ovs_mutex meter_locks[N_METER_LOCKS]; > struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ > @@ -1527,8 +1563,6 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); > > - atomic_init(&dp->reloading_pmds, 0); > - > cmap_init(&dp->poll_threads); > dp->pmd_rxq_assign_cyc = true; > > @@ -4644,43 +4678,33 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) > OVS_REQUIRES(dp->port_mutex) > } > > static void > -wait_reloading_pmds(struct dp_netdev *dp) > +wait_reloading_pmd(struct dp_netdev_pmd_thread *pmd) > { > - uint32_t reloading; > + bool reload; > > do { > - atomic_read_explicit(&dp->reloading_pmds, &reloading, > - memory_order_acquire); > - } while (reloading != 0); > + atomic_read_explicit(&pmd->reload, &reload, memory_order_acquire); > + } while (reload); > } > > static void > reload_affected_pmds(struct dp_netdev *dp) > { > struct dp_netdev_pmd_thread *pmd; > - unsigned int pmd_count = 0; > > CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > - if (pmd->core_id == NON_PMD_CORE_ID) { > - continue; > - } > if (pmd->need_reload) { > - pmd_count++; > + flow_mark_flush(pmd); > + dp_netdev_reload_pmd__(pmd); > } > } > - atomic_store_relaxed(&dp->reloading_pmds, pmd_count); > > CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > if (pmd->need_reload) { > - flow_mark_flush(pmd); > - dp_netdev_reload_pmd__(pmd); > + wait_reloading_pmd(pmd); > pmd->need_reload = false; > } > } > - > - if (pmd_count != 0) { > - wait_reloading_pmds(dp); > - } > } > > static void > @@ -5885,14 +5907,10 @@ dpif_netdev_enable_upcall(struct dpif *dpif) > static void > dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) > { > - uint32_t old; > - > atomic_store_relaxed(&pmd->wait_for_reload, false); > atomic_store_relaxed(&pmd->reload_tx_qid, false); > - atomic_store_relaxed(&pmd->reload, false); > pmd->last_reload_seq = seq_read(pmd->reload_seq); > - atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old, > - memory_order_release); > + atomic_store_explicit(&pmd->reload, false, memory_order_release); > } > > /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns > @@ -6038,9 +6058,8 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct > dp_netdev_pmd_thread *pmd) > ovs_mutex_unlock(&dp->non_pmd_mutex); > } else { > atomic_store_relaxed(&pmd->exit, true); > - atomic_store_relaxed(&dp->reloading_pmds, 1); > dp_netdev_reload_pmd__(pmd); > - wait_reloading_pmds(dp); > + wait_reloading_pmd(pmd); > xpthread_join(pmd->thread, NULL); > } > > --- > > What do you think? > Yes, this looks simpler, so better. I will rework the series with this suggestion.
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 19d7f7d..23cf6a6 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -334,6 +334,9 @@ struct dp_netdev { /* The time that a packet can wait in output batch for sending. */ atomic_uint32_t tx_flush_interval; + /* Count of pmds currently reloading */ + atomic_uint32_t reloading_pmds; + /* Meters. */ struct ovs_mutex meter_locks[N_METER_LOCKS]; struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ @@ -646,9 +649,6 @@ struct dp_netdev_pmd_thread { struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */ struct cmap_node node; /* In 'dp->poll_threads'. */ - pthread_cond_t cond; /* For synchronizing pmd thread reload. */ - struct ovs_mutex cond_mutex; /* Mutex for condition variable. */ - /* Per thread exact-match cache. Note, the instance for cpu core * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly * need to be protected by 'non_pmd_mutex'. Every other instance @@ -1524,6 +1524,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class, atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); + atomic_init(&dp->reloading_pmds, 0); + cmap_init(&dp->poll_threads); dp->pmd_rxq_assign_cyc = true; @@ -1753,11 +1755,8 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd) return; } - ovs_mutex_lock(&pmd->cond_mutex); seq_change(pmd->reload_seq); atomic_store_explicit(&pmd->reload, true, memory_order_release); - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); - ovs_mutex_unlock(&pmd->cond_mutex); } static uint32_t @@ -4640,9 +4639,31 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) } static void +wait_reloading_pmds(struct dp_netdev *dp) +{ + uint32_t reloading; + + do { + atomic_read_explicit(&dp->reloading_pmds, &reloading, + memory_order_acquire); + } while (reloading != 0); +} + +static void reload_affected_pmds(struct dp_netdev *dp) { struct dp_netdev_pmd_thread *pmd; + unsigned int pmd_count = 0; + + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + if (pmd->core_id == NON_PMD_CORE_ID) { + continue; + } + if (pmd->need_reload) { + pmd_count++; + } + } + atomic_store_relaxed(&dp->reloading_pmds, pmd_count); CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { if (pmd->need_reload) { @@ -4651,6 +4672,10 @@ reload_affected_pmds(struct dp_netdev *dp) pmd->need_reload = false; } } + + if (pmd_count != 0) { + wait_reloading_pmds(dp); + } } static void @@ -5812,11 +5837,12 @@ dpif_netdev_enable_upcall(struct dpif *dpif) static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) { - ovs_mutex_lock(&pmd->cond_mutex); + uint32_t old; + atomic_store_relaxed(&pmd->reload, false); pmd->last_reload_seq = seq_read(pmd->reload_seq); - xpthread_cond_signal(&pmd->cond); - ovs_mutex_unlock(&pmd->cond_mutex); + atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old, + memory_order_release); } /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns @@ -5901,8 +5927,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->reload_seq = seq_create(); pmd->last_reload_seq = seq_read(pmd->reload_seq); atomic_init(&pmd->reload, false); - xpthread_cond_init(&pmd->cond, NULL); - ovs_mutex_init(&pmd->cond_mutex); ovs_mutex_init(&pmd->flow_mutex); ovs_mutex_init(&pmd->port_mutex); cmap_init(&pmd->flow_table); @@ -5945,8 +5969,6 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) cmap_destroy(&pmd->flow_table); ovs_mutex_destroy(&pmd->flow_mutex); seq_destroy(pmd->reload_seq); - xpthread_cond_destroy(&pmd->cond); - ovs_mutex_destroy(&pmd->cond_mutex); ovs_mutex_destroy(&pmd->port_mutex); free(pmd); } @@ -5966,7 +5988,9 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) ovs_mutex_unlock(&dp->non_pmd_mutex); } else { atomic_store_relaxed(&pmd->exit, true); + atomic_store_relaxed(&dp->reloading_pmds, 1); dp_netdev_reload_pmd__(pmd); + wait_reloading_pmds(dp); xpthread_join(pmd->thread, NULL); }