Message ID | 1556626682-28858-3-git-send-email-david.marchand@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Quicker pmd threads reloads | expand |
On 30 Apr 2019, at 14:17, David Marchand wrote: > pmd reloads are currently serialised in each steps calling > reload_affected_pmds. > Any pmd processing packets, waiting on a mutex etc... will make other > pmd threads wait for a delay that can be undeterministic when syscalls > adds up. > > Switch to a little busy loop on the control thread using an atomic > count. > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++------------- > 1 file changed, 30 insertions(+), 13 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 30774ed..b2b21fd 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -335,6 +335,9 @@ struct dp_netdev { > /* The time that a packet can wait in output batch for sending. > */ > atomic_uint32_t tx_flush_interval; > > + /* Count of pmds currently reloading */ > + atomic_count reloading_pmds; > + > /* Meters. */ > struct ovs_mutex meter_locks[N_METER_LOCKS]; > struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ > @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread { > struct ovs_refcount ref_cnt; /* Every reference must be > refcount'ed. */ > struct cmap_node node; /* In 'dp->poll_threads'. */ > > - pthread_cond_t cond; /* For synchronizing pmd thread > reload. */ > - struct ovs_mutex cond_mutex; /* Mutex for condition variable. > */ > - > /* Per thread exact-match cache. Note, the instance for cpu core > * NON_PMD_CORE_ID can be accessed by multiple threads, and > thusly > * need to be protected by 'non_pmd_mutex'. Every other instance > @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); > > + atomic_count_init(&dp->reloading_pmds, 0); > + > cmap_init(&dp->poll_threads); > dp->pmd_rxq_assign_cyc = true; > > @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct > dp_netdev_pmd_thread *pmd) > return; > } > > - ovs_mutex_lock(&pmd->cond_mutex); > seq_change(pmd->reload_seq); > atomic_store_relaxed(&pmd->reload, true); > - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); > - ovs_mutex_unlock(&pmd->cond_mutex); > } > > static uint32_t > @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool > pinned) OVS_REQUIRES(dp->port_mutex) > } > > static void > +wait_reloading_pmds(struct dp_netdev *dp) > +{ > + while (atomic_count_get(&dp->reloading_pmds) != 0) { > + } > +} > + > +static void > reload_affected_pmds(struct dp_netdev *dp) > { > struct dp_netdev_pmd_thread *pmd; > + unsigned int pmd_count = 0; > + > + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > + if (pmd->core_id == NON_PMD_CORE_ID) { > + continue; > + } > + if (pmd->need_reload) { > + pmd_count++; > + } > + } > + atomic_count_set(&dp->reloading_pmds, pmd_count); > CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > if (pmd->need_reload) { > @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp) The above atomic_count_set() is a relaxed set, so is the atomic_store_relaxed(&pmd->reload, true) in dp_netdev_reload_pmd__(pmd). Which could lead to the PMDs decreasing the dp->reloading_pmd before it’s set. Guess if the correct memory_order is selected for pmd->reload as suggested by Ilya in patch 1/5 it should work. > pmd->need_reload = false; > } > } > + > + if (pmd_count != 0) { > + wait_reloading_pmds(dp); > + } > } > > static void > @@ -5813,11 +5834,9 @@ dpif_netdev_enable_upcall(struct dpif *dpif) > static void > dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) > { > - ovs_mutex_lock(&pmd->cond_mutex); > atomic_store_relaxed(&pmd->reload, false); > pmd->last_reload_seq = seq_read(pmd->reload_seq); > - xpthread_cond_signal(&pmd->cond); > - ovs_mutex_unlock(&pmd->cond_mutex); > + atomic_count_dec(&pmd->dp->reloading_pmds); > } > > /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. > Returns > @@ -5902,8 +5921,6 @@ dp_netdev_configure_pmd(struct > dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > pmd->reload_seq = seq_create(); > pmd->last_reload_seq = seq_read(pmd->reload_seq); > atomic_init(&pmd->reload, false); > - xpthread_cond_init(&pmd->cond, NULL); > - ovs_mutex_init(&pmd->cond_mutex); > ovs_mutex_init(&pmd->flow_mutex); > ovs_mutex_init(&pmd->port_mutex); > cmap_init(&pmd->flow_table); > @@ -5946,8 +5963,6 @@ dp_netdev_destroy_pmd(struct > dp_netdev_pmd_thread *pmd) > cmap_destroy(&pmd->flow_table); > ovs_mutex_destroy(&pmd->flow_mutex); > seq_destroy(pmd->reload_seq); > - xpthread_cond_destroy(&pmd->cond); > - ovs_mutex_destroy(&pmd->cond_mutex); > ovs_mutex_destroy(&pmd->port_mutex); > free(pmd); > } > @@ -5967,7 +5982,9 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct > dp_netdev_pmd_thread *pmd) > ovs_mutex_unlock(&dp->non_pmd_mutex); > } else { > atomic_store_relaxed(&pmd->exit, true); > + atomic_count_set(&dp->reloading_pmds, 1); > dp_netdev_reload_pmd__(pmd); > + wait_reloading_pmds(dp); > xpthread_join(pmd->thread, NULL); > } > > -- > 1.8.3.1
On 07.05.2019 16:46, Eelco Chaudron wrote: > > > On 30 Apr 2019, at 14:17, David Marchand wrote: > >> pmd reloads are currently serialised in each steps calling >> reload_affected_pmds. >> Any pmd processing packets, waiting on a mutex etc... will make other >> pmd threads wait for a delay that can be undeterministic when syscalls >> adds up. >> >> Switch to a little busy loop on the control thread using an atomic >> count. >> >> Signed-off-by: David Marchand <david.marchand@redhat.com> >> --- >> lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++------------- >> 1 file changed, 30 insertions(+), 13 deletions(-) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 30774ed..b2b21fd 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -335,6 +335,9 @@ struct dp_netdev { >> /* The time that a packet can wait in output batch for sending. */ >> atomic_uint32_t tx_flush_interval; >> >> + /* Count of pmds currently reloading */ >> + atomic_count reloading_pmds; >> + >> /* Meters. */ >> struct ovs_mutex meter_locks[N_METER_LOCKS]; >> struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ >> @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread { >> struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */ >> struct cmap_node node; /* In 'dp->poll_threads'. */ >> >> - pthread_cond_t cond; /* For synchronizing pmd thread reload. */ >> - struct ovs_mutex cond_mutex; /* Mutex for condition variable. */ >> - >> /* Per thread exact-match cache. Note, the instance for cpu core >> * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly >> * need to be protected by 'non_pmd_mutex'. Every other instance >> @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class, >> atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); >> atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); >> >> + atomic_count_init(&dp->reloading_pmds, 0); >> + >> cmap_init(&dp->poll_threads); >> dp->pmd_rxq_assign_cyc = true; >> >> @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd) >> return; >> } >> >> - ovs_mutex_lock(&pmd->cond_mutex); >> seq_change(pmd->reload_seq); >> atomic_store_relaxed(&pmd->reload, true); >> - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); >> - ovs_mutex_unlock(&pmd->cond_mutex); >> } >> >> static uint32_t >> @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) >> } >> >> static void >> +wait_reloading_pmds(struct dp_netdev *dp) >> +{ >> + while (atomic_count_get(&dp->reloading_pmds) != 0) { >> + } >> +} >> + >> +static void >> reload_affected_pmds(struct dp_netdev *dp) >> { >> struct dp_netdev_pmd_thread *pmd; >> + unsigned int pmd_count = 0; >> + >> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >> + if (pmd->core_id == NON_PMD_CORE_ID) { >> + continue; >> + } >> + if (pmd->need_reload) { >> + pmd_count++; >> + } >> + } >> + atomic_count_set(&dp->reloading_pmds, pmd_count); >> CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >> if (pmd->need_reload) { >> @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp) > > The above atomic_count_set() is a relaxed set, so is the atomic_store_relaxed(&pmd->reload, true) in dp_netdev_reload_pmd__(pmd). > Which could lead to the PMDs decreasing the dp->reloading_pmd before it’s set. Guess if the correct memory_order is selected for pmd->reload as suggested by Ilya in patch 1/5 it should work. relaxed memory ordering does not imply any synchronization and will not be synchronized with 'reload' regardless of its memory ordering. To guarantee the order, atomic_count_set() must be replaced with non-relaxed version along with 'reload'.
On 7 May 2019, at 16:00, Ilya Maximets wrote: > On 07.05.2019 16:46, Eelco Chaudron wrote: >> >> >> On 30 Apr 2019, at 14:17, David Marchand wrote: >> >>> pmd reloads are currently serialised in each steps calling >>> reload_affected_pmds. >>> Any pmd processing packets, waiting on a mutex etc... will make >>> other >>> pmd threads wait for a delay that can be undeterministic when >>> syscalls >>> adds up. >>> >>> Switch to a little busy loop on the control thread using an atomic >>> count. >>> >>> Signed-off-by: David Marchand <david.marchand@redhat.com> >>> --- >>> lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++------------- >>> 1 file changed, 30 insertions(+), 13 deletions(-) >>> >>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>> index 30774ed..b2b21fd 100644 >>> --- a/lib/dpif-netdev.c >>> +++ b/lib/dpif-netdev.c >>> @@ -335,6 +335,9 @@ struct dp_netdev { >>> /* The time that a packet can wait in output batch for >>> sending. */ >>> atomic_uint32_t tx_flush_interval; >>> >>> + /* Count of pmds currently reloading */ >>> + atomic_count reloading_pmds; >>> + >>> /* Meters. */ >>> struct ovs_mutex meter_locks[N_METER_LOCKS]; >>> struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ >>> @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread { >>> struct ovs_refcount ref_cnt; /* Every reference must >>> be refcount'ed. */ >>> struct cmap_node node; /* In >>> 'dp->poll_threads'. */ >>> >>> - pthread_cond_t cond; /* For >>> synchronizing pmd thread reload. */ >>> - struct ovs_mutex cond_mutex; /* Mutex for condition >>> variable. */ >>> - >>> /* Per thread exact-match cache. Note, the instance for >>> cpu core >>> * NON_PMD_CORE_ID can be accessed by multiple threads, >>> and thusly >>> * need to be protected by 'non_pmd_mutex'. Every other >>> instance >>> @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const >>> struct dpif_class *class, >>> atomic_init(&dp->emc_insert_min, >>> DEFAULT_EM_FLOW_INSERT_MIN); >>> atomic_init(&dp->tx_flush_interval, >>> DEFAULT_TX_FLUSH_INTERVAL); >>> >>> + atomic_count_init(&dp->reloading_pmds, 0); >>> + >>> cmap_init(&dp->poll_threads); >>> dp->pmd_rxq_assign_cyc = true; >>> >>> @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct >>> dp_netdev_pmd_thread *pmd) >>> return; >>> } >>> >>> - ovs_mutex_lock(&pmd->cond_mutex); >>> seq_change(pmd->reload_seq); >>> atomic_store_relaxed(&pmd->reload, true); >>> - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); >>> - ovs_mutex_unlock(&pmd->cond_mutex); >>> } >>> >>> static uint32_t >>> @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool >>> pinned) OVS_REQUIRES(dp->port_mutex) >>> } >>> >>> static void >>> +wait_reloading_pmds(struct dp_netdev *dp) >>> +{ >>> + while (atomic_count_get(&dp->reloading_pmds) != 0) { >>> + } >>> +} >>> + >>> +static void >>> reload_affected_pmds(struct dp_netdev *dp) >>> { >>> struct dp_netdev_pmd_thread *pmd; >>> + unsigned int pmd_count = 0; >>> + >>> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >>> + if (pmd->core_id == NON_PMD_CORE_ID) { >>> + continue; >>> + } >>> + if (pmd->need_reload) { >>> + pmd_count++; >>> + } >>> + } >>> + atomic_count_set(&dp->reloading_pmds, pmd_count); >>> CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >>> if (pmd->need_reload) { >>> @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp) >> >> The above atomic_count_set() is a relaxed set, so is the >> atomic_store_relaxed(&pmd->reload, true) in >> dp_netdev_reload_pmd__(pmd). >> Which could lead to the PMDs decreasing the dp->reloading_pmd before >> it’s set. Guess if the correct memory_order is selected for >> pmd->reload as suggested by Ilya in patch 1/5 it should work. > > relaxed memory ordering does not imply any synchronization and will > not be synchronized with 'reload' regardless of its memory ordering. > To guarantee the order, atomic_count_set() must be replaced with > non-relaxed version along with 'reload'. Thanks for the additional info, as it made me wonder why… But then I found this, https://lwn.net/Articles/586838/, telling me a memory_order_release is not the same as the Linux implementation with a full read/write barrier :(
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 30774ed..b2b21fd 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -335,6 +335,9 @@ struct dp_netdev { /* The time that a packet can wait in output batch for sending. */ atomic_uint32_t tx_flush_interval; + /* Count of pmds currently reloading */ + atomic_count reloading_pmds; + /* Meters. */ struct ovs_mutex meter_locks[N_METER_LOCKS]; struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread { struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */ struct cmap_node node; /* In 'dp->poll_threads'. */ - pthread_cond_t cond; /* For synchronizing pmd thread reload. */ - struct ovs_mutex cond_mutex; /* Mutex for condition variable. */ - /* Per thread exact-match cache. Note, the instance for cpu core * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly * need to be protected by 'non_pmd_mutex'. Every other instance @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class, atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); + atomic_count_init(&dp->reloading_pmds, 0); + cmap_init(&dp->poll_threads); dp->pmd_rxq_assign_cyc = true; @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd) return; } - ovs_mutex_lock(&pmd->cond_mutex); seq_change(pmd->reload_seq); atomic_store_relaxed(&pmd->reload, true); - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex); - ovs_mutex_unlock(&pmd->cond_mutex); } static uint32_t @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) } static void +wait_reloading_pmds(struct dp_netdev *dp) +{ + while (atomic_count_get(&dp->reloading_pmds) != 0) { + } +} + +static void reload_affected_pmds(struct dp_netdev *dp) { struct dp_netdev_pmd_thread *pmd; + unsigned int pmd_count = 0; + + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + if (pmd->core_id == NON_PMD_CORE_ID) { + continue; + } + if (pmd->need_reload) { + pmd_count++; + } + } + atomic_count_set(&dp->reloading_pmds, pmd_count); CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { if (pmd->need_reload) { @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp) pmd->need_reload = false; } } + + if (pmd_count != 0) { + wait_reloading_pmds(dp); + } } static void @@ -5813,11 +5834,9 @@ dpif_netdev_enable_upcall(struct dpif *dpif) static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) { - ovs_mutex_lock(&pmd->cond_mutex); atomic_store_relaxed(&pmd->reload, false); pmd->last_reload_seq = seq_read(pmd->reload_seq); - xpthread_cond_signal(&pmd->cond); - ovs_mutex_unlock(&pmd->cond_mutex); + atomic_count_dec(&pmd->dp->reloading_pmds); } /* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns @@ -5902,8 +5921,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->reload_seq = seq_create(); pmd->last_reload_seq = seq_read(pmd->reload_seq); atomic_init(&pmd->reload, false); - xpthread_cond_init(&pmd->cond, NULL); - ovs_mutex_init(&pmd->cond_mutex); ovs_mutex_init(&pmd->flow_mutex); ovs_mutex_init(&pmd->port_mutex); cmap_init(&pmd->flow_table); @@ -5946,8 +5963,6 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) cmap_destroy(&pmd->flow_table); ovs_mutex_destroy(&pmd->flow_mutex); seq_destroy(pmd->reload_seq); - xpthread_cond_destroy(&pmd->cond); - ovs_mutex_destroy(&pmd->cond_mutex); ovs_mutex_destroy(&pmd->port_mutex); free(pmd); } @@ -5967,7 +5982,9 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) ovs_mutex_unlock(&dp->non_pmd_mutex); } else { atomic_store_relaxed(&pmd->exit, true); + atomic_count_set(&dp->reloading_pmds, 1); dp_netdev_reload_pmd__(pmd); + wait_reloading_pmds(dp); xpthread_join(pmd->thread, NULL); }
pmd reloads are currently serialised in each steps calling reload_affected_pmds. Any pmd processing packets, waiting on a mutex etc... will make other pmd threads wait for a delay that can be undeterministic when syscalls adds up. Switch to a little busy loop on the control thread using an atomic count. Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-)