Message ID | 5d7e9b4a-274a-1fb1-92dc-f1220c230eca@samsung.com |
---|---|
State | Not Applicable |
Headers | show |
Thanks for the review and for the incremental! I applied everything to the various commits, except one thing, see below I accumulated enough comments to send a v2, but I wanted to have a look first at a couple of patches that may conflict with this (mostly vhost pmd). Daniele On 25/11/2016 06:19, "Ilya Maximets" <i.maximets@samsung.com> wrote: >Sorry, CC to list. > >On 25.11.2016 17:15, Ilya Maximets wrote: >Hi, Daniele. > >I've prepared some incremental changes for this path-set (only very >simple tests performed). Mostly for the largest patch. >I hope, you will find some of this changes useful. >I'll continue to test and review this path-set. > >--- > lib/dpif-netdev.c | 65 ++++++++++++++++++++++++------------------------------- > lib/ovs-numa.c | 23 ++++++-------------- > lib/ovs-numa.h | 1 + > 3 files changed, 36 insertions(+), 53 deletions(-) > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >index 8fad4f3..31c5b5a 100644 >--- a/lib/dpif-netdev.c >+++ b/lib/dpif-netdev.c >@@ -3067,11 +3067,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) > if (!pmd) { > VLOG_WARN("There is no PMD thread on core %d. Queue " > "%d on port \'%s\' will not be polled.", >- port->rxqs[qid].core_id, qid, >- netdev_get_name(port->netdev)); >+ q->core_id, qid, netdev_get_name(port->netdev)); > } else { > q->pmd = pmd; > pmd->isolated = true; >+ dp_netdev_pmd_unref(pmd); > } > } else if (!pinned && q->core_id == RXQ_CORE_UNPINNED) { > if (!numa) { >@@ -3107,31 +3107,31 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS); > } > >- /* Check for unwanted pmd threads */ >- CMAP_FOR_EACH(pmd, node, &dp->poll_threads) { >- if (pmd->core_id != NON_PMD_CORE_ID && >- !ovs_numa_dump_contains_core(pmd_cores, >- pmd->numa_id, >- pmd->core_id)) { >- changed = true; >- } >- } >- >- /* Check for required new pmd threads */ >- FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { >- pmd = dp_netdev_get_pmd(dp, core->core_id); >- if (pmd) { >- dp_netdev_pmd_unref(pmd); >- continue; >- } >+ /* Check for changed configuration */ >+ if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) { > changed = true; >+ } else { >+ CMAP_FOR_EACH(pmd, node, &dp->poll_threads) { >+ if (pmd->core_id != NON_PMD_CORE_ID >+ && !ovs_numa_dump_contains_core(pmd_cores, >+ pmd->numa_id, >+ pmd->core_id)) { >+ changed = true; >+ break; >+ } >+ } Less lines of code with the same effect, looks good to me > } > > /* Destroy the old and recreate the new pmd threads. We don't perform an > * incremental update because we would have to adjust 'static_tx_qid'. */ > if (changed) { >- struct ovs_numa_dump *all_numas; >- struct ovs_numa_info *numa; >+ uint32_t *n_threads_on_numa; >+ int i, n_numas = ovs_numa_get_n_numas(); >+ >+ ovs_assert(n_numas != OVS_NUMA_UNSPEC); >+ >+ /* This is not expensive because n_numas <= MAX_NUMA_NODES */ >+ n_threads_on_numa = xzalloc(n_numas * sizeof *n_threads_on_numa); > > /* Do not destroy the non pmd thread. */ > dp_netdev_destroy_all_pmds(dp, false); >@@ -3141,26 +3141,17 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id); > > pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); >+ n_threads_on_numa[core->numa_id]++; > } > > /* Log the number of pmd threads per numa node. */ >- all_numas = ovs_numa_dump_n_cores_per_numa(1); >- >- FOR_EACH_CORE_ON_DUMP(numa, all_numas) { >- int n = 0; >- >- FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { >- if (core->numa_id == numa->numa_id) { >- n++; >- } >- } >- >- if (n) { >+ for (i = 0; i < n_numas; i++) { >+ if (n_threads_on_numa[i]) { > VLOG_INFO("Created %d pmd threads on numa node %d", >- n, numa->numa_id); >+ n_threads_on_numa[i], i); > } > } >- ovs_numa_dump_destroy(all_numas); >+ free(n_threads_on_numa); It's probably not a big deal, but this code doesn't properly handle non-consecutive numa nodes. I decided to add helpers in ovs-numa to simplify this, like you suggested before. > } > > ovs_numa_dump_destroy(pmd_cores); >@@ -3321,7 +3312,7 @@ reconfigure_datapath(struct dp_netdev *dp) > > if (q->pmd) { > ovs_mutex_lock(&q->pmd->port_mutex); >- dp_netdev_add_rxq_to_pmd(q->pmd, &port->rxqs[qid]); >+ dp_netdev_add_rxq_to_pmd(q->pmd, q); > ovs_mutex_unlock(&q->pmd->port_mutex); > } > } >@@ -3728,7 +3719,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) > /* All flows (including their dpcls_rules) have been deleted already */ > CMAP_FOR_EACH (cls, node, &pmd->classifiers) { > dpcls_destroy(cls); >- free(cls); >+ ovsrcu_postpone(free, cls); > } > cmap_destroy(&pmd->classifiers); > cmap_destroy(&pmd->flow_table); >diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c >index 6599542..b7c305f 100644 >--- a/lib/ovs-numa.c >+++ b/lib/ovs-numa.c >@@ -406,21 +406,6 @@ ovs_numa_dump_cores_on_numa(int numa_id) > return dump; > } > >-static struct cpu_core * >-get_cpu_core(int core_id) >-{ >- struct cpu_core *core; >- >- HMAP_FOR_EACH_WITH_HASH(core, hmap_node, hash_int(core_id, 0), >- &all_cpu_cores) { >- if (core->core_id == core_id) { >- return core; >- } >- } >- >- return NULL; >-} >- > struct ovs_numa_dump * > ovs_numa_dump_cores_with_cmask(const char *cmask) > { >@@ -444,7 +429,7 @@ ovs_numa_dump_cores_with_cmask(const char *cmask) > > for (j = 0; j < 4; j++) { > if ((bin >> j) & 0x1) { >- struct cpu_core *core = get_cpu_core(core_id); >+ struct cpu_core *core = get_core_by_core_id(core_id); Thanks for spotting this, I would have carried the duplicated version forever > > if (core) { > struct ovs_numa_info *info = xmalloc(sizeof *info); >@@ -508,6 +493,12 @@ ovs_numa_dump_contains_core(const struct ovs_numa_dump *dump, > return false; > } > >+size_t >+ovs_numa_dump_count(struct ovs_numa_dump *dump) >+{ >+ return hmap_count(&dump->dump); >+} >+ > void > ovs_numa_dump_destroy(struct ovs_numa_dump *dump) > { >diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h >index 89b6a30..9c6f08f 100644 >--- a/lib/ovs-numa.h >+++ b/lib/ovs-numa.h >@@ -49,6 +49,7 @@ int ovs_numa_get_n_cores_on_numa(int numa_id); > struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id); > struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask); > struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n); >+size_t ovs_numa_dump_count(struct ovs_numa_dump *); > bool ovs_numa_dump_contains_core(const struct ovs_numa_dump *, > int numa_id, unsigned core_id); > void ovs_numa_dump_destroy(struct ovs_numa_dump *); >-- > > >Best regards, Ilya Maximets. > >> On 16.11.2016 03:45, diproiettod at vmware.com (Daniele Di Proietto) wrote: >>> The first two commits of the series are trivial bugfixes for dpif-netdev. >>> >>> Then the series fixes a long standing bug that caused a crash when the >>> admin link state of a port is changed while traffic is flowing. >>> >>> The next part makes use of reconfiguration for port add: this makes >>> the operation twice as fast and reduce some code duplication. This part >>> conflicts with the port naming change, so I'm willing to postpone it, unless >>> we find it to be useful for the port naming change. >>> >>> The rest of the series refactors a lot of code if dpif-netdev: >>> >>> * We no longer start pmd threads on demand for each numa node. This made >>> the code very complicated and introduced a lot of bugs. >>> * The pmd threads state is now internal to dpif-netdev and it's not stored in >>> ovs-numa. >>> * There's now a single function that handles pmd threads/ports changes: this >>> reduces code duplication and makes port reconfiguration faster, as we don't >>> have to bring down the whole datapath. >>> >>> Daniele Di Proietto (17): >>> dpif-netdev: Fix memory leak. >>> dpif-netdev: Take non_pmd_mutex to access tx cached ports. >>> dpif-netdev: Don't try to output on a device without txqs. >>> netdev-dpdk: Don't call rte_dev_stop() in update_flags(). >>> netdev-dpdk: Start the device only once on port-add. >>> netdev-dpdk: Refactor construct and destruct. >>> dpif-netdev: Use a boolean instead of pmd->port_seq. >>> dpif-netdev: Block pmd threads if there are no ports. >>> dpif-netdev: Create pmd threads for every numa node. >>> dpif-netdev: Make 'static_tx_qid' const. >>> dpctl: Avoid making assumptions on pmd threads. >>> ovs-numa: New ovs_numa_dump_contains_core() function. >>> ovs-numa: Add new dump types. >>> dpif-netdev: core_id shouldn't be negative. >>> dpif-netdev: Use hmap for poll_list in pmd threads. >>> dpif-netdev: Centralized threads and queues handling code. >>> ovs-numa: Remove unused functions. >>> >>> lib/dpctl.c | 107 +--- >>> lib/dpif-netdev.c | 1429 ++++++++++++++++++++++++++++------------------------- >>> lib/dpif.c | 6 +- >>> lib/dpif.h | 12 +- >>> lib/netdev-dpdk.c | 173 +++---- >>> lib/netdev.c | 21 +- >>> lib/netdev.h | 1 + >>> lib/ovs-numa.c | 227 +++------ >>> lib/ovs-numa.h | 21 +- >>> tests/pmd.at | 49 +- >>> vswitchd/bridge.c | 2 + >>> 11 files changed, 1030 insertions(+), 1018 deletions(-) >>>
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 8fad4f3..31c5b5a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3067,11 +3067,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) if (!pmd) { VLOG_WARN("There is no PMD thread on core %d. Queue " "%d on port \'%s\' will not be polled.", - port->rxqs[qid].core_id, qid, - netdev_get_name(port->netdev)); + q->core_id, qid, netdev_get_name(port->netdev)); } else { q->pmd = pmd; pmd->isolated = true; + dp_netdev_pmd_unref(pmd); } } else if (!pinned && q->core_id == RXQ_CORE_UNPINNED) { if (!numa) { @@ -3107,31 +3107,31 @@ reconfigure_pmd_threads(struct dp_netdev *dp) pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS); } - /* Check for unwanted pmd threads */ - CMAP_FOR_EACH(pmd, node, &dp->poll_threads) { - if (pmd->core_id != NON_PMD_CORE_ID && - !ovs_numa_dump_contains_core(pmd_cores, - pmd->numa_id, - pmd->core_id)) { - changed = true; - } - } - - /* Check for required new pmd threads */ - FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { - pmd = dp_netdev_get_pmd(dp, core->core_id); - if (pmd) { - dp_netdev_pmd_unref(pmd); - continue; - } + /* Check for changed configuration */ + if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) { changed = true; + } else { + CMAP_FOR_EACH(pmd, node, &dp->poll_threads) { + if (pmd->core_id != NON_PMD_CORE_ID + && !ovs_numa_dump_contains_core(pmd_cores, + pmd->numa_id, + pmd->core_id)) { + changed = true; + break; + } + } } /* Destroy the old and recreate the new pmd threads. We don't perform an * incremental update because we would have to adjust 'static_tx_qid'. */ if (changed) { - struct ovs_numa_dump *all_numas; - struct ovs_numa_info *numa; + uint32_t *n_threads_on_numa; + int i, n_numas = ovs_numa_get_n_numas(); + + ovs_assert(n_numas != OVS_NUMA_UNSPEC); + + /* This is not expensive because n_numas <= MAX_NUMA_NODES */ + n_threads_on_numa = xzalloc(n_numas * sizeof *n_threads_on_numa); /* Do not destroy the non pmd thread. */ dp_netdev_destroy_all_pmds(dp, false); @@ -3141,26 +3141,17 @@ reconfigure_pmd_threads(struct dp_netdev *dp) dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id); pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); + n_threads_on_numa[core->numa_id]++; } /* Log the number of pmd threads per numa node. */ - all_numas = ovs_numa_dump_n_cores_per_numa(1); - - FOR_EACH_CORE_ON_DUMP(numa, all_numas) { - int n = 0; - - FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { - if (core->numa_id == numa->numa_id) { - n++; - } - } - - if (n) { + for (i = 0; i < n_numas; i++) { + if (n_threads_on_numa[i]) { VLOG_INFO("Created %d pmd threads on numa node %d", - n, numa->numa_id); + n_threads_on_numa[i], i); } } - ovs_numa_dump_destroy(all_numas); + free(n_threads_on_numa); } ovs_numa_dump_destroy(pmd_cores); @@ -3321,7 +3312,7 @@ reconfigure_datapath(struct dp_netdev *dp) if (q->pmd) { ovs_mutex_lock(&q->pmd->port_mutex); - dp_netdev_add_rxq_to_pmd(q->pmd, &port->rxqs[qid]); + dp_netdev_add_rxq_to_pmd(q->pmd, q); ovs_mutex_unlock(&q->pmd->port_mutex); } } @@ -3728,7 +3719,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) /* All flows (including their dpcls_rules) have been deleted already */ CMAP_FOR_EACH (cls, node, &pmd->classifiers) { dpcls_destroy(cls); - free(cls); + ovsrcu_postpone(free, cls); } cmap_destroy(&pmd->classifiers); cmap_destroy(&pmd->flow_table); diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 6599542..b7c305f 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -406,21 +406,6 @@ ovs_numa_dump_cores_on_numa(int numa_id) return dump; } -static struct cpu_core * -get_cpu_core(int core_id) -{ - struct cpu_core *core; - - HMAP_FOR_EACH_WITH_HASH(core, hmap_node, hash_int(core_id, 0), - &all_cpu_cores) { - if (core->core_id == core_id) { - return core; - } - } - - return NULL; -} - struct ovs_numa_dump * ovs_numa_dump_cores_with_cmask(const char *cmask) { @@ -444,7 +429,7 @@ ovs_numa_dump_cores_with_cmask(const char *cmask) for (j = 0; j < 4; j++) { if ((bin >> j) & 0x1) { - struct cpu_core *core = get_cpu_core(core_id); + struct cpu_core *core = get_core_by_core_id(core_id); if (core) { struct ovs_numa_info *info = xmalloc(sizeof *info); @@ -508,6 +493,12 @@ ovs_numa_dump_contains_core(const struct ovs_numa_dump *dump, return false; } +size_t +ovs_numa_dump_count(struct ovs_numa_dump *dump) +{ + return hmap_count(&dump->dump); +} + void ovs_numa_dump_destroy(struct ovs_numa_dump *dump) { diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 89b6a30..9c6f08f 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -49,6 +49,7 @@ int ovs_numa_get_n_cores_on_numa(int numa_id); struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id); struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask); struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n); +size_t ovs_numa_dump_count(struct ovs_numa_dump *); bool ovs_numa_dump_contains_core(const struct ovs_numa_dump *, int numa_id, unsigned core_id); void ovs_numa_dump_destroy(struct ovs_numa_dump *);