diff mbox

[ovs-dev] dpif-netdev: Honor rxq affinity during pmd threads creation.

Message ID 1479209799-16893-1-git-send-email-i.maximets@samsung.com
State Accepted
Delegated to: Daniele Di Proietto
Headers show

Commit Message

Ilya Maximets Nov. 15, 2016, 11:36 a.m. UTC
Currently, If user will set up 'pmd-rxq-affinity' to cores on
different numa node, they may not be polled, because pmd threads
will not be created there even if this cores are in 'pmd-cpu-mask'.
Fix that by creating threads on all numa nodes rxqs assigned to.
Anyway, OVS will warn about so inefficient mapping.

Fixes: 3eb67853c481 ("dpif-netdev: Introduce pmd-rxq-affinity.")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
 lib/dpif-netdev.c | 54 ++++++++++++++++++++++++++++++++++++++++++++---------
 tests/pmd.at      | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 9 deletions(-)

Comments

Daniele Di Proietto Nov. 16, 2016, 12:50 a.m. UTC | #1
2016-11-15 3:36 GMT-08:00 Ilya Maximets <i.maximets@samsung.com>:

> Currently, If user will set up 'pmd-rxq-affinity' to cores on
> different numa node, they may not be polled, because pmd threads
> will not be created there even if this cores are in 'pmd-cpu-mask'.
> Fix that by creating threads on all numa nodes rxqs assigned to.
> Anyway, OVS will warn about so inefficient mapping.
>
> Fixes: 3eb67853c481 ("dpif-netdev: Introduce pmd-rxq-affinity.")
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
>

I think the warning is not necessary, so I removed it for the moment.

I pushed the fix to master and branch-2.6, thanks

I'm not comfortable with the complexity that we had to introduce to
properly support this, so I decided to send a series that, among other
things, starts all the pmd threads on all the numa nodes simultaneously:

https://mail.openvswitch.org/pipermail/ovs-dev/2016-November/325141.html
diff mbox

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f69f86d..63a3ce6 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1498,18 +1498,30 @@  get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id)
     return n_pmds;
 }
 
-/* Returns 'true' if there is a port with pmd netdev and the netdev
- * is on numa node 'numa_id'. */
+/* Returns 'true' if there is a port with pmd netdev and the netdev is on
+ * numa node 'numa_id' or its rx queue assigned to core on that numa node . */
 static bool
-has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id)
+has_pmd_rxq_for_numa(struct dp_netdev *dp, int numa_id)
     OVS_REQUIRES(dp->port_mutex)
 {
     struct dp_netdev_port *port;
 
     HMAP_FOR_EACH (port, node, &dp->ports) {
-        if (netdev_is_pmd(port->netdev)
-            && netdev_get_numa_id(port->netdev) == numa_id) {
-            return true;
+        if (netdev_is_pmd(port->netdev)) {
+            int i;
+
+            if (netdev_get_numa_id(port->netdev) == numa_id) {
+                return true;
+            }
+
+            for (i = 0; i < port->n_rxq; i++) {
+                unsigned core_id = port->rxqs[i].core_id;
+
+                if (core_id != -1
+                    && ovs_numa_get_numa_id(core_id) == numa_id) {
+                    return true;
+                }
+            }
         }
     }
 
@@ -1533,7 +1545,7 @@  do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
         ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
         /* If there is no netdev on the numa node, deletes the pmd threads
          * for that numa. */
-        if (!has_pmd_port_for_numa(dp, numa_id)) {
+        if (!has_pmd_rxq_for_numa(dp, numa_id)) {
             dp_netdev_del_pmds_on_numa(dp, numa_id);
         }
     }
@@ -3738,9 +3750,33 @@  dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
 
     HMAP_FOR_EACH (port, node, &dp->ports) {
         if (netdev_is_pmd(port->netdev)) {
-            int numa_id = netdev_get_numa_id(port->netdev);
+            struct hmapx numas = HMAPX_INITIALIZER(&numas);
+            struct hmapx_node *numa_node;
+            uintptr_t numa_id;
+            int i;
+
+            numa_id = netdev_get_numa_id(port->netdev);
+            hmapx_add(&numas, (void *) numa_id);
+            for (i = 0; i < port->n_rxq; i++) {
+                unsigned core_id = port->rxqs[i].core_id;
 
-            dp_netdev_set_pmds_on_numa(dp, numa_id);
+                if (core_id != -1) {
+                    numa_id = ovs_numa_get_numa_id(core_id);
+                    hmapx_add(&numas, (void *) numa_id);
+                }
+            }
+
+            HMAPX_FOR_EACH (numa_node, &numas) {
+                dp_netdev_set_pmds_on_numa(dp, (uintptr_t) numa_node->data);
+            }
+
+            if (hmapx_count(&numas) > 1) {
+               VLOG_WARN("Some Rx queues of port '%s"
+                         "' assigned to cores on different numa node. "
+                         "This may cause performance issues.",
+                         netdev_get_name(port->netdev));
+            }
+            hmapx_destroy(&numas);
         }
         /* Distribute only pinned rx queues first to mark threads as isolated */
         dp_netdev_add_port_rx_to_pmds(dp, port, &to_reload, true);
diff --git a/tests/pmd.at b/tests/pmd.at
index 7b63b67..259f0ae 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -517,6 +517,62 @@  p1 3 0 2
 OVS_VSWITCHD_STOP(["/dpif_netdev|WARN|There is no PMD thread on core/d"])
 AT_CLEANUP
 
+AT_SETUP([PMD - rxq affinity - NUMA])
+OVS_VSWITCHD_START(
+  [], [], [], [--dummy-numa 0,0,0,1,1])
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+
+AT_CHECK([ovs-ofctl add-flow br0 actions=controller])
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=7e])
+
+AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd ofport_request=1 options:n_rxq=2 options:numa_id=0 other_config:pmd-rxq-affinity="0:1,1:2"])
+
+dnl The rxqs should be on the requested cores.
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show], [0], [dnl
+p1 0 0 1
+p1 1 0 2
+])
+
+AT_CHECK([ovs-vsctl set Interface p1 other_config:pmd-rxq-affinity="0:3,1:4"])
+
+dnl We moved the queues to different numa node. Expecting threads on
+dnl NUMA node 1 to be created.
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show], [0], [dnl
+p1 0 1 3
+p1 1 1 4
+])
+
+AT_CHECK([ovs-vsctl set Interface p1 other_config:pmd-rxq-affinity="0:3,1:1"])
+
+dnl Queues splitted between NUMA nodes.
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show], [0], [dnl
+p1 0 1 3
+p1 1 0 1
+])
+
+AT_CHECK([ovs-vsctl remove Interface p1 other_config pmd-rxq-affinity])
+
+dnl We removed the rxq-affinity request.  dpif-netdev should assign queues
+dnl in a round robin fashion.  We just make sure that every rxq is being
+dnl polled again.
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show | cut -f 1,2 -d ' ' | sort], [0], [dnl
+p1 0
+p1 1
+])
+
+AT_CHECK([ovs-vsctl set Interface p1 other_config:pmd-rxq-affinity='0:3'])
+
+dnl We explicitly requesting NUMA node 1 for queue 0.
+dnl Queue 1 should be polled by thread from NUMA node 0.
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show | cut -f 1,2,3 -d ' '], [0], [dnl
+p1 0 1
+p1 1 0
+])
+
+OVS_VSWITCHD_STOP(["/dpif_netdev|WARN|Some Rx queues of port 'p1' assigned to cores on different numa node. This may cause performance issues./d"])
+AT_CLEANUP
+
 AT_SETUP([PMD - monitor threads])
 OVS_VSWITCHD_START(
   [], [], [], [--dummy-numa 0,0])