@@ -460,6 +460,16 @@ affinitized accordingly.
pmd thread on a NUMA node is only created if there is at least one DPDK
interface from that NUMA node added to OVS.
+ .. note::
+ On NUMA systems PCI devices are also local to a NUMA node. Rx queues for
+ PCI device will assigned to a pmd on it's local NUMA node if pmd-cpu-mask
+ has created a pmd thread on that NUMA node. If not the queue will be
+ assigned to a pmd on a remote NUMA node. This will result in reduced
+ maximum throughput on that device. In the case such a queue assignment
+ is made a warning message will be logged: "There's no available (non-
+ isolated) pmd thread on numa node N. Queue Q on port P will be assigned to
+ the pmd on core C (numa node N'). Expect reduced performance."
+
- QEMU vCPU thread Affinity
A VM performing simple packet forwarding or running complex packet pipelines
@@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id)
}
static void
-rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
+rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
+ int *all_numa_ids, unsigned all_numa_ids_sz,
+ int *num_ids_written)
{
struct dp_netdev_pmd_thread *pmd;
struct rr_numa *numa;
+ unsigned idx = 0;
hmap_init(&rr->numas);
@@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
numa->n_pmds++;
numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
numa->pmds[numa->n_pmds - 1] = pmd;
+
+ all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
+ idx++;
}
+ *num_ids_written = idx;
}
static struct dp_netdev_pmd_thread *
@@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
{
struct dp_netdev_port *port;
struct rr_numa_list rr;
+ int all_numa_ids [64];
+ int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
+ unsigned all_numa_ids_idx = 0;
+ int all_numa_ids_max_idx = 0;
+ int num_numa_ids = 0;
- rr_numa_list_populate(dp, &rr);
+ rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,
+ &num_numa_ids);
+ all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz - 1);
HMAP_FOR_EACH (port, node, &dp->ports) {
struct rr_numa *numa;
@@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
}
} else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
if (!numa) {
- VLOG_WARN("There's no available (non isolated) pmd thread "
+ if (all_numa_ids_max_idx < 0) {
+ VLOG_ERR("There is no available (non-isolated) pmd "
+ "thread for port \'%s\' queue %d. This port "
+ "will not be polled. Is pmd-cpu-mask set to "
+ "zero? Or are all PMDs isolated to other "
+ "queues?", netdev_get_name(port->netdev),
+ qid);
+ continue;
+ }
+ int alt_numa_id = all_numa_ids[all_numa_ids_idx];
+ struct rr_numa *alt_numa;
+ alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);
+ q->pmd = rr_numa_get_pmd(alt_numa);
+ VLOG_WARN("There's no available (non-isolated) pmd thread "
"on numa node %d. Queue %d on port \'%s\' will "
- "not be polled.",
- numa_id, qid, netdev_get_name(port->netdev));
+ "be assigned to the pmd on core %d "
+ "(numa node %d). Expect reduced performance.",
+ numa_id, qid, netdev_get_name(port->netdev),
+ q->pmd->core_id, q->pmd->numa_id);
+ all_numa_ids_idx++;
+ if (all_numa_ids_idx > all_numa_ids_max_idx) {
+ all_numa_ids_idx = 0;
+ }
} else {
q->pmd = rr_numa_get_pmd(numa);
}