@@ -101,6 +101,10 @@ observed with pmd stats::
For certain traffic profiles with many parallel flows, it's recommended to set
``N`` to '0' to achieve higher forwarding performance.
+It is also possible to enable/disable EMC on per-port basis using::
+
+ $ ovs-vsctl set interface <iface> other_config:emc-enable={true,false}
+
For more information on the EMC refer to :doc:`/intro/install/dpdk` .
@@ -9,9 +9,11 @@ Post-v2.10.0
- ovn:
* New support for IPSEC encrypted tunnels between hypervisors.
* ovn-ctl: allow passing user:group ids to the OVN daemons.
- - DPDK:
+ - Userspace datapath:
* Add option for simple round-robin based Rxq to PMD assignment.
It can be set with pmd-rxq-assign.
+ * Added new per-port configurable option to manage EMC:
+ 'other_config:emc-enable'.
- Add 'symmetric_l3' hash function.
- OVS now honors 'updelay' and 'downdelay' for bonds with LACP configured.
- ovs-vswitchd:
@@ -460,6 +460,7 @@ struct dp_netdev_port {
unsigned n_rxq; /* Number of elements in 'rxqs' */
unsigned *txq_used; /* Number of threads that use each tx queue. */
struct ovs_mutex txq_used_mutex;
+ bool emc_enabled; /* If true EMC will be used. */
char *type; /* Port type as requested by user. */
char *rxq_affinity_list; /* Requested affinity of rx queues. */
};
@@ -574,6 +575,7 @@ static void dp_netdev_actions_free(struct dp_netdev_actions *);
struct polled_queue {
struct dp_netdev_rxq *rxq;
odp_port_t port_no;
+ bool emc_enabled;
};
/* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */
@@ -603,6 +605,8 @@ struct dp_netdev_pmd_thread_ctx {
long long now;
/* RX queue from which last packet was received. */
struct dp_netdev_rxq *last_rxq;
+ /* EMC insertion probability context for the current processing cycle. */
+ uint32_t emc_insert_min;
};
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
@@ -1779,6 +1783,7 @@ port_create(const char *devname, const char *type,
port->netdev = netdev;
port->type = xstrdup(type);
port->sf = sf;
+ port->emc_enabled = true;
port->need_reconfigure = true;
ovs_mutex_init(&port->txq_used_mutex);
@@ -2811,9 +2816,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
* default the value is UINT32_MAX / 100 which yields an insertion
* probability of 1/100 ie. 1% */
- uint32_t min;
-
- atomic_read_relaxed(&pmd->dp->emc_insert_min, &min);
+ uint32_t min = pmd->ctx.emc_insert_min;
if (min && random_uint32() <= min) {
emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
@@ -3679,7 +3682,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
ovs_mutex_lock(&dp->non_pmd_mutex);
}
- /* Update current time in PMD context. */
+ /* Update current time in PMD context. We don't care about EMC insertion
+ * probability, because we are on a slow path. */
pmd_thread_ctx_time_update(pmd);
/* The action processing expects the RSS hash to be valid, because
@@ -3775,7 +3779,7 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
if (insert_min != cur_min) {
atomic_store_relaxed(&dp->emc_insert_min, insert_min);
if (insert_min == 0) {
- VLOG_INFO("EMC has been disabled");
+ VLOG_INFO("EMC insertion probability changed to zero");
} else {
VLOG_INFO("EMC insertion probability changed to 1/%llu (~%.2f%%)",
insert_prob, (100 / (float)insert_prob));
@@ -3881,6 +3885,27 @@ exit:
return error;
}
+/* Returns 'true' if one of the 'port's RX queues exists in 'poll_list'
+ * of given PMD thread. */
+static bool
+dpif_netdev_pmd_polls_port(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_port *port)
+ OVS_EXCLUDED(pmd->port_mutex)
+{
+ struct rxq_poll *poll;
+ bool found = false;
+
+ ovs_mutex_lock(&pmd->port_mutex);
+ HMAP_FOR_EACH (poll, node, &pmd->poll_list) {
+ if (port == poll->rxq->port) {
+ found = true;
+ break;
+ }
+ }
+ ovs_mutex_unlock(&pmd->port_mutex);
+ return found;
+}
+
/* Changes the affinity of port's rx queues. The changes are actually applied
* in dpif_netdev_run(). */
static int
@@ -3891,10 +3916,33 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t port_no,
struct dp_netdev_port *port;
int error = 0;
const char *affinity_list = smap_get(cfg, "pmd-rxq-affinity");
+ bool emc_enabled = smap_get_bool(cfg, "emc-enable", true);
ovs_mutex_lock(&dp->port_mutex);
error = get_port_by_number(dp, port_no, &port);
- if (error || !netdev_is_pmd(port->netdev)
+ if (error) {
+ goto unlock;
+ }
+
+ if (emc_enabled != port->emc_enabled) {
+ struct dp_netdev_pmd_thread *pmd;
+
+ port->emc_enabled = emc_enabled;
+ /* Mark for reload all the threads that polls this port and request
+ * for reconfiguration for the actual reloading of threads. */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ if (dpif_netdev_pmd_polls_port(pmd, port)) {
+ pmd->need_reload = true;
+ }
+ }
+ dp_netdev_request_reconfigure(dp);
+
+ VLOG_INFO("%s: EMC has been %s", netdev_get_name(port->netdev),
+ (emc_enabled) ? "enabled" : "disabled");
+ }
+
+ /* Checking for RXq affinity changes. */
+ if (!netdev_is_pmd(port->netdev)
|| nullable_string_is_equal(affinity_list, port->rxq_affinity_list)) {
goto unlock;
}
@@ -4798,6 +4846,13 @@ dpif_netdev_run(struct dpif *dpif)
if (!netdev_is_pmd(port->netdev)) {
int i;
+ if (port->emc_enabled) {
+ atomic_read_relaxed(&dp->emc_insert_min,
+ &non_pmd->ctx.emc_insert_min);
+ } else {
+ non_pmd->ctx.emc_insert_min = 0;
+ }
+
for (i = 0; i < port->n_rxq; i++) {
if (dp_netdev_process_rxq_port(non_pmd,
&port->rxqs[i],
@@ -4945,6 +5000,7 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
HMAP_FOR_EACH (poll, node, &pmd->poll_list) {
poll_list[i].rxq = poll->rxq;
poll_list[i].port_no = poll->rxq->port->port_no;
+ poll_list[i].emc_enabled = poll->rxq->port->emc_enabled;
i++;
}
@@ -5007,6 +5063,14 @@ reload:
pmd_perf_start_iteration(s);
for (i = 0; i < poll_cnt; i++) {
+
+ if (poll_list[i].emc_enabled) {
+ atomic_read_relaxed(&pmd->dp->emc_insert_min,
+ &pmd->ctx.emc_insert_min);
+ } else {
+ pmd->ctx.emc_insert_min = 0;
+ }
+
process_packets =
dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
poll_list[i].port_no);
@@ -5948,7 +6012,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
struct dfc_cache *cache = &pmd->flow_cache;
struct dp_packet *packet;
const size_t cnt = dp_packet_batch_size(packets_);
- uint32_t cur_min;
+ uint32_t cur_min = pmd->ctx.emc_insert_min;
int i;
uint16_t tcp_flags;
bool smc_enable_db;
@@ -5956,7 +6020,6 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
bool batch_enable = true;
atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
- atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
pmd_perf_update_counter(&pmd->perf_stats,
md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
cnt);
@@ -3060,6 +3060,25 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
</column>
</group>
+ <group title="EMC (Exact Match Cache) Configuration">
+ <p>
+ These settings controls behaviour of EMC lookups/insertions for packets
+ received from the interface.
+ </p>
+
+ <column name="other_config" key="emc-enable" type='{"type": "boolean"}'>
+ <p>
+ Specifies if Exact Match Cache (EMC) should be used while processing
+ packets received from this interface.
+ If true, <ref table="Open_vSwitch" column="other_config"
+ key="emc-insert-inv-prob"/> will have effect on this interface.
+ </p>
+ <p>
+ Defaults to true.
+ </p>
+ </column>
+ </group>
+
<group title="MTU">
<p>
The MTU (maximum transmission unit) is the largest amount of data
Conditional EMC insert helps a lot in scenarios with high numbers of parallel flows, but in current implementation this option affects all the threads and ports at once. There are scenarios where we have different number of flows on different ports. For example, if one of the VMs encapsulates traffic using additional headers, it will receive large number of flows but only few flows will come out of this VM. In this scenario it's much faster to use EMC instead of classifier for traffic from the VM, but it's better to disable EMC for the traffic which flows to VM. To handle above issue introduced 'emc-enable' configurable to enable/disable EMC on a per-port basis. Ex.: ovs-vsctl set interface dpdk0 other_config:emc-enable=false EMC probability kept as is and it works for all the ports with 'emc-enable=true'. Signed-off-by: Ilya Maximets <i.maximets@samsung.com> --- It's been a while since the first version. It's available here: https://patchwork.ozlabs.org/patch/800277/ Version 2: * The main concern was about backward compatibility. Also, there is no real profit having the per-port probability value. So, per-port probability switched to per-port 'emc-enable' configurable. Global probability kept back and can be used without any changes. Documentation/topics/dpdk/bridge.rst | 4 ++ NEWS | 4 +- lib/dpif-netdev.c | 79 +++++++++++++++++++++++++--- vswitchd/vswitch.xml | 19 +++++++ 4 files changed, 97 insertions(+), 9 deletions(-)