diff mbox series

[ovs-dev,v4,2/2] dpif-netdev: dfc_process performance optimization by prefetching EMC entry.

Message ID 1559555477-36363-3-git-send-email-Yanqin.Wei@arm.com
State Deferred
Headers show
Series dfc_process optimization by prefetching EMC entry | expand

Commit Message

Yanqin Wei June 3, 2019, 9:51 a.m. UTC
It is observed that the throughput of medium number of flows(9-8191) is
worse than low number of flows(1-8) in the EMC NIC2NIC test.
It is because CPU cache-miss increasing in EMC lookup. Each flow need
load at least one EMC entry to CPU L1 cache(several cache lines) and
compare it with packet miniflow.
This patch improves it by prefetching EMC entry in advance. Hash value
can be obtained from dpdk rss hash, so this step can be advanced ahead of
miniflow_extract() and prefetch EMC entry there. By testing on several
kinds of cpu with 32K L1 cache(x86-64 and arm64), prefetch start to improve
performance from 8~10 flows onwards. In order to benefit most modern CPUs,
the minimum threshold is set to 20. The max threshold is set to
EM_FLOW_HASH_ENTRIES-1 because entry prefetching become negative in huge
number of flows. So this patch prefetch one EMC cache line only when EMC
counter is 20-8191, which could ensure no side effect in all cases.
Performance test was run in some arm and x86 platform. Medium number of
flow case achieved around 2-3% improvement in RFC2544 test in x86 and arm.
High number of flows(>8191) also benifit during EMC insertion, so it
acheives around 2% improvement in 100k flows RFC2544 test. And low number
of flows has almost no performance impact.

Signed-off-by: Yanqin Wei <Yanqin.Wei@arm.com>
Reviewed-by: Gavin Hu <Gavin.Hu@arm.com>
---
 lib/dpif-netdev.c | 66 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 23 deletions(-)
 mode change 100644 => 100755 lib/dpif-netdev.c
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
old mode 100644
new mode 100755
index c74cc02..dc2ad64
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -189,6 +189,9 @@  struct netdev_flow_key {
 #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX /                     \
                                     DEFAULT_EM_FLOW_INSERT_INV_PROB)
 
+/* Prefetch minimum threshold*/
+#define EMC_PREFETCH_MIN_THRESHOLD 10
+
 struct emc_entry {
     struct dp_netdev_flow *flow;
     struct netdev_flow_key key;   /* key.hash used for emc hash value. */
@@ -215,6 +218,11 @@  struct dfc_cache {
     struct smc_cache smc_cache;
 };
 
+/* Prefetch in case of [EMC_PREFETCH_THRESHOLD,EM_FLOW_HASH_ENTRIES) entries*/
+#define EMC_PREFETCH_IN_RANGE(DFC_CACHE)                        \
+    ((DFC_CACHE)->emc_cache.counter >= EMC_PREFETCH_MIN_THRESHOLD \
+    && (DFC_CACHE)->emc_cache.counter < EM_FLOW_HASH_ENTRIES)
+
 /* Iterate in the exact match cache through every entry that might contain a
  * miniflow with hash 'HASH'. */
 #define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH)                 \
@@ -6172,41 +6180,41 @@  dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
 }
 
 static inline uint32_t
-dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet *packet,
-                                const struct miniflow *mf)
+dpif_netdev_packet_get_5tuple_hash(struct dp_packet *packet,
+                                   const struct miniflow *mf,
+                                   bool account_recirc_id)
 {
-    uint32_t hash;
+    uint32_t hash, recirc_depth;
 
-    if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-        hash = dp_packet_get_rss_hash(packet);
-    } else {
-        hash = miniflow_hash_5tuple(mf, 0);
-        dp_packet_set_rss_hash(packet, hash);
+    hash = miniflow_hash_5tuple(mf, 0);
+
+    if (account_recirc_id) {
+        /* The RSS hash must account for the recirculation depth to avoid
+         * collisions in the exact match cache */
+        recirc_depth = *recirc_depth_get_unsafe();
+        hash = hash_finish(hash, recirc_depth);
     }
 
+    dp_packet_set_rss_hash(packet, hash);
     return hash;
 }
 
 static inline uint32_t
 dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
-                                const struct miniflow *mf)
+                                bool account_recirc_id)
 {
     uint32_t hash, recirc_depth;
 
-    if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
-        hash = dp_packet_get_rss_hash(packet);
-    } else {
-        hash = miniflow_hash_5tuple(mf, 0);
-        dp_packet_set_rss_hash(packet, hash);
-    }
+    hash = dp_packet_get_rss_hash(packet);
 
-    /* The RSS hash must account for the recirculation depth to avoid
-     * collisions in the exact match cache */
-    recirc_depth = *recirc_depth_get_unsafe();
-    if (OVS_UNLIKELY(recirc_depth)) {
+    if (account_recirc_id) {
+        /* The RSS hash must account for the recirculation depth to avoid
+         * collisions in the exact match cache */
+        recirc_depth = *recirc_depth_get_unsafe();
         hash = hash_finish(hash, recirc_depth);
         dp_packet_set_rss_hash(packet, hash);
     }
+
     return hash;
 }
 
@@ -6396,6 +6404,8 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
     bool smc_enable_db;
     size_t map_cnt = 0;
     bool batch_enable = true;
+    bool rss_valid;
+    bool prefetch_emc = cur_min && EMC_PREFETCH_IN_RANGE(cache);
 
     atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
     pmd_perf_update_counter(&pmd->perf_stats,
@@ -6442,12 +6452,22 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
             }
         }
 
+        rss_valid = dp_packet_rss_valid(packet);
+        if (rss_valid) {
+            key->hash = dpif_netdev_packet_get_rss_hash(packet, md_is_valid);
+            if (prefetch_emc) {
+                OVS_PREFETCH(&cache->emc_cache.entries[key->hash
+                                                      & EM_FLOW_HASH_MASK]);
+            }
+        }
+
         miniflow_extract(packet, &key->mf);
         key->len = 0; /* Not computed yet. */
-        key->hash =
-                (md_is_valid == false)
-                ? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf)
-                : dpif_netdev_packet_get_rss_hash(packet, &key->mf);
+
+        if (!rss_valid) {
+            key->hash = dpif_netdev_packet_get_5tuple_hash(packet, &key->mf,
+                                                           md_is_valid);
+        }
 
         /* If EMC is disabled skip emc_lookup */
         flow = (cur_min != 0) ? emc_lookup(&cache->emc_cache, key) : NULL;