[ovs-dev,v2,2/5] dpif-netdev: Add AVX2 implementation for CD lookup.

Message ID 1509493177-28988-3-git-send-email-yipeng1.wang@intel.com
State New
Headers show
Series
  • dpif-netdev: Cuckoo-Distributor implementation
Related show

Commit Message

Wang, Yipeng1 Oct. 31, 2017, 11:39 p.m.
This patch adds the AVX2 implementation during CD lookup. 16 entries of a
bucket will be compared together with the lookup key. This patch depends
on the first patch.

CC: Darrell Ball <dball at vmware.com>
CC: Jan Scheurich <jan.scheurich at ericsson.com>
Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com>
---
evaluation:
We setup the testing enviornment same to the previous patch. The AVX2
CD implementation's results are shown below.

AVX2 data:
1M flows:
no.subtable: 10          20          30
cd-ovs       3895961     3170530     2968555
orig-ovs     2683455     1646227     1240501
speedup      1.45x       1.92x       2.39x
---
 lib/dpif-netdev.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ea1d625..78219ba 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -30,6 +30,9 @@ 
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#if defined(__AVX2__)
+#include <immintrin.h>
+#endif
 
 #ifdef DPDK_NETDEV
 #include <rte_cycles.h>
@@ -2378,7 +2381,37 @@  cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
 
         OVS_PREFETCH(prim_bkt1);
         OVS_PREFETCH(sec_bkt1);
+#ifdef __AVX2__
+        prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+
+        sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
 
+        if (prim_hitmask) {
+            loc = raw_ctz(prim_hitmask) >> 1;
+            data[i-1] =
+                     prim_bkt0->table_index[loc];
+            if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+                hits |= 1 << (i - 1);
+                prim_bkt0 = prim_bkt1;
+                sec_bkt0 = sec_bkt1;
+                temp_sig0 = temp_sig1;
+                continue;
+            }
+        }
+
+        if (sec_hitmask) {
+            loc = raw_ctz(sec_hitmask) >> 1;
+            data[i-1] = sec_bkt0->table_index[loc];
+            if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+               hits |= 1 << (i - 1);
+            }
+        }
+#else
         unsigned int j;
         prim_hitmask = 0;
         sec_hitmask = 0;
@@ -2407,12 +2440,42 @@  cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
                 hits |= 1 << (i - 1);
             }
         }
-
+#endif
         prim_bkt0 = prim_bkt1;
         sec_bkt0 = sec_bkt1;
         temp_sig0 = temp_sig1;
     }
 
+#ifdef __AVX2__
+    prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+
+    sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+    if (prim_hitmask) {
+        loc = raw_ctz(prim_hitmask) >> 1;
+        data[i-1] = prim_bkt0->table_index[loc];
+        if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+            hits |= 1 << (i - 1);
+            if (hit_mask != NULL) {
+                *hit_mask = hits;
+            }
+            return;
+        }
+     }
+
+    if (sec_hitmask) {
+        loc = raw_ctz(sec_hitmask) >> 1;
+        data[i-1] = sec_bkt0->table_index[loc];
+        if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+           hits |= 1 << (i - 1);
+        }
+    }
+#else
     unsigned int j;
     prim_hitmask = 0;
     sec_hitmask = 0;
@@ -2442,9 +2505,11 @@  cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
         }
     }
 
+#endif
     if (hit_mask != NULL) {
         *hit_mask = hits;
     }
+
 }
 
 static int