diff mbox series

[ovs-dev,3/3] use flow_table as indirect table

Message ID 1520364208-54362-4-git-send-email-yipeng1.wang@intel.com
State Superseded
Delegated to: Ian Stokes
Headers show
Series dpif-netdev: Combine CD and DFC patch for datapath refactor | expand

Commit Message

Wang, Yipeng1 March 6, 2018, 7:23 p.m. UTC
From: Yipeng Wang <yipeng1.wang@intel.com>

It is not memory efficient to store pointers in the distributor.
In this commit, we store a 2 Byte index which is the index into
flow_table. If the flow table is larger than 2^16, the rules
store in the high index entry will not be cached in distributor.
We assume rule count is usually not that large.

In cmap, we add two APIs to support find flow by index and find
index by flow. Since flow table is a cuckoo hash, it is possible
that keys are moved around and also table is rehashed.
In such case, distributor will have misses and
refresh by itself. However, this should not happen frequently and
distributor as a cache should not cause functional error.

Comparing to commit 1, this commit reduce cache/memory requirement by half.
DFC sweeping is also removed to simplify the code since DFC does not hold
pointers to flow any more.

Signed-off-by: Yipeng Wang <yipeng1.wang@intel.com>
---
 lib/cmap.c        |  62 ++++++++++++++++++++++++++
 lib/cmap.h        |   5 +++
 lib/dpif-netdev.c | 127 ++++++++++++++++++++----------------------------------
 3 files changed, 113 insertions(+), 81 deletions(-)

Comments

Ben Pfaff March 7, 2018, 8:49 p.m. UTC | #1
On Tue, Mar 06, 2018 at 11:23:28AM -0800, yipeng wang wrote:
> From: Yipeng Wang <yipeng1.wang@intel.com>
> 
> It is not memory efficient to store pointers in the distributor.
> In this commit, we store a 2 Byte index which is the index into
> flow_table. If the flow table is larger than 2^16, the rules
> store in the high index entry will not be cached in distributor.
> We assume rule count is usually not that large.
> 
> In cmap, we add two APIs to support find flow by index and find
> index by flow. Since flow table is a cuckoo hash, it is possible
> that keys are moved around and also table is rehashed.
> In such case, distributor will have misses and
> refresh by itself. However, this should not happen frequently and
> distributor as a cache should not cause functional error.
> 
> Comparing to commit 1, this commit reduce cache/memory requirement by half.
> DFC sweeping is also removed to simplify the code since DFC does not hold
> pointers to flow any more.
> 
> Signed-off-by: Yipeng Wang <yipeng1.wang@intel.com>

Thanks for working to making OVS better!

I'm taking a look at this patch, in particular, because it affects the
cmap common data structure.  I don't plan to review the whole series or
even the non-cmap parts of this one.

In cmap.c, it's not really obvious to me what cmap_find_index() or
cmap_find_index() does.  I suggest adding function-level comments
explaining the purpose, semantics, and return value of the functions.

In cmap.h, the style for the prototypes doesn't match the style used for
any of the other prototypes in the file, or the common OVS style.
Please do adjust them to match.

Thanks,

Ben.
diff mbox series

Patch

diff --git a/lib/cmap.c b/lib/cmap.c
index 07719a8..248f26b 100644
--- a/lib/cmap.c
+++ b/lib/cmap.c
@@ -373,6 +373,68 @@  cmap_find(const struct cmap *cmap, uint32_t hash)
                        hash);
 }
 
+struct cmap_node *
+cmap_find_by_index(const struct cmap *cmap, uint16_t index)
+{
+    const struct cmap_impl *impl = cmap_get_impl(cmap);
+
+    uint32_t b = index / CMAP_K;
+    uint32_t e = index % CMAP_K;
+
+    if (b > impl->mask) {
+        return NULL;
+    }
+
+    const struct cmap_bucket *bucket = &impl->buckets[b];
+
+    return cmap_node_next(&bucket->nodes[e]);
+}
+
+uint16_t
+cmap_find_index(const struct cmap *cmap, uint32_t hash)
+{
+    const struct cmap_impl *impl = cmap_get_impl(cmap);
+    uint32_t h1 = rehash(impl, hash);
+    uint32_t h2 = other_hash(h1);
+
+    uint32_t b_index1 = h1 & impl->mask;
+    uint32_t b_index2 = h2 & impl->mask;
+
+    uint32_t c1, c2;
+    uint32_t index = UINT32_MAX;
+
+    const struct cmap_bucket *b1 = &impl->buckets[b_index1];
+    const struct cmap_bucket *b2 = &impl->buckets[b_index2];
+
+    do {
+        do {
+            c1 = read_even_counter(b1);
+            for (int i = 0; i < CMAP_K; i++) {
+                if (b1->hashes[i] == hash) {
+                    index = b_index1 * CMAP_K + i;
+                 }
+            }
+        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+        if (index != UINT32_MAX) {
+            break;
+        }
+        do {
+            c2 = read_even_counter(b2);
+            for (int i = 0; i < CMAP_K; i++) {
+                if (b2->hashes[i] == hash) {
+                    index = b_index2 * CMAP_K + i;
+                }
+            }
+        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
+
+        if (index != UINT32_MAX) {
+            break;
+        }
+    } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+
+    return (index >= UINT16_MAX) ? UINT16_MAX : (uint16_t)index;
+}
+
 /* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
  * and sets the corresponding pointer in 'nodes', if the hash value was
  * found from the 'cmap'.  In other cases the 'nodes' values are not changed,
diff --git a/lib/cmap.h b/lib/cmap.h
index 8bfb6c0..0266aea 100644
--- a/lib/cmap.h
+++ b/lib/cmap.h
@@ -145,6 +145,11 @@  size_t cmap_replace(struct cmap *, struct cmap_node *old_node,
 const struct cmap_node *cmap_find(const struct cmap *, uint32_t hash);
 struct cmap_node *cmap_find_protected(const struct cmap *, uint32_t hash);
 
+struct cmap_node *
+cmap_find_by_index(const struct cmap *cmap, uint16_t index);
+uint16_t
+cmap_find_index(const struct cmap *cmap, uint32_t hash);
+
 /* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
  * and sets the corresponding pointer in 'nodes', if the hash value was
  * found from the 'cmap'.  In other cases the 'nodes' values are not changed,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 08fee86..d9dd20c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -175,13 +175,12 @@  struct emc_cache {
 
 struct dfc_bucket {
     uint16_t sig[DFC_ENTRY_PER_BUCKET];
-    struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
+    uint16_t flow_idx[DFC_ENTRY_PER_BUCKET];
 };
 
 struct dfc_cache {
     struct emc_cache emc_cache;
     struct dfc_bucket buckets[DFC_BUCKET_CNT];
-    int sweep_idx;
 };
 
 
@@ -722,7 +721,6 @@  dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
 static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
                                       struct tx_port *tx);
 
-static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);
 static void emc_clear_entry(struct emc_entry *ce);
 static void dfc_clear_entry(struct dfc_bucket *b, int idx);
 
@@ -752,10 +750,9 @@  dfc_cache_init(struct dfc_cache *flow_cache)
     emc_cache_init(&flow_cache->emc_cache);
     for (i = 0; i < DFC_BUCKET_CNT; i++) {
         for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
-            flow_cache->buckets[i].flow[j] = NULL;
+            flow_cache->buckets[i].flow_idx[j] = UINT16_MAX;
         }
     }
-    flow_cache->sweep_idx = 0;
 }
 
 static void
@@ -2209,84 +2206,72 @@  emc_lookup(struct emc_cache *emc, const struct netdev_flow_key *key)
     return NULL;
 }
 
-static inline struct dp_netdev_flow *
-dfc_entry_get(struct dfc_cache *cache, const uint32_t hash)
+static inline  struct cmap_node *
+dfc_entry_get(struct dp_netdev_pmd_thread *pmd, struct dfc_cache *cache, const uint32_t hash)
 {
     struct dfc_bucket *bucket = &cache->buckets[hash & DFC_MASK];
     uint16_t sig = hash >> 16;
+    uint16_t index = UINT16_MAX;
+
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
-        if(bucket->sig[i] == sig) {
-            return bucket->flow[i];
+        if (bucket->sig[i] == sig) {
+            index = bucket->flow_idx[i];
+            break;
         }
     }
+    if (index != UINT16_MAX) {
+        return cmap_find_by_index(&pmd->flow_table, index);
+    }
     return NULL;
 }
 
-static inline bool
-dfc_entry_alive(struct dp_netdev_flow *flow)
-{
-    return flow && !flow->dead;
-}
 
 static void
 dfc_clear_entry(struct dfc_bucket *b, int idx)
 {
-    if (b->flow[idx]) {
-        dp_netdev_flow_unref(b->flow[idx]);
-        b->flow[idx] = NULL;
-    }
-}
-
-static inline void
-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow *flow)
-{
-    if (b->flow[idx] != flow) {
-        if (b->flow[idx]) {
-            dp_netdev_flow_unref(b->flow[idx]);
-        }
-
-        if (dp_netdev_flow_ref(flow)) {
-            b->flow[idx] = flow;
-        } else {
-            b->flow[idx] = NULL;
-        }
-    }
+    b->flow_idx[idx] = UINT16_MAX;
 }
 
 static inline void
 dfc_insert(struct dp_netdev_pmd_thread *pmd,
            const struct netdev_flow_key *key,
-           struct dp_netdev_flow *flow)
+           uint16_t index)
 {
     struct dfc_cache *cache = &pmd->flow_cache;
-
     struct dfc_bucket *bucket = &cache->buckets[key->hash & DFC_MASK];
+
+    if (index == UINT16_MAX) {
+        return;
+    }
+
     uint16_t sig = key->hash >> 16;
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
         if(bucket->sig[i] == sig) {
-            dfc_change_entry(bucket, i, flow);
+            bucket->flow_idx[i] = index;
             return;
         }
     }
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
-        if(bucket->flow[i] == NULL) {
+        if(bucket->flow_idx[i] == UINT16_MAX) {
             bucket->sig[i] = sig;
-            dfc_change_entry(bucket, i, flow);
+            bucket->flow_idx[i] = index;
             return;
         }
     }
     int idx = random_uint32() & (DFC_ENTRY_PER_BUCKET - 1);
     bucket->sig[idx] = sig;
-    dfc_change_entry(bucket, idx, flow);
+    bucket->flow_idx[idx] = index;
 }
 
 static inline struct dp_netdev_flow *
-dfc_lookup(struct dp_netdev_pmd_thread *pmd, struct netdev_flow_key *key,
-           bool *exact_match)
+dfc_lookup(struct dp_netdev_pmd_thread *pmd,
+            struct netdev_flow_key *key,
+            bool *exact_match)
 {
     struct dp_netdev_flow *flow;
     uint32_t cur_min;
     struct dfc_cache *cache = &pmd->flow_cache;
+    struct cmap_node *flow_node;
 
     /* Try an EMC lookup first. */
     atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
@@ -2303,23 +2288,18 @@  dfc_lookup(struct dp_netdev_pmd_thread *pmd, struct netdev_flow_key *key,
     }
 
     /* EMC lookup not successful: try DFC lookup. */
-    flow = dfc_entry_get(cache, key->hash);
-
-    if (flow != NULL && dfc_entry_alive(flow) &&
-        dpcls_rule_matches_key(&flow->cr, key)) {
-
-        /* Found a match in DFC. Insert into EMC for subsequent lookups.
-         * We use probabilistic insertion here so that mainly elephant
-         * flows enter EMC. */
-        key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
-        emc_probabilistic_insert(pmd, key, flow);
-        *exact_match = false;
-        return flow;
-    } else {
+    flow_node = dfc_entry_get(pmd, cache, key->hash);
 
-        /* No match. Need to go to DPCLS lookup. */
-        return NULL;
+    CMAP_NODE_FOR_EACH (flow, node, flow_node) {
+        if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, key))) {
+            key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
+            emc_probabilistic_insert(pmd, key, flow);
+            *exact_match = false;
+            return flow;
+        }
     }
+
+    return NULL;
 }
 
 /* Check and clear dead flow references slowly (one entry at each
@@ -2335,26 +2315,6 @@  emc_slow_sweep(struct emc_cache *emc)
     emc->sweep_idx = (emc->sweep_idx + 1) & EMC_MASK;
 }
 
-static void
-dfc_slow_sweep(struct dfc_cache *cache)
-{
-    /* Sweep the EMC so that both finish in the same time. */
-    if ((cache->sweep_idx & (DFC_BUCKET_CNT / EMC_ENTRIES - 1)) == 0) {
-        emc_slow_sweep(&cache->emc_cache);
-    }
-
-    if((cache->sweep_idx & (DFC_ENTRY_PER_BUCKET - 1)) == 0) {
-        uint32_t bkt = cache->sweep_idx / DFC_ENTRY_PER_BUCKET;
-        struct dfc_bucket *bucket = &cache->buckets[bkt];
-        for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++){
-            if (!dfc_entry_alive(bucket->flow[i])) {
-                dfc_clear_entry(bucket, i);
-            }
-        }
-    }
-    cache->sweep_idx = (cache->sweep_idx + 1) & (DFC_ENTRIES - 1);
-}
-
 static struct dp_netdev_flow *
 dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
                           const struct netdev_flow_key *key,
@@ -4325,8 +4285,9 @@  reload:
 
             coverage_try_clear();
             dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
+
             if (!ovsrcu_try_quiesce()) {
-                dfc_slow_sweep(&pmd->flow_cache);
+                emc_slow_sweep(&((pmd->flow_cache).emc_cache));
             }
 
             atomic_read_relaxed(&pmd->reload, &reload);
@@ -5184,6 +5145,7 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
         }
         miniflow_extract(packet, &key->mf);
         key->len = 0; /* Not computed yet. */
+
         if (!md_is_valid) {
             key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet,
                     &key->mf);
@@ -5279,7 +5241,9 @@  handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
                                              add_actions->size);
         }
         ovs_mutex_unlock(&pmd->flow_mutex);
-        dfc_insert(pmd, key, netdev_flow);
+        uint32_t index = cmap_find_index(&pmd->flow_table,
+                                    dp_netdev_flow_hash(&netdev_flow->ufid));
+        dfc_insert(pmd, key, index);
     }
     return error;
 }
@@ -5374,8 +5338,9 @@  fast_path_processing(struct dp_netdev_pmd_thread *pmd,
         }
 
         flow = dp_netdev_flow_cast(rules[i]);
-
-        dfc_insert(pmd, &keys[i], flow);
+        uint16_t index = cmap_find_index(&pmd->flow_table,
+                                            dp_netdev_flow_hash(&flow->ufid));
+        dfc_insert(pmd, &keys[i], index);
         dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
     }