@@ -176,6 +176,66 @@ struct emc_cache {
i__ < EM_FLOW_HASH_SEGS; \
i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT)
+
+/* Cuckoo distributor (CD) is a double-hash function hash table, that helps
+ * redirect packets to their corresponding subtables to avoid the sequential
+ * search of megaflow subtables. This is another layer of cache to cache flows
+ * and their corresponding subtable indexes. Different from a hash table, CD
+ * can have certain false positive rate (since the full key is not stored for
+ * space efficiency). Our CD design was partially inspired by earlier concepts
+ * proposed in "simTable"[1] and "Cuckoo Filter"[2], and DPDK's cuckoo hash
+ * implementation.
+ *
+ * For the current implementation, the design does not allow displacing items
+ * when a bucket is full, which is different from the behavior of a cuckoo hash
+ * table. The advantage is that we do not need to store two signatures so that
+ * the struct is more compact. We use 16 entries per bucket for the
+ * convenience of vector lookup.
+ *
+ * Each classifier has its own cuckoo distributor.
+ *
+ * [1] H. Lee and B. Lee, Approaches for improving tuple space search-based
+ * table lookup, ICTC '15
+ * [2] B. Fan, D. G. Andersen, M. Kaminsky, and M. D. Mitzenmacher,
+ * Cuckoo Filter: Practically Better Than Bloom, CoNEXT '14
+ */
+
+#define CD_NUM_BUCKETS (1 << 16)
+#define CD_BUCKET_MASK (CD_NUM_BUCKETS - 1)
+/* Number of entries per bucket. */
+#define CD_ENTRIES 16
+#define CD_ENTRY_MASK (CD_ENTRIES - 1)
+
+/* These two seeds are used for hashing out two bucket locations. */
+#define CD_PRIM_SEED 10
+#define CD_SEC_SEED 20
+
+/* This bit is used to choose which bucket to replace CD's entry
+ * in cd_insert. */
+#define CD_BKT_BIT (1 << CD_ENTRIES)
+
+/* Two byte signature and same length of mask. */
+typedef uint16_t cd_sig_t;
+#define CD_SIG_MASK 0xffff
+
+/* Length of Subtable pointer array for cuckoo distributor to index subtables.
+ * The size of the table is at most 2^16-1 entires because the CD's entry
+ * provides 2 bytes for indexing currently.
+ */
+#define SUB_PTR_LEN 256
+
+/* The bucket struct for cuckoo distributor*/
+struct cd_bucket {
+ cd_sig_t sig[CD_ENTRIES]; /* 2-byte long signature. */
+ uint16_t table_index[CD_ENTRIES]; /* index to subtable pointer array. */
+} __attribute__ ((packed));
+
+
+struct cd_cache {
+ struct cd_bucket buckets[CD_NUM_BUCKETS]; /* buckets array. */
+} __attribute__ ((aligned (64)));
+
+
/* Simple non-wildcarding single-priority classifier. */
/* Time in ms between successive optimizations of the dpcls subtable vector */
@@ -194,6 +254,8 @@ struct dpcls {
odp_port_t in_port;
struct cmap subtables_map;
struct pvector subtables;
+ struct cd_cache *cdtable; /* The cuckoo distributor. */
+ struct dpcls_subtable *sub_ptrs[SUB_PTR_LEN]; /* Subtable pointer array. */
};
/* A rule to be inserted to the classifier. */
@@ -214,6 +276,9 @@ static bool dpcls_lookup(struct dpcls *cls,
const struct netdev_flow_key keys[],
struct dpcls_rule **rules, size_t cnt,
int *num_lookups_p);
+static inline struct dpcls_subtable *
+dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask);
+
/* Set of supported meter flags */
#define DP_SUPPORTED_METER_FLAGS_MASK \
@@ -764,6 +829,27 @@ emc_cache_slow_sweep(struct emc_cache *flow_cache)
flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
}
+/* Initialize the cuckoo distributor structure */
+static void
+cd_init(struct cd_cache *cd)
+{
+ int i, j;
+
+ for (i = 0; i < CD_NUM_BUCKETS; i++) {
+ for (j = 0; j < CD_ENTRIES; j++) {
+ cd->buckets[i].sig[j] = 0;
+ cd->buckets[i].table_index[j] = 0;
+ }
+ }
+}
+
+/* Delete the cuckoo distributor*/
+static void
+cd_delete(struct cd_cache *cd)
+{
+ free(cd);
+}
+
/* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */
bool
dpif_is_netdev(const struct dpif *dpif)
@@ -2188,6 +2274,240 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
return NULL;
}
+
+static void
+cd_insert(struct cd_cache *cd,
+ const struct netdev_flow_key *key, int index)
+{
+ int i;
+ cd_sig_t tmp_sig = (key->hash & CD_SIG_MASK);
+
+ /*
+ * First element of sub_ptrs (index == 0) means an empty element.
+ * Here we should have a valid entry for CD insertion so it is not 0.
+ */
+ ovs_assert(index != 0);
+ /*
+ * Using 2 different seeds to get 2 totally
+ * random bucket places
+ */
+ uint32_t prim_bucket = hash_int(key->hash, CD_PRIM_SEED) & CD_BUCKET_MASK;
+ uint32_t sec_bucket = hash_int(key->hash, CD_SEC_SEED) & CD_BUCKET_MASK;
+
+ /* Check if the signature is already in either of the two buckets. */
+ for (i = 0; i < CD_ENTRIES; i++) {
+ if (cd->buckets[prim_bucket].sig[i] == tmp_sig) {
+ cd->buckets[prim_bucket].table_index[i] = index;
+ return;
+ }
+ if (cd->buckets[sec_bucket].sig[i] == tmp_sig) {
+ cd->buckets[sec_bucket].table_index[i] = index;
+ return;
+ }
+ }
+
+ /* Check prim_bucket for empty slot */
+ for (i = 0; i < CD_ENTRIES; i++) {
+ if (cd->buckets[prim_bucket].table_index[i] == 0) {
+ cd->buckets[prim_bucket].sig[i] = tmp_sig;
+ cd->buckets[prim_bucket].table_index[i] = index;
+ return;
+ }
+ }
+
+ /* Check sec_bucket for empty slot */
+ if (i == CD_ENTRIES) {
+ for (i = 0; i < CD_ENTRIES; i++) {
+ if (cd->buckets[sec_bucket].table_index[i] == 0) {
+ cd->buckets[sec_bucket].sig[i] = tmp_sig;
+ cd->buckets[sec_bucket].table_index[i] = index;
+ return;
+ }
+ }
+ }
+
+ /* Then we should evict someone. */
+ uint32_t random = random_uint32();
+ uint32_t bucket_choose = prim_bucket;
+ uint32_t evict_idx = random & (CD_ENTRIES-1);
+
+ if (random & CD_BKT_BIT) {
+ bucket_choose = sec_bucket;
+ }
+
+ cd->buckets[bucket_choose].sig[evict_idx] = tmp_sig;
+ cd->buckets[bucket_choose].table_index[evict_idx] = index;
+}
+
+/* 2-stage pipelined cd lookup */
+static void
+cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
+ int32_t num_keys, uint32_t *hit_mask, int data[])
+{
+ int i;
+ uint32_t prim_hitmask = 0;
+ uint32_t sec_hitmask = 0;
+ uint64_t hits = 0;
+ uint32_t loc = 0;
+ struct cd_cache *cd = cls->cdtable;
+
+ if (num_keys == 0) {
+ *hit_mask = 0;
+ return;
+ }
+ cd_sig_t temp_sig0 = (keys[0].hash) & CD_SIG_MASK;
+
+ struct cd_bucket *prim_bkt0 =
+ &cd->buckets[hash_int(keys[0].hash, CD_PRIM_SEED)
+ & CD_BUCKET_MASK];
+ struct cd_bucket *sec_bkt0 =
+ &cd->buckets[hash_int(keys[0].hash, CD_SEC_SEED)
+ & CD_BUCKET_MASK];
+ OVS_PREFETCH(prim_bkt0);
+ OVS_PREFETCH(sec_bkt0);
+
+ for (i = 1; i < num_keys; i++) {
+ cd_sig_t temp_sig1 = (keys[i].hash) & CD_SIG_MASK;
+
+ struct cd_bucket *prim_bkt1 =
+ &cd->buckets[hash_int(keys[i].hash, CD_PRIM_SEED)
+ & CD_BUCKET_MASK];
+ struct cd_bucket *sec_bkt1 =
+ &cd->buckets[hash_int(keys[i].hash, CD_SEC_SEED)
+ & CD_BUCKET_MASK];
+
+ OVS_PREFETCH(prim_bkt1);
+ OVS_PREFETCH(sec_bkt1);
+
+ unsigned int j;
+ prim_hitmask = 0;
+ sec_hitmask = 0;
+
+ for (j = 0; j < CD_ENTRIES; j++) {
+ prim_hitmask |= ((temp_sig0 == prim_bkt0->sig[j]) << j);
+ sec_hitmask |= ((temp_sig0 == sec_bkt0->sig[j]) << j);
+ }
+
+ if (prim_hitmask) {
+ loc = raw_ctz(prim_hitmask);
+ data[i-1] = prim_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ prim_bkt0 = prim_bkt1;
+ sec_bkt0 = sec_bkt1;
+ temp_sig0 = temp_sig1;
+ continue;
+ }
+ }
+
+ if (sec_hitmask) {
+ loc = raw_ctz(sec_hitmask);
+ data[i-1] = sec_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ }
+ }
+
+ prim_bkt0 = prim_bkt1;
+ sec_bkt0 = sec_bkt1;
+ temp_sig0 = temp_sig1;
+ }
+
+ unsigned int j;
+ prim_hitmask = 0;
+ sec_hitmask = 0;
+
+ for (j = 0; j < CD_ENTRIES; j++) {
+ prim_hitmask |= ((temp_sig0 == prim_bkt0->sig[j]) << j);
+ sec_hitmask |= ((temp_sig0 == sec_bkt0->sig[j]) << j);
+ }
+
+ if (prim_hitmask) {
+ loc = raw_ctz(prim_hitmask);
+ data[i-1] = prim_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i-1);
+ if (hit_mask != NULL) {
+ *hit_mask = hits;
+ }
+ return;
+ }
+ }
+
+ if (sec_hitmask) {
+ loc = raw_ctz(sec_hitmask);
+ data[i-1] = sec_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ }
+ }
+
+ if (hit_mask != NULL) {
+ *hit_mask = hits;
+ }
+}
+
+static int
+insert_subtable_ptr(struct dpcls *cls, struct dpcls_subtable *subtable)
+{
+ int i;
+
+ ovs_assert(subtable != NULL);
+ for (i = 1; i < SUB_PTR_LEN; i++) {
+ if (cls->sub_ptrs[i] == subtable) {
+ /* When we insert, we should know that the subtable is not inserted
+ * before. */
+ VLOG_ERR("already have the subtable in sub_ptrs");
+ return -1;
+ }
+ }
+
+ for (i = 1; i < SUB_PTR_LEN; i++) {
+ if (cls->sub_ptrs[i] == 0) {
+ cls->sub_ptrs[i] = subtable;
+ return i;
+ }
+ }
+ /* When the subtable count is larger than SUB_PTR_LEN (255 now). */
+ VLOG_INFO("create subtable in sub_ptrs failed, overflow");
+ return 0;
+}
+
+static int
+remove_subtable_ptr(struct dpcls *cls, struct dpcls_subtable *subtable)
+{
+
+ int i;
+
+ ovs_assert(subtable != NULL);
+ for (i = 1; i < SUB_PTR_LEN; i++) {
+ if (cls->sub_ptrs[i] == subtable) {
+ /*reset to subtable index in sub_ptrs to NULL*/
+ cls->sub_ptrs[i] = (struct dpcls_subtable *)0;
+ return i;
+ }
+ }
+
+ /* Happens when remove while more subtable than the SUB_PTR_LEN. */
+ VLOG_INFO("cannot find the table ptr in sub_ptrs to remove");
+ return 0;
+}
+
+static int
+find_index_in_sub_ptrs(struct dpcls *cls,
+ struct dpcls_subtable *subtable)
+{
+ int i;
+
+ ovs_assert(subtable != NULL);
+ for (i = 1; i < SUB_PTR_LEN; i++) {
+ if (cls->sub_ptrs[i] == subtable) {
+ return i;
+ }
+ }
+ return 0;
+}
+
static struct dp_netdev_flow *
dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
const struct netdev_flow_key *key,
@@ -2426,6 +2746,7 @@ out:
static struct dp_netdev_flow *
dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
+ const struct netdev_flow_key *key,
struct match *match, const ovs_u128 *ufid,
const struct nlattr *actions, size_t actions_len)
OVS_REQUIRES(pmd->flow_mutex)
@@ -2473,6 +2794,15 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node),
dp_netdev_flow_hash(&flow->ufid));
+ /* Insert to CD here. */
+ if (OVS_LIKELY(key != NULL)) {
+ struct dpcls_subtable *subtable = dpcls_find_subtable(cls, &mask);
+ int index = find_index_in_sub_ptrs(cls, subtable);
+
+ if (index != 0) {
+ cd_insert(cls->cdtable, key, index);
+ }
+ }
if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) {
struct ds ds = DS_EMPTY_INITIALIZER;
@@ -2541,7 +2871,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
if (!netdev_flow) {
if (put->flags & DPIF_FP_CREATE) {
if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
- dp_netdev_flow_add(pmd, match, ufid, put->actions,
+ dp_netdev_flow_add(pmd, NULL, match, ufid, put->actions,
put->actions_len);
error = 0;
} else {
@@ -5019,7 +5349,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
ovs_mutex_lock(&pmd->flow_mutex);
netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
if (OVS_LIKELY(!netdev_flow)) {
- netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
+ netdev_flow = dp_netdev_flow_add(pmd, key, &match, &ufid,
add_actions->data,
add_actions->size);
}
@@ -5893,14 +6223,27 @@ struct dpcls_subtable {
static void
dpcls_init(struct dpcls *cls)
{
+ int i;
+
cmap_init(&cls->subtables_map);
pvector_init(&cls->subtables);
+ int ret = posix_memalign((void **)&cls->cdtable, 64,
+ sizeof(struct cd_cache));
+ if (ret != 0) {
+ VLOG_ERR("Create cuckoo distributor failed");
+ }
+ cd_init(cls->cdtable);
+ for (i = 0; i < SUB_PTR_LEN; i++) {
+ cls->sub_ptrs[i] = 0;
+ }
+ random_set_seed(100);
}
static void
dpcls_destroy_subtable(struct dpcls *cls, struct dpcls_subtable *subtable)
{
VLOG_DBG("Destroying subtable %p for in_port %d", subtable, cls->in_port);
+ remove_subtable_ptr(cls, subtable);
pvector_remove(&cls->subtables, subtable);
cmap_remove(&cls->subtables_map, &subtable->cmap_node,
subtable->mask.hash);
@@ -5923,6 +6266,7 @@ dpcls_destroy(struct dpcls *cls)
}
cmap_destroy(&cls->subtables_map);
pvector_destroy(&cls->subtables);
+ cd_delete(cls->cdtable);
}
}
@@ -5943,6 +6287,9 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
VLOG_DBG("Creating %"PRIuSIZE". subtable %p for in_port %d",
cmap_count(&cls->subtables_map), subtable, cls->in_port);
pvector_publish(&cls->subtables);
+ int ret = insert_subtable_ptr(cls, subtable);
+ /* The subtable should not be in subtable-table yet */
+ ovs_assert(ret >= 0);
return subtable;
}
@@ -6095,6 +6442,59 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
int lookups_match = 0, subtable_pos = 1;
+ /*
+ * The cuckoo distributor lookup pass begin first before go to megaflow
+ * cache. CD hit will return a subtable index to the subtable to lookup.
+ */
+
+ int i;
+ int data[MAP_BITS];
+
+ cd_lookup_bulk_pipe(cls, keys, cnt, &found_map, data);
+
+ ULLONG_FOR_EACH_1(i, found_map) {
+ hashes[i] = netdev_flow_key_hash_in_mask(&keys[i],
+ &(cls->sub_ptrs[data[i]])->mask);
+ nodes[i] = cmap_find(&((cls->sub_ptrs[data[i]])->rules),
+ hashes[i]);
+ if (nodes[i] != NULL) {
+ struct dpcls_rule *rule;
+ CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
+ if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) {
+ rules[i] = rule;
+ lookups_match += 1;
+ goto scnext;
+ }
+ }
+ ULLONG_SET0(found_map, i); /* Did not match. */
+ /*
+ * Here means key same in subtable but not same rule
+ * since CD find the correct subtable,
+ * we dont need to insert to CD.
+ */
+
+ scnext:
+ ;
+ } else if (nodes[i] == NULL) {
+ ULLONG_SET0(found_map, i);
+ /* Here means in CD but not in the target subtable.
+ * meaning that it matches to a same (but wrong) key in CD.
+ * we should insert it into CD later when we know
+ * which subtable it hits.
+ */
+ }
+ }
+
+ keys_map &= ~found_map;
+
+ if (!keys_map) {
+ if (num_lookups_p) {
+ *num_lookups_p = lookups_match;
+ }
+
+ return true; /* All found in CD. */
+ }
+
/* The Datapath classifier - aka dpcls - is composed of subtables.
* Subtables are dynamically created as needed when new rules are inserted.
* Each subtable collects rules with matches on a specific subset of packet
@@ -6135,8 +6535,21 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
/* None of the found rules was a match. Reset the i-th bit to
* keep searching this key in the next subtable. */
ULLONG_SET0(found_map, i); /* Did not match. */
+ continue;
next:
; /* Keep Sparse happy. */
+ /*
+ * If we find things here, it means it misses in CD.
+ * We should insert into CD.
+ */
+ int index = find_index_in_sub_ptrs(cls, subtable);
+ /*
+ * If 0, means not in sub_ptrs, then no need to insert
+ * into CD.
+ */
+ if (index != 0) {
+ cd_insert(cls->cdtable, &keys[i], index);
+ }
}
keys_map &= ~found_map; /* Clear the found rules. */
if (!keys_map) {
@@ -6150,5 +6563,9 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
if (num_lookups_p) {
*num_lookups_p = lookups_match;
}
+ /*
+ * Things that miss in both tables should also be inserted into CD.
+ * the upcall function should be able to handle it.
+ */
return false; /* Some misses. */
}