Message ID | 1476455835-77641-2-git-send-email-bhanuprakash.bodireddy@intel.com |
---|---|
State | Superseded |
Delegated to: | Daniele Di Proietto |
Headers | show |
2016-10-14 7:37 GMT-07:00 Bhanuprakash Bodireddy < bhanuprakash.bodireddy@intel.com>: > This patch increases the number of packets processed in a batch during a > lookup from 16 to 32. Processing batches of 32 packets improves > performance and also one of the internal loops can be avoided here. > > Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com> > Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> > Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> > I guess Co-authored-by should have Antonio? Also, the (already existing code) is not trivial, I'd like to take another look at it Thanks, Daniele > --- > lib/dpif-netdev.c | 110 ++++++++++++++++++++++-------- > ------------------------ > 1 file changed, 45 insertions(+), 65 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index eb9f764..0a4f338 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -4985,23 +4985,21 @@ dpcls_lookup(struct dpcls *cls, const struct > netdev_flow_key keys[], > int *num_lookups_p) > { > /* The received 'cnt' miniflows are the search-keys that will be > processed > - * in batches of 16 elements. N_MAPS will contain the number of these > - * 16-elements batches. i.e. for 'cnt' = 32, N_MAPS will be 2. The > batch > - * size 16 was experimentally found faster than 8 or 32. */ > - typedef uint16_t map_type; > + * to find a matching entry into the available subtables. > + * The number of bits in map_type is equal to NETDEV_MAX_BURST. */ > + typedef uint32_t map_type; > #define MAP_BITS (sizeof(map_type) * CHAR_BIT) > + BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST); > > -#if !defined(__CHECKER__) && !defined(_WIN32) > - const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS); > -#else > - enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) }; > -#endif > - map_type maps[N_MAPS]; > struct dpcls_subtable *subtable; > > - memset(maps, 0xff, sizeof maps); > - if (cnt % MAP_BITS) { > - maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra > bits. */ > + map_type keys_map = TYPE_MAXIMUM(map_type); > + map_type found_map; > + uint32_t hashes[MAP_BITS]; > + const struct cmap_node *nodes[MAP_BITS]; > + > + if (cnt != NETDEV_MAX_BURST) { > + keys_map >>= NETDEV_MAX_BURST - cnt; /* Clear extra bits. */ > } > memset(rules, 0, cnt * sizeof *rules); > > @@ -5015,61 +5013,43 @@ dpcls_lookup(struct dpcls *cls, const struct > netdev_flow_key keys[], > * search-key, the search for that key can stop because the rules are > * non-overlapping. */ > PVECTOR_FOR_EACH (subtable, &cls->subtables) { > - const struct netdev_flow_key *mkeys = keys; > - struct dpcls_rule **mrules = rules; > - map_type remains = 0; > - int m; > - > - BUILD_ASSERT_DECL(sizeof remains == sizeof *maps); > - > - /* Loops on each batch of 16 search-keys. */ > - for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += > MAP_BITS) { > - uint32_t hashes[MAP_BITS]; > - const struct cmap_node *nodes[MAP_BITS]; > - unsigned long map = maps[m]; > - int i; > - > - if (!map) { > - continue; /* Skip empty maps. */ > - } > - > - /* Compute hashes for the remaining keys. Each search-key is > - * masked with the subtable's mask to avoid hashing the > wildcarded > - * bits. */ > - ULLONG_FOR_EACH_1(i, map) { > - hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i], > - &subtable->mask); > - } > - /* Lookup. */ > - map = cmap_find_batch(&subtable->rules, map, hashes, nodes); > - /* Check results. When the i-th bit of map is set, it means > that a > - * set of nodes with a matching hash value was found for the > i-th > - * search-key. Due to possible hash collisions we need to > check > - * which of the found rules, if any, really matches our masked > - * search-key. */ > - ULLONG_FOR_EACH_1(i, map) { > - struct dpcls_rule *rule; > - > - CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { > - if (OVS_LIKELY(dpcls_rule_matches_key(rule, > &mkeys[i]))) { > - mrules[i] = rule; > - /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap > - * within one second optimization interval */ > - subtable->hit_cnt++; > - lookups_match += subtable_pos; > - goto next; > - } > + int i; > + > + /* Compute hashes for the remaining keys. Each search-key is > + * masked with the subtable's mask to avoid hashing the wildcarded > + * bits. */ > + ULLONG_FOR_EACH_1(i, keys_map) { > + hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], > + &subtable->mask); > + } > + /* Lookup. */ > + found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, > nodes); > + /* Check results. When the i-th bit of found_map is set, it means > + * that a set of nodes with a matching hash value was found for > the > + * i-th search-key. Due to possible hash collisions we need to > check > + * which of the found rules, if any, really matches our masked > + * search-key. */ > + ULLONG_FOR_EACH_1(i, found_map) { > + struct dpcls_rule *rule; > + > + CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { > + if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) { > + rules[i] = rule; > + /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap > + * within one second optimization interval. */ > + subtable->hit_cnt++; > + lookups_match += subtable_pos; > + goto next; > } > - /* None of the found rules was a match. Reset the i-th > bit to > - * keep searching in the next subtable. */ > - ULLONG_SET0(map, i); /* Did not match. */ > - next: > - ; /* Keep Sparse happy. */ > } > - maps[m] &= ~map; /* Clear the found rules. */ > - remains |= maps[m]; > + /* None of the found rules was a match. Reset the i-th bit to > + * keep searching this key in the next subtable. */ > + ULLONG_SET0(found_map, i); /* Did not match. */ > + next: > + ; /* Keep Sparse happy. */ > } > - if (!remains) { > + keys_map &= ~found_map; /* Clear the found rules. */ > + if (!keys_map) { > if (num_lookups_p) { > *num_lookups_p = lookups_match; > } > -- > 2.4.11 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev >
>-----Original Message----- >From: Daniele Di Proietto [mailto:diproiettod@ovn.org] >Sent: Tuesday, October 18, 2016 4:04 AM >To: Bodireddy, Bhanuprakash <bhanuprakash.bodireddy@intel.com> >Cc: dev@openvswitch.org >Subject: Re: [ovs-dev] [PATCH v3 01/12] dpcls: Use 32 packet batches for >lookups. > > > >2016-10-14 7:37 GMT-07:00 Bhanuprakash Bodireddy ><bhanuprakash.bodireddy@intel.com>: >This patch increases the number of packets processed in a batch during a >lookup from 16 to 32. Processing batches of 32 packets improves >performance and also one of the internal loops can be avoided here. > >Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com> >Co-authored-by: Bhanuprakash Bodireddy ><bhanuprakash.bodireddy@intel.com> >Signed-off-by: Bhanuprakash Bodireddy ><bhanuprakash.bodireddy@intel.com> > >I guess Co-authored-by should have Antonio? My mistake, I am sending out the remaining 4 patches separately with appropriate tags. >Also, the (already existing code) is not trivial, I'd like to take another look at it Sure, let us know if you have more comments. Regards, Bhanuprakash. >Thanks, >Daniele > >--- > lib/dpif-netdev.c | 110 ++++++++++++++++++++++------------------------------- >- > 1 file changed, 45 insertions(+), 65 deletions(-) > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >index eb9f764..0a4f338 100644 >--- a/lib/dpif-netdev.c >+++ b/lib/dpif-netdev.c >@@ -4985,23 +4985,21 @@ dpcls_lookup(struct dpcls *cls, const struct >netdev_flow_key keys[], > int *num_lookups_p) > { > /* The received 'cnt' miniflows are the search-keys that will be processed >- * in batches of 16 elements. N_MAPS will contain the number of these >- * 16-elements batches. i.e. for 'cnt' = 32, N_MAPS will be 2. The batch >- * size 16 was experimentally found faster than 8 or 32. */ >- typedef uint16_t map_type; >+ * to find a matching entry into the available subtables. >+ * The number of bits in map_type is equal to NETDEV_MAX_BURST. */ >+ typedef uint32_t map_type; > #define MAP_BITS (sizeof(map_type) * CHAR_BIT) >+ BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST); > >-#if !defined(__CHECKER__) && !defined(_WIN32) >- const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS); >-#else >- enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) }; >-#endif >- map_type maps[N_MAPS]; > struct dpcls_subtable *subtable; > >- memset(maps, 0xff, sizeof maps); >- if (cnt % MAP_BITS) { >- maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra bits. >*/ >+ map_type keys_map = TYPE_MAXIMUM(map_type); >+ map_type found_map; >+ uint32_t hashes[MAP_BITS]; >+ const struct cmap_node *nodes[MAP_BITS]; >+ >+ if (cnt != NETDEV_MAX_BURST) { >+ keys_map >>= NETDEV_MAX_BURST - cnt; /* Clear extra bits. */ > } > memset(rules, 0, cnt * sizeof *rules); > >@@ -5015,61 +5013,43 @@ dpcls_lookup(struct dpcls *cls, const struct >netdev_flow_key keys[], > * search-key, the search for that key can stop because the rules are > * non-overlapping. */ > PVECTOR_FOR_EACH (subtable, &cls->subtables) { >- const struct netdev_flow_key *mkeys = keys; >- struct dpcls_rule **mrules = rules; >- map_type remains = 0; >- int m; >- >- BUILD_ASSERT_DECL(sizeof remains == sizeof *maps); >- >- /* Loops on each batch of 16 search-keys. */ >- for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += >MAP_BITS) { >- uint32_t hashes[MAP_BITS]; >- const struct cmap_node *nodes[MAP_BITS]; >- unsigned long map = maps[m]; >- int i; >- >- if (!map) { >- continue; /* Skip empty maps. */ >- } >- >- /* Compute hashes for the remaining keys. Each search-key is >- * masked with the subtable's mask to avoid hashing the wildcarded >- * bits. */ >- ULLONG_FOR_EACH_1(i, map) { >- hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i], >- &subtable->mask); >- } >- /* Lookup. */ >- map = cmap_find_batch(&subtable->rules, map, hashes, nodes); >- /* Check results. When the i-th bit of map is set, it means that a >- * set of nodes with a matching hash value was found for the i-th >- * search-key. Due to possible hash collisions we need to check >- * which of the found rules, if any, really matches our masked >- * search-key. */ >- ULLONG_FOR_EACH_1(i, map) { >- struct dpcls_rule *rule; >- >- CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { >- if (OVS_LIKELY(dpcls_rule_matches_key(rule, &mkeys[i]))) { >- mrules[i] = rule; >- /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap >- * within one second optimization interval */ >- subtable->hit_cnt++; >- lookups_match += subtable_pos; >- goto next; >- } >+ int i; >+ >+ /* Compute hashes for the remaining keys. Each search-key is >+ * masked with the subtable's mask to avoid hashing the wildcarded >+ * bits. */ >+ ULLONG_FOR_EACH_1(i, keys_map) { >+ hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], >+ &subtable->mask); >+ } >+ /* Lookup. */ >+ found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, >nodes); >+ /* Check results. When the i-th bit of found_map is set, it means >+ * that a set of nodes with a matching hash value was found for the >+ * i-th search-key. Due to possible hash collisions we need to check >+ * which of the found rules, if any, really matches our masked >+ * search-key. */ >+ ULLONG_FOR_EACH_1(i, found_map) { >+ struct dpcls_rule *rule; >+ >+ CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { >+ if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) { >+ rules[i] = rule; >+ /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap >+ * within one second optimization interval. */ >+ subtable->hit_cnt++; >+ lookups_match += subtable_pos; >+ goto next; > } >- /* None of the found rules was a match. Reset the i-th bit to >- * keep searching in the next subtable. */ >- ULLONG_SET0(map, i); /* Did not match. */ >- next: >- ; /* Keep Sparse happy. */ > } >- maps[m] &= ~map; /* Clear the found rules. */ >- remains |= maps[m]; >+ /* None of the found rules was a match. Reset the i-th bit to >+ * keep searching this key in the next subtable. */ >+ ULLONG_SET0(found_map, i); /* Did not match. */ >+ next: >+ ; /* Keep Sparse happy. */ > } >- if (!remains) { >+ keys_map &= ~found_map; /* Clear the found rules. */ >+ if (!keys_map) { > if (num_lookups_p) { > *num_lookups_p = lookups_match; > } >-- >2.4.11 > >_______________________________________________ >dev mailing list >dev@openvswitch.org >http://openvswitch.org/mailman/listinfo/dev
See one comment below, Jarno > On Oct 14, 2016, at 7:37 AM, Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> wrote: > > This patch increases the number of packets processed in a batch during a > lookup from 16 to 32. Processing batches of 32 packets improves > performance and also one of the internal loops can be avoided here. > > Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com> > Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> > Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> > --- > lib/dpif-netdev.c | 110 ++++++++++++++++++++++-------------------------------- > 1 file changed, 45 insertions(+), 65 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index eb9f764..0a4f338 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -4985,23 +4985,21 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], > int *num_lookups_p) > { > /* The received 'cnt' miniflows are the search-keys that will be processed > - * in batches of 16 elements. N_MAPS will contain the number of these > - * 16-elements batches. i.e. for 'cnt' = 32, N_MAPS will be 2. The batch > - * size 16 was experimentally found faster than 8 or 32. */ > - typedef uint16_t map_type; > + * to find a matching entry into the available subtables. > + * The number of bits in map_type is equal to NETDEV_MAX_BURST. */ > + typedef uint32_t map_type; > #define MAP_BITS (sizeof(map_type) * CHAR_BIT) > + BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST); > > -#if !defined(__CHECKER__) && !defined(_WIN32) > - const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS); > -#else > - enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) }; > -#endif > - map_type maps[N_MAPS]; > struct dpcls_subtable *subtable; > > - memset(maps, 0xff, sizeof maps); > - if (cnt % MAP_BITS) { > - maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra bits. */ > + map_type keys_map = TYPE_MAXIMUM(map_type); > + map_type found_map; > + uint32_t hashes[MAP_BITS]; > + const struct cmap_node *nodes[MAP_BITS]; > + > + if (cnt != NETDEV_MAX_BURST) { > + keys_map >>= NETDEV_MAX_BURST - cnt; /* Clear extra bits. */ ‘keys_maps’ has MAP_BITS bits set, and after this is should only have the ‘cnt’ LSBs set. The assert above allows NETDEV_MAX_BURST to be less than MAP_BITS, hence MAP_BITS must be used here instead. Otherwise looks good to me, so with this change: Acked-by: Jarno Rajahalme <jarno@ovn.org> > } > memset(rules, 0, cnt * sizeof *rules); > > @@ -5015,61 +5013,43 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], > * search-key, the search for that key can stop because the rules are > * non-overlapping. */ > PVECTOR_FOR_EACH (subtable, &cls->subtables) { > - const struct netdev_flow_key *mkeys = keys; > - struct dpcls_rule **mrules = rules; > - map_type remains = 0; > - int m; > - > - BUILD_ASSERT_DECL(sizeof remains == sizeof *maps); > - > - /* Loops on each batch of 16 search-keys. */ > - for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += MAP_BITS) { > - uint32_t hashes[MAP_BITS]; > - const struct cmap_node *nodes[MAP_BITS]; > - unsigned long map = maps[m]; > - int i; > - > - if (!map) { > - continue; /* Skip empty maps. */ > - } > - > - /* Compute hashes for the remaining keys. Each search-key is > - * masked with the subtable's mask to avoid hashing the wildcarded > - * bits. */ > - ULLONG_FOR_EACH_1(i, map) { > - hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i], > - &subtable->mask); > - } > - /* Lookup. */ > - map = cmap_find_batch(&subtable->rules, map, hashes, nodes); > - /* Check results. When the i-th bit of map is set, it means that a > - * set of nodes with a matching hash value was found for the i-th > - * search-key. Due to possible hash collisions we need to check > - * which of the found rules, if any, really matches our masked > - * search-key. */ > - ULLONG_FOR_EACH_1(i, map) { > - struct dpcls_rule *rule; > - > - CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { > - if (OVS_LIKELY(dpcls_rule_matches_key(rule, &mkeys[i]))) { > - mrules[i] = rule; > - /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap > - * within one second optimization interval */ > - subtable->hit_cnt++; > - lookups_match += subtable_pos; > - goto next; > - } > + int i; > + > + /* Compute hashes for the remaining keys. Each search-key is > + * masked with the subtable's mask to avoid hashing the wildcarded > + * bits. */ > + ULLONG_FOR_EACH_1(i, keys_map) { > + hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], > + &subtable->mask); > + } > + /* Lookup. */ > + found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes); > + /* Check results. When the i-th bit of found_map is set, it means > + * that a set of nodes with a matching hash value was found for the > + * i-th search-key. Due to possible hash collisions we need to check > + * which of the found rules, if any, really matches our masked > + * search-key. */ > + ULLONG_FOR_EACH_1(i, found_map) { > + struct dpcls_rule *rule; > + > + CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { > + if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) { > + rules[i] = rule; > + /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap > + * within one second optimization interval. */ > + subtable->hit_cnt++; > + lookups_match += subtable_pos; > + goto next; > } > - /* None of the found rules was a match. Reset the i-th bit to > - * keep searching in the next subtable. */ > - ULLONG_SET0(map, i); /* Did not match. */ > - next: > - ; /* Keep Sparse happy. */ > } > - maps[m] &= ~map; /* Clear the found rules. */ > - remains |= maps[m]; > + /* None of the found rules was a match. Reset the i-th bit to > + * keep searching this key in the next subtable. */ > + ULLONG_SET0(found_map, i); /* Did not match. */ > + next: > + ; /* Keep Sparse happy. */ > } > - if (!remains) { > + keys_map &= ~found_map; /* Clear the found rules. */ > + if (!keys_map) { > if (num_lookups_p) { > *num_lookups_p = lookups_match; > } > -- > 2.4.11 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev
Thanks Jarno for spotting this, will do the suggested change. Antonio > -----Original Message----- > From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Jarno > Rajahalme > Sent: Tuesday, October 18, 2016 6:26 PM > To: Bodireddy, Bhanuprakash <bhanuprakash.bodireddy@intel.com> > Cc: dev@openvswitch.org > Subject: Re: [ovs-dev] [PATCH v3 01/12] dpcls: Use 32 packet batches for > lookups. > > See one comment below, > > Jarno > > > On Oct 14, 2016, at 7:37 AM, Bhanuprakash Bodireddy > <bhanuprakash.bodireddy@intel.com> wrote: > > > > This patch increases the number of packets processed in a batch during a > > lookup from 16 to 32. Processing batches of 32 packets improves > > performance and also one of the internal loops can be avoided here. > > > > Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com> > > Co-authored-by: Bhanuprakash Bodireddy > <bhanuprakash.bodireddy@intel.com> > > Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com> > > --- > > lib/dpif-netdev.c | 110 ++++++++++++++++++++++-------------------------- > ------ > > 1 file changed, 45 insertions(+), 65 deletions(-) > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > index eb9f764..0a4f338 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -4985,23 +4985,21 @@ dpcls_lookup(struct dpcls *cls, const struct > netdev_flow_key keys[], > > int *num_lookups_p) > > { > > /* The received 'cnt' miniflows are the search-keys that will be > processed > > - * in batches of 16 elements. N_MAPS will contain the number of > these > > - * 16-elements batches. i.e. for 'cnt' = 32, N_MAPS will be 2. > The batch > > - * size 16 was experimentally found faster than 8 or 32. */ > > - typedef uint16_t map_type; > > + * to find a matching entry into the available subtables. > > + * The number of bits in map_type is equal to NETDEV_MAX_BURST. */ > > + typedef uint32_t map_type; > > #define MAP_BITS (sizeof(map_type) * CHAR_BIT) > > + BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST); > > > > -#if !defined(__CHECKER__) && !defined(_WIN32) > > - const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS); > > -#else > > - enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) }; > > -#endif > > - map_type maps[N_MAPS]; > > struct dpcls_subtable *subtable; > > > > - memset(maps, 0xff, sizeof maps); > > - if (cnt % MAP_BITS) { > > - maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra > bits. */ > > + map_type keys_map = TYPE_MAXIMUM(map_type); > > + map_type found_map; > > + uint32_t hashes[MAP_BITS]; > > + const struct cmap_node *nodes[MAP_BITS]; > > + > > + if (cnt != NETDEV_MAX_BURST) { > > + keys_map >>= NETDEV_MAX_BURST - cnt; /* Clear extra bits. */ > > ‘keys_maps’ has MAP_BITS bits set, and after this is should only have the > ‘cnt’ LSBs set. The assert above allows NETDEV_MAX_BURST to be less than > MAP_BITS, hence MAP_BITS must be used here instead. > > Otherwise looks good to me, so with this change: > > Acked-by: Jarno Rajahalme <jarno@ovn.org> > > > > } > > memset(rules, 0, cnt * sizeof *rules); > > > > @@ -5015,61 +5013,43 @@ dpcls_lookup(struct dpcls *cls, const struct > netdev_flow_key keys[], > > * search-key, the search for that key can stop because the rules > are > > * non-overlapping. */ > > PVECTOR_FOR_EACH (subtable, &cls->subtables) { > > - const struct netdev_flow_key *mkeys = keys; > > - struct dpcls_rule **mrules = rules; > > - map_type remains = 0; > > - int m; > > - > > - BUILD_ASSERT_DECL(sizeof remains == sizeof *maps); > > - > > - /* Loops on each batch of 16 search-keys. */ > > - for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += > MAP_BITS) { > > - uint32_t hashes[MAP_BITS]; > > - const struct cmap_node *nodes[MAP_BITS]; > > - unsigned long map = maps[m]; > > - int i; > > - > > - if (!map) { > > - continue; /* Skip empty maps. */ > > - } > > - > > - /* Compute hashes for the remaining keys. Each search-key > is > > - * masked with the subtable's mask to avoid hashing the > wildcarded > > - * bits. */ > > - ULLONG_FOR_EACH_1(i, map) { > > - hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i], > > - &subtable- > >mask); > > - } > > - /* Lookup. */ > > - map = cmap_find_batch(&subtable->rules, map, hashes, > nodes); > > - /* Check results. When the i-th bit of map is set, it > means that a > > - * set of nodes with a matching hash value was found for > the i-th > > - * search-key. Due to possible hash collisions we need to > check > > - * which of the found rules, if any, really matches our > masked > > - * search-key. */ > > - ULLONG_FOR_EACH_1(i, map) { > > - struct dpcls_rule *rule; > > - > > - CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { > > - if (OVS_LIKELY(dpcls_rule_matches_key(rule, > &mkeys[i]))) { > > - mrules[i] = rule; > > - /* Even at 20 Mpps the 32-bit hit_cnt cannot > wrap > > - * within one second optimization interval */ > > - subtable->hit_cnt++; > > - lookups_match += subtable_pos; > > - goto next; > > - } > > + int i; > > + > > + /* Compute hashes for the remaining keys. Each search-key is > > + * masked with the subtable's mask to avoid hashing the > wildcarded > > + * bits. */ > > + ULLONG_FOR_EACH_1(i, keys_map) { > > + hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], > > + &subtable->mask); > > + } > > + /* Lookup. */ > > + found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, > nodes); > > + /* Check results. When the i-th bit of found_map is set, it > means > > + * that a set of nodes with a matching hash value was found for > the > > + * i-th search-key. Due to possible hash collisions we need to > check > > + * which of the found rules, if any, really matches our masked > > + * search-key. */ > > + ULLONG_FOR_EACH_1(i, found_map) { > > + struct dpcls_rule *rule; > > + > > + CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { > > + if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) > { > > + rules[i] = rule; > > + /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap > > + * within one second optimization interval. */ > > + subtable->hit_cnt++; > > + lookups_match += subtable_pos; > > + goto next; > > } > > - /* None of the found rules was a match. Reset the i-th > bit to > > - * keep searching in the next subtable. */ > > - ULLONG_SET0(map, i); /* Did not match. */ > > - next: > > - ; /* Keep Sparse happy. */ > > } > > - maps[m] &= ~map; /* Clear the found rules. */ > > - remains |= maps[m]; > > + /* None of the found rules was a match. Reset the i-th bit > to > > + * keep searching this key in the next subtable. */ > > + ULLONG_SET0(found_map, i); /* Did not match. */ > > + next: > > + ; /* Keep Sparse happy. */ > > } > > - if (!remains) { > > + keys_map &= ~found_map; /* Clear the found rules. > */ > > + if (!keys_map) { > > if (num_lookups_p) { > > *num_lookups_p = lookups_match; > > } > > -- > > 2.4.11 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index eb9f764..0a4f338 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4985,23 +4985,21 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], int *num_lookups_p) { /* The received 'cnt' miniflows are the search-keys that will be processed - * in batches of 16 elements. N_MAPS will contain the number of these - * 16-elements batches. i.e. for 'cnt' = 32, N_MAPS will be 2. The batch - * size 16 was experimentally found faster than 8 or 32. */ - typedef uint16_t map_type; + * to find a matching entry into the available subtables. + * The number of bits in map_type is equal to NETDEV_MAX_BURST. */ + typedef uint32_t map_type; #define MAP_BITS (sizeof(map_type) * CHAR_BIT) + BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST); -#if !defined(__CHECKER__) && !defined(_WIN32) - const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS); -#else - enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) }; -#endif - map_type maps[N_MAPS]; struct dpcls_subtable *subtable; - memset(maps, 0xff, sizeof maps); - if (cnt % MAP_BITS) { - maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra bits. */ + map_type keys_map = TYPE_MAXIMUM(map_type); + map_type found_map; + uint32_t hashes[MAP_BITS]; + const struct cmap_node *nodes[MAP_BITS]; + + if (cnt != NETDEV_MAX_BURST) { + keys_map >>= NETDEV_MAX_BURST - cnt; /* Clear extra bits. */ } memset(rules, 0, cnt * sizeof *rules); @@ -5015,61 +5013,43 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], * search-key, the search for that key can stop because the rules are * non-overlapping. */ PVECTOR_FOR_EACH (subtable, &cls->subtables) { - const struct netdev_flow_key *mkeys = keys; - struct dpcls_rule **mrules = rules; - map_type remains = 0; - int m; - - BUILD_ASSERT_DECL(sizeof remains == sizeof *maps); - - /* Loops on each batch of 16 search-keys. */ - for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += MAP_BITS) { - uint32_t hashes[MAP_BITS]; - const struct cmap_node *nodes[MAP_BITS]; - unsigned long map = maps[m]; - int i; - - if (!map) { - continue; /* Skip empty maps. */ - } - - /* Compute hashes for the remaining keys. Each search-key is - * masked with the subtable's mask to avoid hashing the wildcarded - * bits. */ - ULLONG_FOR_EACH_1(i, map) { - hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i], - &subtable->mask); - } - /* Lookup. */ - map = cmap_find_batch(&subtable->rules, map, hashes, nodes); - /* Check results. When the i-th bit of map is set, it means that a - * set of nodes with a matching hash value was found for the i-th - * search-key. Due to possible hash collisions we need to check - * which of the found rules, if any, really matches our masked - * search-key. */ - ULLONG_FOR_EACH_1(i, map) { - struct dpcls_rule *rule; - - CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { - if (OVS_LIKELY(dpcls_rule_matches_key(rule, &mkeys[i]))) { - mrules[i] = rule; - /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap - * within one second optimization interval */ - subtable->hit_cnt++; - lookups_match += subtable_pos; - goto next; - } + int i; + + /* Compute hashes for the remaining keys. Each search-key is + * masked with the subtable's mask to avoid hashing the wildcarded + * bits. */ + ULLONG_FOR_EACH_1(i, keys_map) { + hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], + &subtable->mask); + } + /* Lookup. */ + found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes); + /* Check results. When the i-th bit of found_map is set, it means + * that a set of nodes with a matching hash value was found for the + * i-th search-key. Due to possible hash collisions we need to check + * which of the found rules, if any, really matches our masked + * search-key. */ + ULLONG_FOR_EACH_1(i, found_map) { + struct dpcls_rule *rule; + + CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { + if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) { + rules[i] = rule; + /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap + * within one second optimization interval. */ + subtable->hit_cnt++; + lookups_match += subtable_pos; + goto next; } - /* None of the found rules was a match. Reset the i-th bit to - * keep searching in the next subtable. */ - ULLONG_SET0(map, i); /* Did not match. */ - next: - ; /* Keep Sparse happy. */ } - maps[m] &= ~map; /* Clear the found rules. */ - remains |= maps[m]; + /* None of the found rules was a match. Reset the i-th bit to + * keep searching this key in the next subtable. */ + ULLONG_SET0(found_map, i); /* Did not match. */ + next: + ; /* Keep Sparse happy. */ } - if (!remains) { + keys_map &= ~found_map; /* Clear the found rules. */ + if (!keys_map) { if (num_lookups_p) { *num_lookups_p = lookups_match; }