diff mbox series

[ovs-dev,v10,1/5] dpif-netdev: implement function pointers/subtable

Message ID 20190709123440.45519-2-harry.van.haaren@intel.com
State Superseded
Delegated to: Ian Stokes
Headers show
Series dpcls func ptrs & optimizations | expand

Commit Message

Van Haaren, Harry July 9, 2019, 12:34 p.m. UTC
This allows plugging-in of different subtable hash-lookup-verify
routines, and allows special casing of those functions based on
known context (eg: # of bits set) of the specific subtable.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Tested-by: Malvika Gupta <malvika.gupta@arm.com>

---

v10:
- Fix capitalization of comments, and punctuation. (Ian)
- Variable declarations up top before use (Ian)
- Fix alignment of function parameters, had to newline after typedef (Ian)
- Some mailing-list questions relpied to on-list (Ian)

v9:
- Use count_1bits in favour of __builtin_popcount (Ilya)

v6:
- Implement subtable effort per packet "lookups_match" counter  (Ilya)
- Remove double newline (Eelco)
- Remove double * before comments (Eelco)
- Reword comments in dpcls_lookup() for clarity (Harry)
---
 lib/dpif-netdev.c | 138 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 96 insertions(+), 42 deletions(-)

Comments

Stokes, Ian July 10, 2019, 3:30 p.m. UTC | #1
On 7/9/2019 1:34 PM, Harry van Haaren wrote:
> This allows plugging-in of different subtable hash-lookup-verify
> routines, and allows special casing of those functions based on
> known context (eg: # of bits set) of the specific subtable.
> 
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> Tested-by: Malvika Gupta <malvika.gupta@arm.com>
> 

Thanks for this harry, few minor comments below.

> ---
> 
> v10:
> - Fix capitalization of comments, and punctuation. (Ian)
> - Variable declarations up top before use (Ian)
> - Fix alignment of function parameters, had to newline after typedef (Ian)
> - Some mailing-list questions relpied to on-list (Ian)
> 
> v9:
> - Use count_1bits in favour of __builtin_popcount (Ilya)
> 
> v6:
> - Implement subtable effort per packet "lookups_match" counter  (Ilya)
> - Remove double newline (Eelco)
> - Remove double * before comments (Eelco)
> - Reword comments in dpcls_lookup() for clarity (Harry)
> ---
>   lib/dpif-netdev.c | 138 ++++++++++++++++++++++++++++++++--------------
>   1 file changed, 96 insertions(+), 42 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index f4b59e41b..f02bcd552 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -7603,6 +7603,28 @@ dpif_dummy_register(enum dummy_level level)
>   
>   /* Datapath Classifier. */
>   
> +/* Forward declaration for lookup_func typedef. */
> +struct dpcls_subtable;
> +
> +/* Lookup function for a subtable in the dpcls. This function is called
> + * by each subtable with an array of packets, and a bitmask of packets to
> + * perform the lookup on. Using a function pointer gives flexibility to
> + * optimize the lookup function based on subtable properties and the
> + * CPU instruction set available at runtime.
> + */
> +typedef
> +uint32_t (*dpcls_subtable_lookup_func)(struct dpcls_subtable *subtable,
> +                                       uint32_t keys_map,
> +                                       const struct netdev_flow_key *keys[],
> +                                       struct dpcls_rule **rules);
> +
> +/* Prototype for generic lookup func, using same code path as before. */
> +uint32_t
> +dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
> +                              uint32_t keys_map,
> +                              const struct netdev_flow_key *keys[],
> +                              struct dpcls_rule **rules);
> +
>   /* A set of rules that all have the same fields wildcarded. */
>   struct dpcls_subtable {
>       /* The fields are only used by writers. */
> @@ -7612,6 +7634,13 @@ struct dpcls_subtable {
>       struct cmap rules;           /* Contains "struct dpcls_rule"s. */
>       uint32_t hit_cnt;            /* Number of match hits in subtable in current
>                                       optimization interval. */
> +
> +    /* The lookup function to use for this subtable. If there is a known
> +     * property of the subtable (eg: only 3 bits of miniflow metadata is
> +     * used for the lookup) then this can point at an optimized version of
> +     * the lookup function for this particular subtable. */
> +    dpcls_subtable_lookup_func lookup_func;
> +
>       struct netdev_flow_key mask; /* Wildcards for fields (const). */
>       /* 'mask' must be the last field, additional space is allocated here. */
>   };
> @@ -7671,6 +7700,10 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
>       cmap_init(&subtable->rules);
>       subtable->hit_cnt = 0;
>       netdev_flow_key_clone(&subtable->mask, mask);
> +
> +    /* Decide which hash/lookup/verify function to use. */
> +    subtable->lookup_func = dpcls_subtable_lookup_generic;
> +
>       cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
>       /* Add the new subtable at the end of the pvector (with no hits yet) */
>       pvector_insert(&cls->subtables, subtable, 0);
> @@ -7831,6 +7864,55 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
>       return true;
>   }
>   
> +uint32_t
> +dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
> +                              uint32_t keys_map,
> +                              const struct netdev_flow_key *keys[],
> +                              struct dpcls_rule **rules)
> +{
> +        int i;
> +        uint32_t found_map;

I was thinking should this be initialized to 0, but  I don't think there 
is a need. the call to cmap_find_batch() below will set it to 0 in the 
case of no math, otherwise set the i-th bit for a match so I thinks it's ok.

> +
> +        /* Compute hashes for the remaining keys.  Each search-key is
> +         * masked with the subtable's mask to avoid hashing the wildcarded
> +         * bits. */
> +        uint32_t hashes[NETDEV_MAX_BURST];
> +        ULLONG_FOR_EACH_1(i, keys_map) {
> +            hashes[i] = netdev_flow_key_hash_in_mask(keys[i],
> +                                                     &subtable->mask);
> +        }
> +
> +        /* Lookup. */
> +        const struct cmap_node *nodes[NETDEV_MAX_BURST];
> +        found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes);
> +
> +        /* Check results.  When the i-th bit of found_map is set, it means
> +         * that a set of nodes with a matching hash value was found for the
> +         * i-th search-key.  Due to possible hash collisions we need to check
> +         * which of the found rules, if any, really matches our masked
> +         * search-key. */
> +        ULLONG_FOR_EACH_1(i, found_map) {
> +            struct dpcls_rule *rule;
> +
> +            CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
> +                if (OVS_LIKELY(dpcls_rule_matches_key(rule, keys[i]))) {
> +                    rules[i] = rule;
> +                    /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap
> +                     * within one second optimization interval. */
> +                    subtable->hit_cnt++;
> +                    goto next;
> +                }
> +            }
> +            /* None of the found rules was a match.  Reset the i-th bit to
> +             * keep searching this key in the next subtable. */
> +            ULLONG_SET0(found_map, i);  /* Did not match. */
> +        next:
> +            ;                     /* Keep Sparse happy. */
> +        }
> +
> +        return found_map;
> +}
> +
>   /* For each miniflow in 'keys' performs a classifier lookup writing the result
>    * into the corresponding slot in 'rules'.  If a particular entry in 'keys' is
>    * NULL it is skipped.
> @@ -7849,16 +7931,12 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>       /* The received 'cnt' miniflows are the search-keys that will be processed
>        * to find a matching entry into the available subtables.
>        * The number of bits in map_type is equal to NETDEV_MAX_BURST. */
> -    typedef uint32_t map_type;
> -#define MAP_BITS (sizeof(map_type) * CHAR_BIT)
> +#define MAP_BITS (sizeof(uint32_t) * CHAR_BIT)
>       BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST);
>   
>       struct dpcls_subtable *subtable;
>   
> -    map_type keys_map = TYPE_MAXIMUM(map_type); /* Set all bits. */
> -    map_type found_map;
> -    uint32_t hashes[MAP_BITS];
> -    const struct cmap_node *nodes[MAP_BITS];
> +    uint32_t keys_map = TYPE_MAXIMUM(uint32_t); /* Set all bits. */
>   
>       if (cnt != MAP_BITS) {
>           keys_map >>= MAP_BITS - cnt; /* Clear extra bits. */
> @@ -7866,6 +7944,7 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>       memset(rules, 0, cnt * sizeof *rules);
>   
>       int lookups_match = 0, subtable_pos = 1;
> +    uint32_t found_map;

Same as above, should be OK as it wont be used until after the call to 
the generic look up function, which will set it to 0 in the event of no 
matches.

>   
>       /* The Datapath classifier - aka dpcls - is composed of subtables.
>        * Subtables are dynamically created as needed when new rules are inserted.
> @@ -7875,52 +7954,27 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>        * search-key, the search for that key can stop because the rules are
>        * non-overlapping. */
>       PVECTOR_FOR_EACH (subtable, &cls->subtables) {
> -        int i;
> +        /* Call the subtable specific lookup function. */
> +        found_map = subtable->lookup_func(subtable, keys_map, keys, rules);
>   
> -        /* Compute hashes for the remaining keys.  Each search-key is
> -         * masked with the subtable's mask to avoid hashing the wildcarded
> -         * bits. */
> -        ULLONG_FOR_EACH_1(i, keys_map) {
> -            hashes[i] = netdev_flow_key_hash_in_mask(keys[i],
> -                                                     &subtable->mask);
> -        }
> -        /* Lookup. */
> -        found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes);
> -        /* Check results.  When the i-th bit of found_map is set, it means
> -         * that a set of nodes with a matching hash value was found for the
> -         * i-th search-key.  Due to possible hash collisions we need to check
> -         * which of the found rules, if any, really matches our masked
> -         * search-key. */
> -        ULLONG_FOR_EACH_1(i, found_map) {
> -            struct dpcls_rule *rule;
> +        /* Count the number of subtables searched for this packet match. This
> +         * estimates the "spread" of subtables looked at per matched packet. */
> +        uint32_t pkts_matched = count_1bits(found_map);
> +        lookups_match += pkts_matched * subtable_pos;
>   
> -            CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
> -                if (OVS_LIKELY(dpcls_rule_matches_key(rule, keys[i]))) {
> -                    rules[i] = rule;
> -                    /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap
> -                     * within one second optimization interval. */
> -                    subtable->hit_cnt++;
> -                    lookups_match += subtable_pos;
> -                    goto next;
> -                }
> -            }
> -            /* None of the found rules was a match.  Reset the i-th bit to
> -             * keep searching this key in the next subtable. */
> -            ULLONG_SET0(found_map, i);  /* Did not match. */
> -        next:
> -            ;                     /* Keep Sparse happy. */
> -        }
> -        keys_map &= ~found_map;             /* Clear the found rules. */
> +        /* Clear the found rules, and return early if all packets are found. */
> +        keys_map &= ~found_map;
>           if (!keys_map) {
>               if (num_lookups_p) {
>                   *num_lookups_p = lookups_match;
>               }
> -            return true;              /* All found. */
> +            return true;
>           }
>           subtable_pos++;
>       }
> +
>       if (num_lookups_p) {
>           *num_lookups_p = lookups_match;
>       }
> -    return false;                     /* Some misses. */
> +    return false;
>   }
> 

I've thought about whether an item should be added to the NEWS doc 
(possibly not in this patch but perhaps later in the series), it's hard 
to say as the changes are under the hood and not configurable by a user. 
Thoughts?

Regards
Ian
Van Haaren, Harry July 10, 2019, 3:40 p.m. UTC | #2
> -----Original Message-----
> From: Stokes, Ian
> Sent: Wednesday, July 10, 2019 4:30 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; dev@openvswitch.org
> Cc: i.maximets@samsung.com; malvika.gupta@arm.com
> Subject: Re: [PATCH v10 1/5] dpif-netdev: implement function pointers/subtable
> 
> On 7/9/2019 1:34 PM, Harry van Haaren wrote:
> > This allows plugging-in of different subtable hash-lookup-verify
> > routines, and allows special casing of those functions based on
> > known context (eg: # of bits set) of the specific subtable.
> >
> > Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> > Tested-by: Malvika Gupta <malvika.gupta@arm.com>
> >
> 
> Thanks for this harry, few minor comments below.

Hey, cool I'll snip away irrelevant code sections for readability.


<snip>

> > +uint32_t
> > +dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
> > +                              uint32_t keys_map,
> > +                              const struct netdev_flow_key *keys[],
> > +                              struct dpcls_rule **rules)
> > +{
> > +        int i;
> > +        uint32_t found_map;
> 
> I was thinking should this be initialized to 0, but  I don't think there
> is a need. the call to cmap_find_batch() below will set it to 0 in the
> case of no math, otherwise set the i-th bit for a match so I thinks it's ok.

Correct - the value is written before being used - always.
Initializing it to zero is pointless, and only waste and instruction.


<snip>
> > @@ -7849,16 +7931,12 @@ dpcls_lookup(struct dpcls *cls, const struct
> >       if (cnt != MAP_BITS) {
> >           keys_map >>= MAP_BITS - cnt; /* Clear extra bits. */
> > @@ -7866,6 +7944,7 @@ dpcls_lookup(struct dpcls *cls, const struct
> netdev_flow_key *keys[],
> >       memset(rules, 0, cnt * sizeof *rules);
> >
> >       int lookups_match = 0, subtable_pos = 1;
> > +    uint32_t found_map;
> 
> Same as above, should be OK as it wont be used until after the call to
> the generic look up function, which will set it to 0 in the event of no
> matches.

Correct - written before use - no use in initializing to any value.


> > -            /* None of the found rules was a match.  Reset the i-th bit to
> > -             * keep searching this key in the next subtable. */
> > -            ULLONG_SET0(found_map, i);  /* Did not match. */
> > -        next:
> > -            ;                     /* Keep Sparse happy. */
> > -        }
> > -        keys_map &= ~found_map;             /* Clear the found rules. */
> > +        /* Clear the found rules, and return early if all packets are
> found. */
> > +        keys_map &= ~found_map;
> >           if (!keys_map) {
> >               if (num_lookups_p) {
> >                   *num_lookups_p = lookups_match;
> >               }
> > -            return true;              /* All found. */
> > +            return true;
> >           }
> >           subtable_pos++;
> >       }
> > +
> >       if (num_lookups_p) {
> >           *num_lookups_p = lookups_match;
> >       }
> > -    return false;                     /* Some misses. */
> > +    return false;
> >   }
> >
> 
> I've thought about whether an item should be added to the NEWS doc
> (possibly not in this patch but perhaps later in the series), it's hard
> to say as the changes are under the hood and not configurable by a user.
> Thoughts?

Correct the user shouldn't identify the actual changes, except for small
gains in performance if the specialized subtables are in use.

I think it would be good to add a NEWS item, as people profiling OVS's functions
will see different function names, and having called out the DPCLS Function Pointer
changes, and why they are there would be beneficial.

I will draft a separate patch for NEWS item - so we can keep it parallel to this patch set.
Shout if that is not a good approach :)

> Regards
> Ian

Thanks for review!
Stokes, Ian July 10, 2019, 3:54 p.m. UTC | #3
On 7/10/2019 4:40 PM, Van Haaren, Harry wrote:
>> -----Original Message-----
>> From: Stokes, Ian
>> Sent: Wednesday, July 10, 2019 4:30 PM
>> To: Van Haaren, Harry <harry.van.haaren@intel.com>; dev@openvswitch.org
>> Cc: i.maximets@samsung.com; malvika.gupta@arm.com
>> Subject: Re: [PATCH v10 1/5] dpif-netdev: implement function pointers/subtable
>>
>> On 7/9/2019 1:34 PM, Harry van Haaren wrote:
>>> This allows plugging-in of different subtable hash-lookup-verify
>>> routines, and allows special casing of those functions based on
>>> known context (eg: # of bits set) of the specific subtable.
>>>
>>> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
>>> Tested-by: Malvika Gupta <malvika.gupta@arm.com>
>>>
>>
>> Thanks for this harry, few minor comments below.
> 
> Hey, cool I'll snip away irrelevant code sections for readability.
> 
> 
> <snip>

<snip>

>>
>> I've thought about whether an item should be added to the NEWS doc
>> (possibly not in this patch but perhaps later in the series), it's hard
>> to say as the changes are under the hood and not configurable by a user.
>> Thoughts?
> 
> Correct the user shouldn't identify the actual changes, except for small
> gains in performance if the specialized subtables are in use.
> 
> I think it would be good to add a NEWS item, as people profiling OVS's functions
> will see different function names, and having called out the DPCLS Function Pointer
> changes, and why they are there would be beneficial.
> 
> I will draft a separate patch for NEWS item - so we can keep it parallel to this patch set.
> Shout if that is not a good approach :)

I would think it could be added as part of patch 5 of the series as 
that's when the performance increase is introduced? Anyway it's minor 
enough that if there is not another revision of the series it can be 
done upon commit.

Regards
Ian
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f4b59e41b..f02bcd552 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7603,6 +7603,28 @@  dpif_dummy_register(enum dummy_level level)
 
 /* Datapath Classifier. */
 
+/* Forward declaration for lookup_func typedef. */
+struct dpcls_subtable;
+
+/* Lookup function for a subtable in the dpcls. This function is called
+ * by each subtable with an array of packets, and a bitmask of packets to
+ * perform the lookup on. Using a function pointer gives flexibility to
+ * optimize the lookup function based on subtable properties and the
+ * CPU instruction set available at runtime.
+ */
+typedef
+uint32_t (*dpcls_subtable_lookup_func)(struct dpcls_subtable *subtable,
+                                       uint32_t keys_map,
+                                       const struct netdev_flow_key *keys[],
+                                       struct dpcls_rule **rules);
+
+/* Prototype for generic lookup func, using same code path as before. */
+uint32_t
+dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
+                              uint32_t keys_map,
+                              const struct netdev_flow_key *keys[],
+                              struct dpcls_rule **rules);
+
 /* A set of rules that all have the same fields wildcarded. */
 struct dpcls_subtable {
     /* The fields are only used by writers. */
@@ -7612,6 +7634,13 @@  struct dpcls_subtable {
     struct cmap rules;           /* Contains "struct dpcls_rule"s. */
     uint32_t hit_cnt;            /* Number of match hits in subtable in current
                                     optimization interval. */
+
+    /* The lookup function to use for this subtable. If there is a known
+     * property of the subtable (eg: only 3 bits of miniflow metadata is
+     * used for the lookup) then this can point at an optimized version of
+     * the lookup function for this particular subtable. */
+    dpcls_subtable_lookup_func lookup_func;
+
     struct netdev_flow_key mask; /* Wildcards for fields (const). */
     /* 'mask' must be the last field, additional space is allocated here. */
 };
@@ -7671,6 +7700,10 @@  dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
     cmap_init(&subtable->rules);
     subtable->hit_cnt = 0;
     netdev_flow_key_clone(&subtable->mask, mask);
+
+    /* Decide which hash/lookup/verify function to use. */
+    subtable->lookup_func = dpcls_subtable_lookup_generic;
+
     cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
     /* Add the new subtable at the end of the pvector (with no hits yet) */
     pvector_insert(&cls->subtables, subtable, 0);
@@ -7831,6 +7864,55 @@  dpcls_rule_matches_key(const struct dpcls_rule *rule,
     return true;
 }
 
+uint32_t
+dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
+                              uint32_t keys_map,
+                              const struct netdev_flow_key *keys[],
+                              struct dpcls_rule **rules)
+{
+        int i;
+        uint32_t found_map;
+
+        /* Compute hashes for the remaining keys.  Each search-key is
+         * masked with the subtable's mask to avoid hashing the wildcarded
+         * bits. */
+        uint32_t hashes[NETDEV_MAX_BURST];
+        ULLONG_FOR_EACH_1(i, keys_map) {
+            hashes[i] = netdev_flow_key_hash_in_mask(keys[i],
+                                                     &subtable->mask);
+        }
+
+        /* Lookup. */
+        const struct cmap_node *nodes[NETDEV_MAX_BURST];
+        found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes);
+
+        /* Check results.  When the i-th bit of found_map is set, it means
+         * that a set of nodes with a matching hash value was found for the
+         * i-th search-key.  Due to possible hash collisions we need to check
+         * which of the found rules, if any, really matches our masked
+         * search-key. */
+        ULLONG_FOR_EACH_1(i, found_map) {
+            struct dpcls_rule *rule;
+
+            CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
+                if (OVS_LIKELY(dpcls_rule_matches_key(rule, keys[i]))) {
+                    rules[i] = rule;
+                    /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap
+                     * within one second optimization interval. */
+                    subtable->hit_cnt++;
+                    goto next;
+                }
+            }
+            /* None of the found rules was a match.  Reset the i-th bit to
+             * keep searching this key in the next subtable. */
+            ULLONG_SET0(found_map, i);  /* Did not match. */
+        next:
+            ;                     /* Keep Sparse happy. */
+        }
+
+        return found_map;
+}
+
 /* For each miniflow in 'keys' performs a classifier lookup writing the result
  * into the corresponding slot in 'rules'.  If a particular entry in 'keys' is
  * NULL it is skipped.
@@ -7849,16 +7931,12 @@  dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
     /* The received 'cnt' miniflows are the search-keys that will be processed
      * to find a matching entry into the available subtables.
      * The number of bits in map_type is equal to NETDEV_MAX_BURST. */
-    typedef uint32_t map_type;
-#define MAP_BITS (sizeof(map_type) * CHAR_BIT)
+#define MAP_BITS (sizeof(uint32_t) * CHAR_BIT)
     BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST);
 
     struct dpcls_subtable *subtable;
 
-    map_type keys_map = TYPE_MAXIMUM(map_type); /* Set all bits. */
-    map_type found_map;
-    uint32_t hashes[MAP_BITS];
-    const struct cmap_node *nodes[MAP_BITS];
+    uint32_t keys_map = TYPE_MAXIMUM(uint32_t); /* Set all bits. */
 
     if (cnt != MAP_BITS) {
         keys_map >>= MAP_BITS - cnt; /* Clear extra bits. */
@@ -7866,6 +7944,7 @@  dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
     memset(rules, 0, cnt * sizeof *rules);
 
     int lookups_match = 0, subtable_pos = 1;
+    uint32_t found_map;
 
     /* The Datapath classifier - aka dpcls - is composed of subtables.
      * Subtables are dynamically created as needed when new rules are inserted.
@@ -7875,52 +7954,27 @@  dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
      * search-key, the search for that key can stop because the rules are
      * non-overlapping. */
     PVECTOR_FOR_EACH (subtable, &cls->subtables) {
-        int i;
+        /* Call the subtable specific lookup function. */
+        found_map = subtable->lookup_func(subtable, keys_map, keys, rules);
 
-        /* Compute hashes for the remaining keys.  Each search-key is
-         * masked with the subtable's mask to avoid hashing the wildcarded
-         * bits. */
-        ULLONG_FOR_EACH_1(i, keys_map) {
-            hashes[i] = netdev_flow_key_hash_in_mask(keys[i],
-                                                     &subtable->mask);
-        }
-        /* Lookup. */
-        found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes);
-        /* Check results.  When the i-th bit of found_map is set, it means
-         * that a set of nodes with a matching hash value was found for the
-         * i-th search-key.  Due to possible hash collisions we need to check
-         * which of the found rules, if any, really matches our masked
-         * search-key. */
-        ULLONG_FOR_EACH_1(i, found_map) {
-            struct dpcls_rule *rule;
+        /* Count the number of subtables searched for this packet match. This
+         * estimates the "spread" of subtables looked at per matched packet. */
+        uint32_t pkts_matched = count_1bits(found_map);
+        lookups_match += pkts_matched * subtable_pos;
 
-            CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
-                if (OVS_LIKELY(dpcls_rule_matches_key(rule, keys[i]))) {
-                    rules[i] = rule;
-                    /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap
-                     * within one second optimization interval. */
-                    subtable->hit_cnt++;
-                    lookups_match += subtable_pos;
-                    goto next;
-                }
-            }
-            /* None of the found rules was a match.  Reset the i-th bit to
-             * keep searching this key in the next subtable. */
-            ULLONG_SET0(found_map, i);  /* Did not match. */
-        next:
-            ;                     /* Keep Sparse happy. */
-        }
-        keys_map &= ~found_map;             /* Clear the found rules. */
+        /* Clear the found rules, and return early if all packets are found. */
+        keys_map &= ~found_map;
         if (!keys_map) {
             if (num_lookups_p) {
                 *num_lookups_p = lookups_match;
             }
-            return true;              /* All found. */
+            return true;
         }
         subtable_pos++;
     }
+
     if (num_lookups_p) {
         *num_lookups_p = lookups_match;
     }
-    return false;                     /* Some misses. */
+    return false;
 }