@@ -34,6 +34,10 @@ Post-v2.11.0
* 'ovs-appctl exit' now implies cleanup of non-internal ports in userspace
datapath regardless of '--cleanup' option. Use '--cleanup' to remove
internal ports too.
+ * Datapath classifer code refactored to enable function pointers to select
+ the lookup implementation at runtime. This enables specialization of
+ specific subtables based on the miniflow attributes, enhancing the
+ performance of the subtable search.
- OVSDB:
* OVSDB clients can now resynchronize with clustered servers much more
quickly after a brief disconnection, saving bandwidth and CPU time.
@@ -233,7 +233,58 @@ dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
const struct netdev_flow_key *keys[],
struct dpcls_rule **rules)
{
+ /* Here the runtime subtable->mf_bits counts are used, which forces the
+ * compiler to iterate normal for() loops. Due to this limitation in the
+ * compilers available optimizations, this function has lower performance
+ * than the below specialized functions.
+ */
return lookup_generic_impl(subtable, blocks_scratch, keys_map, keys, rules,
subtable->mf_bits_set_unit0,
subtable->mf_bits_set_unit1);
}
+
+/* Expand out specialized functions with U0 and U1 bit attributes. */
+#define DECLARE_OPTIMIZED_LOOKUP_FUNCTION(U0, U1) \
+ static uint32_t \
+ dpcls_subtable_lookup_mf_u0w##U0##_u1w##U1( \
+ struct dpcls_subtable *subtable, \
+ uint64_t *blocks_scratch, \
+ uint32_t keys_map, \
+ const struct netdev_flow_key *keys[],\
+ struct dpcls_rule **rules) \
+ { \
+ return lookup_generic_impl(subtable, blocks_scratch, keys_map, \
+ keys, rules, U0, U1); \
+ } \
+
+DECLARE_OPTIMIZED_LOOKUP_FUNCTION(5, 1)
+DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 1)
+DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 0)
+
+/* Check if a specialized function is valid for the required subtable. */
+#define CHECK_LOOKUP_FUNCTION(U0,U1) \
+ if (!f && u0_bits == U0 && u1_bits == U1) { \
+ f = dpcls_subtable_lookup_mf_u0w##U0##_u1w##U1; \
+ }
+
+/* Probe function to lookup an available specialized function.
+ * If capable to run the requested miniflow fingerprint, this function returns
+ * the most optimal implementation for that miniflow fingerprint.
+ * @retval Non-NULL A valid function to handle the miniflow bit pattern
+ * @retval NULL The requested miniflow is not supported by this implementation.
+ */
+dpcls_subtable_lookup_func
+dpcls_subtable_generic_probe(uint32_t u0_bits, uint32_t u1_bits)
+{
+ dpcls_subtable_lookup_func f = NULL;
+
+ CHECK_LOOKUP_FUNCTION(5, 1);
+ CHECK_LOOKUP_FUNCTION(4, 1);
+ CHECK_LOOKUP_FUNCTION(4, 0);
+
+ if (f) {
+ VLOG_DBG("Subtable using Generic Optimized for u0 %d, u1 %d\n",
+ u0_bits, u1_bits);
+ }
+ return f;
+}
@@ -69,6 +69,14 @@ dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
const struct netdev_flow_key *keys[],
struct dpcls_rule **rules);
+/* Probe function to select a specialized version of the generic lookup
+ * implementation. This provides performance benefit due to compile-time
+ * optimizations such as loop-unrolling. These are enabled by the compile-time
+ * constants in the specific function implementations.
+ */
+dpcls_subtable_lookup_func
+dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count);
+
/* A set of rules that all have the same fields wildcarded. */
struct dpcls_subtable {
/* The fields are only used by writers. */
@@ -7735,8 +7735,13 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
cls->blocks_scratch_size = blocks_required_per_pkt;
}
- /* Assign the generic lookup - this works with any miniflow fingerprint. */
- subtable->lookup_func = dpcls_subtable_lookup_generic;
+ /* Probe for a specialized generic lookup function. */
+ subtable->lookup_func = dpcls_subtable_generic_probe(unit0, unit1);
+
+ /* If not set, assign generic lookup. Generic works for any miniflow. */
+ if (!subtable->lookup_func) {
+ subtable->lookup_func = dpcls_subtable_lookup_generic;
+ }
cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
/* Add the new subtable at the end of the pvector (with no hits yet) */