diff mbox series

[ovs-dev] expr: Remove supersets from OR expressions.

Message ID 20230317192509.1513631-1-i.maximets@ovn.org
State Superseded
Headers show
Series [ovs-dev] expr: Remove supersets from OR expressions. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Ilya Maximets March 17, 2023, 7:25 p.m. UTC
While crushing OR expressions, OVN removes exact replicas of sub
expressions.  However, there could be many CMP expressions that are
supersets of each other.  These are most likely to be created as a
result of cross-product while expanding brackets in the AND expression
in crush_and_numeric(), i.e. while converting
"x && (a0 || a1) && (b0 || b1)" into "xa0b0 || xa0b1 || xa1b0 || xa1b1".

Replacing the removal of exact duplicates with scan and removal of
supersets of other existing sub-expressions to reduce the amount of
generated flows.  This operation is less efficient in comparison,
but should save time later, since less flows will be generated.

Example:

  "ip4.src == 172.168.0.0/16 && ip4.src!={172.168.13.0/24, 172.168.15.0/24}"

  Processing of this expression yields 42 flows:

  $ ./tests/ovstest test-ovn expr-to-flows <<< "$expr"

  ip,nw_src=172.168.0.0/255.255.1.0
  ip,nw_src=172.168.0.0/255.255.10.0
  ip,nw_src=172.168.0.0/255.255.12.0
  ip,nw_src=172.168.0.0/255.255.3.0
  ip,nw_src=172.168.0.0/255.255.4.0
  ip,nw_src=172.168.0.0/255.255.5.0
  ip,nw_src=172.168.0.0/255.255.6.0
  ip,nw_src=172.168.0.0/255.255.8.0
  ip,nw_src=172.168.0.0/255.255.9.0
  ip,nw_src=172.168.128.0/17
  <... 32 more flows ...>

  We can see that many flows above do overlap, e.g. 255.255.3.0
  mask is a superset of 255.255.1.0.  Everything that matches
  255.255.3.0, will match 255.255.1.0 as well (the value is the same).

  By removing all the unnecessary supersets, the set of flows can
  be reduced from 42 down to 7:

  ip,nw_src=172.168.0.0/255.255.1.0
  ip,nw_src=172.168.0.0/255.255.4.0
  ip,nw_src=172.168.0.0/255.255.8.0
  ip,nw_src=172.168.128.0/17
  ip,nw_src=172.168.16.0/255.255.16.0
  ip,nw_src=172.168.32.0/255.255.32.0
  ip,nw_src=172.168.64.0/255.255.64.0

This change should be particularly useful for expressions with
inequality checks, like the one above.  Such expressions are
frequent among ACL rules.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2177197
Reported-by: Nadia Pinaeva <npinaeva@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 lib/expr.c | 128 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 55 deletions(-)

Comments

Ilya Maximets March 17, 2023, 7:46 p.m. UTC | #1
On 3/17/23 20:25, Ilya Maximets wrote:
> While crushing OR expressions, OVN removes exact replicas of sub
> expressions.  However, there could be many CMP expressions that are
> supersets of each other.  These are most likely to be created as a
> result of cross-product while expanding brackets in the AND expression
> in crush_and_numeric(), i.e. while converting
> "x && (a0 || a1) && (b0 || b1)" into "xa0b0 || xa0b1 || xa1b0 || xa1b1".
> 
> Replacing the removal of exact duplicates with scan and removal of
> supersets of other existing sub-expressions to reduce the amount of
> generated flows.  This operation is less efficient in comparison,
> but should save time later, since less flows will be generated.
> 
> Example:
> 
>   "ip4.src == 172.168.0.0/16 && ip4.src!={172.168.13.0/24, 172.168.15.0/24}"
> 
>   Processing of this expression yields 42 flows:
> 
>   $ ./tests/ovstest test-ovn expr-to-flows <<< "$expr"
> 
>   ip,nw_src=172.168.0.0/255.255.1.0
>   ip,nw_src=172.168.0.0/255.255.10.0
>   ip,nw_src=172.168.0.0/255.255.12.0
>   ip,nw_src=172.168.0.0/255.255.3.0
>   ip,nw_src=172.168.0.0/255.255.4.0
>   ip,nw_src=172.168.0.0/255.255.5.0
>   ip,nw_src=172.168.0.0/255.255.6.0
>   ip,nw_src=172.168.0.0/255.255.8.0
>   ip,nw_src=172.168.0.0/255.255.9.0
>   ip,nw_src=172.168.128.0/17
>   <... 32 more flows ...>
> 
>   We can see that many flows above do overlap, e.g. 255.255.3.0
>   mask is a superset of 255.255.1.0.  Everything that matches
>   255.255.3.0, will match 255.255.1.0 as well (the value is the same).
> 
>   By removing all the unnecessary supersets, the set of flows can
>   be reduced from 42 down to 7:
> 
>   ip,nw_src=172.168.0.0/255.255.1.0
>   ip,nw_src=172.168.0.0/255.255.4.0
>   ip,nw_src=172.168.0.0/255.255.8.0
>   ip,nw_src=172.168.128.0/17
>   ip,nw_src=172.168.16.0/255.255.16.0
>   ip,nw_src=172.168.32.0/255.255.32.0
>   ip,nw_src=172.168.64.0/255.255.64.0
> 
> This change should be particularly useful for expressions with
> inequality checks, like the one above.  Such expressions are
> frequent among ACL rules.
> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2177197
> Reported-by: Nadia Pinaeva <npinaeva@redhat.com>
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>  lib/expr.c | 128 ++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 73 insertions(+), 55 deletions(-)

One more example:

Before:

$ ./tests/ovstest test-ovn expr-to-flows <<< "ip4.src != {172.168.13.0/24, 172.168.14.0/24, 172.168.15.0/24}" | wc -l
2894

After:

$ ./tests/ovstest test-ovn expr-to-flows <<< "ip4.src != {172.168.13.0/24, 172.168.14.0/24, 172.168.15.0/24}" | wc -l
23

$ ./tests/ovstest test-ovn expr-to-flows <<< "ip4.src != {172.168.13.0/24, 172.168.14.0/24, 172.168.15.0/24}" | sort
ip,nw_src=0.0.0.0/0.0.3.0
ip,nw_src=0.0.0.0/0.0.4.0
ip,nw_src=0.0.0.0/0.0.8.0
ip,nw_src=0.0.0.0/0.128.0.0
ip,nw_src=0.0.0.0/0.32.0.0
ip,nw_src=0.0.0.0/0.8.0.0
ip,nw_src=0.0.0.0/1
ip,nw_src=0.0.0.0/32.0.0.0
ip,nw_src=0.0.0.0/4.0.0.0
ip,nw_src=0.0.0.0/8.0.0.0
ip,nw_src=0.0.128.0/0.0.128.0
ip,nw_src=0.0.16.0/0.0.16.0
ip,nw_src=0.0.32.0/0.0.32.0
ip,nw_src=0.0.64.0/0.0.64.0
ip,nw_src=0.1.0.0/0.1.0.0
ip,nw_src=0.16.0.0/0.16.0.0
ip,nw_src=0.2.0.0/0.2.0.0
ip,nw_src=0.4.0.0/0.4.0.0
ip,nw_src=0.64.0.0/0.64.0.0
ip,nw_src=1.0.0.0/1.0.0.0
ip,nw_src=16.0.0.0/16.0.0.0
ip,nw_src=2.0.0.0/2.0.0.0
ip,nw_src=64.0.0.0/64.0.0.0

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/expr.c b/lib/expr.c
index 0a6f7e574..4aa8873b6 100644
--- a/lib/expr.c
+++ b/lib/expr.c
@@ -16,20 +16,21 @@ 
 
 #include <config.h>
 #include "byte-order.h"
-#include "openvswitch/json.h"
+#include "hmapx.h"
 #include "nx-match.h"
 #include "openvswitch/dynamic-string.h"
+#include "openvswitch/json.h"
 #include "openvswitch/match.h"
 #include "openvswitch/ofp-actions.h"
-#include "openvswitch/vlog.h"
 #include "openvswitch/shash.h"
+#include "openvswitch/vlog.h"
+#include "ovn-util.h"
 #include "ovn/expr.h"
 #include "ovn/lex.h"
 #include "ovn/logical-fields.h"
 #include "simap.h"
 #include "sset.h"
 #include "util.h"
-#include "ovn-util.h"
 
 VLOG_DEFINE_THIS_MODULE(expr);
 
@@ -2520,6 +2521,74 @@  crush_and_string(struct expr *expr, const struct expr_symbol *symbol)
     return expr_fix(expr);
 }
 
+/* This function expects an OR expression with already crushed sub
+ * expressions, so they are plain comparisons.  Result is the same
+ * expression, but with unnecessary sub-expressions removed. */
+static struct expr *
+crush_or_supersets(struct expr *expr, const struct expr_symbol *symbol)
+{
+    struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete);
+
+    ovs_assert(expr->type == EXPR_T_OR);
+    if (!symbol->width) {
+        return expr;
+    }
+
+    struct expr *a;
+    LIST_FOR_EACH (a, node, &expr->andor) {
+        ovs_assert(a->type == EXPR_T_CMP);
+
+        if (hmapx_contains(&to_delete, a)) {
+            continue;
+        }
+
+        struct expr *b;
+        LIST_FOR_EACH (b, node, &expr->andor) {
+            union mf_subvalue tmp_value, tmp_mask;
+
+            if (a == b || hmapx_contains(&to_delete, b)) {
+                continue;
+            }
+
+            /* Conflicting sub-expressions cannot superseed each other. */
+            if (mf_subvalue_intersect(&a->cmp.value, &a->cmp.mask,
+                                      &b->cmp.value, &b->cmp.mask,
+                                      &tmp_value, &tmp_mask)) {
+                const size_t sz = sizeof a->cmp.mask * CHAR_BIT;
+                const unsigned long *a_mask, *b_mask;
+
+                a_mask = (unsigned long *) a->cmp.mask.be64;
+                b_mask = (unsigned long *) b->cmp.mask.be64;
+
+                /* Check if 'a' is a superset of 'b' or the other way around.
+                 * Keep the smaller mask. */
+                if (bitmap_is_superset(a_mask, b_mask, sz)) {
+                    hmapx_add(&to_delete, a);
+                    break;
+                } else if (bitmap_is_superset(b_mask, a_mask, sz)) {
+                    hmapx_add(&to_delete, b);
+                }
+            }
+        }
+    }
+
+    if (hmapx_count(&to_delete)) {
+        /* Members modified, so untrack address set. */
+        free(expr->as_name);
+        expr->as_name = NULL;
+    }
+
+    struct hmapx_node *node;
+    HMAPX_FOR_EACH (node, &to_delete) {
+        a = (struct expr *) node->data;
+        ovs_list_remove(&a->node);
+        expr_destroy(a);
+    }
+    hmapx_destroy(&to_delete);
+
+    return expr;
+}
+
 /* Implementation of crush_cmps() for expr->type == EXPR_T_AND and a
  * numeric-typed 'symbol'. */
 static struct expr *
@@ -2679,31 +2748,6 @@  crush_and_numeric(struct expr *expr, const struct expr_symbol *symbol)
     }
 }
 
-static int
-compare_cmps_3way(const struct expr *a, const struct expr *b)
-{
-    ovs_assert(a->cmp.symbol == b->cmp.symbol);
-    if (!a->cmp.symbol->width) {
-        return strcmp(a->cmp.string, b->cmp.string);
-    } else {
-        int d = memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value);
-        if (!d) {
-            d = memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask);
-        }
-        return d;
-    }
-}
-
-static int
-compare_cmps_cb(const void *a_, const void *b_)
-{
-    const struct expr *const *ap = a_;
-    const struct expr *const *bp = b_;
-    const struct expr *a = *ap;
-    const struct expr *b = *bp;
-    return compare_cmps_3way(a, b);
-}
-
 /* Implementation of crush_cmps() for expr->type == EXPR_T_OR. */
 static struct expr *
 crush_or(struct expr *expr, const struct expr_symbol *symbol)
@@ -2723,34 +2767,8 @@  crush_or(struct expr *expr, const struct expr_symbol *symbol)
         return expr;
     }
 
-    /* Sort subexpressions by value and mask, to bring together duplicates. */
-    size_t n = ovs_list_size(&expr->andor);
-    struct expr **subs = xmalloc(n * sizeof *subs);
-
-    size_t i = 0;
-    LIST_FOR_EACH (sub, node, &expr->andor) {
-        subs[i++] = sub;
-    }
-    ovs_assert(i == n);
-
-    qsort(subs, n, sizeof *subs, compare_cmps_cb);
+    expr = crush_or_supersets(expr, symbol);
 
-    /* Eliminate duplicates. */
-    ovs_list_init(&expr->andor);
-    ovs_list_push_back(&expr->andor, &subs[0]->node);
-    for (i = 1; i < n; i++) {
-        struct expr *a = expr_from_node(ovs_list_back(&expr->andor));
-        struct expr *b = subs[i];
-        if (compare_cmps_3way(a, b)) {
-            ovs_list_push_back(&expr->andor, &b->node);
-        } else {
-            expr_destroy(b);
-            /* Member modified, so untrack address set. */
-            free(expr->as_name);
-            expr->as_name = NULL;
-        }
-    }
-    free(subs);
     return expr_fix(expr);
 }