diff mbox series

[ovs-dev,10/11] ovn-controller: Handle addresses deletion in address set incrementally.

Message ID 20220209063726.1134827-11-hzhou@ovn.org
State Superseded
Headers show
Series ovn-controller: Fine-grained address set incremental processing. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success

Commit Message

Han Zhou Feb. 9, 2022, 6:37 a.m. UTC
The cost of reprocessing a lflow referencing a big address set can be very
high. Now a single address change in an address set would cause the
related logical flows being reprocessed. When the change rate of an
address set is high, ovn-controller would be busy reprocessing lflows.

This patch handles addresses deletion incrementally. It deletes the
related flows for the deleted addresses only, without deleting
and recreating unrelated flows unnecessarily.

Scale test shows obvious performance gains because the time complexity
changed from O(n) to O(1). The bigger the size of address set, the more
CPU savings. With the AS size of 10k, the test shows ~40x speed up.

Test setup:
CPU: Intel(R) Core(TM) i9-7920X CPU @ 2.90GHz.
5 ACL all referencing an address set of 10,000 IPs.

Measure the time spent by ovn-controller for handling one IP deletion
from the address set.

Before: ~400ms
After: 11-12ms

There is memory cost increase, due to the index built to track each
individual IP. The total memory cost for the OF flows in ovn-controller
increased ~20% in the 10k AS size test.

Before:
ofctrl_desired_flow_usage-KB:22248
ofctrl_installed_flow_usage-KB:14850
ofctrl_sb_flow_ref_usage-KB:7208

After:
ofctrl_desired_flow_usage-KB:22248
ofctrl_installed_flow_usage-KB:14850
ofctrl_sb_flow_ref_usage-KB:15551

Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 controller/lflow.c          | 149 ++++++++++++++++++++++++++++++++++++
 controller/lflow.h          |  10 +++
 controller/ofctrl.c         |  71 +++++++++++++++++
 controller/ofctrl.h         |   5 ++
 controller/ovn-controller.c |  25 ++++--
 tests/ovn-controller.at     |  20 +++--
 6 files changed, 265 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/controller/lflow.c b/controller/lflow.c
index 66c31db9e..055afba64 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -485,6 +485,155 @@  lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
     return ret;
 }
 
+static bool
+as_info_from_expr_const(const char *as_name, const union expr_constant *c,
+                        struct addrset_info *as_info)
+{
+    as_info->name = as_name;
+    as_info->ip = c->value.ipv6;
+    if (c->masked) {
+        as_info->mask = c->mask.ipv6;
+    } else {
+        /* Generate mask so that it is the same as what's added for
+         * expr->cmp.mask. See make_cmp__() in expr.c. */
+        union mf_subvalue mask;
+        memset(&mask, 0, sizeof mask);
+        if (c->format == LEX_F_IPV4) {
+            mask.ipv4 = be32_prefix_mask(32);
+        } else if (c->format == LEX_F_IPV6) {
+            mask.ipv6 = ipv6_create_mask(128);
+        } else if (c->format == LEX_F_ETHERNET) {
+            mask.mac = eth_addr_exact;
+        } else {
+            /* Not an address */
+            return false;
+        }
+        as_info->mask = mask.ipv6;
+    }
+    return true;
+}
+
+static bool
+as_update_can_be_handled(const char *as_name, struct addr_set_diff *as_diff,
+                         struct lflow_ctx_in *l_ctx_in)
+{
+    struct expr_constant_set *as = shash_find_data(l_ctx_in->addr_sets,
+                                                   as_name);
+    ovs_assert(as);
+    size_t n_added = as_diff->added ? as_diff->added->n_values : 0;
+    size_t n_deleted = as_diff->deleted ? as_diff->deleted->n_values : 0;
+    size_t old_as_size = as->n_values + n_deleted - n_added;
+
+    /* If the change may impact n_conj, i.e. the template of the flows would
+     * change, we must reprocess the lflow. */
+    if (old_as_size <= 1 || as->n_values <= 1) {
+        return false;
+    }
+
+    /* If the size of the diff is too big, reprocessing may be more
+     * efficient than incrementally processing the diffs.  */
+    if ((n_added + n_deleted) >= as->n_values) {
+        return false;
+    }
+
+    return true;
+}
+
+/* Handles address set update incrementally - processes only the diff
+ * (added/deleted) addresses in the address set. If it cannot handle the update
+ * incrementally, returns false, so that the caller will trigger reprocessing
+ * for the lflow.
+ *
+ * The reasons that the function returns false are:
+ *
+ * - The size of the address set changed to/from 0 or 1, which means the
+ *   'template' of the lflow translation is changed. In this case reprocessing
+ *   doesn't impact performance because the size of the address set is already
+ *   very small.
+ *
+ * - The size of the change is equal or bigger than the new size. In this case
+ *   it doesn't make sense to incrementally processing the changes because
+ *   reprocessing can be faster.
+ *
+ * - When the address set information couldn't be properly tracked during lflow
+ *   parsing. The typical cases are:
+ *
+ *      - The relational operator to the address set is not '=='. In this case
+ *        there is no 1-1 mapping between the addresses and the flows
+ *        generated.
+ *
+ *      - The sub expression of the address set is combined with other sub-
+ *        expressions/constants, usually because of disjunctions between
+ *        sub-expressions/constants, e.g.:
+ *
+ *          ip.src == $as1 || ip.dst == $as2
+ *          ip.src == {$as1, $as2}
+ *          ip.src == {$as1, ip1}
+ *
+ *        All these could have been split into separate lflows.
+ *
+ *      - Conjunctions overlapping between lflows, which can be caused by
+ *        overlapping address sets or same address set used by multiple lflows
+ *        that could have been combined. e.g.:
+ *
+ *          lflow1: ip.src == $as1 && tcp.dst == {p1, p2}
+ *          lflow2: ip.src == $as1 && tcp.dst == {p3, p4}
+ *
+ *        It could have been combined as:
+ *
+ *          ip.src == $as1 && tcp.dst == {p1, p2, p3, p4}
+ */
+bool
+lflow_handle_addr_set_update(const char *as_name,
+                             struct addr_set_diff *as_diff,
+                             struct lflow_ctx_in *l_ctx_in OVS_UNUSED,
+                             struct lflow_ctx_out *l_ctx_out,
+                             bool *changed)
+{
+    if (as_diff->added) {
+        return false;
+    }
+    ovs_assert(as_diff->deleted);
+
+    if (!as_update_can_be_handled(as_name, as_diff, l_ctx_in)) {
+        return false;
+    }
+
+    struct ref_lflow_node *rlfn =
+        ref_lflow_lookup(&l_ctx_out->lfrr->ref_lflow_table, REF_TYPE_ADDRSET,
+                         as_name);
+    if (!rlfn) {
+        *changed = false;
+        return true;
+    }
+
+    *changed = false;
+
+    struct lflow_ref_list_node *lrln;
+    HMAP_FOR_EACH (lrln, hmap_node, &rlfn->lflow_uuids) {
+        if (lflows_processed_find(l_ctx_out->lflows_processed,
+                                  &lrln->lflow_uuid)) {
+            VLOG_DBG("lflow "UUID_FMT"has been processed, skip.",
+                     UUID_ARGS(&lrln->lflow_uuid));
+            continue;
+        }
+        for (size_t i = 0; i < as_diff->deleted->n_values; i++) {
+            union expr_constant *c = &as_diff->deleted->values[i];
+            struct addrset_info as_info;
+            if (!as_info_from_expr_const(as_name, c, &as_info)) {
+                continue;
+            }
+            if (!ofctrl_remove_flows_for_as_ip(l_ctx_out->flow_table,
+                                               &lrln->lflow_uuid, &as_info,
+                                               lrln->ref_count)) {
+                return false;
+            }
+            *changed = true;
+        }
+    }
+    return true;
+}
+
 bool
 lflow_handle_changed_ref(enum ref_type ref_type, const char *ref_name,
                          struct lflow_ctx_in *l_ctx_in,
diff --git a/controller/lflow.h b/controller/lflow.h
index b131b6cb6..d61733bc2 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -181,6 +181,16 @@  bool lflow_handle_changed_flows(struct lflow_ctx_in *, struct lflow_ctx_out *);
 bool lflow_handle_changed_ref(enum ref_type, const char *ref_name,
                               struct lflow_ctx_in *, struct lflow_ctx_out *,
                               bool *changed);
+
+struct addr_set_diff {
+    struct expr_constant_set *added;
+    struct expr_constant_set *deleted;
+};
+bool lflow_handle_addr_set_update(const char *as_name, struct addr_set_diff *,
+                                  struct lflow_ctx_in *,
+                                  struct lflow_ctx_out *,
+                                  bool *changed);
+
 void lflow_handle_changed_neighbors(
     struct ovsdb_idl_index *sbrec_port_binding_by_name,
     const struct sbrec_mac_binding_table *,
diff --git a/controller/ofctrl.c b/controller/ofctrl.c
index 7671a3b7a..8eb2949f7 100644
--- a/controller/ofctrl.c
+++ b/controller/ofctrl.c
@@ -1342,6 +1342,77 @@  ofctrl_flood_remove_flows(struct ovn_desired_flow_table *flow_table,
     }
 }
 
+/* Remove desired flows related to the specified 'addrset_info' for the
+ * 'lflow_uuid'. Returns true if it can be processed completely, otherwise
+ * returns false, which would trigger a reprocessing of the lflow of
+ * 'lflow_uuid'. The expected_count is checked against the actual flows
+ * deleted, and if it doesn't match, return false, too. */
+bool
+ofctrl_remove_flows_for_as_ip(struct ovn_desired_flow_table *flow_table,
+                              const struct uuid *lflow_uuid,
+                              const struct addrset_info *as_info,
+                              size_t expected_count)
+{
+    struct sb_to_flow *stf = sb_to_flow_find(&flow_table->uuid_flow_table,
+                                             lflow_uuid);
+    if (!stf) {
+        /* No such flow, nothing needs to be done. */
+        return true;
+    }
+
+    struct sb_addrset_ref *sar;
+    bool found = false;
+    LIST_FOR_EACH (sar, list_node, &stf->addrsets) {
+        if (!strcmp(sar->name, as_info->name)) {
+            found = true;
+            break;
+        }
+    }
+    if (!found) {
+        /* No address set tracking infomation found, can't perform the
+         * deletion. */
+        return false;
+    }
+
+    struct ip_to_flow_node *itfn = ip_to_flow_find(&sar->ip_to_flow_map,
+                                                   &as_info->ip,
+                                                   &as_info->mask);
+    if (!itfn) {
+        /* This ip wasn't tracked, probably because it maps to a flow that has
+         * compound conjunction actions for the same ip from multiple address
+         * sets. */
+        return false;
+    }
+    struct sb_flow_ref *sfr, *next;
+    size_t count = 0;
+    LIST_FOR_EACH_SAFE (sfr, next, as_ip_flow_list, &itfn->flows) {
+        /* If the desired flow is referenced by multiple sb lflows, it
+         * shouldn't have been indexed by address set. */
+        ovs_assert(ovs_list_is_short(&sfr->sb_list));
+
+        ovs_list_remove(&sfr->sb_list);
+        ovs_list_remove(&sfr->flow_list);
+        ovs_list_remove(&sfr->as_ip_flow_list);
+
+        struct desired_flow *f = sfr->flow;
+        mem_stats.sb_flow_ref_usage -= sb_flow_ref_size(sfr);
+        free(sfr);
+
+        ovs_assert(ovs_list_is_empty(&f->list_node));
+        ovs_assert(ovs_list_is_empty(&f->references));
+        ovn_flow_log(&f->flow, "remove_flows_for_as_ip");
+        hmap_remove(&flow_table->match_flow_table,
+                    &f->match_hmap_node);
+        track_or_destroy_for_flow_del(flow_table, f);
+        count++;
+    }
+
+    hmap_remove(&sar->ip_to_flow_map, &itfn->hmap_node);
+    mem_stats.sb_flow_ref_usage -= sizeof *itfn;
+    free(itfn);
+    return (count == expected_count);
+}
+
 /* Remove ovn_flows for the given "sb_to_flow" node in the uuid_flow_table.
  * Optionally log the message for each flow that is acturally removed, if
  * log_msg is not NULL. */
diff --git a/controller/ofctrl.h b/controller/ofctrl.h
index 4ec328c24..ad8f4be65 100644
--- a/controller/ofctrl.h
+++ b/controller/ofctrl.h
@@ -124,6 +124,11 @@  void ofctrl_flood_remove_flows(struct ovn_desired_flow_table *,
                                struct hmap *flood_remove_nodes);
 void ofctrl_flood_remove_add_node(struct hmap *flood_remove_nodes,
                                   const struct uuid *sb_uuid);
+bool ofctrl_remove_flows_for_as_ip(struct ovn_desired_flow_table *,
+                                   const struct uuid *lflow_uuid,
+                                   const struct addrset_info *,
+                                   size_t expected_count);
+
 void ovn_desired_flow_table_init(struct ovn_desired_flow_table *);
 void ovn_desired_flow_table_clear(struct ovn_desired_flow_table *);
 void ovn_desired_flow_table_destroy(struct ovn_desired_flow_table *);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 24e09f78f..5119a3277 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1410,11 +1410,6 @@  en_addr_sets_init(struct engine_node *node OVS_UNUSED,
     return as;
 }
 
-struct addr_set_diff {
-    struct expr_constant_set *added;
-    struct expr_constant_set *deleted;
-};
-
 static void
 en_addr_sets_clear_tracked_data(void *data)
 {
@@ -1486,6 +1481,14 @@  addr_sets_update(const struct sbrec_address_set_table *address_set_table,
                 expr_constant_set_integers_diff(cs_old, cs_new,
                                                 &as_diff->added,
                                                 &as_diff->deleted);
+                if (!as_diff->added && !as_diff->deleted) {
+                    /* The address set may have been updated, but the change
+                     * doesn't has any impact to the generated constant-set.
+                     * For example, ff::01 is changed to ff::00:01. */
+                    free(as_diff);
+                    expr_constant_set_destroy(cs_new);
+                    continue;
+                }
                 shash_add(updated, as->name, as_diff);
                 expr_const_sets_add(addr_sets, as->name, cs_new);
             }
@@ -2543,9 +2546,15 @@  lflow_output_addr_sets_handler(struct engine_node *node, void *data)
     }
     struct shash_node *shash_node;
     SHASH_FOR_EACH (shash_node, &as_data->updated) {
-        if (!lflow_handle_changed_ref(REF_TYPE_ADDRSET, shash_node->name,
-                                      &l_ctx_in, &l_ctx_out, &changed)) {
-            return false;
+        struct addr_set_diff *as_diff = shash_node->data;
+        if (!lflow_handle_addr_set_update(shash_node->name, as_diff, &l_ctx_in,
+                                          &l_ctx_out, &changed)) {
+            VLOG_DBG("Can't incrementally handle the change of address set %s."
+                     " Reprocess related lflows.", shash_node->name);
+            if (!lflow_handle_changed_ref(REF_TYPE_ADDRSET, shash_node->name,
+                                          &l_ctx_in, &l_ctx_out, &changed)) {
+                return false;
+            }
         }
         if (changed) {
             engine_set_node_state(node, EN_UPDATED);
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index b16162ef4..b5f19b442 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -928,7 +928,9 @@  priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10 actions=d
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
+# when the old/new AS size is smaller than 2, fallback to reprocessing, so
+# there are still 2 reprocessing when the AS size is below 2.
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 # Add IPs to as1 for 10 times, 2 IPs each time.
@@ -1117,7 +1119,7 @@  priority=1100,tcp,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,tp_dst=3
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 # Add IPs to as1 for 10 times, 2 IPs each time.
@@ -1312,7 +1314,7 @@  priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,nw_dst=10
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 # Add 1 IP back to both ASes
@@ -1368,7 +1370,7 @@  priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,nw_dst=10
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 OVN_CLEANUP([hv1])
@@ -1462,6 +1464,8 @@  for i in $(seq 10); do
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
+# In this case the two sub expr for as1 and as2 are merged, so we lose track of
+# address set information - can't handle deletion incrementally.
 AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
 ])
 
@@ -1562,6 +1566,8 @@  for i in $(seq 10); do
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
+# In this case the as1 and as2 are merged to a single OR expr, so we lose track of
+# address set information - can't handle deletion incrementally.
 AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
 ])
 
@@ -1657,7 +1663,7 @@  priority=1100,ip,reg15=0x$port_key,metadata=0x$dp_key,nw_src=10.0.0.10,nw_dst=10
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [10
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 # Add IPs to as1 for 10 times, 2 IPs each time.
@@ -1945,7 +1951,7 @@  priority=1100,reg15=0x$port_key,metadata=0x$dp_key,dl_src=aa:aa:aa:aa:aa:05 acti
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [5
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 OVN_CLEANUP([hv1])
@@ -2025,7 +2031,7 @@  priority=1100,ipv6,reg15=0x$port_key,metadata=0x$dp_key,ipv6_src=ff::5 actions=d
 done
 
 reprocess_count_new=$(read_counter consider_logical_flow)
-AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [5
+AT_CHECK([echo $(($reprocess_count_new - $reprocess_count_old))], [0], [2
 ])
 
 OVN_CLEANUP([hv1])