[ovs-dev,v3] ovn: Avoid nb_cfg update notification flooding
diff mbox series

Message ID 1523993050-115039-1-git-send-email-hzhou8@ebay.com
State New
Headers show
Series
  • [ovs-dev,v3] ovn: Avoid nb_cfg update notification flooding
Related show

Commit Message

Han Zhou April 17, 2018, 7:24 p.m. UTC
nb_cfg as a mechanism to "ping" OVN control plane is very useful
in many ways. However, the current implementation will trigger
update notifications flooding in the whole control plane. Each
HV updates to SB the nb_cfg number and all these updates are
notified to all the other HVs, which is O(n^2). Although updates
are batched in fewers notifications than n^2, it still generates
significant load on SB DB and also on ovn-controllers.

To solve this problem and make the mechanism more useful in large
scale producation deployment, this patch separates the per HV
*private* data (write only by the owning chassis and not
interesting to any other HVs) from the Chassis table to a separate
table, so that each HV can conditionally monitor and get updates
only for its own record.

Test result shows great improvement:
In a test environment with 1K sandbox HVs, and 10K ports created
on 40 lswitches and 8 lrouters, do the sync test when the system
is idle, with command:

    time ovn-nbctl --wait=hv sync

Original result:
real    4m52.926s
user    0m0.328s
sys     0m0.004s

With this patch:
real    0m11.405s
user    0m0.316s
sys     0m0.016s

Signed-off-by: Han Zhou <hzhou8@ebay.com>
Acked-By: Venkata Anil <vkommadi@redhat.com>
---

Notes:
    v2->v3: updates to make the new table more reasonable

 ovn/controller/chassis.c        | 36 +++++++++++++++++++++++++++++++++++-
 ovn/controller/chassis.h        |  9 ++++++---
 ovn/controller/ovn-controller.c | 22 +++++++++++++++++-----
 ovn/northd/ovn-northd.c         | 22 +++++++++++++++++++---
 ovn/ovn-sb.ovsschema            | 10 ++++++++--
 ovn/ovn-sb.xml                  | 31 +++++++++++++++++++++++++++----
 6 files changed, 112 insertions(+), 18 deletions(-)

Patch
diff mbox series

diff --git a/ovn/controller/chassis.c b/ovn/controller/chassis.c
index 6b5286a..8f81194 100644
--- a/ovn/controller/chassis.c
+++ b/ovn/controller/chassis.c
@@ -72,12 +72,28 @@  get_cms_options(const struct smap *ext_ids)
     return smap_get_def(ext_ids, "ovn-cms-options", "");
 }
 
+static const struct sbrec_chassis_private*
+find_chassis_private(struct ovsdb_idl *ovnsb_idl, const char *chassis_id)
+{
+    const struct sbrec_chassis_private *chassis_private_rec;
+
+    SBREC_CHASSIS_PRIVATE_FOR_EACH (chassis_private_rec, ovnsb_idl) {
+        if (!strcmp(chassis_private_rec->chassis_name, chassis_id)) {
+            break;
+        }
+    }
+
+    return chassis_private_rec;
+}
+
 /* Returns this chassis's Chassis record, if it is available and is currently
  * amenable to a transaction. */
 const struct sbrec_chassis *
 chassis_run(struct controller_ctx *ctx, const char *chassis_id,
-            const struct ovsrec_bridge *br_int)
+            const struct ovsrec_bridge *br_int,
+            const struct sbrec_chassis_private **chassis_private)
 {
+    *chassis_private = NULL;
     if (!ctx->ovnsb_idl_txn) {
         return NULL;
     }
@@ -135,8 +151,18 @@  chassis_run(struct controller_ctx *ctx, const char *chassis_id,
     ds_chomp(&iface_types, ',');
     const char *iface_types_str = ds_cstr(&iface_types);
 
+    const struct sbrec_chassis_private *chassis_private_rec
+        = find_chassis_private(ctx->ovnsb_idl, chassis_id);
+    if (!chassis_private_rec) {
+        chassis_private_rec = sbrec_chassis_private_insert(ctx->ovnsb_idl_txn);
+        sbrec_chassis_private_set_chassis_name(chassis_private_rec,
+                                               chassis_id);
+    }
+    *chassis_private = chassis_private_rec;
+
     const struct sbrec_chassis *chassis_rec
         = get_chassis(ctx->ovnsb_idl, chassis_id);
+
     const char *encap_csum = smap_get_def(&cfg->external_ids,
                                           "ovn-encap-csum", "true");
     if (chassis_rec) {
@@ -256,6 +282,14 @@  chassis_cleanup(struct controller_ctx *ctx,
                                   "ovn-controller: unregistering chassis '%s'",
                                   chassis_rec->name);
         sbrec_chassis_delete(chassis_rec);
+        const struct sbrec_chassis_private *chassis_private_rec
+            = find_chassis_private(ctx->ovnsb_idl, chassis_rec->name);
+        if (chassis_private_rec) {
+            sbrec_chassis_private_delete(chassis_private_rec);
+        } else {
+            VLOG_WARN("Chassis_Private record didn't exist for chassis '%s'",
+                      chassis_rec->name);
+        }
     }
     return false;
 }
diff --git a/ovn/controller/chassis.h b/ovn/controller/chassis.h
index 2cc47fc..e4b29a8 100644
--- a/ovn/controller/chassis.h
+++ b/ovn/controller/chassis.h
@@ -17,6 +17,7 @@ 
 #define OVN_CHASSIS_H 1
 
 #include <stdbool.h>
+#include "ovn/lib/ovn-sb-idl.h"
 
 struct controller_ctx;
 struct ovsdb_idl;
@@ -24,9 +25,11 @@  struct ovsrec_bridge;
 struct sbrec_chassis;
 
 void chassis_register_ovs_idl(struct ovsdb_idl *);
-const struct sbrec_chassis *chassis_run(struct controller_ctx *,
-                                        const char *chassis_id,
-                                        const struct ovsrec_bridge *br_int);
+const struct sbrec_chassis *chassis_run(
+            struct controller_ctx *,
+            const char *chassis_id,
+            const struct ovsrec_bridge *br_int,
+            const struct sbrec_chassis_private **chassis_private);
 bool chassis_cleanup(struct controller_ctx *, const struct sbrec_chassis *);
 
 #endif /* ovn/chassis.h */
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 27a092d..1f178eb 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -155,6 +155,7 @@  update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     struct ovsdb_idl_condition mb = OVSDB_IDL_CONDITION_INIT(&mb);
     struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT(&mg);
     struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT(&dns);
+    struct ovsdb_idl_condition chprv = OVSDB_IDL_CONDITION_INIT(&chprv);
     sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "patch");
     /* XXX: We can optimize this, if we find a way to only monitor
      * ports that have a Gateway_Chassis that point's to our own
@@ -177,6 +178,12 @@  update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l2);
         const struct smap l3 = SMAP_CONST1(&l3, "l3gateway-chassis", id);
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l3);
+
+        /* Monitors Chassis_Private record for current chassis only */
+        sbrec_chassis_private_add_clause_chassis_name(&chprv, OVSDB_F_EQ,
+                                                     chassis->name);
+    } else {
+        ovsdb_idl_condition_add_clause_true(&chprv);
     }
     if (local_ifaces) {
         const char *name;
@@ -203,11 +210,13 @@  update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     sbrec_mac_binding_set_condition(ovnsb_idl, &mb);
     sbrec_multicast_group_set_condition(ovnsb_idl, &mg);
     sbrec_dns_set_condition(ovnsb_idl, &dns);
+    sbrec_chassis_private_set_condition(ovnsb_idl, &chprv);
     ovsdb_idl_condition_destroy(&pb);
     ovsdb_idl_condition_destroy(&lf);
     ovsdb_idl_condition_destroy(&mb);
     ovsdb_idl_condition_destroy(&mg);
     ovsdb_idl_condition_destroy(&dns);
+    ovsdb_idl_condition_destroy(&chprv);
 }
 
 static const struct ovsrec_bridge *
@@ -640,7 +649,8 @@  main(int argc, char *argv[])
     create_ovnsb_indexes(ovnsb_idl_loop.idl);
     lport_init(ovnsb_idl_loop.idl);
 
-    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
+    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
+                         &sbrec_chassis_private_col_nb_cfg);
     update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL);
     ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl);
 
@@ -705,8 +715,9 @@  main(int argc, char *argv[])
         chassis_index_init(&chassis_index, ctx.ovnsb_idl);
 
         const struct sbrec_chassis *chassis = NULL;
+        const struct sbrec_chassis_private *chassis_private = NULL;
         if (chassis_id) {
-            chassis = chassis_run(&ctx, chassis_id, br_int);
+            chassis = chassis_run(&ctx, chassis_id, br_int, &chassis_private);
             encaps_run(&ctx, br_int, chassis_id);
             bfd_calculate_active_tunnels(br_int, &active_tunnels);
             binding_run(&ctx, br_int, chassis,
@@ -758,10 +769,11 @@  main(int argc, char *argv[])
 
                     hmap_destroy(&flow_table);
                 }
-                if (ctx.ovnsb_idl_txn) {
+                if (ctx.ovnsb_idl_txn && chassis_private) {
                     int64_t cur_cfg = ofctrl_get_cur_cfg();
-                    if (cur_cfg && cur_cfg != chassis->nb_cfg) {
-                        sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
+                    if (cur_cfg && cur_cfg != chassis_private->nb_cfg) {
+                        sbrec_chassis_private_set_nb_cfg(chassis_private,
+                                                         cur_cfg);
                     }
                 }
             }
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index b55f5f8..60ecbc0 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -6636,6 +6636,11 @@  static const char *rbac_chassis_auth[] =
 static const char *rbac_chassis_update[] =
     {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
 
+static const char *rbac_chassis_private_auth[] =
+    {"chassis_name"};
+static const char *rbac_chassis_private_update[] =
+    {"nb_cfg"};
+
 static const char *rbac_encap_auth[] =
     {"chassis_name"};
 static const char *rbac_encap_update[] =
@@ -6669,6 +6674,14 @@  static struct rbac_perm_cfg {
         .n_update = ARRAY_SIZE(rbac_chassis_update),
         .row = NULL
     },{
+        .table = "Chassis_Private",
+        .auth = rbac_chassis_private_auth,
+        .n_auth = ARRAY_SIZE(rbac_chassis_private_auth),
+        .insdel = true,
+        .update = rbac_chassis_private_update,
+        .n_update = ARRAY_SIZE(rbac_chassis_private_update),
+        .row = NULL
+    },{
         .table = "Encap",
         .auth = rbac_encap_auth,
         .n_auth = ARRAY_SIZE(rbac_encap_auth),
@@ -6829,9 +6842,9 @@  update_northbound_cfg(struct northd_context *ctx,
     /* Update northbound hv_cfg if appropriate. */
     if (nbg) {
         /* Find minimum nb_cfg among all chassis. */
-        const struct sbrec_chassis *chassis;
+        const struct sbrec_chassis_private *chassis;
         int64_t hv_cfg = nbg->nb_cfg;
-        SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
+        SBREC_CHASSIS_PRIVATE_FOR_EACH (chassis, ctx->ovnsb_idl) {
             if (chassis->nb_cfg < hv_cfg) {
                 hv_cfg = chassis->nb_cfg;
             }
@@ -7067,9 +7080,12 @@  main(int argc, char *argv[])
     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
 
     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
-    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
     ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
 
+    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis_private);
+    ovsdb_idl_add_column(ovnsb_idl_loop.idl,
+                         &sbrec_chassis_private_col_nb_cfg);
+
     /* Ensure that only a single ovn-northd is active in the deployment by
      * acquiring a lock called "ovn_northd" on the southbound database
      * and then only performing DB transactions if the lock is held. */
diff --git a/ovn/ovn-sb.ovsschema b/ovn/ovn-sb.ovsschema
index 9e271d4..1c63c10 100644
--- a/ovn/ovn-sb.ovsschema
+++ b/ovn/ovn-sb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Southbound",
-    "version": "1.15.0",
-    "cksum": "1839738004 13639",
+    "version": "1.16.0",
+    "cksum": "3896799452 13874",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -36,6 +36,12 @@ 
                              "min": 0, "max": "unlimited"}}},
             "isRoot": true,
             "indexes": [["name"]]},
+        "Chassis_Private": {
+            "columns": {
+                "chassis_name": {"type": "string"},
+                "nb_cfg": {"type": {"key": "integer"}}},
+            "isRoot": true,
+            "indexes": [["chassis_name"]]},
         "Encap": {
             "columns": {
                 "type": {"type": {"key": {
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 5d23774..9c71ddf 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -212,10 +212,8 @@ 
     </column>
 
     <column name="nb_cfg">
-      Sequence number for the configuration.  When <code>ovn-controller</code>
-      updates the configuration of a chassis from the contents of the
-      southbound database, it copies <ref table="SB_Global" column="nb_cfg"/>
-      from the <ref table="SB_Global"/> table into this column.
+      Deprecated. This column is replaced by the <ref table="Chassis_Private"
+      column="nb_cfg"/> column of the <ref table="Chassis_Private"/> table.
     </column>
 
     <column name="external_ids" key="ovn-bridge-mappings">
@@ -291,6 +289,31 @@ 
     </group>
   </table>
 
+  <table name="Chassis_Private" title="Chassis Private">
+    <p>
+      Each row in this table maintains per chassis private data that are
+      accessed only by the owning chassis (write only) and ovn-northd, not by
+      any other chassis.  These data are stored in this separate table instead
+      of the <ref table="Chassis"/> table for performance considerations:
+      the rows in this table can be conditionally monitored by chassises so
+      that each chassis only get update notifications for its own row, to avoid
+      unnecessary chassis private data update flooding in a large scale
+      deployment.  (Future: this separation can be avoided if ovsdb conditional
+      monitoring is supported on a set of columns)
+    </p>
+
+    <column name="chassis_name">
+      The name of the chassis that owns these chassis-private data.
+    </column>
+
+    <column name="nb_cfg">
+      Sequence number for the configuration.  When <code>ovn-controller</code>
+      updates the configuration of a chassis from the contents of the
+      southbound database, it copies <ref table="SB_Global" column="nb_cfg"/>
+      from the <ref table="SB_Global"/> table into this column.
+    </column>
+  </table>
+
   <table name="Encap" title="Encapsulation Types">
     <p>
       The <ref column="encaps" table="Chassis"/> column in the <ref