diff mbox series

[ovs-dev,branch-20.03] chassis: Do not try to guess system-id changes.

Message ID 1607949185-1273-1-git-send-email-dceara@redhat.com
State Accepted
Headers show
Series [ovs-dev,branch-20.03] chassis: Do not try to guess system-id changes. | expand

Commit Message

Dumitru Ceara Dec. 14, 2020, 12:33 p.m. UTC
When the OVS system-id changes ovn-controller needs external (CMS) help
in order to update its own Chassis/Chassis_private records, i.e., the
CMS has to ensure that either ovn-controller is stopped (so that
ovn-controller cleans up its old Chassis/Chassis_private records) or
that after the system-id is changed, the stale Chassis/Chassis_private
records are destroyed externally.

This patch reverts the previous efforts to have ovn-controller reuse
stale Chassis records and documents how the system-id change operation
needs to be executed.  The main problem with reusing stale records is
that there's no safe way to make it work when RBAC is enabled.

Suggestedy-by: Han Zhou <hzhou@ovn.org>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis record.")
Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id changes.")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
(cherry picked from commit fc359bfe934290baeeeed1bc78a3f2a919421fa3)
---
 controller/chassis.c            | 119 +++-------------------------------------
 controller/chassis.h            |   2 -
 controller/ovn-controller.8.xml |   7 ++-
 controller/ovn-controller.c     |  22 ++++----
 tests/ovn-controller.at         |  22 ++++++--
 5 files changed, 45 insertions(+), 127 deletions(-)

Comments

0-day Robot Dec. 14, 2020, 1 p.m. UTC | #1
Bleep bloop.  Greetings Dumitru Ceara, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


build:
gcc -std=gnu99 -DHAVE_CONFIG_H -I.   -I ./include  -I ./include -I ./ovn -I ./include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I ./lib -I ./lib    -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror  -g -O2 -MT controller/pinctrl.o -MD -MP -MF $depbase.Tpo -c -o controller/pinctrl.o controller/pinctrl.c &&\
mv -f $depbase.Tpo $depbase.Po
depbase=`echo controller/patch.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\
gcc -std=gnu99 -DHAVE_CONFIG_H -I.   -I ./include  -I ./include -I ./ovn -I ./include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I ./lib -I ./lib    -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror  -g -O2 -MT controller/patch.o -MD -MP -MF $depbase.Tpo -c -o controller/patch.o controller/patch.c &&\
mv -f $depbase.Tpo $depbase.Po
depbase=`echo controller/ovn-controller.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\
gcc -std=gnu99 -DHAVE_CONFIG_H -I.   -I ./include  -I ./include -I ./ovn -I ./include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I ./lib -I ./lib    -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror  -g -O2 -MT controller/ovn-controller.o -MD -MP -MF $depbase.Tpo -c -o controller/ovn-controller.o controller/ovn-controller.c &&\
mv -f $depbase.Tpo $depbase.Po
depbase=`echo controller/physical.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\
gcc -std=gnu99 -DHAVE_CONFIG_H -I.   -I ./include  -I ./include -I ./ovn -I ./include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/include -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR/lib -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I /var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace/OVSDIR -I ./lib -I ./lib    -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror  -g -O2 -MT controller/physical.o -MD -MP -MF $depbase.Tpo -c -o controller/physical.o controller/physical.c &&\
mv -f $depbase.Tpo $depbase.Po
controller/physical.c: In function ‘put_remote_port_redirect_overlay’:
controller/physical.c:379:23: error: ‘struct ofpact_bundle’ has no member named ‘n_slaves’
             if (bundle->n_slaves >= BUNDLE_MAX_SLAVES) {
                       ^
controller/physical.c:379:37: error: ‘BUNDLE_MAX_SLAVES’ undeclared (first use in this function)
             if (bundle->n_slaves >= BUNDLE_MAX_SLAVES) {
                                     ^
controller/physical.c:379:37: note: each undeclared identifier is reported only once for each function it appears in
controller/physical.c:387:19: error: ‘struct ofpact_bundle’ has no member named ‘n_slaves’
             bundle->n_slaves++;
                   ^
make[1]: *** [controller/physical.o] Error 1
make[1]: Leaving directory `/var/lib/jenkins/jobs/0day_robot_upstream_build_ovn_from_pw/workspace'
make: *** [all] Error 2


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
diff mbox series

Patch

diff --git a/controller/chassis.c b/controller/chassis.c
index 522893e..8358ca3 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -37,44 +37,6 @@  VLOG_DEFINE_THIS_MODULE(chassis);
 #endif /* HOST_NAME_MAX */
 
 /*
- * Structure to hold chassis specific state (currently just chassis-id)
- * to avoid database lookups when changes happen while the controller is
- * running.
- */
-struct chassis_info {
-    /* Last ID we initialized the Chassis SB record with. */
-    struct ds id;
-
-    /* True if Chassis SB record is initialized, false otherwise. */
-    uint32_t id_inited : 1;
-};
-
-static struct chassis_info chassis_state = {
-    .id = DS_EMPTY_INITIALIZER,
-    .id_inited = false,
-};
-
-static void
-chassis_info_set_id(struct chassis_info *info, const char *id)
-{
-    ds_clear(&info->id);
-    ds_put_cstr(&info->id, id);
-    info->id_inited = true;
-}
-
-static bool
-chassis_info_id_inited(const struct chassis_info *info)
-{
-    return info->id_inited;
-}
-
-static const char *
-chassis_info_id(const struct chassis_info *info)
-{
-    return ds_cstr_ro(&info->id);
-}
-
-/*
  * Structure for storing the chassis config parsed from the ovs table.
  */
 struct ovs_chassis_cfg {
@@ -381,9 +343,6 @@  chassis_tunnels_changed(const struct sset *encap_type_set,
     size_t encap_type_count = 0;
 
     for (int i = 0; i < chassis_rec->n_encaps; i++) {
-        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
-            return true;
-        }
 
         if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
             return true;
@@ -455,69 +414,24 @@  chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
     return encaps;
 }
 
-/*
- * Returns a pointer to a chassis record from 'chassis_table' that
- * matches at least one tunnel config.
- */
-static const struct sbrec_chassis *
-chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
-                         const struct ovs_chassis_cfg *ovs_cfg,
-                         const char *chassis_id)
-{
-    const struct sbrec_chassis *chassis_rec;
-
-    SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
-        for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
-            if (sset_contains(&ovs_cfg->encap_type_set,
-                              chassis_rec->encaps[i]->type) &&
-                    sset_contains(&ovs_cfg->encap_ip_set,
-                                  chassis_rec->encaps[i]->ip)) {
-                return chassis_rec;
-            }
-            if (strcmp(chassis_rec->name, chassis_id) == 0) {
-                return chassis_rec;
-            }
-        }
-    }
-
-    return NULL;
-}
-
 /* If this is a chassis config update after we initialized the record once
  * then we should always be able to find it with the ID we saved in
  * chassis_state.
- * Otherwise (i.e., first time we create the record) then we check if there's
- * a stale record from a previous controller run that didn't end gracefully
- * and reuse it. If not then we create a new record.
+ * Otherwise (i.e., first time we create the record or if the system-id
+ * changed) we create a new record.
  */
 static const struct sbrec_chassis *
 chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    struct ovsdb_idl_index *sbrec_chassis_by_name,
-                   const struct sbrec_chassis_table *chassis_table,
-                   const struct ovs_chassis_cfg *ovs_cfg,
                    const char *chassis_id)
 {
-    const struct sbrec_chassis *chassis_rec;
-
-    if (chassis_info_id_inited(&chassis_state)) {
-        chassis_rec = chassis_lookup_by_name(sbrec_chassis_by_name,
-                                             chassis_info_id(&chassis_state));
-        if (!chassis_rec) {
-            VLOG_DBG("Could not find Chassis, will create it"
-                     ": stored (%s) ovs (%s)",
-                     chassis_info_id(&chassis_state), chassis_id);
-            if (ovnsb_idl_txn) {
-                /* Recreate the chassis record.  */
-                chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
-            }
-        }
-    } else {
-        chassis_rec =
-            chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
+    const struct sbrec_chassis *chassis_rec =
+        chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
 
-        if (!chassis_rec && ovnsb_idl_txn) {
-            chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
-        }
+    if (!chassis_rec && ovnsb_idl_txn) {
+        /* Create the chassis record. */
+        VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
+        chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
     }
     return chassis_rec;
 }
@@ -586,7 +500,6 @@  const struct sbrec_chassis *
 chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             struct ovsdb_idl_index *sbrec_chassis_by_name,
             const struct ovsrec_open_vswitch_table *ovs_table,
-            const struct sbrec_chassis_table *chassis_table,
             const char *chassis_id,
             const struct ovsrec_bridge *br_int,
             const struct sset *transport_zones)
@@ -601,7 +514,7 @@  chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 
     const struct sbrec_chassis *chassis_rec =
         chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
-                           chassis_table, &ovs_cfg, chassis_id);
+                           chassis_id);
 
     /* If we found (or created) a record, update it with the correct config
      * and store the current chassis_id for fast lookup in case it gets
@@ -610,7 +523,6 @@  chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
     if (chassis_rec && ovnsb_idl_txn) {
         chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg, chassis_id,
                        transport_zones);
-        chassis_info_set_id(&chassis_state, chassis_id);
         ovsdb_idl_txn_add_comment(ovnsb_idl_txn,
                                   "ovn-controller: registering chassis '%s'",
                                   chassis_id);
@@ -682,16 +594,3 @@  chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
     }
     return false;
 }
-
-/*
- * Returns the last initialized chassis-id.
- */
-const char *
-chassis_get_id(void)
-{
-    if (chassis_info_id_inited(&chassis_state)) {
-        return chassis_info_id(&chassis_state);
-    }
-
-    return NULL;
-}
diff --git a/controller/chassis.h b/controller/chassis.h
index 178d295..de3971e 100644
--- a/controller/chassis.h
+++ b/controller/chassis.h
@@ -34,7 +34,6 @@  const struct sbrec_chassis *chassis_run(
     struct ovsdb_idl_txn *ovnsb_idl_txn,
     struct ovsdb_idl_index *sbrec_chassis_by_name,
     const struct ovsrec_open_vswitch_table *,
-    const struct sbrec_chassis_table *,
     const char *chassis_id, const struct ovsrec_bridge *br_int,
     const struct sset *transport_zones);
 bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
@@ -42,7 +41,6 @@  bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
 bool chassis_get_mac(const struct sbrec_chassis *chassis,
                      const char *bridge_mapping,
                      struct eth_addr *chassis_mac);
-const char *chassis_get_id(void);
 const char * get_chassis_mac_mappings(const struct smap *ext_ids);
 
 
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index 76bbbdc..e10056a 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -68,7 +68,12 @@ 
     </p>
     <dl>
       <dt><code>external_ids:system-id</code></dt>
-      <dd>The chassis name to use in the Chassis table.</dd>
+      <dd>The chassis name to use in the Chassis table.  Changing the
+      <code>system-id</code> while <code>ovn-controller</code> is running is
+      not directly supported.  Users have two options: either first
+      gracefully stop <code>ovn-controller</code> or manually delete the
+      stale <code>Chassis</code> record after changing the
+      <code>system-id</code>.</dd>
 
       <dt><code>external_ids:hostname</code></dt>
       <dd>The hostname to use in the Chassis table.</dd>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index ee3ae85..51bad09 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1118,7 +1118,7 @@  runtime_data_sb_port_binding_handler(struct engine_node *node, void *data)
     struct ovsrec_bridge_table *bridge_table =
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
 
     ovs_assert(br_int && chassis_id);
@@ -1273,7 +1273,7 @@  static void init_physical_ctx(struct engine_node *node,
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
     const struct sbrec_chassis *chassis = NULL;
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -1338,7 +1338,11 @@  static void init_lflow_ctx(struct engine_node *node,
         (struct sbrec_multicast_group_table *)EN_OVSDB_GET(
             engine_get_input("SB_multicast_group", node));
 
-    const char *chassis_id = chassis_get_id();
+    struct ovsrec_open_vswitch_table *ovs_table =
+        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_open_vswitch", node));
+
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
     const struct sbrec_chassis *chassis = NULL;
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -1417,7 +1421,7 @@  en_flow_output_run(struct engine_node *node, void *data)
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
 
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -1617,7 +1621,7 @@  _flow_output_resource_ref_handler(struct engine_node *node, void *data,
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
 
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -1991,16 +1995,14 @@  main(int argc, char *argv[])
                 ovsrec_bridge_table_get(ovs_idl_loop.idl);
             const struct ovsrec_open_vswitch_table *ovs_table =
                 ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
-            const struct sbrec_chassis_table *chassis_table =
-                sbrec_chassis_table_get(ovnsb_idl_loop.idl);
             const struct ovsrec_bridge *br_int =
                 process_br_int(ovs_idl_txn, bridge_table, ovs_table);
             const char *chassis_id = get_ovs_chassis_id(ovs_table);
             const struct sbrec_chassis *chassis = NULL;
             if (chassis_id) {
                 chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
-                                      ovs_table, chassis_table, chassis_id,
-                                      br_int, &transport_zones);
+                                      ovs_table, chassis_id, br_int,
+                                      &transport_zones);
             }
 
             if (br_int) {
@@ -2225,7 +2227,7 @@  main(int argc, char *argv[])
 
             const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
                                                             ovs_table);
-            const char *chassis_id = chassis_get_id();
+            const char *chassis_id = get_ovs_chassis_id(ovs_table);
             const struct sbrec_chassis *chassis
                 = (chassis_id
                    ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 63b2581..57d2460 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -187,13 +187,27 @@  OVS_WAIT_UNTIL([
     test "${expected_iface_types}" = "${chassis_iface_types}"
 ])
 
-# Change the value of external_ids:system-id and make sure it's mirrored
-# in the Chassis record in the OVN_Southbound database.
+# Change the value of external_ids:system-id.
+# This requires operator intervention and removal of the stale chassis record.
+# Until that happens ovn-controller fails to create the records due to
+# constraint violation on the Encap table.
 sysid=${sysid}-foo
 ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
+
+OVS_WAIT_UNTIL([
+    grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
+])
+
+# Destroy the stale entry manually and ovn-controller should now be able
+# to create new ones.
+ovn-sbctl destroy chassis .
+OVS_WAIT_UNTIL([
+    test $(ovn-sbctl --columns _uuid find chassis name=${sysid} | wc -l) -eq 1
+])
+
+# Only one Chassis record should exist.
 OVS_WAIT_UNTIL([
-    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
-    test "${sysid}" = "${chassis_id}"
+    test $(ovn-sbctl --columns _uuid list chassis | wc -l) -eq 1
 ])
 
 # Gracefully terminate daemons