diff mbox series

[ovs-dev] chassis: Do not try to guess system-id changes.

Message ID 1607455279-21771-1-git-send-email-dceara@redhat.com
State Accepted
Headers show
Series [ovs-dev] chassis: Do not try to guess system-id changes. | expand

Commit Message

Dumitru Ceara Dec. 8, 2020, 7:21 p.m. UTC
When the OVS system-id changes ovn-controller needs external (CMS) help
in order to update its own Chassis/Chassis_private records, i.e., the
CMS has to ensure that either ovn-controller is stopped (so that
ovn-controller cleans up its old Chassis/Chassis_private records) or
that after the system-id is changed, the stale Chassis/Chassis_private
records are destroyed externally.

This patch reverts the previous efforts to have ovn-controller reuse
stale Chassis records and documents how the system-id change operation
needs to be executed.  The main problem with reusing stale records is
that there's no safe way to make it work when RBAC is enabled.

Suggestedy-by: Han Zhou <hzhou@ovn.org>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis record.")
Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id changes.")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
---
 controller/chassis.c            | 177 ++++------------------------------------
 controller/chassis.h            |   3 -
 controller/ovn-controller.8.xml |   7 +-
 controller/ovn-controller.c     |  21 +++--
 northd/ovn-northd.c             |   6 +-
 tests/ovn-controller.at         |  44 +++-------
 6 files changed, 47 insertions(+), 211 deletions(-)

Comments

Mark Michelson Dec. 9, 2020, 9:57 p.m. UTC | #1
Acked-by: Mark Michelson <mmichels@redhat.com>

On 12/8/20 2:21 PM, Dumitru Ceara wrote:
> When the OVS system-id changes ovn-controller needs external (CMS) help
> in order to update its own Chassis/Chassis_private records, i.e., the
> CMS has to ensure that either ovn-controller is stopped (so that
> ovn-controller cleans up its old Chassis/Chassis_private records) or
> that after the system-id is changed, the stale Chassis/Chassis_private
> records are destroyed externally.
> 
> This patch reverts the previous efforts to have ovn-controller reuse
> stale Chassis records and documents how the system-id change operation
> needs to be executed.  The main problem with reusing stale records is
> that there's no safe way to make it work when RBAC is enabled.
> 
> Suggestedy-by: Han Zhou <hzhou@ovn.org>
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
> Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
> Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
> Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis record.")
> Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id changes.")
> Signed-off-by: Dumitru Ceara <dceara@redhat.com>
> ---
>   controller/chassis.c            | 177 ++++------------------------------------
>   controller/chassis.h            |   3 -
>   controller/ovn-controller.8.xml |   7 +-
>   controller/ovn-controller.c     |  21 +++--
>   northd/ovn-northd.c             |   6 +-
>   tests/ovn-controller.at         |  44 +++-------
>   6 files changed, 47 insertions(+), 211 deletions(-)
> 
> diff --git a/controller/chassis.c b/controller/chassis.c
> index 7748fb9..b4d4b0e 100644
> --- a/controller/chassis.c
> +++ b/controller/chassis.c
> @@ -37,44 +37,6 @@ VLOG_DEFINE_THIS_MODULE(chassis);
>   #endif /* HOST_NAME_MAX */
>   
>   /*
> - * Structure to hold chassis specific state (currently just chassis-id)
> - * to avoid database lookups when changes happen while the controller is
> - * running.
> - */
> -struct chassis_info {
> -    /* Last ID we initialized the Chassis SB record with. */
> -    struct ds id;
> -
> -    /* True if Chassis SB record is initialized, false otherwise. */
> -    uint32_t id_inited : 1;
> -};
> -
> -static struct chassis_info chassis_state = {
> -    .id = DS_EMPTY_INITIALIZER,
> -    .id_inited = false,
> -};
> -
> -static void
> -chassis_info_set_id(struct chassis_info *info, const char *id)
> -{
> -    ds_clear(&info->id);
> -    ds_put_cstr(&info->id, id);
> -    info->id_inited = true;
> -}
> -
> -static bool
> -chassis_info_id_inited(const struct chassis_info *info)
> -{
> -    return info->id_inited;
> -}
> -
> -static const char *
> -chassis_info_id(const struct chassis_info *info)
> -{
> -    return ds_cstr_ro(&info->id);
> -}
> -
> -/*
>    * Structure for storing the chassis config parsed from the ovs table.
>    */
>   struct ovs_chassis_cfg {
> @@ -420,6 +382,9 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
>       bool  changed = false;
>   
>       for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> +        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
> +            return true;
> +        }
>   
>           if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
>               changed = true;
> @@ -500,54 +465,11 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
>       return encaps;
>   }
>   
> -/*
> - * Updates encaps for a given chassis. This can happen when the chassis
> - * name has changed. Also, the only thing we support updating is the
> - * chassis_name. For other changes the encaps will be recreated.
> - */
> -static void
> -chassis_update_encaps(const struct sbrec_chassis *chassis)
> -{
> -    for (size_t i = 0; i < chassis->n_encaps; i++) {
> -        sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
> -    }
> -}
> -
> -/*
> - * Returns a pointer to a chassis record from 'chassis_table' that
> - * matches at least one tunnel config.
> - */
> -static const struct sbrec_chassis *
> -chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
> -                         const struct ovs_chassis_cfg *ovs_cfg,
> -                         const char *chassis_id)
> -{
> -    const struct sbrec_chassis *chassis_rec;
> -
> -    SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
> -        for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> -            if (sset_contains(&ovs_cfg->encap_type_set,
> -                              chassis_rec->encaps[i]->type) &&
> -                    sset_contains(&ovs_cfg->encap_ip_set,
> -                                  chassis_rec->encaps[i]->ip)) {
> -                return chassis_rec;
> -            }
> -            if (strcmp(chassis_rec->name, chassis_id) == 0) {
> -                return chassis_rec;
> -            }
> -        }
> -    }
> -
> -    return NULL;
> -}
> -
>   /* If this is a chassis config update after we initialized the record once
>    * then we should always be able to find it with the ID we saved in
>    * chassis_state.
>    * Otherwise (i.e., first time we create the record or if the system-id
> - * changed) then we check if there's a stale record from a previous
> - * controller run that didn't end gracefully and reuse it. If not then we
> - * create a new record.
> + * changed) we create a new record.
>    *
>    * Sets '*chassis_rec' to point to the local chassis record.
>    * Returns true if this record was already in the database, false if it was
> @@ -556,33 +478,16 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
>   static bool
>   chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                      struct ovsdb_idl_index *sbrec_chassis_by_name,
> -                   const struct sbrec_chassis_table *chassis_table,
> -                   const struct ovs_chassis_cfg *ovs_cfg,
>                      const char *chassis_id,
>                      const struct sbrec_chassis **chassis_rec)
>   {
> -    const struct sbrec_chassis *chassis = NULL;
> -
> -    if (chassis_info_id_inited(&chassis_state)) {
> -        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
> -                                         chassis_info_id(&chassis_state));
> -        if (!chassis) {
> -            VLOG_DBG("Could not find Chassis, will check if the id changed: "
> -                     "stored (%s) ovs (%s)",
> -                     chassis_info_id(&chassis_state), chassis_id);
> -        }
> -    }
> -
> -    if (!chassis) {
> -        chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
> -    }
> +    const struct sbrec_chassis *chassis =
> +        chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
>   
> -    if (!chassis) {
> -        /* Recreate the chassis record. */
> +    if (!chassis && ovnsb_idl_txn) {
> +        /* Create the chassis record. */
>           VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
> -        if (ovnsb_idl_txn) {
> -            *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
> -        }
> +        *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
>           return false;
>       }
>   
> @@ -650,7 +555,6 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
>                                   &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
>                                   chassis_rec);
>       if (!tunnels_changed) {
> -        chassis_update_encaps(chassis_rec);
>           return updated;
>       }
>   
> @@ -666,33 +570,11 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
>       return true;
>   }
>   
> -/*
> - * Returns a pointer to a chassis_private record from 'chassis_pvt_table' that
> - * matches the chassis record.
> - */
> -static const struct sbrec_chassis_private *
> -chassis_private_get_stale_record(
> -    const struct sbrec_chassis_private_table *chassis_pvt_table,
> -    const struct sbrec_chassis *chassis)
> -{
> -    const struct sbrec_chassis_private *chassis_pvt_rec;
> -
> -    SBREC_CHASSIS_PRIVATE_TABLE_FOR_EACH (chassis_pvt_rec, chassis_pvt_table) {
> -        if (chassis_pvt_rec->chassis == chassis) {
> -            return chassis_pvt_rec;
> -        }
> -    }
> -
> -    return NULL;
> -}
> -
>   /* If this is a chassis_private config update after we initialized the record
>    * once then we should always be able to find it with the ID we saved in
>    * chassis_state.
>    * Otherwise (i.e., first time we created the chassis record or if the
> - * system-id changed) then we check if there's a stale record from a previous
> - * controller run that didn't end gracefully and reuse it. If not then we
> - * create a new record.
> + * system-id changed) we create a new record.
>    *
>    * Returns the local chassis record.
>    */
> @@ -700,21 +582,11 @@ static const struct sbrec_chassis_private *
>   chassis_private_get_record(
>       struct ovsdb_idl_txn *ovnsb_idl_txn,
>       struct ovsdb_idl_index *sbrec_chassis_pvt_by_name,
> -    const struct sbrec_chassis_private_table *chassis_pvt_table,
> -    const struct sbrec_chassis *chassis)
> +    const char *chassis_id)
>   {
> -    const struct sbrec_chassis_private *chassis_p = NULL;
> -
> -    if (chassis_info_id_inited(&chassis_state)) {
> -        chassis_p =
> +    const struct sbrec_chassis_private *chassis_p =
>               chassis_private_lookup_by_name(sbrec_chassis_pvt_by_name,
> -                                           chassis_info_id(&chassis_state));
> -    }
> -
> -    if (!chassis_p) {
> -        chassis_p = chassis_private_get_stale_record(chassis_pvt_table,
> -                                                     chassis);
> -    }
> +                                           chassis_id);
>   
>       if (!chassis_p && ovnsb_idl_txn) {
>           return sbrec_chassis_private_insert(ovnsb_idl_txn);
> @@ -743,8 +615,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>               struct ovsdb_idl_index *sbrec_chassis_private_by_name,
>               const struct ovsrec_open_vswitch_table *ovs_table,
> -            const struct sbrec_chassis_table *chassis_table,
> -            const struct sbrec_chassis_private_table *chassis_pvt_table,
>               const char *chassis_id,
>               const struct ovsrec_bridge *br_int,
>               const struct sset *transport_zones,
> @@ -762,8 +632,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>   
>       const struct sbrec_chassis *chassis_rec = NULL;
>       bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
> -                                      chassis_table, &ovs_cfg, chassis_id,
> -                                      &chassis_rec);
> +                                      chassis_id, &chassis_rec);
>   
>       /* If we found (or created) a record, update it with the correct config
>        * and store the current chassis_id for fast lookup in case it gets
> @@ -783,13 +652,10 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>           *chassis_private =
>               chassis_private_get_record(ovnsb_idl_txn,
>                                          sbrec_chassis_private_by_name,
> -                                       chassis_pvt_table, chassis_rec);
> -
> +                                       chassis_id);
>           if (*chassis_private) {
>               chassis_private_update(*chassis_private, chassis_rec, chassis_id);
>           }
> -
> -        chassis_info_set_id(&chassis_state, chassis_id);
>       }
>   
>       ovs_chassis_cfg_destroy(&ovs_cfg);
> @@ -865,16 +731,3 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>       }
>       return false;
>   }
> -
> -/*
> - * Returns the last initialized chassis-id.
> - */
> -const char *
> -chassis_get_id(void)
> -{
> -    if (chassis_info_id_inited(&chassis_state)) {
> -        return chassis_info_id(&chassis_state);
> -    }
> -
> -    return NULL;
> -}
> diff --git a/controller/chassis.h b/controller/chassis.h
> index 220f726..18b45a1 100644
> --- a/controller/chassis.h
> +++ b/controller/chassis.h
> @@ -37,8 +37,6 @@ const struct sbrec_chassis *chassis_run(
>       struct ovsdb_idl_index *sbrec_chassis_by_name,
>       struct ovsdb_idl_index *sbrec_chassis_private_by_name,
>       const struct ovsrec_open_vswitch_table *,
> -    const struct sbrec_chassis_table *,
> -    const struct sbrec_chassis_private_table *,
>       const char *chassis_id, const struct ovsrec_bridge *br_int,
>       const struct sset *transport_zones,
>       const struct sbrec_chassis_private **chassis_private);
> @@ -48,7 +46,6 @@ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>   bool chassis_get_mac(const struct sbrec_chassis *chassis,
>                        const char *bridge_mapping,
>                        struct eth_addr *chassis_mac);
> -const char *chassis_get_id(void);
>   const char * get_chassis_mac_mappings(const struct smap *ext_ids);
>   
>   
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index 6f3d388..29833c7 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -68,7 +68,12 @@
>       </p>
>       <dl>
>         <dt><code>external_ids:system-id</code></dt>
> -      <dd>The chassis name to use in the Chassis table.</dd>
> +      <dd>The chassis name to use in the Chassis table.  Changing the
> +      <code>system-id</code> while <code>ovn-controller</code> is running is
> +      not directly supported.  Users have two options: either first
> +      gracefully stop <code>ovn-controller</code> or manually delete the
> +      stale <code>Chassis</code> and <code>Chassis_Private</code> records
> +      after changing the <code>system-id</code>.</dd>
>   
>         <dt><code>external_ids:hostname</code></dt>
>         <dd>The hostname to use in the Chassis table.</dd>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 90a0d9c..b5f0c13 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1777,7 +1777,7 @@ static void init_physical_ctx(struct engine_node *node,
>           (struct ovsrec_bridge_table *)EN_OVSDB_GET(
>               engine_get_input("OVS_bridge", node));
>       const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> -    const char *chassis_id = chassis_get_id();
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>       const struct sbrec_chassis *chassis = NULL;
>       struct ovsdb_idl_index *sbrec_chassis_by_name =
>           engine_ovsdb_node_get_index(
> @@ -1867,7 +1867,11 @@ static void init_lflow_ctx(struct engine_node *node,
>           (struct sbrec_load_balancer_table *)EN_OVSDB_GET(
>               engine_get_input("SB_load_balancer", node));
>   
> -    const char *chassis_id = chassis_get_id();
> +    struct ovsrec_open_vswitch_table *ovs_table =
> +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_open_vswitch", node));
> +
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>       const struct sbrec_chassis *chassis = NULL;
>       struct ovsdb_idl_index *sbrec_chassis_by_name =
>           engine_ovsdb_node_get_index(
> @@ -1961,7 +1965,7 @@ en_flow_output_run(struct engine_node *node, void *data)
>           (struct ovsrec_bridge_table *)EN_OVSDB_GET(
>               engine_get_input("OVS_bridge", node));
>       const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> -    const char *chassis_id = chassis_get_id();
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>   
>       struct ovsdb_idl_index *sbrec_chassis_by_name =
>           engine_ovsdb_node_get_index(
> @@ -2145,7 +2149,7 @@ _flow_output_resource_ref_handler(struct engine_node *node, void *data,
>           (struct ovsrec_bridge_table *)EN_OVSDB_GET(
>               engine_get_input("OVS_bridge", node));
>       const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> -    const char *chassis_id = chassis_get_id();
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>   
>       struct ovsdb_idl_index *sbrec_chassis_by_name =
>           engine_ovsdb_node_get_index(
> @@ -2770,18 +2774,13 @@ main(int argc, char *argv[])
>                   get_transport_zones(ovsrec_open_vswitch_table_get(
>                                       ovs_idl_loop.idl)), ",");
>   
> -            const struct sbrec_chassis_table *chassis_table =
> -                sbrec_chassis_table_get(ovnsb_idl_loop.idl);
> -            const struct sbrec_chassis_private_table *chassis_pvt_table =
> -                sbrec_chassis_private_table_get(ovnsb_idl_loop.idl);
>               const char *chassis_id = get_ovs_chassis_id(ovs_table);
>               const struct sbrec_chassis *chassis = NULL;
>               const struct sbrec_chassis_private *chassis_private = NULL;
>               if (chassis_id) {
>                   chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
>                                         sbrec_chassis_private_by_name,
> -                                      ovs_table, chassis_table,
> -                                      chassis_pvt_table, chassis_id,
> +                                      ovs_table, chassis_id,
>                                         br_int, &transport_zones,
>                                         &chassis_private);
>               }
> @@ -3020,7 +3019,7 @@ loop_done:
>   
>               const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
>                                                               ovs_table);
> -            const char *chassis_id = chassis_get_id();
> +            const char *chassis_id = get_ovs_chassis_id(ovs_table);
>               const struct sbrec_chassis *chassis
>                   = (chassis_id
>                      ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 9572423..8dc41ab 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -12637,17 +12637,17 @@ static const char *rbac_chassis_auth[] =
>       {"name"};
>   static const char *rbac_chassis_update[] =
>       {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches",
> -     "other_config", "name"};
> +     "other_config"};
>   
>   static const char *rbac_chassis_private_auth[] =
>       {"name"};
>   static const char *rbac_chassis_private_update[] =
> -    {"nb_cfg", "nb_cfg_timestamp", "chassis", "name"};
> +    {"nb_cfg", "nb_cfg_timestamp", "chassis"};
>   
>   static const char *rbac_encap_auth[] =
>       {"chassis_name"};
>   static const char *rbac_encap_update[] =
> -    {"type", "options", "ip", "chassis_name"};
> +    {"type", "options", "ip"};
>   
>   static const char *rbac_port_binding_auth[] =
>       {""};
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index a1f35fb..1b46799 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -187,45 +187,27 @@ OVS_WAIT_UNTIL([
>       test "${expected_iface_types}" = "${chassis_iface_types}"
>   ])
>   
> -# Change the value of external_ids:system-id and make sure it's mirrored
> -# in the Chassis record in the OVN_Southbound database.
> +# Change the value of external_ids:system-id.
> +# This requires operator intervention and removal of the stale chassis and
> +# chassis_private records.  Until that happens ovn-controller fails to
> +# create the records due to constraint violation on the Encap table.
>   sysid=${sysid}-foo
>   ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> +
>   OVS_WAIT_UNTIL([
> -    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> -    test "${sysid}" = "${chassis_id}"
> -])
> -OVS_WAIT_UNTIL([
> -    chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
> -    test "${sysid}" = "${chassis_id}"
> +    grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
>   ])
>   
> -# Only one Chassis_Private record should exist.
> -wait_row_count Chassis_Private 1
> +# Destroy the stale entries manually and ovn-controller should now be able
> +# to create new ones.
> +check ovn-sbctl destroy chassis_private . -- destroy chassis .
>   
> -# Simulate system-id changing while ovn-controller is disconnected from the
> -# SB.
> -valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
> -invalid_remote=tcp:0.0.0.0:4242
> -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
> -expected_state="not connected"
> -OVS_WAIT_UNTIL([
> -    test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
> -])
> -sysid=${sysid}-bar
> -ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
> -OVS_WAIT_UNTIL([
> -    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> -    test "${sysid}" = "${chassis_id}"
> -])
> -OVS_WAIT_UNTIL([
> -    chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
> -    test "${sysid}" = "${chassis_id}"
> -])
> +wait_row_count Chassis_Private 1 name=${sysid}
> +wait_row_count Chassis 1 name=${sysid}
>   
> -# Only one Chassis_Private record should exist.
> +# Only one Chassis_Private/Chassis record should exist.
>   wait_row_count Chassis_Private 1
> +wait_row_count Chassis 1
>   
>   # Gracefully terminate daemons
>   OVN_CLEANUP_SBOX([hv])
>
Numan Siddique Dec. 11, 2020, 11:53 a.m. UTC | #2
On Thu, Dec 10, 2020 at 3:28 AM Mark Michelson <mmichels@redhat.com> wrote:
>
> Acked-by: Mark Michelson <mmichels@redhat.com>

Thanks Dumitru and Mark.

I applied this patch to the main branch and backported to branch-20.12.

Can you please submit backport patches for the remaining branches.
It doesn't apply cleanly.

Thanks
Numan

>
> On 12/8/20 2:21 PM, Dumitru Ceara wrote:
> > When the OVS system-id changes ovn-controller needs external (CMS) help
> > in order to update its own Chassis/Chassis_private records, i.e., the
> > CMS has to ensure that either ovn-controller is stopped (so that
> > ovn-controller cleans up its old Chassis/Chassis_private records) or
> > that after the system-id is changed, the stale Chassis/Chassis_private
> > records are destroyed externally.
> >
> > This patch reverts the previous efforts to have ovn-controller reuse
> > stale Chassis records and documents how the system-id change operation
> > needs to be executed.  The main problem with reusing stale records is
> > that there's no safe way to make it work when RBAC is enabled.
> >
> > Suggestedy-by: Han Zhou <hzhou@ovn.org>
> > Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
> > Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
> > Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
> > Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis record.")
> > Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id changes.")
> > Signed-off-by: Dumitru Ceara <dceara@redhat.com>
> > ---
> >   controller/chassis.c            | 177 ++++------------------------------------
> >   controller/chassis.h            |   3 -
> >   controller/ovn-controller.8.xml |   7 +-
> >   controller/ovn-controller.c     |  21 +++--
> >   northd/ovn-northd.c             |   6 +-
> >   tests/ovn-controller.at         |  44 +++-------
> >   6 files changed, 47 insertions(+), 211 deletions(-)
> >
> > diff --git a/controller/chassis.c b/controller/chassis.c
> > index 7748fb9..b4d4b0e 100644
> > --- a/controller/chassis.c
> > +++ b/controller/chassis.c
> > @@ -37,44 +37,6 @@ VLOG_DEFINE_THIS_MODULE(chassis);
> >   #endif /* HOST_NAME_MAX */
> >
> >   /*
> > - * Structure to hold chassis specific state (currently just chassis-id)
> > - * to avoid database lookups when changes happen while the controller is
> > - * running.
> > - */
> > -struct chassis_info {
> > -    /* Last ID we initialized the Chassis SB record with. */
> > -    struct ds id;
> > -
> > -    /* True if Chassis SB record is initialized, false otherwise. */
> > -    uint32_t id_inited : 1;
> > -};
> > -
> > -static struct chassis_info chassis_state = {
> > -    .id = DS_EMPTY_INITIALIZER,
> > -    .id_inited = false,
> > -};
> > -
> > -static void
> > -chassis_info_set_id(struct chassis_info *info, const char *id)
> > -{
> > -    ds_clear(&info->id);
> > -    ds_put_cstr(&info->id, id);
> > -    info->id_inited = true;
> > -}
> > -
> > -static bool
> > -chassis_info_id_inited(const struct chassis_info *info)
> > -{
> > -    return info->id_inited;
> > -}
> > -
> > -static const char *
> > -chassis_info_id(const struct chassis_info *info)
> > -{
> > -    return ds_cstr_ro(&info->id);
> > -}
> > -
> > -/*
> >    * Structure for storing the chassis config parsed from the ovs table.
> >    */
> >   struct ovs_chassis_cfg {
> > @@ -420,6 +382,9 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
> >       bool  changed = false;
> >
> >       for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> > +        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
> > +            return true;
> > +        }
> >
> >           if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
> >               changed = true;
> > @@ -500,54 +465,11 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >       return encaps;
> >   }
> >
> > -/*
> > - * Updates encaps for a given chassis. This can happen when the chassis
> > - * name has changed. Also, the only thing we support updating is the
> > - * chassis_name. For other changes the encaps will be recreated.
> > - */
> > -static void
> > -chassis_update_encaps(const struct sbrec_chassis *chassis)
> > -{
> > -    for (size_t i = 0; i < chassis->n_encaps; i++) {
> > -        sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
> > -    }
> > -}
> > -
> > -/*
> > - * Returns a pointer to a chassis record from 'chassis_table' that
> > - * matches at least one tunnel config.
> > - */
> > -static const struct sbrec_chassis *
> > -chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
> > -                         const struct ovs_chassis_cfg *ovs_cfg,
> > -                         const char *chassis_id)
> > -{
> > -    const struct sbrec_chassis *chassis_rec;
> > -
> > -    SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
> > -        for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> > -            if (sset_contains(&ovs_cfg->encap_type_set,
> > -                              chassis_rec->encaps[i]->type) &&
> > -                    sset_contains(&ovs_cfg->encap_ip_set,
> > -                                  chassis_rec->encaps[i]->ip)) {
> > -                return chassis_rec;
> > -            }
> > -            if (strcmp(chassis_rec->name, chassis_id) == 0) {
> > -                return chassis_rec;
> > -            }
> > -        }
> > -    }
> > -
> > -    return NULL;
> > -}
> > -
> >   /* If this is a chassis config update after we initialized the record once
> >    * then we should always be able to find it with the ID we saved in
> >    * chassis_state.
> >    * Otherwise (i.e., first time we create the record or if the system-id
> > - * changed) then we check if there's a stale record from a previous
> > - * controller run that didn't end gracefully and reuse it. If not then we
> > - * create a new record.
> > + * changed) we create a new record.
> >    *
> >    * Sets '*chassis_rec' to point to the local chassis record.
> >    * Returns true if this record was already in the database, false if it was
> > @@ -556,33 +478,16 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
> >   static bool
> >   chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >                      struct ovsdb_idl_index *sbrec_chassis_by_name,
> > -                   const struct sbrec_chassis_table *chassis_table,
> > -                   const struct ovs_chassis_cfg *ovs_cfg,
> >                      const char *chassis_id,
> >                      const struct sbrec_chassis **chassis_rec)
> >   {
> > -    const struct sbrec_chassis *chassis = NULL;
> > -
> > -    if (chassis_info_id_inited(&chassis_state)) {
> > -        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
> > -                                         chassis_info_id(&chassis_state));
> > -        if (!chassis) {
> > -            VLOG_DBG("Could not find Chassis, will check if the id changed: "
> > -                     "stored (%s) ovs (%s)",
> > -                     chassis_info_id(&chassis_state), chassis_id);
> > -        }
> > -    }
> > -
> > -    if (!chassis) {
> > -        chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
> > -    }
> > +    const struct sbrec_chassis *chassis =
> > +        chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> >
> > -    if (!chassis) {
> > -        /* Recreate the chassis record. */
> > +    if (!chassis && ovnsb_idl_txn) {
> > +        /* Create the chassis record. */
> >           VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
> > -        if (ovnsb_idl_txn) {
> > -            *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
> > -        }
> > +        *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
> >           return false;
> >       }
> >
> > @@ -650,7 +555,6 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
> >                                   &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
> >                                   chassis_rec);
> >       if (!tunnels_changed) {
> > -        chassis_update_encaps(chassis_rec);
> >           return updated;
> >       }
> >
> > @@ -666,33 +570,11 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
> >       return true;
> >   }
> >
> > -/*
> > - * Returns a pointer to a chassis_private record from 'chassis_pvt_table' that
> > - * matches the chassis record.
> > - */
> > -static const struct sbrec_chassis_private *
> > -chassis_private_get_stale_record(
> > -    const struct sbrec_chassis_private_table *chassis_pvt_table,
> > -    const struct sbrec_chassis *chassis)
> > -{
> > -    const struct sbrec_chassis_private *chassis_pvt_rec;
> > -
> > -    SBREC_CHASSIS_PRIVATE_TABLE_FOR_EACH (chassis_pvt_rec, chassis_pvt_table) {
> > -        if (chassis_pvt_rec->chassis == chassis) {
> > -            return chassis_pvt_rec;
> > -        }
> > -    }
> > -
> > -    return NULL;
> > -}
> > -
> >   /* If this is a chassis_private config update after we initialized the record
> >    * once then we should always be able to find it with the ID we saved in
> >    * chassis_state.
> >    * Otherwise (i.e., first time we created the chassis record or if the
> > - * system-id changed) then we check if there's a stale record from a previous
> > - * controller run that didn't end gracefully and reuse it. If not then we
> > - * create a new record.
> > + * system-id changed) we create a new record.
> >    *
> >    * Returns the local chassis record.
> >    */
> > @@ -700,21 +582,11 @@ static const struct sbrec_chassis_private *
> >   chassis_private_get_record(
> >       struct ovsdb_idl_txn *ovnsb_idl_txn,
> >       struct ovsdb_idl_index *sbrec_chassis_pvt_by_name,
> > -    const struct sbrec_chassis_private_table *chassis_pvt_table,
> > -    const struct sbrec_chassis *chassis)
> > +    const char *chassis_id)
> >   {
> > -    const struct sbrec_chassis_private *chassis_p = NULL;
> > -
> > -    if (chassis_info_id_inited(&chassis_state)) {
> > -        chassis_p =
> > +    const struct sbrec_chassis_private *chassis_p =
> >               chassis_private_lookup_by_name(sbrec_chassis_pvt_by_name,
> > -                                           chassis_info_id(&chassis_state));
> > -    }
> > -
> > -    if (!chassis_p) {
> > -        chassis_p = chassis_private_get_stale_record(chassis_pvt_table,
> > -                                                     chassis);
> > -    }
> > +                                           chassis_id);
> >
> >       if (!chassis_p && ovnsb_idl_txn) {
> >           return sbrec_chassis_private_insert(ovnsb_idl_txn);
> > @@ -743,8 +615,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >               struct ovsdb_idl_index *sbrec_chassis_by_name,
> >               struct ovsdb_idl_index *sbrec_chassis_private_by_name,
> >               const struct ovsrec_open_vswitch_table *ovs_table,
> > -            const struct sbrec_chassis_table *chassis_table,
> > -            const struct sbrec_chassis_private_table *chassis_pvt_table,
> >               const char *chassis_id,
> >               const struct ovsrec_bridge *br_int,
> >               const struct sset *transport_zones,
> > @@ -762,8 +632,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >
> >       const struct sbrec_chassis *chassis_rec = NULL;
> >       bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
> > -                                      chassis_table, &ovs_cfg, chassis_id,
> > -                                      &chassis_rec);
> > +                                      chassis_id, &chassis_rec);
> >
> >       /* If we found (or created) a record, update it with the correct config
> >        * and store the current chassis_id for fast lookup in case it gets
> > @@ -783,13 +652,10 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >           *chassis_private =
> >               chassis_private_get_record(ovnsb_idl_txn,
> >                                          sbrec_chassis_private_by_name,
> > -                                       chassis_pvt_table, chassis_rec);
> > -
> > +                                       chassis_id);
> >           if (*chassis_private) {
> >               chassis_private_update(*chassis_private, chassis_rec, chassis_id);
> >           }
> > -
> > -        chassis_info_set_id(&chassis_state, chassis_id);
> >       }
> >
> >       ovs_chassis_cfg_destroy(&ovs_cfg);
> > @@ -865,16 +731,3 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >       }
> >       return false;
> >   }
> > -
> > -/*
> > - * Returns the last initialized chassis-id.
> > - */
> > -const char *
> > -chassis_get_id(void)
> > -{
> > -    if (chassis_info_id_inited(&chassis_state)) {
> > -        return chassis_info_id(&chassis_state);
> > -    }
> > -
> > -    return NULL;
> > -}
> > diff --git a/controller/chassis.h b/controller/chassis.h
> > index 220f726..18b45a1 100644
> > --- a/controller/chassis.h
> > +++ b/controller/chassis.h
> > @@ -37,8 +37,6 @@ const struct sbrec_chassis *chassis_run(
> >       struct ovsdb_idl_index *sbrec_chassis_by_name,
> >       struct ovsdb_idl_index *sbrec_chassis_private_by_name,
> >       const struct ovsrec_open_vswitch_table *,
> > -    const struct sbrec_chassis_table *,
> > -    const struct sbrec_chassis_private_table *,
> >       const char *chassis_id, const struct ovsrec_bridge *br_int,
> >       const struct sset *transport_zones,
> >       const struct sbrec_chassis_private **chassis_private);
> > @@ -48,7 +46,6 @@ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >   bool chassis_get_mac(const struct sbrec_chassis *chassis,
> >                        const char *bridge_mapping,
> >                        struct eth_addr *chassis_mac);
> > -const char *chassis_get_id(void);
> >   const char * get_chassis_mac_mappings(const struct smap *ext_ids);
> >
> >
> > diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> > index 6f3d388..29833c7 100644
> > --- a/controller/ovn-controller.8.xml
> > +++ b/controller/ovn-controller.8.xml
> > @@ -68,7 +68,12 @@
> >       </p>
> >       <dl>
> >         <dt><code>external_ids:system-id</code></dt>
> > -      <dd>The chassis name to use in the Chassis table.</dd>
> > +      <dd>The chassis name to use in the Chassis table.  Changing the
> > +      <code>system-id</code> while <code>ovn-controller</code> is running is
> > +      not directly supported.  Users have two options: either first
> > +      gracefully stop <code>ovn-controller</code> or manually delete the
> > +      stale <code>Chassis</code> and <code>Chassis_Private</code> records
> > +      after changing the <code>system-id</code>.</dd>
> >
> >         <dt><code>external_ids:hostname</code></dt>
> >         <dd>The hostname to use in the Chassis table.</dd>
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 90a0d9c..b5f0c13 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1777,7 +1777,7 @@ static void init_physical_ctx(struct engine_node *node,
> >           (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> >               engine_get_input("OVS_bridge", node));
> >       const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> > -    const char *chassis_id = chassis_get_id();
> > +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> >       const struct sbrec_chassis *chassis = NULL;
> >       struct ovsdb_idl_index *sbrec_chassis_by_name =
> >           engine_ovsdb_node_get_index(
> > @@ -1867,7 +1867,11 @@ static void init_lflow_ctx(struct engine_node *node,
> >           (struct sbrec_load_balancer_table *)EN_OVSDB_GET(
> >               engine_get_input("SB_load_balancer", node));
> >
> > -    const char *chassis_id = chassis_get_id();
> > +    struct ovsrec_open_vswitch_table *ovs_table =
> > +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> > +            engine_get_input("OVS_open_vswitch", node));
> > +
> > +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> >       const struct sbrec_chassis *chassis = NULL;
> >       struct ovsdb_idl_index *sbrec_chassis_by_name =
> >           engine_ovsdb_node_get_index(
> > @@ -1961,7 +1965,7 @@ en_flow_output_run(struct engine_node *node, void *data)
> >           (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> >               engine_get_input("OVS_bridge", node));
> >       const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> > -    const char *chassis_id = chassis_get_id();
> > +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> >
> >       struct ovsdb_idl_index *sbrec_chassis_by_name =
> >           engine_ovsdb_node_get_index(
> > @@ -2145,7 +2149,7 @@ _flow_output_resource_ref_handler(struct engine_node *node, void *data,
> >           (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> >               engine_get_input("OVS_bridge", node));
> >       const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> > -    const char *chassis_id = chassis_get_id();
> > +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> >
> >       struct ovsdb_idl_index *sbrec_chassis_by_name =
> >           engine_ovsdb_node_get_index(
> > @@ -2770,18 +2774,13 @@ main(int argc, char *argv[])
> >                   get_transport_zones(ovsrec_open_vswitch_table_get(
> >                                       ovs_idl_loop.idl)), ",");
> >
> > -            const struct sbrec_chassis_table *chassis_table =
> > -                sbrec_chassis_table_get(ovnsb_idl_loop.idl);
> > -            const struct sbrec_chassis_private_table *chassis_pvt_table =
> > -                sbrec_chassis_private_table_get(ovnsb_idl_loop.idl);
> >               const char *chassis_id = get_ovs_chassis_id(ovs_table);
> >               const struct sbrec_chassis *chassis = NULL;
> >               const struct sbrec_chassis_private *chassis_private = NULL;
> >               if (chassis_id) {
> >                   chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> >                                         sbrec_chassis_private_by_name,
> > -                                      ovs_table, chassis_table,
> > -                                      chassis_pvt_table, chassis_id,
> > +                                      ovs_table, chassis_id,
> >                                         br_int, &transport_zones,
> >                                         &chassis_private);
> >               }
> > @@ -3020,7 +3019,7 @@ loop_done:
> >
> >               const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
> >                                                               ovs_table);
> > -            const char *chassis_id = chassis_get_id();
> > +            const char *chassis_id = get_ovs_chassis_id(ovs_table);
> >               const struct sbrec_chassis *chassis
> >                   = (chassis_id
> >                      ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index 9572423..8dc41ab 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -12637,17 +12637,17 @@ static const char *rbac_chassis_auth[] =
> >       {"name"};
> >   static const char *rbac_chassis_update[] =
> >       {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches",
> > -     "other_config", "name"};
> > +     "other_config"};
> >
> >   static const char *rbac_chassis_private_auth[] =
> >       {"name"};
> >   static const char *rbac_chassis_private_update[] =
> > -    {"nb_cfg", "nb_cfg_timestamp", "chassis", "name"};
> > +    {"nb_cfg", "nb_cfg_timestamp", "chassis"};
> >
> >   static const char *rbac_encap_auth[] =
> >       {"chassis_name"};
> >   static const char *rbac_encap_update[] =
> > -    {"type", "options", "ip", "chassis_name"};
> > +    {"type", "options", "ip"};
> >
> >   static const char *rbac_port_binding_auth[] =
> >       {""};
> > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> > index a1f35fb..1b46799 100644
> > --- a/tests/ovn-controller.at
> > +++ b/tests/ovn-controller.at
> > @@ -187,45 +187,27 @@ OVS_WAIT_UNTIL([
> >       test "${expected_iface_types}" = "${chassis_iface_types}"
> >   ])
> >
> > -# Change the value of external_ids:system-id and make sure it's mirrored
> > -# in the Chassis record in the OVN_Southbound database.
> > +# Change the value of external_ids:system-id.
> > +# This requires operator intervention and removal of the stale chassis and
> > +# chassis_private records.  Until that happens ovn-controller fails to
> > +# create the records due to constraint violation on the Encap table.
> >   sysid=${sysid}-foo
> >   ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> > +
> >   OVS_WAIT_UNTIL([
> > -    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> > -    test "${sysid}" = "${chassis_id}"
> > -])
> > -OVS_WAIT_UNTIL([
> > -    chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
> > -    test "${sysid}" = "${chassis_id}"
> > +    grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
> >   ])
> >
> > -# Only one Chassis_Private record should exist.
> > -wait_row_count Chassis_Private 1
> > +# Destroy the stale entries manually and ovn-controller should now be able
> > +# to create new ones.
> > +check ovn-sbctl destroy chassis_private . -- destroy chassis .
> >
> > -# Simulate system-id changing while ovn-controller is disconnected from the
> > -# SB.
> > -valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
> > -invalid_remote=tcp:0.0.0.0:4242
> > -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
> > -expected_state="not connected"
> > -OVS_WAIT_UNTIL([
> > -    test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
> > -])
> > -sysid=${sysid}-bar
> > -ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> > -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
> > -OVS_WAIT_UNTIL([
> > -    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> > -    test "${sysid}" = "${chassis_id}"
> > -])
> > -OVS_WAIT_UNTIL([
> > -    chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
> > -    test "${sysid}" = "${chassis_id}"
> > -])
> > +wait_row_count Chassis_Private 1 name=${sysid}
> > +wait_row_count Chassis 1 name=${sysid}
> >
> > -# Only one Chassis_Private record should exist.
> > +# Only one Chassis_Private/Chassis record should exist.
> >   wait_row_count Chassis_Private 1
> > +wait_row_count Chassis 1
> >
> >   # Gracefully terminate daemons
> >   OVN_CLEANUP_SBOX([hv])
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara Dec. 14, 2020, 12:35 p.m. UTC | #3
On 12/11/20 12:53 PM, Numan Siddique wrote:
> On Thu, Dec 10, 2020 at 3:28 AM Mark Michelson <mmichels@redhat.com> wrote:
>>
>> Acked-by: Mark Michelson <mmichels@redhat.com>
> 
> Thanks Dumitru and Mark.
> 
> I applied this patch to the main branch and backported to branch-20.12.
> 
> Can you please submit backport patches for the remaining branches.
> It doesn't apply cleanly.
> 

Thanks Mark and Numan.

I just sent backport patches for all stable branches:

http://patchwork.ozlabs.org/project/ovn/patch/1607943476-23533-1-git-send-email-dceara@redhat.com/
http://patchwork.ozlabs.org/project/ovn/patch/1607947243-9907-1-git-send-email-dceara@redhat.com/
http://patchwork.ozlabs.org/project/ovn/patch/1607949185-1273-1-git-send-email-dceara@redhat.com/

Thanks,
Dumitru
diff mbox series

Patch

diff --git a/controller/chassis.c b/controller/chassis.c
index 7748fb9..b4d4b0e 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -37,44 +37,6 @@  VLOG_DEFINE_THIS_MODULE(chassis);
 #endif /* HOST_NAME_MAX */
 
 /*
- * Structure to hold chassis specific state (currently just chassis-id)
- * to avoid database lookups when changes happen while the controller is
- * running.
- */
-struct chassis_info {
-    /* Last ID we initialized the Chassis SB record with. */
-    struct ds id;
-
-    /* True if Chassis SB record is initialized, false otherwise. */
-    uint32_t id_inited : 1;
-};
-
-static struct chassis_info chassis_state = {
-    .id = DS_EMPTY_INITIALIZER,
-    .id_inited = false,
-};
-
-static void
-chassis_info_set_id(struct chassis_info *info, const char *id)
-{
-    ds_clear(&info->id);
-    ds_put_cstr(&info->id, id);
-    info->id_inited = true;
-}
-
-static bool
-chassis_info_id_inited(const struct chassis_info *info)
-{
-    return info->id_inited;
-}
-
-static const char *
-chassis_info_id(const struct chassis_info *info)
-{
-    return ds_cstr_ro(&info->id);
-}
-
-/*
  * Structure for storing the chassis config parsed from the ovs table.
  */
 struct ovs_chassis_cfg {
@@ -420,6 +382,9 @@  chassis_tunnels_changed(const struct sset *encap_type_set,
     bool  changed = false;
 
     for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
+        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
+            return true;
+        }
 
         if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
             changed = true;
@@ -500,54 +465,11 @@  chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
     return encaps;
 }
 
-/*
- * Updates encaps for a given chassis. This can happen when the chassis
- * name has changed. Also, the only thing we support updating is the
- * chassis_name. For other changes the encaps will be recreated.
- */
-static void
-chassis_update_encaps(const struct sbrec_chassis *chassis)
-{
-    for (size_t i = 0; i < chassis->n_encaps; i++) {
-        sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
-    }
-}
-
-/*
- * Returns a pointer to a chassis record from 'chassis_table' that
- * matches at least one tunnel config.
- */
-static const struct sbrec_chassis *
-chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
-                         const struct ovs_chassis_cfg *ovs_cfg,
-                         const char *chassis_id)
-{
-    const struct sbrec_chassis *chassis_rec;
-
-    SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
-        for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
-            if (sset_contains(&ovs_cfg->encap_type_set,
-                              chassis_rec->encaps[i]->type) &&
-                    sset_contains(&ovs_cfg->encap_ip_set,
-                                  chassis_rec->encaps[i]->ip)) {
-                return chassis_rec;
-            }
-            if (strcmp(chassis_rec->name, chassis_id) == 0) {
-                return chassis_rec;
-            }
-        }
-    }
-
-    return NULL;
-}
-
 /* If this is a chassis config update after we initialized the record once
  * then we should always be able to find it with the ID we saved in
  * chassis_state.
  * Otherwise (i.e., first time we create the record or if the system-id
- * changed) then we check if there's a stale record from a previous
- * controller run that didn't end gracefully and reuse it. If not then we
- * create a new record.
+ * changed) we create a new record.
  *
  * Sets '*chassis_rec' to point to the local chassis record.
  * Returns true if this record was already in the database, false if it was
@@ -556,33 +478,16 @@  chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
 static bool
 chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    struct ovsdb_idl_index *sbrec_chassis_by_name,
-                   const struct sbrec_chassis_table *chassis_table,
-                   const struct ovs_chassis_cfg *ovs_cfg,
                    const char *chassis_id,
                    const struct sbrec_chassis **chassis_rec)
 {
-    const struct sbrec_chassis *chassis = NULL;
-
-    if (chassis_info_id_inited(&chassis_state)) {
-        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
-                                         chassis_info_id(&chassis_state));
-        if (!chassis) {
-            VLOG_DBG("Could not find Chassis, will check if the id changed: "
-                     "stored (%s) ovs (%s)",
-                     chassis_info_id(&chassis_state), chassis_id);
-        }
-    }
-
-    if (!chassis) {
-        chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
-    }
+    const struct sbrec_chassis *chassis =
+        chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
 
-    if (!chassis) {
-        /* Recreate the chassis record. */
+    if (!chassis && ovnsb_idl_txn) {
+        /* Create the chassis record. */
         VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
-        if (ovnsb_idl_txn) {
-            *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
-        }
+        *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
         return false;
     }
 
@@ -650,7 +555,6 @@  chassis_update(const struct sbrec_chassis *chassis_rec,
                                 &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
                                 chassis_rec);
     if (!tunnels_changed) {
-        chassis_update_encaps(chassis_rec);
         return updated;
     }
 
@@ -666,33 +570,11 @@  chassis_update(const struct sbrec_chassis *chassis_rec,
     return true;
 }
 
-/*
- * Returns a pointer to a chassis_private record from 'chassis_pvt_table' that
- * matches the chassis record.
- */
-static const struct sbrec_chassis_private *
-chassis_private_get_stale_record(
-    const struct sbrec_chassis_private_table *chassis_pvt_table,
-    const struct sbrec_chassis *chassis)
-{
-    const struct sbrec_chassis_private *chassis_pvt_rec;
-
-    SBREC_CHASSIS_PRIVATE_TABLE_FOR_EACH (chassis_pvt_rec, chassis_pvt_table) {
-        if (chassis_pvt_rec->chassis == chassis) {
-            return chassis_pvt_rec;
-        }
-    }
-
-    return NULL;
-}
-
 /* If this is a chassis_private config update after we initialized the record
  * once then we should always be able to find it with the ID we saved in
  * chassis_state.
  * Otherwise (i.e., first time we created the chassis record or if the
- * system-id changed) then we check if there's a stale record from a previous
- * controller run that didn't end gracefully and reuse it. If not then we
- * create a new record.
+ * system-id changed) we create a new record.
  *
  * Returns the local chassis record.
  */
@@ -700,21 +582,11 @@  static const struct sbrec_chassis_private *
 chassis_private_get_record(
     struct ovsdb_idl_txn *ovnsb_idl_txn,
     struct ovsdb_idl_index *sbrec_chassis_pvt_by_name,
-    const struct sbrec_chassis_private_table *chassis_pvt_table,
-    const struct sbrec_chassis *chassis)
+    const char *chassis_id)
 {
-    const struct sbrec_chassis_private *chassis_p = NULL;
-
-    if (chassis_info_id_inited(&chassis_state)) {
-        chassis_p =
+    const struct sbrec_chassis_private *chassis_p =
             chassis_private_lookup_by_name(sbrec_chassis_pvt_by_name,
-                                           chassis_info_id(&chassis_state));
-    }
-
-    if (!chassis_p) {
-        chassis_p = chassis_private_get_stale_record(chassis_pvt_table,
-                                                     chassis);
-    }
+                                           chassis_id);
 
     if (!chassis_p && ovnsb_idl_txn) {
         return sbrec_chassis_private_insert(ovnsb_idl_txn);
@@ -743,8 +615,6 @@  chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             struct ovsdb_idl_index *sbrec_chassis_by_name,
             struct ovsdb_idl_index *sbrec_chassis_private_by_name,
             const struct ovsrec_open_vswitch_table *ovs_table,
-            const struct sbrec_chassis_table *chassis_table,
-            const struct sbrec_chassis_private_table *chassis_pvt_table,
             const char *chassis_id,
             const struct ovsrec_bridge *br_int,
             const struct sset *transport_zones,
@@ -762,8 +632,7 @@  chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 
     const struct sbrec_chassis *chassis_rec = NULL;
     bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
-                                      chassis_table, &ovs_cfg, chassis_id,
-                                      &chassis_rec);
+                                      chassis_id, &chassis_rec);
 
     /* If we found (or created) a record, update it with the correct config
      * and store the current chassis_id for fast lookup in case it gets
@@ -783,13 +652,10 @@  chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
         *chassis_private =
             chassis_private_get_record(ovnsb_idl_txn,
                                        sbrec_chassis_private_by_name,
-                                       chassis_pvt_table, chassis_rec);
-
+                                       chassis_id);
         if (*chassis_private) {
             chassis_private_update(*chassis_private, chassis_rec, chassis_id);
         }
-
-        chassis_info_set_id(&chassis_state, chassis_id);
     }
 
     ovs_chassis_cfg_destroy(&ovs_cfg);
@@ -865,16 +731,3 @@  chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
     }
     return false;
 }
-
-/*
- * Returns the last initialized chassis-id.
- */
-const char *
-chassis_get_id(void)
-{
-    if (chassis_info_id_inited(&chassis_state)) {
-        return chassis_info_id(&chassis_state);
-    }
-
-    return NULL;
-}
diff --git a/controller/chassis.h b/controller/chassis.h
index 220f726..18b45a1 100644
--- a/controller/chassis.h
+++ b/controller/chassis.h
@@ -37,8 +37,6 @@  const struct sbrec_chassis *chassis_run(
     struct ovsdb_idl_index *sbrec_chassis_by_name,
     struct ovsdb_idl_index *sbrec_chassis_private_by_name,
     const struct ovsrec_open_vswitch_table *,
-    const struct sbrec_chassis_table *,
-    const struct sbrec_chassis_private_table *,
     const char *chassis_id, const struct ovsrec_bridge *br_int,
     const struct sset *transport_zones,
     const struct sbrec_chassis_private **chassis_private);
@@ -48,7 +46,6 @@  bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
 bool chassis_get_mac(const struct sbrec_chassis *chassis,
                      const char *bridge_mapping,
                      struct eth_addr *chassis_mac);
-const char *chassis_get_id(void);
 const char * get_chassis_mac_mappings(const struct smap *ext_ids);
 
 
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index 6f3d388..29833c7 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -68,7 +68,12 @@ 
     </p>
     <dl>
       <dt><code>external_ids:system-id</code></dt>
-      <dd>The chassis name to use in the Chassis table.</dd>
+      <dd>The chassis name to use in the Chassis table.  Changing the
+      <code>system-id</code> while <code>ovn-controller</code> is running is
+      not directly supported.  Users have two options: either first
+      gracefully stop <code>ovn-controller</code> or manually delete the
+      stale <code>Chassis</code> and <code>Chassis_Private</code> records
+      after changing the <code>system-id</code>.</dd>
 
       <dt><code>external_ids:hostname</code></dt>
       <dd>The hostname to use in the Chassis table.</dd>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 90a0d9c..b5f0c13 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1777,7 +1777,7 @@  static void init_physical_ctx(struct engine_node *node,
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
     const struct sbrec_chassis *chassis = NULL;
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -1867,7 +1867,11 @@  static void init_lflow_ctx(struct engine_node *node,
         (struct sbrec_load_balancer_table *)EN_OVSDB_GET(
             engine_get_input("SB_load_balancer", node));
 
-    const char *chassis_id = chassis_get_id();
+    struct ovsrec_open_vswitch_table *ovs_table =
+        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_open_vswitch", node));
+
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
     const struct sbrec_chassis *chassis = NULL;
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -1961,7 +1965,7 @@  en_flow_output_run(struct engine_node *node, void *data)
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
 
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -2145,7 +2149,7 @@  _flow_output_resource_ref_handler(struct engine_node *node, void *data,
         (struct ovsrec_bridge_table *)EN_OVSDB_GET(
             engine_get_input("OVS_bridge", node));
     const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
-    const char *chassis_id = chassis_get_id();
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
 
     struct ovsdb_idl_index *sbrec_chassis_by_name =
         engine_ovsdb_node_get_index(
@@ -2770,18 +2774,13 @@  main(int argc, char *argv[])
                 get_transport_zones(ovsrec_open_vswitch_table_get(
                                     ovs_idl_loop.idl)), ",");
 
-            const struct sbrec_chassis_table *chassis_table =
-                sbrec_chassis_table_get(ovnsb_idl_loop.idl);
-            const struct sbrec_chassis_private_table *chassis_pvt_table =
-                sbrec_chassis_private_table_get(ovnsb_idl_loop.idl);
             const char *chassis_id = get_ovs_chassis_id(ovs_table);
             const struct sbrec_chassis *chassis = NULL;
             const struct sbrec_chassis_private *chassis_private = NULL;
             if (chassis_id) {
                 chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
                                       sbrec_chassis_private_by_name,
-                                      ovs_table, chassis_table,
-                                      chassis_pvt_table, chassis_id,
+                                      ovs_table, chassis_id,
                                       br_int, &transport_zones,
                                       &chassis_private);
             }
@@ -3020,7 +3019,7 @@  loop_done:
 
             const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
                                                             ovs_table);
-            const char *chassis_id = chassis_get_id();
+            const char *chassis_id = get_ovs_chassis_id(ovs_table);
             const struct sbrec_chassis *chassis
                 = (chassis_id
                    ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 9572423..8dc41ab 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12637,17 +12637,17 @@  static const char *rbac_chassis_auth[] =
     {"name"};
 static const char *rbac_chassis_update[] =
     {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches",
-     "other_config", "name"};
+     "other_config"};
 
 static const char *rbac_chassis_private_auth[] =
     {"name"};
 static const char *rbac_chassis_private_update[] =
-    {"nb_cfg", "nb_cfg_timestamp", "chassis", "name"};
+    {"nb_cfg", "nb_cfg_timestamp", "chassis"};
 
 static const char *rbac_encap_auth[] =
     {"chassis_name"};
 static const char *rbac_encap_update[] =
-    {"type", "options", "ip", "chassis_name"};
+    {"type", "options", "ip"};
 
 static const char *rbac_port_binding_auth[] =
     {""};
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index a1f35fb..1b46799 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -187,45 +187,27 @@  OVS_WAIT_UNTIL([
     test "${expected_iface_types}" = "${chassis_iface_types}"
 ])
 
-# Change the value of external_ids:system-id and make sure it's mirrored
-# in the Chassis record in the OVN_Southbound database.
+# Change the value of external_ids:system-id.
+# This requires operator intervention and removal of the stale chassis and
+# chassis_private records.  Until that happens ovn-controller fails to
+# create the records due to constraint violation on the Encap table.
 sysid=${sysid}-foo
 ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
+
 OVS_WAIT_UNTIL([
-    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
-    test "${sysid}" = "${chassis_id}"
-])
-OVS_WAIT_UNTIL([
-    chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
-    test "${sysid}" = "${chassis_id}"
+    grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
 ])
 
-# Only one Chassis_Private record should exist.
-wait_row_count Chassis_Private 1
+# Destroy the stale entries manually and ovn-controller should now be able
+# to create new ones.
+check ovn-sbctl destroy chassis_private . -- destroy chassis .
 
-# Simulate system-id changing while ovn-controller is disconnected from the
-# SB.
-valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
-invalid_remote=tcp:0.0.0.0:4242
-ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
-expected_state="not connected"
-OVS_WAIT_UNTIL([
-    test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
-])
-sysid=${sysid}-bar
-ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
-ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
-OVS_WAIT_UNTIL([
-    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
-    test "${sysid}" = "${chassis_id}"
-])
-OVS_WAIT_UNTIL([
-    chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
-    test "${sysid}" = "${chassis_id}"
-])
+wait_row_count Chassis_Private 1 name=${sysid}
+wait_row_count Chassis 1 name=${sysid}
 
-# Only one Chassis_Private record should exist.
+# Only one Chassis_Private/Chassis record should exist.
 wait_row_count Chassis_Private 1
+wait_row_count Chassis 1
 
 # Gracefully terminate daemons
 OVN_CLEANUP_SBOX([hv])