diff mbox series

[ovs-dev] controller: avoid recomputes triggered by SBDB Port_Binding updates.

Message ID 20220512090425.500011-1-xsimonar@redhat.com
State Superseded
Headers show
Series [ovs-dev] controller: avoid recomputes triggered by SBDB Port_Binding updates. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Xavier Simonart May 12, 2022, 9:04 a.m. UTC
When VIF ports are claimed on a chassis, SBDB Port_Binding table is updated.
If the SBDB IDL is still is read-only ("in transaction") when such a update
is required, the update is not possible and recompute is triggered through
I+P failure.

This situation can happen:
- after updating Port_Binding->chassis to SBDB for one port, in a following
  iteration, ovn-controller handles Interface:external_ids:ovn-installed
  (for the same port) while SBDB is still read-only.
- after updating Port_Binding->chassis to SBDB for one port, in a following
  iteration, ovn-controller updates Port_Binding->chassis for another port,
  while SBDB is still read-only.

This patch prevent the recompute, by having the if-status module
updating the Port_Binding chassis (if needed), when possible.
This does not delay Port_Binding chassis update compared to before this
patch: Port_Binding chassis will be updated as soon as SBDB is again
writable, as it was the case, through a recompute, before this patch.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
---
 controller/binding.c        | 124 ++++++++++++++++++++++++++----------
 controller/binding.h        |   9 ++-
 controller/if-status.c      |  31 +++++++--
 controller/if-status.h      |   6 +-
 controller/ovn-controller.c |   6 +-
 tests/ovn.at                |  55 +++++++++++++++-
 6 files changed, 184 insertions(+), 47 deletions(-)

Comments

Dumitru Ceara May 13, 2022, 12:46 p.m. UTC | #1
Hi Xavier,

On 5/12/22 11:04, Xavier Simonart wrote:
> When VIF ports are claimed on a chassis, SBDB Port_Binding table is updated.
> If the SBDB IDL is still is read-only ("in transaction") when such a update
> is required, the update is not possible and recompute is triggered through
> I+P failure.
> 
> This situation can happen:
> - after updating Port_Binding->chassis to SBDB for one port, in a following
>   iteration, ovn-controller handles Interface:external_ids:ovn-installed
>   (for the same port) while SBDB is still read-only.
> - after updating Port_Binding->chassis to SBDB for one port, in a following
>   iteration, ovn-controller updates Port_Binding->chassis for another port,
>   while SBDB is still read-only.
> 
> This patch prevent the recompute, by having the if-status module
> updating the Port_Binding chassis (if needed), when possible.
> This does not delay Port_Binding chassis update compared to before this
> patch: Port_Binding chassis will be updated as soon as SBDB is again
> writable, as it was the case, through a recompute, before this patch.

Just to confirm, with your patch the Port_Binding chassis will be
updated when the SBDB is again writable and will *not* cause a
recompute, right?

> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
> Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
> ---
>  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
>  controller/binding.h        |   9 ++-
>  controller/if-status.c      |  31 +++++++--
>  controller/if-status.h      |   6 +-
>  controller/ovn-controller.c |   6 +-
>  tests/ovn.at                |  55 +++++++++++++++-
>  6 files changed, 184 insertions(+), 47 deletions(-)
> 
> diff --git a/controller/binding.c b/controller/binding.c
> index e5ba56b25..d917d8775 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct shash *local_bindings,
>  }
>  
>  bool
> -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
> +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
> +                    const struct sbrec_chassis *chassis_rec)
>  {
>      struct local_binding *lbinding =
>          local_binding_find(local_bindings, pb_name);
>      struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding);
> +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
> +        return false;
> +    }
>      if (lbinding && b_lport && lbinding->iface) {
>          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
>              return false;
> @@ -673,13 +677,23 @@ local_binding_is_up(struct shash *local_bindings, const char *pb_name)
>  }
>  
>  bool
> -local_binding_is_down(struct shash *local_bindings, const char *pb_name)
> +local_binding_is_down(struct shash *local_bindings, const char *pb_name,
> +                      const struct sbrec_chassis *chassis_rec)
>  {
>      struct local_binding *lbinding =
>          local_binding_find(local_bindings, pb_name);
>  
>      struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding);
>  
> +    if (b_lport) {
> +        if (b_lport->pb->chassis == chassis_rec) {
> +            return false;
> +        } else if (b_lport->pb->chassis) {
> +            VLOG_DBG("lport %s already claimed by other chassis",
> +                     b_lport->pb->logical_port);
> +        }
> +    }
> +
>      if (!lbinding) {
>          return true;
>      }
> @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
>      OVS_NOT_REACHED();
>  }
>  
> +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,

Nit: this should be:

static void
set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,

> +                          const struct sbrec_chassis *chassis_rec)
> +{
> +    if (pb->chassis != chassis_rec) {
> +        if (pb->chassis) {
> +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> +                    pb->logical_port, pb->chassis->name,
> +                    chassis_rec->name);

Nit: indentation.

> +        } else {
> +            VLOG_INFO("Claiming lport %s for this chassis.", pb->logical_port);
> +        }
> +        for (int i = 0; i < pb->n_mac; i++) {
> +            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> +        }
> +        sbrec_port_binding_set_chassis(pb, chassis_rec);
> +    }
> +}
> +
> +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb)

Here too, the function name should start on a new line.

> +{
> +    if (pb->chassis) {
> +        sbrec_port_binding_set_chassis(pb, NULL);
> +    }
> +}
> +
> +void
> +local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
> +                    const struct sbrec_chassis *chassis_rec)

Nit: indentation.

> +{
> +    struct local_binding *lbinding =
> +        local_binding_find(local_bindings, pb_name);
> +    struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding);
> +
> +    if (b_lport) {
> +        if (chassis_rec) {
> +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
> +        } else {
> +            clear_pb_chassis_in_sbrec(b_lport->pb);
> +        }
> +    }
> +}
> +
>  /* For newly claimed ports, if 'notify_up' is 'false':
>   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
>   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for
> @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
>   *   it's explicitly set to 'false'.
>   *   This is to ensure compatibility with older versions of ovn-northd.
>   */
> -static void
> +static bool
>  claimed_lport_set_up(const struct sbrec_port_binding *pb,
>                       const struct sbrec_port_binding *parent_pb,
>                       const struct sbrec_chassis *chassis_rec,
> -                     bool notify_up, struct if_status_mgr *if_mgr)
> +                     bool notify_up, struct if_status_mgr *if_mgr,
> +                     bool sb_readonly)
>  {
> +

Extra line not needed.

>      if (!notify_up) {
>          bool up = true;
>          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
> -            sbrec_port_binding_set_up(pb, &up, 1);
> +            if (!sb_readonly) {
> +                sbrec_port_binding_set_up(pb, &up, 1);
> +            } else if (pb->n_up && !pb->up[0]) {
> +                return false;
> +            }
>          }
> -        return;
> +        if (sb_readonly && (pb->chassis != chassis_rec)) {
> +            /* Handle the cases where if_status_mgr does not claim the
> +             * interface.In those cases, if we do not set pb->chassis in sb
> +             * now (as readonly), we might not do it later ...
> +             */
> +            return false;
> +        }
> +        return true;
>      }
>  
>      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
>          if_status_mgr_claim_iface(if_mgr, pb->logical_port);
>      }
> +    return true;
>  }
>  
>  /* Returns false if lport is not claimed due to 'sb_readonly'.
> @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
>              struct hmap *tracked_datapaths,
>              struct if_status_mgr *if_mgr)
>  {
> -    if (!sb_readonly) {
> -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> -    }
>  
> +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> +           if_mgr, sb_readonly)) {
> +        return false;
> +    }
>      if (pb->chassis != chassis_rec) {
> -        if (sb_readonly) {
> -            return false;
> -        }
> -
> -        if (pb->chassis) {
> -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> -                    pb->logical_port, pb->chassis->name,
> -                    chassis_rec->name);
> -        } else {
> -            VLOG_INFO("Claiming lport %s for this chassis.", pb->logical_port);
> +        if (!sb_readonly) {
> +            set_pb_chassis_in_sbrec(pb, chassis_rec);
>          }
> -        for (int i = 0; i < pb->n_mac; i++) {
> -            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> -        }
> -
> -        sbrec_port_binding_set_chassis(pb, chassis_rec);
>  
>          if (tracked_datapaths) {
>              update_lport_tracking(pb, tracked_datapaths, true);
> @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
>          }
>          sbrec_port_binding_set_encap(pb, encap_rec);
>      }
> -

Unrelated?

>      return true;
>  }
>  
> @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>   * Caller should make sure that this is the case.
>   */
>  static bool
> -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
> +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
> +               struct if_status_mgr *if_mgr)
>  {
>      if (pb->encap) {
>          if (sb_readonly) {
> @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
>          sbrec_port_binding_set_encap(pb, NULL);
>      }
>  
> +    /* if sb readonly, pb->chassis unset through if-status if present */
>      if (pb->chassis) {
> -        if (sb_readonly) {
> +        if (!sb_readonly) {
> +            sbrec_port_binding_set_chassis(pb, NULL);
> +        } else if (!if_status_mgr_iface_is_present(if_mgr, pb->logical_port)) {
>              return false;
>          }
> -        sbrec_port_binding_set_chassis(pb, NULL);
>      }
>  
>      if (pb->virtual_parent) {
> @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
>          sbrec_port_binding_set_virtual_parent(pb, NULL);
>      }
>  
> -    VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port);
> +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
> +              pb->logical_port, sb_readonly);
>      return true;
>  }
>  
> @@ -1017,7 +1078,7 @@ static bool
>  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
>                struct hmap *tracked_datapaths, struct if_status_mgr *if_mgr)
>  {
> -    if (!release_lport_(pb, sb_readonly)) {
> +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
>          return false;
>      }
>  
> @@ -1342,10 +1403,9 @@ consider_localport(const struct sbrec_port_binding *pb,
>      /* If the port binding is claimed, then release it as localport is claimed
>       * by any ovn-controller. */
>      if (pb->chassis == b_ctx_in->chassis_rec) {
> -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
> +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn, b_ctx_out->if_mgr)) {
>              return false;
>          }
> -

Unrelated?

>          remove_related_lport(pb, b_ctx_out);
>      }
>  
> diff --git a/controller/binding.h b/controller/binding.h
> index 430a8d9b1..45202a321 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -151,14 +151,17 @@ const struct sbrec_port_binding *local_binding_get_primary_pb(
>  ofp_port_t local_binding_get_lport_ofport(const struct shash *local_bindings,
>                                            const char *pb_name);
>  
> -bool local_binding_is_up(struct shash *local_bindings, const char *pb_name);
> -bool local_binding_is_down(struct shash *local_bindings, const char *pb_name);
> +bool local_binding_is_up(struct shash *local_bindings, const char *pb_name,
> +                         const struct sbrec_chassis *);
> +bool local_binding_is_down(struct shash *local_bindings, const char *pb_name,
> +                         const struct sbrec_chassis *);
>  void local_binding_set_up(struct shash *local_bindings, const char *pb_name,
>                            const char *ts_now_str, bool sb_readonly,
>                            bool ovs_readonly);
>  void local_binding_set_down(struct shash *local_bindings, const char *pb_name,
>                              bool sb_readonly, bool ovs_readonly);
> -
> +void local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
> +                         const struct sbrec_chassis *chassis_rec);
>  void binding_register_ovs_idl(struct ovsdb_idl *);
>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> diff --git a/controller/if-status.c b/controller/if-status.c
> index dbdf8b66f..e36df22a0 100644
> --- a/controller/if-status.c
> +++ b/controller/if-status.c
> @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr *, struct ovs_iface *,
>  
>  static void if_status_mgr_update_bindings(
>      struct if_status_mgr *mgr, struct local_binding_data *binding_data,
> +    const struct sbrec_chassis *chassis_rec,
>      bool sb_readonly, bool ovs_readonly);
>  
>  struct if_status_mgr *
> @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr *mgr, const char *iface_id)
>      }
>  }
>  
> +bool
> +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char *iface_id)
> +{
> +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
> +    return (!!iface);
> +}
> +
>  void
>  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char *iface_id)
>  {
> @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr *mgr, const char *iface_id)
>  
>  void
>  if_status_mgr_update(struct if_status_mgr *mgr,
> -                     struct local_binding_data *binding_data)
> +                     struct local_binding_data *binding_data,
> +                     const struct sbrec_chassis *chassis_rec)
>  {
>      if (!binding_data) {
>          return;
> @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>          struct ovs_iface *iface = node->data;
>  
> -        if (local_binding_is_up(bindings, iface->id)) {
> +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
>              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
>          }
>      }
> @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>          struct ovs_iface *iface = node->data;
>  
> -        if (local_binding_is_down(bindings, iface->id)) {
> +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
>              ovs_iface_destroy(mgr, iface);
>          }
>      }
> @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>  void
>  if_status_mgr_run(struct if_status_mgr *mgr,
>                    struct local_binding_data *binding_data,
> +                  const struct sbrec_chassis *chassis_rec,
>                    bool sb_readonly, bool ovs_readonly)
>  {
>      struct ofctrl_acked_seqnos *acked_seqnos =
> @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
>      ofctrl_acked_seqnos_destroy(acked_seqnos);
>  
>      /* Update binding states. */
> -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
> -                                  ovs_readonly);
> +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
> +                                  sb_readonly, ovs_readonly);
>  }
>  
>  static void
> @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr, struct ovs_iface *iface,
>  static void
>  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>                                struct local_binding_data *binding_data,
> +                              const struct sbrec_chassis *chassis_rec,
>                                bool sb_readonly, bool ovs_readonly)
>  {
>      if (!binding_data) {
> @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>       */
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
>          struct ovs_iface *iface = node->data;
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> +        }
>  
>          local_binding_set_down(bindings, iface->id, sb_readonly, ovs_readonly);
>      }
> @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>      char *ts_now_str = xasprintf("%lld", time_wall_msec());
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>          struct ovs_iface *iface = node->data;
> -
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> +        }
>          local_binding_set_up(bindings, iface->id, ts_now_str,
>                               sb_readonly, ovs_readonly);
>      }
> @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>       */
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>          struct ovs_iface *iface = node->data;
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, NULL);
> +        }
>  
>          local_binding_set_down(bindings, iface->id, sb_readonly, ovs_readonly);
>      }
> diff --git a/controller/if-status.h b/controller/if-status.h
> index ff4aa760e..2c55eb18e 100644
> --- a/controller/if-status.h
> +++ b/controller/if-status.h
> @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct if_status_mgr *, const char *iface_id);
>  void if_status_mgr_release_iface(struct if_status_mgr *, const char *iface_id);
>  void if_status_mgr_delete_iface(struct if_status_mgr *, const char *iface_id);
>  
> -void if_status_mgr_update(struct if_status_mgr *, struct local_binding_data *);
> +void if_status_mgr_update(struct if_status_mgr *, struct local_binding_data *,
> +                          const struct sbrec_chassis *chassis);
>  void if_status_mgr_run(struct if_status_mgr *mgr, struct local_binding_data *,
> +                       const struct sbrec_chassis *chassis,
>                         bool sb_readonly, bool ovs_readonly);
>  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
>                                      struct simap *usage);
> +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
> +                                    const char *iface_id);
>  
>  # endif /* controller/if-status.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5a6274eb2..994aebdfb 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
>                          runtime_data ? &runtime_data->lbinding_data : NULL;
>                      stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>                                      time_msec());
> -                    if_status_mgr_update(if_mgr, binding_data);
> +                    if_status_mgr_update(if_mgr, binding_data, chassis);
>                      stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>                                     time_msec());
>  
> @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
>                                     time_msec());
>                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                      time_msec());
> -                    if_status_mgr_run(if_mgr, binding_data, !ovnsb_idl_txn,
> -                                      !ovs_idl_txn);
> +                    if_status_mgr_run(if_mgr, binding_data, chassis,
> +                                      !ovnsb_idl_txn, !ovs_idl_txn);
>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                     time_msec());
>                  }
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 6a0a169c1..403fbc85f 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
>  ovs-vsctl -- add-port br-int hv1-vif0 -- \
>  set Interface hv1-vif0 ofport-request=1
>  
> +

Unrelated?

>  sim_add hv2
>  as hv2
>  ovs-vsctl add-br br-phys
> @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep actions=output:1],
>  echo "verifying that lsp0 binding moves when requested-chassis is changed"
>  
>  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
> +
> +# We might see multiple "Releasing lport ...", when sb is read only
> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
> +
>  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
>  
>  # (6) Chassis hv2 should add flows and hv1 should not.
> @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [0], [ig
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep actions=output:1], [0], [ignore])
>  
>  check ovn-nbctl --wait=hv lsp-set-options lsp0 requested-chassis=non-existant-chassis
> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
>  check ovn-nbctl --wait=hv sync
>  wait_column '' Port_Binding chasssi logical_port=lsp0
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [1], [])
> @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2 00\:00\:00\:00\:10\:10 1
>  OVN_CLEANUP([hv1], [hv2])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([recomputes])
> +ovn_start
> +
> +# Add chassis
> +net_add n1
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +add_switch_ports() {
> +    for port in $(seq $1 $2); do
> +        logical_switch_port=lsp${port}
> +        check ovn-nbctl lsp-add ls1 $logical_switch_port
> +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
> +
> +        as hv1 ovs-vsctl \
> +            -- add-port br-int vif${port} \
> +            -- set Interface vif${port} external_ids:iface-id=$logical_switch_port
> +    done

Don't we need a wait_for_ports_up here to make sure the new ports are
claimed and processed?

> +
> +    check ovn-nbctl --wait=hv sync
> +    AT_CHECK([test $(ovn-appctl -t ovn-controller coverage/read-counter lflow_run) == $lflow_run])
> +}
> +
> +check ovn-nbctl ls-add ls1
> +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
> +check ovn-nbctl set Logical_Switch ls1 other_config:exclude_ips=10.1.255.254
> +
> +check ovn-nbctl lr-add lr1
> +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0 type=router options:router-port=lrp0 addresses=dynamic
> +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
> +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
> +
> +check ovn-nbctl --wait=hv sync
> +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter lflow_run)
> +
> +add_switch_ports 1 50
> +add_switch_ports 51 100
> +add_switch_ports 101 150
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])

Thanks,
Dumitru
Xavier Simonart May 13, 2022, 1:41 p.m. UTC | #2
Hi Dumitru

Thanks for the review.
I'll go through your suggestions and submit a v2.

Just commented here on your question - see embedded.
Thanks
Xavier

On Fri, May 13, 2022 at 2:46 PM Dumitru Ceara <dceara@redhat.com> wrote:

> Hi Xavier,
>
> On 5/12/22 11:04, Xavier Simonart wrote:
> > When VIF ports are claimed on a chassis, SBDB Port_Binding table is
> updated.
> > If the SBDB IDL is still is read-only ("in transaction") when such a
> update
> > is required, the update is not possible and recompute is triggered
> through
> > I+P failure.
> >
> > This situation can happen:
> > - after updating Port_Binding->chassis to SBDB for one port, in a
> following
> >   iteration, ovn-controller handles Interface:external_ids:ovn-installed
> >   (for the same port) while SBDB is still read-only.
> > - after updating Port_Binding->chassis to SBDB for one port, in a
> following
> >   iteration, ovn-controller updates Port_Binding->chassis for another
> port,
> >   while SBDB is still read-only.
> >
> > This patch prevent the recompute, by having the if-status module
> > updating the Port_Binding chassis (if needed), when possible.
> > This does not delay Port_Binding chassis update compared to before this
> > patch: Port_Binding chassis will be updated as soon as SBDB is again
> > writable, as it was the case, through a recompute, before this patch.
>
> Just to confirm, with your patch the Port_Binding chassis will be
> updated when the SBDB is again writable and will *not* cause a
> recompute, right?
>
> Yes, confirmed. I can reformulate as:

This patch prevents the recompute, by having the if-status module
updating the Port_Binding chassis (if needed), when possible.
This does not delay Port_Binding chassis update compared to before this
patch.
- With the patch, Port_Binding chassis will be updated as soon as SBDB is
again
writable, without recompute.
- Without the patch, Port_Binding chassis will be updated as soon as SBDB
is again
writable, through a recompute.

As a slide note, the patch only prevents the recomputes when up is set
through notification (notify_up=true).
I think we can later fix the recomputes in the (notify_up=false) case if
needed, but I thought it was better to see if
1) the approach used by the patch is accepted
2) the use case notify_up=false is really needed
In that case, additional states would be needed in if-status state machine.

>
> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
> > Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
> > ---
> >  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
> >  controller/binding.h        |   9 ++-
> >  controller/if-status.c      |  31 +++++++--
> >  controller/if-status.h      |   6 +-
> >  controller/ovn-controller.c |   6 +-
> >  tests/ovn.at                |  55 +++++++++++++++-
> >  6 files changed, 184 insertions(+), 47 deletions(-)
> >
> > diff --git a/controller/binding.c b/controller/binding.c
> > index e5ba56b25..d917d8775 100644
> > --- a/controller/binding.c
> > +++ b/controller/binding.c
> > @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct shash
> *local_bindings,
> >  }
> >
> >  bool
> > -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
> > +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
> > +                    const struct sbrec_chassis *chassis_rec)
> >  {
> >      struct local_binding *lbinding =
> >          local_binding_find(local_bindings, pb_name);
> >      struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
> > +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
> > +        return false;
> > +    }
> >      if (lbinding && b_lport && lbinding->iface) {
> >          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
> >              return false;
> > @@ -673,13 +677,23 @@ local_binding_is_up(struct shash *local_bindings,
> const char *pb_name)
> >  }
> >
> >  bool
> > -local_binding_is_down(struct shash *local_bindings, const char *pb_name)
> > +local_binding_is_down(struct shash *local_bindings, const char *pb_name,
> > +                      const struct sbrec_chassis *chassis_rec)
> >  {
> >      struct local_binding *lbinding =
> >          local_binding_find(local_bindings, pb_name);
> >
> >      struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
> >
> > +    if (b_lport) {
> > +        if (b_lport->pb->chassis == chassis_rec) {
> > +            return false;
> > +        } else if (b_lport->pb->chassis) {
> > +            VLOG_DBG("lport %s already claimed by other chassis",
> > +                     b_lport->pb->logical_port);
> > +        }
> > +    }
> > +
> >      if (!lbinding) {
> >          return true;
> >      }
> > @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
> >      OVS_NOT_REACHED();
> >  }
> >
> > +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
>
> Nit: this should be:
>
> static void
> set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
>
> > +                          const struct sbrec_chassis *chassis_rec)
> > +{
> > +    if (pb->chassis != chassis_rec) {
> > +        if (pb->chassis) {
> > +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> > +                    pb->logical_port, pb->chassis->name,
> > +                    chassis_rec->name);
>
> Nit: indentation.
>
> > +        } else {
> > +            VLOG_INFO("Claiming lport %s for this chassis.",
> pb->logical_port);
> > +        }
> > +        for (int i = 0; i < pb->n_mac; i++) {
> > +            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> > +        }
> > +        sbrec_port_binding_set_chassis(pb, chassis_rec);
> > +    }
> > +}
> > +
> > +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding
> *pb)
>
> Here too, the function name should start on a new line.
>
> > +{
> > +    if (pb->chassis) {
> > +        sbrec_port_binding_set_chassis(pb, NULL);
> > +    }
> > +}
> > +
> > +void
> > +local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
> > +                    const struct sbrec_chassis *chassis_rec)
>
> Nit: indentation.
>
> > +{
> > +    struct local_binding *lbinding =
> > +        local_binding_find(local_bindings, pb_name);
> > +    struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
> > +
> > +    if (b_lport) {
> > +        if (chassis_rec) {
> > +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
> > +        } else {
> > +            clear_pb_chassis_in_sbrec(b_lport->pb);
> > +        }
> > +    }
> > +}
> > +
> >  /* For newly claimed ports, if 'notify_up' is 'false':
> >   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
> >   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g.,
> for
> > @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
> >   *   it's explicitly set to 'false'.
> >   *   This is to ensure compatibility with older versions of ovn-northd.
> >   */
> > -static void
> > +static bool
> >  claimed_lport_set_up(const struct sbrec_port_binding *pb,
> >                       const struct sbrec_port_binding *parent_pb,
> >                       const struct sbrec_chassis *chassis_rec,
> > -                     bool notify_up, struct if_status_mgr *if_mgr)
> > +                     bool notify_up, struct if_status_mgr *if_mgr,
> > +                     bool sb_readonly)
> >  {
> > +
>
> Extra line not needed.
>
> >      if (!notify_up) {
> >          bool up = true;
> >          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
> > -            sbrec_port_binding_set_up(pb, &up, 1);
> > +            if (!sb_readonly) {
> > +                sbrec_port_binding_set_up(pb, &up, 1);
> > +            } else if (pb->n_up && !pb->up[0]) {
> > +                return false;
> > +            }
> >          }
> > -        return;
> > +        if (sb_readonly && (pb->chassis != chassis_rec)) {
> > +            /* Handle the cases where if_status_mgr does not claim the
> > +             * interface.In those cases, if we do not set pb->chassis
> in sb
> > +             * now (as readonly), we might not do it later ...
> > +             */
> > +            return false;
> > +        }
> > +        return true;
> >      }
> >
> >      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
> >          if_status_mgr_claim_iface(if_mgr, pb->logical_port);
> >      }
> > +    return true;
> >  }
> >
> >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> > @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
> >              struct hmap *tracked_datapaths,
> >              struct if_status_mgr *if_mgr)
> >  {
> > -    if (!sb_readonly) {
> > -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> if_mgr);
> > -    }
> >
> > +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> > +           if_mgr, sb_readonly)) {
> > +        return false;
> > +    }
> >      if (pb->chassis != chassis_rec) {
> > -        if (sb_readonly) {
> > -            return false;
> > -        }
> > -
> > -        if (pb->chassis) {
> > -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> > -                    pb->logical_port, pb->chassis->name,
> > -                    chassis_rec->name);
> > -        } else {
> > -            VLOG_INFO("Claiming lport %s for this chassis.",
> pb->logical_port);
> > +        if (!sb_readonly) {
> > +            set_pb_chassis_in_sbrec(pb, chassis_rec);
> >          }
> > -        for (int i = 0; i < pb->n_mac; i++) {
> > -            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> > -        }
> > -
> > -        sbrec_port_binding_set_chassis(pb, chassis_rec);
> >
> >          if (tracked_datapaths) {
> >              update_lport_tracking(pb, tracked_datapaths, true);
> > @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
> >          }
> >          sbrec_port_binding_set_encap(pb, encap_rec);
> >      }
> > -
>
> Unrelated?
>
>

> >      return true;
> >  }
> >
> > @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
> >   * Caller should make sure that this is the case.
> >   */
> >  static bool
> > -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
> > +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
> > +               struct if_status_mgr *if_mgr)
> >  {
> >      if (pb->encap) {
> >          if (sb_readonly) {
> > @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding
> *pb, bool sb_readonly)
> >          sbrec_port_binding_set_encap(pb, NULL);
> >      }
> >
> > +    /* if sb readonly, pb->chassis unset through if-status if present */
> >      if (pb->chassis) {
> > -        if (sb_readonly) {
> > +        if (!sb_readonly) {
> > +            sbrec_port_binding_set_chassis(pb, NULL);
> > +        } else if (!if_status_mgr_iface_is_present(if_mgr,
> pb->logical_port)) {
> >              return false;
> >          }
> > -        sbrec_port_binding_set_chassis(pb, NULL);
> >      }
> >
> >      if (pb->virtual_parent) {
> > @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding
> *pb, bool sb_readonly)
> >          sbrec_port_binding_set_virtual_parent(pb, NULL);
> >      }
> >
> > -    VLOG_INFO("Releasing lport %s from this chassis.",
> pb->logical_port);
> > +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
> > +              pb->logical_port, sb_readonly);
> >      return true;
> >  }
> >
> > @@ -1017,7 +1078,7 @@ static bool
> >  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
> >                struct hmap *tracked_datapaths, struct if_status_mgr
> *if_mgr)
> >  {
> > -    if (!release_lport_(pb, sb_readonly)) {
> > +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
> >          return false;
> >      }
> >
> > @@ -1342,10 +1403,9 @@ consider_localport(const struct
> sbrec_port_binding *pb,
> >      /* If the port binding is claimed, then release it as localport is
> claimed
> >       * by any ovn-controller. */
> >      if (pb->chassis == b_ctx_in->chassis_rec) {
> > -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
> > +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn,
> b_ctx_out->if_mgr)) {
> >              return false;
> >          }
> > -
>
> Unrelated?
>
> >          remove_related_lport(pb, b_ctx_out);
> >      }
> >
> > diff --git a/controller/binding.h b/controller/binding.h
> > index 430a8d9b1..45202a321 100644
> > --- a/controller/binding.h
> > +++ b/controller/binding.h
> > @@ -151,14 +151,17 @@ const struct sbrec_port_binding
> *local_binding_get_primary_pb(
> >  ofp_port_t local_binding_get_lport_ofport(const struct shash
> *local_bindings,
> >                                            const char *pb_name);
> >
> > -bool local_binding_is_up(struct shash *local_bindings, const char
> *pb_name);
> > -bool local_binding_is_down(struct shash *local_bindings, const char
> *pb_name);
> > +bool local_binding_is_up(struct shash *local_bindings, const char
> *pb_name,
> > +                         const struct sbrec_chassis *);
> > +bool local_binding_is_down(struct shash *local_bindings, const char
> *pb_name,
> > +                         const struct sbrec_chassis *);
> >  void local_binding_set_up(struct shash *local_bindings, const char
> *pb_name,
> >                            const char *ts_now_str, bool sb_readonly,
> >                            bool ovs_readonly);
> >  void local_binding_set_down(struct shash *local_bindings, const char
> *pb_name,
> >                              bool sb_readonly, bool ovs_readonly);
> > -
> > +void local_binding_set_pb(struct shash *local_bindings, const char
> *pb_name,
> > +                         const struct sbrec_chassis *chassis_rec);
> >  void binding_register_ovs_idl(struct ovsdb_idl *);
> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > diff --git a/controller/if-status.c b/controller/if-status.c
> > index dbdf8b66f..e36df22a0 100644
> > --- a/controller/if-status.c
> > +++ b/controller/if-status.c
> > @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr
> *, struct ovs_iface *,
> >
> >  static void if_status_mgr_update_bindings(
> >      struct if_status_mgr *mgr, struct local_binding_data *binding_data,
> > +    const struct sbrec_chassis *chassis_rec,
> >      bool sb_readonly, bool ovs_readonly);
> >
> >  struct if_status_mgr *
> > @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr
> *mgr, const char *iface_id)
> >      }
> >  }
> >
> > +bool
> > +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
> *iface_id)
> > +{
> > +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
> > +    return (!!iface);
> > +}
> > +
> >  void
> >  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
> *iface_id)
> >  {
> > @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr
> *mgr, const char *iface_id)
> >
> >  void
> >  if_status_mgr_update(struct if_status_mgr *mgr,
> > -                     struct local_binding_data *binding_data)
> > +                     struct local_binding_data *binding_data,
> > +                     const struct sbrec_chassis *chassis_rec)
> >  {
> >      if (!binding_data) {
> >          return;
> > @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
> >      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
> >          struct ovs_iface *iface = node->data;
> >
> > -        if (local_binding_is_up(bindings, iface->id)) {
> > +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
> >              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
> >          }
> >      }
> > @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
> >      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
> >          struct ovs_iface *iface = node->data;
> >
> > -        if (local_binding_is_down(bindings, iface->id)) {
> > +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
> >              ovs_iface_destroy(mgr, iface);
> >          }
> >      }
> > @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
> >  void
> >  if_status_mgr_run(struct if_status_mgr *mgr,
> >                    struct local_binding_data *binding_data,
> > +                  const struct sbrec_chassis *chassis_rec,
> >                    bool sb_readonly, bool ovs_readonly)
> >  {
> >      struct ofctrl_acked_seqnos *acked_seqnos =
> > @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
> >      ofctrl_acked_seqnos_destroy(acked_seqnos);
> >
> >      /* Update binding states. */
> > -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
> > -                                  ovs_readonly);
> > +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
> > +                                  sb_readonly, ovs_readonly);
> >  }
> >
> >  static void
> > @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr,
> struct ovs_iface *iface,
> >  static void
> >  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
> >                                struct local_binding_data *binding_data,
> > +                              const struct sbrec_chassis *chassis_rec,
> >                                bool sb_readonly, bool ovs_readonly)
> >  {
> >      if (!binding_data) {
> > @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
> >       */
> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
> >          struct ovs_iface *iface = node->data;
> > +        if (!sb_readonly) {
> > +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> > +        }
> >
> >          local_binding_set_down(bindings, iface->id, sb_readonly,
> ovs_readonly);
> >      }
> > @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
> >      char *ts_now_str = xasprintf("%lld", time_wall_msec());
> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
> >          struct ovs_iface *iface = node->data;
> > -
> > +        if (!sb_readonly) {
> > +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> > +        }
> >          local_binding_set_up(bindings, iface->id, ts_now_str,
> >                               sb_readonly, ovs_readonly);
> >      }
> > @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
> >       */
> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
> >          struct ovs_iface *iface = node->data;
> > +        if (!sb_readonly) {
> > +            local_binding_set_pb(bindings, iface->id, NULL);
> > +        }
> >
> >          local_binding_set_down(bindings, iface->id, sb_readonly,
> ovs_readonly);
> >      }
> > diff --git a/controller/if-status.h b/controller/if-status.h
> > index ff4aa760e..2c55eb18e 100644
> > --- a/controller/if-status.h
> > +++ b/controller/if-status.h
> > @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct if_status_mgr
> *, const char *iface_id);
> >  void if_status_mgr_release_iface(struct if_status_mgr *, const char
> *iface_id);
> >  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
> *iface_id);
> >
> > -void if_status_mgr_update(struct if_status_mgr *, struct
> local_binding_data *);
> > +void if_status_mgr_update(struct if_status_mgr *, struct
> local_binding_data *,
> > +                          const struct sbrec_chassis *chassis);
> >  void if_status_mgr_run(struct if_status_mgr *mgr, struct
> local_binding_data *,
> > +                       const struct sbrec_chassis *chassis,
> >                         bool sb_readonly, bool ovs_readonly);
> >  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
> >                                      struct simap *usage);
> > +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
> > +                                    const char *iface_id);
> >
> >  # endif /* controller/if-status.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 5a6274eb2..994aebdfb 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
> >                          runtime_data ? &runtime_data->lbinding_data :
> NULL;
> >                      stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
> >                                      time_msec());
> > -                    if_status_mgr_update(if_mgr, binding_data);
> > +                    if_status_mgr_update(if_mgr, binding_data, chassis);
> >                      stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
> >                                     time_msec());
> >
> > @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
> >                                     time_msec());
> >                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >                                      time_msec());
> > -                    if_status_mgr_run(if_mgr, binding_data,
> !ovnsb_idl_txn,
> > -                                      !ovs_idl_txn);
> > +                    if_status_mgr_run(if_mgr, binding_data, chassis,
> > +                                      !ovnsb_idl_txn, !ovs_idl_txn);
> >                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >                                     time_msec());
> >                  }
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 6a0a169c1..403fbc85f 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
> >  ovs-vsctl -- add-port br-int hv1-vif0 -- \
> >  set Interface hv1-vif0 ofport-request=1
> >
> > +
>
> Unrelated?
>
> >  sim_add hv2
> >  as hv2
> >  ovs-vsctl add-br br-phys
> > @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=65 | grep actions=output:1],
> >  echo "verifying that lsp0 binding moves when requested-chassis is
> changed"
> >
> >  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> > -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> > +
> > +# We might see multiple "Releasing lport ...", when sb is read only
> > +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> > +
> >  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
> >
> >  # (6) Chassis hv2 should add flows and hv1 should not.
> > @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=0 | grep in_port=1], [0], [ig
> >  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
> actions=output:1], [0], [ignore])
> >
> >  check ovn-nbctl --wait=hv lsp-set-options lsp0
> requested-chassis=non-existant-chassis
> > -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> > +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> >  check ovn-nbctl --wait=hv sync
> >  wait_column '' Port_Binding chasssi logical_port=lsp0
> >  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
> [1], [])
> > @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2
> 00\:00\:00\:00\:10\:10 1
> >  OVN_CLEANUP([hv1], [hv2])
> >  AT_CLEANUP
> >  ])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([recomputes])
> > +ovn_start
> > +
> > +# Add chassis
> > +net_add n1
> > +sim_add hv1
> > +as hv1
> > +ovs-vsctl add-br br-phys
> > +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> > +ovn_attach n1 br-phys 192.168.0.1
> > +
> > +add_switch_ports() {
> > +    for port in $(seq $1 $2); do
> > +        logical_switch_port=lsp${port}
> > +        check ovn-nbctl lsp-add ls1 $logical_switch_port
> > +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
> > +
> > +        as hv1 ovs-vsctl \
> > +            -- add-port br-int vif${port} \
> > +            -- set Interface vif${port}
> external_ids:iface-id=$logical_switch_port
> > +    done
>
> Don't we need a wait_for_ports_up here to make sure the new ports are
> claimed and processed?
>
> > +
> > +    check ovn-nbctl --wait=hv sync
> > +    AT_CHECK([test $(ovn-appctl -t ovn-controller coverage/read-counter
> lflow_run) == $lflow_run])
> > +}
> > +
> > +check ovn-nbctl ls-add ls1
> > +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
> > +check ovn-nbctl set Logical_Switch ls1
> other_config:exclude_ips=10.1.255.254
> > +
> > +check ovn-nbctl lr-add lr1
> > +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
> type=router options:router-port=lrp0 addresses=dynamic
> > +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
> > +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
> > +
> > +check ovn-nbctl --wait=hv sync
> > +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter
> lflow_run)
> > +
> > +add_switch_ports 1 50
> > +add_switch_ports 51 100
> > +add_switch_ports 101 150
> > +
> > +OVN_CLEANUP([hv1])
> > +AT_CLEANUP
> > +])
>
> Thanks,
> Dumitru
>
>
Dumitru Ceara May 13, 2022, 7:34 p.m. UTC | #3
On 5/13/22 15:41, Xavier Simonart wrote:
> Hi Dumitru
> 
> Thanks for the review.
> I'll go through your suggestions and submit a v2.
> 
> Just commented here on your question - see embedded.
> Thanks
> Xavier
> 
> On Fri, May 13, 2022 at 2:46 PM Dumitru Ceara <dceara@redhat.com> wrote:
> 
>> Hi Xavier,
>>
>> On 5/12/22 11:04, Xavier Simonart wrote:
>>> When VIF ports are claimed on a chassis, SBDB Port_Binding table is
>> updated.
>>> If the SBDB IDL is still is read-only ("in transaction") when such a
>> update
>>> is required, the update is not possible and recompute is triggered
>> through
>>> I+P failure.
>>>
>>> This situation can happen:
>>> - after updating Port_Binding->chassis to SBDB for one port, in a
>> following
>>>   iteration, ovn-controller handles Interface:external_ids:ovn-installed
>>>   (for the same port) while SBDB is still read-only.
>>> - after updating Port_Binding->chassis to SBDB for one port, in a
>> following
>>>   iteration, ovn-controller updates Port_Binding->chassis for another
>> port,
>>>   while SBDB is still read-only.
>>>
>>> This patch prevent the recompute, by having the if-status module
>>> updating the Port_Binding chassis (if needed), when possible.
>>> This does not delay Port_Binding chassis update compared to before this
>>> patch: Port_Binding chassis will be updated as soon as SBDB is again
>>> writable, as it was the case, through a recompute, before this patch.
>>
>> Just to confirm, with your patch the Port_Binding chassis will be
>> updated when the SBDB is again writable and will *not* cause a
>> recompute, right?
>>
>> Yes, confirmed. I can reformulate as:
> 
> This patch prevents the recompute, by having the if-status module
> updating the Port_Binding chassis (if needed), when possible.
> This does not delay Port_Binding chassis update compared to before this
> patch.
> - With the patch, Port_Binding chassis will be updated as soon as SBDB is
> again
> writable, without recompute.
> - Without the patch, Port_Binding chassis will be updated as soon as SBDB
> is again
> writable, through a recompute.
> 

Thanks for the confirmation!

> As a slide note, the patch only prevents the recomputes when up is set
> through notification (notify_up=true).
> I think we can later fix the recomputes in the (notify_up=false) case if
> needed, but I thought it was better to see if
> 1) the approach used by the patch is accepted

IMO the approach is fine.

> 2) the use case notify_up=false is really needed

Container and virtual ports are used by OpenStack/Neutron so we might
want to try to fix this case too.

> In that case, additional states would be needed in if-status state machine.
> 

Why can't we reuse the state machine by adding a flag to the ovs_iface
to mark that this is a child interface?  In which case the transition
from OIF_INSTALL_FLOWS to OIF_MARK_UP would happen only if the parent
interface is already in OIF_MARK_UP.

OIF_CLAIMED -> OIF_INSTALL_FLOWS -> OIF_MARK_UP -> OIF_INSTALLED

I'm OK to address this second case (notify_up=false) as a follow up though.

Thanks,
Dumitru

>>
>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
>>> Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
>>> ---
>>>  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
>>>  controller/binding.h        |   9 ++-
>>>  controller/if-status.c      |  31 +++++++--
>>>  controller/if-status.h      |   6 +-
>>>  controller/ovn-controller.c |   6 +-
>>>  tests/ovn.at                |  55 +++++++++++++++-
>>>  6 files changed, 184 insertions(+), 47 deletions(-)
>>>
>>> diff --git a/controller/binding.c b/controller/binding.c
>>> index e5ba56b25..d917d8775 100644
>>> --- a/controller/binding.c
>>> +++ b/controller/binding.c
>>> @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct shash
>> *local_bindings,
>>>  }
>>>
>>>  bool
>>> -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
>>> +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
>>> +                    const struct sbrec_chassis *chassis_rec)
>>>  {
>>>      struct local_binding *lbinding =
>>>          local_binding_find(local_bindings, pb_name);
>>>      struct binding_lport *b_lport =
>> local_binding_get_primary_lport(lbinding);
>>> +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
>>> +        return false;
>>> +    }
>>>      if (lbinding && b_lport && lbinding->iface) {
>>>          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
>>>              return false;
>>> @@ -673,13 +677,23 @@ local_binding_is_up(struct shash *local_bindings,
>> const char *pb_name)
>>>  }
>>>
>>>  bool
>>> -local_binding_is_down(struct shash *local_bindings, const char *pb_name)
>>> +local_binding_is_down(struct shash *local_bindings, const char *pb_name,
>>> +                      const struct sbrec_chassis *chassis_rec)
>>>  {
>>>      struct local_binding *lbinding =
>>>          local_binding_find(local_bindings, pb_name);
>>>
>>>      struct binding_lport *b_lport =
>> local_binding_get_primary_lport(lbinding);
>>>
>>> +    if (b_lport) {
>>> +        if (b_lport->pb->chassis == chassis_rec) {
>>> +            return false;
>>> +        } else if (b_lport->pb->chassis) {
>>> +            VLOG_DBG("lport %s already claimed by other chassis",
>>> +                     b_lport->pb->logical_port);
>>> +        }
>>> +    }
>>> +
>>>      if (!lbinding) {
>>>          return true;
>>>      }
>>> @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
>>>      OVS_NOT_REACHED();
>>>  }
>>>
>>> +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
>>
>> Nit: this should be:
>>
>> static void
>> set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
>>
>>> +                          const struct sbrec_chassis *chassis_rec)
>>> +{
>>> +    if (pb->chassis != chassis_rec) {
>>> +        if (pb->chassis) {
>>> +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>>> +                    pb->logical_port, pb->chassis->name,
>>> +                    chassis_rec->name);
>>
>> Nit: indentation.
>>
>>> +        } else {
>>> +            VLOG_INFO("Claiming lport %s for this chassis.",
>> pb->logical_port);
>>> +        }
>>> +        for (int i = 0; i < pb->n_mac; i++) {
>>> +            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
>>> +        }
>>> +        sbrec_port_binding_set_chassis(pb, chassis_rec);
>>> +    }
>>> +}
>>> +
>>> +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding
>> *pb)
>>
>> Here too, the function name should start on a new line.
>>
>>> +{
>>> +    if (pb->chassis) {
>>> +        sbrec_port_binding_set_chassis(pb, NULL);
>>> +    }
>>> +}
>>> +
>>> +void
>>> +local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
>>> +                    const struct sbrec_chassis *chassis_rec)
>>
>> Nit: indentation.
>>
>>> +{
>>> +    struct local_binding *lbinding =
>>> +        local_binding_find(local_bindings, pb_name);
>>> +    struct binding_lport *b_lport =
>> local_binding_get_primary_lport(lbinding);
>>> +
>>> +    if (b_lport) {
>>> +        if (chassis_rec) {
>>> +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
>>> +        } else {
>>> +            clear_pb_chassis_in_sbrec(b_lport->pb);
>>> +        }
>>> +    }
>>> +}
>>> +
>>>  /* For newly claimed ports, if 'notify_up' is 'false':
>>>   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
>>>   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g.,
>> for
>>> @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
>>>   *   it's explicitly set to 'false'.
>>>   *   This is to ensure compatibility with older versions of ovn-northd.
>>>   */
>>> -static void
>>> +static bool
>>>  claimed_lport_set_up(const struct sbrec_port_binding *pb,
>>>                       const struct sbrec_port_binding *parent_pb,
>>>                       const struct sbrec_chassis *chassis_rec,
>>> -                     bool notify_up, struct if_status_mgr *if_mgr)
>>> +                     bool notify_up, struct if_status_mgr *if_mgr,
>>> +                     bool sb_readonly)
>>>  {
>>> +
>>
>> Extra line not needed.
>>
>>>      if (!notify_up) {
>>>          bool up = true;
>>>          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
>>> -            sbrec_port_binding_set_up(pb, &up, 1);
>>> +            if (!sb_readonly) {
>>> +                sbrec_port_binding_set_up(pb, &up, 1);
>>> +            } else if (pb->n_up && !pb->up[0]) {
>>> +                return false;
>>> +            }
>>>          }
>>> -        return;
>>> +        if (sb_readonly && (pb->chassis != chassis_rec)) {
>>> +            /* Handle the cases where if_status_mgr does not claim the
>>> +             * interface.In those cases, if we do not set pb->chassis
>> in sb
>>> +             * now (as readonly), we might not do it later ...
>>> +             */
>>> +            return false;
>>> +        }
>>> +        return true;
>>>      }
>>>
>>>      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
>>>          if_status_mgr_claim_iface(if_mgr, pb->logical_port);
>>>      }
>>> +    return true;
>>>  }
>>>
>>>  /* Returns false if lport is not claimed due to 'sb_readonly'.
>>> @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
>>>              struct hmap *tracked_datapaths,
>>>              struct if_status_mgr *if_mgr)
>>>  {
>>> -    if (!sb_readonly) {
>>> -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
>> if_mgr);
>>> -    }
>>>
>>> +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
>>> +           if_mgr, sb_readonly)) {
>>> +        return false;
>>> +    }
>>>      if (pb->chassis != chassis_rec) {
>>> -        if (sb_readonly) {
>>> -            return false;
>>> -        }
>>> -
>>> -        if (pb->chassis) {
>>> -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>>> -                    pb->logical_port, pb->chassis->name,
>>> -                    chassis_rec->name);
>>> -        } else {
>>> -            VLOG_INFO("Claiming lport %s for this chassis.",
>> pb->logical_port);
>>> +        if (!sb_readonly) {
>>> +            set_pb_chassis_in_sbrec(pb, chassis_rec);
>>>          }
>>> -        for (int i = 0; i < pb->n_mac; i++) {
>>> -            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
>>> -        }
>>> -
>>> -        sbrec_port_binding_set_chassis(pb, chassis_rec);
>>>
>>>          if (tracked_datapaths) {
>>>              update_lport_tracking(pb, tracked_datapaths, true);
>>> @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
>>>          }
>>>          sbrec_port_binding_set_encap(pb, encap_rec);
>>>      }
>>> -
>>
>> Unrelated?
>>
>>
> 
>>>      return true;
>>>  }
>>>
>>> @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>>>   * Caller should make sure that this is the case.
>>>   */
>>>  static bool
>>> -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
>>> +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
>>> +               struct if_status_mgr *if_mgr)
>>>  {
>>>      if (pb->encap) {
>>>          if (sb_readonly) {
>>> @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding
>> *pb, bool sb_readonly)
>>>          sbrec_port_binding_set_encap(pb, NULL);
>>>      }
>>>
>>> +    /* if sb readonly, pb->chassis unset through if-status if present */
>>>      if (pb->chassis) {
>>> -        if (sb_readonly) {
>>> +        if (!sb_readonly) {
>>> +            sbrec_port_binding_set_chassis(pb, NULL);
>>> +        } else if (!if_status_mgr_iface_is_present(if_mgr,
>> pb->logical_port)) {
>>>              return false;
>>>          }
>>> -        sbrec_port_binding_set_chassis(pb, NULL);
>>>      }
>>>
>>>      if (pb->virtual_parent) {
>>> @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding
>> *pb, bool sb_readonly)
>>>          sbrec_port_binding_set_virtual_parent(pb, NULL);
>>>      }
>>>
>>> -    VLOG_INFO("Releasing lport %s from this chassis.",
>> pb->logical_port);
>>> +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
>>> +              pb->logical_port, sb_readonly);
>>>      return true;
>>>  }
>>>
>>> @@ -1017,7 +1078,7 @@ static bool
>>>  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
>>>                struct hmap *tracked_datapaths, struct if_status_mgr
>> *if_mgr)
>>>  {
>>> -    if (!release_lport_(pb, sb_readonly)) {
>>> +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
>>>          return false;
>>>      }
>>>
>>> @@ -1342,10 +1403,9 @@ consider_localport(const struct
>> sbrec_port_binding *pb,
>>>      /* If the port binding is claimed, then release it as localport is
>> claimed
>>>       * by any ovn-controller. */
>>>      if (pb->chassis == b_ctx_in->chassis_rec) {
>>> -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
>>> +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn,
>> b_ctx_out->if_mgr)) {
>>>              return false;
>>>          }
>>> -
>>
>> Unrelated?
>>
>>>          remove_related_lport(pb, b_ctx_out);
>>>      }
>>>
>>> diff --git a/controller/binding.h b/controller/binding.h
>>> index 430a8d9b1..45202a321 100644
>>> --- a/controller/binding.h
>>> +++ b/controller/binding.h
>>> @@ -151,14 +151,17 @@ const struct sbrec_port_binding
>> *local_binding_get_primary_pb(
>>>  ofp_port_t local_binding_get_lport_ofport(const struct shash
>> *local_bindings,
>>>                                            const char *pb_name);
>>>
>>> -bool local_binding_is_up(struct shash *local_bindings, const char
>> *pb_name);
>>> -bool local_binding_is_down(struct shash *local_bindings, const char
>> *pb_name);
>>> +bool local_binding_is_up(struct shash *local_bindings, const char
>> *pb_name,
>>> +                         const struct sbrec_chassis *);
>>> +bool local_binding_is_down(struct shash *local_bindings, const char
>> *pb_name,
>>> +                         const struct sbrec_chassis *);
>>>  void local_binding_set_up(struct shash *local_bindings, const char
>> *pb_name,
>>>                            const char *ts_now_str, bool sb_readonly,
>>>                            bool ovs_readonly);
>>>  void local_binding_set_down(struct shash *local_bindings, const char
>> *pb_name,
>>>                              bool sb_readonly, bool ovs_readonly);
>>> -
>>> +void local_binding_set_pb(struct shash *local_bindings, const char
>> *pb_name,
>>> +                         const struct sbrec_chassis *chassis_rec);
>>>  void binding_register_ovs_idl(struct ovsdb_idl *);
>>>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>>>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>> diff --git a/controller/if-status.c b/controller/if-status.c
>>> index dbdf8b66f..e36df22a0 100644
>>> --- a/controller/if-status.c
>>> +++ b/controller/if-status.c
>>> @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr
>> *, struct ovs_iface *,
>>>
>>>  static void if_status_mgr_update_bindings(
>>>      struct if_status_mgr *mgr, struct local_binding_data *binding_data,
>>> +    const struct sbrec_chassis *chassis_rec,
>>>      bool sb_readonly, bool ovs_readonly);
>>>
>>>  struct if_status_mgr *
>>> @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr
>> *mgr, const char *iface_id)
>>>      }
>>>  }
>>>
>>> +bool
>>> +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
>> *iface_id)
>>> +{
>>> +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
>>> +    return (!!iface);
>>> +}
>>> +
>>>  void
>>>  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
>> *iface_id)
>>>  {
>>> @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr
>> *mgr, const char *iface_id)
>>>
>>>  void
>>>  if_status_mgr_update(struct if_status_mgr *mgr,
>>> -                     struct local_binding_data *binding_data)
>>> +                     struct local_binding_data *binding_data,
>>> +                     const struct sbrec_chassis *chassis_rec)
>>>  {
>>>      if (!binding_data) {
>>>          return;
>>> @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>>>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>>>          struct ovs_iface *iface = node->data;
>>>
>>> -        if (local_binding_is_up(bindings, iface->id)) {
>>> +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
>>>              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
>>>          }
>>>      }
>>> @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>>>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>>>          struct ovs_iface *iface = node->data;
>>>
>>> -        if (local_binding_is_down(bindings, iface->id)) {
>>> +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
>>>              ovs_iface_destroy(mgr, iface);
>>>          }
>>>      }
>>> @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>>>  void
>>>  if_status_mgr_run(struct if_status_mgr *mgr,
>>>                    struct local_binding_data *binding_data,
>>> +                  const struct sbrec_chassis *chassis_rec,
>>>                    bool sb_readonly, bool ovs_readonly)
>>>  {
>>>      struct ofctrl_acked_seqnos *acked_seqnos =
>>> @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
>>>      ofctrl_acked_seqnos_destroy(acked_seqnos);
>>>
>>>      /* Update binding states. */
>>> -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
>>> -                                  ovs_readonly);
>>> +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
>>> +                                  sb_readonly, ovs_readonly);
>>>  }
>>>
>>>  static void
>>> @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr,
>> struct ovs_iface *iface,
>>>  static void
>>>  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>>>                                struct local_binding_data *binding_data,
>>> +                              const struct sbrec_chassis *chassis_rec,
>>>                                bool sb_readonly, bool ovs_readonly)
>>>  {
>>>      if (!binding_data) {
>>> @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
>> *mgr,
>>>       */
>>>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
>>>          struct ovs_iface *iface = node->data;
>>> +        if (!sb_readonly) {
>>> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
>>> +        }
>>>
>>>          local_binding_set_down(bindings, iface->id, sb_readonly,
>> ovs_readonly);
>>>      }
>>> @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
>> *mgr,
>>>      char *ts_now_str = xasprintf("%lld", time_wall_msec());
>>>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>>>          struct ovs_iface *iface = node->data;
>>> -
>>> +        if (!sb_readonly) {
>>> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
>>> +        }
>>>          local_binding_set_up(bindings, iface->id, ts_now_str,
>>>                               sb_readonly, ovs_readonly);
>>>      }
>>> @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
>> *mgr,
>>>       */
>>>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>>>          struct ovs_iface *iface = node->data;
>>> +        if (!sb_readonly) {
>>> +            local_binding_set_pb(bindings, iface->id, NULL);
>>> +        }
>>>
>>>          local_binding_set_down(bindings, iface->id, sb_readonly,
>> ovs_readonly);
>>>      }
>>> diff --git a/controller/if-status.h b/controller/if-status.h
>>> index ff4aa760e..2c55eb18e 100644
>>> --- a/controller/if-status.h
>>> +++ b/controller/if-status.h
>>> @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct if_status_mgr
>> *, const char *iface_id);
>>>  void if_status_mgr_release_iface(struct if_status_mgr *, const char
>> *iface_id);
>>>  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
>> *iface_id);
>>>
>>> -void if_status_mgr_update(struct if_status_mgr *, struct
>> local_binding_data *);
>>> +void if_status_mgr_update(struct if_status_mgr *, struct
>> local_binding_data *,
>>> +                          const struct sbrec_chassis *chassis);
>>>  void if_status_mgr_run(struct if_status_mgr *mgr, struct
>> local_binding_data *,
>>> +                       const struct sbrec_chassis *chassis,
>>>                         bool sb_readonly, bool ovs_readonly);
>>>  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
>>>                                      struct simap *usage);
>>> +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
>>> +                                    const char *iface_id);
>>>
>>>  # endif /* controller/if-status.h */
>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>> index 5a6274eb2..994aebdfb 100644
>>> --- a/controller/ovn-controller.c
>>> +++ b/controller/ovn-controller.c
>>> @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
>>>                          runtime_data ? &runtime_data->lbinding_data :
>> NULL;
>>>                      stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>>>                                      time_msec());
>>> -                    if_status_mgr_update(if_mgr, binding_data);
>>> +                    if_status_mgr_update(if_mgr, binding_data, chassis);
>>>                      stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>>>                                     time_msec());
>>>
>>> @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
>>>                                     time_msec());
>>>                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>>>                                      time_msec());
>>> -                    if_status_mgr_run(if_mgr, binding_data,
>> !ovnsb_idl_txn,
>>> -                                      !ovs_idl_txn);
>>> +                    if_status_mgr_run(if_mgr, binding_data, chassis,
>>> +                                      !ovnsb_idl_txn, !ovs_idl_txn);
>>>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>>>                                     time_msec());
>>>                  }
>>> diff --git a/tests/ovn.at b/tests/ovn.at
>>> index 6a0a169c1..403fbc85f 100644
>>> --- a/tests/ovn.at
>>> +++ b/tests/ovn.at
>>> @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
>>>  ovs-vsctl -- add-port br-int hv1-vif0 -- \
>>>  set Interface hv1-vif0 ofport-request=1
>>>
>>> +
>>
>> Unrelated?
>>
>>>  sim_add hv2
>>>  as hv2
>>>  ovs-vsctl add-br br-phys
>>> @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
>> table=65 | grep actions=output:1],
>>>  echo "verifying that lsp0 binding moves when requested-chassis is
>> changed"
>>>
>>>  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
>>> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
>> chassis" hv1/ovn-controller.log)])
>>> +
>>> +# We might see multiple "Releasing lport ...", when sb is read only
>>> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
>> chassis" hv1/ovn-controller.log)])
>>> +
>>>  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
>>>
>>>  # (6) Chassis hv2 should add flows and hv1 should not.
>>> @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
>> table=0 | grep in_port=1], [0], [ig
>>>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
>> actions=output:1], [0], [ignore])
>>>
>>>  check ovn-nbctl --wait=hv lsp-set-options lsp0
>> requested-chassis=non-existant-chassis
>>> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
>> chassis" hv1/ovn-controller.log)])
>>> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
>> chassis" hv1/ovn-controller.log)])
>>>  check ovn-nbctl --wait=hv sync
>>>  wait_column '' Port_Binding chasssi logical_port=lsp0
>>>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
>> [1], [])
>>> @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2
>> 00\:00\:00\:00\:10\:10 1
>>>  OVN_CLEANUP([hv1], [hv2])
>>>  AT_CLEANUP
>>>  ])
>>> +
>>> +OVN_FOR_EACH_NORTHD([
>>> +AT_SETUP([recomputes])
>>> +ovn_start
>>> +
>>> +# Add chassis
>>> +net_add n1
>>> +sim_add hv1
>>> +as hv1
>>> +ovs-vsctl add-br br-phys
>>> +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
>>> +ovn_attach n1 br-phys 192.168.0.1
>>> +
>>> +add_switch_ports() {
>>> +    for port in $(seq $1 $2); do
>>> +        logical_switch_port=lsp${port}
>>> +        check ovn-nbctl lsp-add ls1 $logical_switch_port
>>> +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
>>> +
>>> +        as hv1 ovs-vsctl \
>>> +            -- add-port br-int vif${port} \
>>> +            -- set Interface vif${port}
>> external_ids:iface-id=$logical_switch_port
>>> +    done
>>
>> Don't we need a wait_for_ports_up here to make sure the new ports are
>> claimed and processed?
>>
>>> +
>>> +    check ovn-nbctl --wait=hv sync
>>> +    AT_CHECK([test $(ovn-appctl -t ovn-controller coverage/read-counter
>> lflow_run) == $lflow_run])
>>> +}
>>> +
>>> +check ovn-nbctl ls-add ls1
>>> +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
>>> +check ovn-nbctl set Logical_Switch ls1
>> other_config:exclude_ips=10.1.255.254
>>> +
>>> +check ovn-nbctl lr-add lr1
>>> +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
>> type=router options:router-port=lrp0 addresses=dynamic
>>> +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
>>> +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
>>> +
>>> +check ovn-nbctl --wait=hv sync
>>> +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter
>> lflow_run)
>>> +
>>> +add_switch_ports 1 50
>>> +add_switch_ports 51 100
>>> +add_switch_ports 101 150
>>> +
>>> +OVN_CLEANUP([hv1])
>>> +AT_CLEANUP
>>> +])
>>
>> Thanks,
>> Dumitru
>>
>>
>
Han Zhou May 17, 2022, 6:03 a.m. UTC | #4
On Thu, May 12, 2022 at 2:04 AM Xavier Simonart <xsimonar@redhat.com> wrote:
>
> When VIF ports are claimed on a chassis, SBDB Port_Binding table is
updated.
> If the SBDB IDL is still is read-only ("in transaction") when such a
update
> is required, the update is not possible and recompute is triggered through
> I+P failure.
>
> This situation can happen:
> - after updating Port_Binding->chassis to SBDB for one port, in a
following
>   iteration, ovn-controller handles Interface:external_ids:ovn-installed
>   (for the same port) while SBDB is still read-only.
> - after updating Port_Binding->chassis to SBDB for one port, in a
following
>   iteration, ovn-controller updates Port_Binding->chassis for another
port,
>   while SBDB is still read-only.
>
> This patch prevent the recompute, by having the if-status module
> updating the Port_Binding chassis (if needed), when possible.
> This does not delay Port_Binding chassis update compared to before this
> patch: Port_Binding chassis will be updated as soon as SBDB is again
> writable, as it was the case, through a recompute, before this patch.
>
Thanks Xavier. I think the approach is good: moving the SB update from the
I-P engine to if-status module, so it wouldn't need to trigger I-P engine
recompute, and just let the if-status module update the SB as soon as it is
writable, through the if-status's state-machine.
However, I have some comments/questions regarding the implementation
details, primarily confusion on the state transitions. Please see below.

> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
> Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
> ---
>  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
>  controller/binding.h        |   9 ++-
>  controller/if-status.c      |  31 +++++++--
>  controller/if-status.h      |   6 +-
>  controller/ovn-controller.c |   6 +-
>  tests/ovn.at                |  55 +++++++++++++++-
>  6 files changed, 184 insertions(+), 47 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index e5ba56b25..d917d8775 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct shash
*local_bindings,
>  }
>
>  bool
> -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
> +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
> +                    const struct sbrec_chassis *chassis_rec)
>  {
>      struct local_binding *lbinding =
>          local_binding_find(local_bindings, pb_name);
>      struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
> +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
> +        return false;
> +    }

Why need the change here? I understand that it is obvious that the binding
should not be considered up if the chassis is not updated in SB
port_binding, but I wonder why we need the change in *this* patch.
The function is called to see if an interface state should be moved from
MARK_UP to INSTALLED, but if the chassis is not set, it shouldn't even be
moved to MARK_UP.

>      if (lbinding && b_lport && lbinding->iface) {
>          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
>              return false;
> @@ -673,13 +677,23 @@ local_binding_is_up(struct shash *local_bindings,
const char *pb_name)
>  }
>
>  bool
> -local_binding_is_down(struct shash *local_bindings, const char *pb_name)
> +local_binding_is_down(struct shash *local_bindings, const char *pb_name,
> +                      const struct sbrec_chassis *chassis_rec)
>  {
>      struct local_binding *lbinding =
>          local_binding_find(local_bindings, pb_name);
>
>      struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
>
> +    if (b_lport) {
> +        if (b_lport->pb->chassis == chassis_rec) {
> +            return false;
> +        } else if (b_lport->pb->chassis) {
> +            VLOG_DBG("lport %s already claimed by other chassis",
> +                     b_lport->pb->logical_port);
> +        }
> +    }
> +

Same as above, it is not clear to me why this change here. It would be
better to clarify the state transition first.

>      if (!lbinding) {
>          return true;
>      }
> @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
>      OVS_NOT_REACHED();
>  }
>
> +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
> +                          const struct sbrec_chassis *chassis_rec)
> +{
> +    if (pb->chassis != chassis_rec) {
> +        if (pb->chassis) {
> +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> +                    pb->logical_port, pb->chassis->name,
> +                    chassis_rec->name);
> +        } else {
> +            VLOG_INFO("Claiming lport %s for this chassis.",
pb->logical_port);
> +        }
> +        for (int i = 0; i < pb->n_mac; i++) {
> +            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> +        }
> +        sbrec_port_binding_set_chassis(pb, chassis_rec);
> +    }
> +}
> +
> +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding
*pb)
> +{
> +    if (pb->chassis) {
> +        sbrec_port_binding_set_chassis(pb, NULL);
> +    }
> +}
> +
> +void
> +local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
> +                    const struct sbrec_chassis *chassis_rec)
> +{
> +    struct local_binding *lbinding =
> +        local_binding_find(local_bindings, pb_name);
> +    struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
> +
> +    if (b_lport) {
> +        if (chassis_rec) {
> +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
> +        } else {
> +            clear_pb_chassis_in_sbrec(b_lport->pb);
> +        }

It looks like the logic can be simplified to just:
    set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
This way we don't need the if/else, and the function
clear_pb_chassis_in_sbrec() can be removed.

> +    }
> +}
> +
>  /* For newly claimed ports, if 'notify_up' is 'false':
>   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
>   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for
> @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
>   *   it's explicitly set to 'false'.
>   *   This is to ensure compatibility with older versions of ovn-northd.
>   */
> -static void
> +static bool
>  claimed_lport_set_up(const struct sbrec_port_binding *pb,
>                       const struct sbrec_port_binding *parent_pb,
>                       const struct sbrec_chassis *chassis_rec,
> -                     bool notify_up, struct if_status_mgr *if_mgr)
> +                     bool notify_up, struct if_status_mgr *if_mgr,
> +                     bool sb_readonly)
>  {
> +
>      if (!notify_up) {
>          bool up = true;
>          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
> -            sbrec_port_binding_set_up(pb, &up, 1);
> +            if (!sb_readonly) {
> +                sbrec_port_binding_set_up(pb, &up, 1);
> +            } else if (pb->n_up && !pb->up[0]) {
> +                return false;
> +            }
>          }
> -        return;
> +        if (sb_readonly && (pb->chassis != chassis_rec)) {
> +            /* Handle the cases where if_status_mgr does not claim the
> +             * interface.In those cases, if we do not set pb->chassis in
sb
> +             * now (as readonly), we might not do it later ...
> +             */

Thanks for the comments, but I am sorry that I still couldn't understand,
maybe just because I am not familiar with the existing logic of container
port handling. Any more detailed explanation would be appreciated.

> +            return false;
> +        }
> +        return true;
>      }
>
>      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
>          if_status_mgr_claim_iface(if_mgr, pb->logical_port);

Since we are now updating SB port_binding's chassis in the if-status
module, we should move to CLAIMED status there when the if-status module
updates SB DB (when it is not read-only). The state transition looks
incorrect if we call the function here and move the state to CLAIMED before
it updates chassis to SB DB. My understanding of CLAIMED here is that an
update is sent to SB for the PB's chassis column. I think we should add a
new state before the CLAIMED state, because this is a new state that is
introduced by this patch: an interface that is supposed to be claimed by
the chassis, but the update to SB is not sent because of the read-only SB.

Or, we can still use the state name CLAIMED for the newly bound interface,
but add a new state between CLAIMED and INSTALL_FLOWS: CLAIMED (but SB PB's
chassis not updated) -> SB_CHASSIS_SENT -> INSTALL_FLOWS -> MARK_UP ->
INSTALLED

>      }
> +    return true;
>  }
>
>  /* Returns false if lport is not claimed due to 'sb_readonly'.
> @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
>              struct hmap *tracked_datapaths,
>              struct if_status_mgr *if_mgr)
>  {
> -    if (!sb_readonly) {
> -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
if_mgr);
> -    }
>
> +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> +           if_mgr, sb_readonly)) {
> +        return false;
> +    }
>      if (pb->chassis != chassis_rec) {
> -        if (sb_readonly) {
> -            return false;
> -        }
> -
> -        if (pb->chassis) {
> -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> -                    pb->logical_port, pb->chassis->name,
> -                    chassis_rec->name);
> -        } else {
> -            VLOG_INFO("Claiming lport %s for this chassis.",
pb->logical_port);
> +        if (!sb_readonly) {
> +            set_pb_chassis_in_sbrec(pb, chassis_rec);

I am confused that if the if-status module is going to handle the update to
SB DB, why would we set it here? I think it is better to make the
responsibility of the modules clear and handle it in just one place.

>          }
> -        for (int i = 0; i < pb->n_mac; i++) {
> -            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> -        }
> -
> -        sbrec_port_binding_set_chassis(pb, chassis_rec);
>
>          if (tracked_datapaths) {
>              update_lport_tracking(pb, tracked_datapaths, true);
> @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
>          }
>          sbrec_port_binding_set_encap(pb, encap_rec);
>      }
> -
>      return true;
>  }
>
> @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>   * Caller should make sure that this is the case.
>   */
>  static bool
> -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
> +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
> +               struct if_status_mgr *if_mgr)
>  {
>      if (pb->encap) {
>          if (sb_readonly) {
> @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding
*pb, bool sb_readonly)
>          sbrec_port_binding_set_encap(pb, NULL);
>      }
>
> +    /* if sb readonly, pb->chassis unset through if-status if present */
>      if (pb->chassis) {
> -        if (sb_readonly) {
> +        if (!sb_readonly) {
> +            sbrec_port_binding_set_chassis(pb, NULL);
> +        } else if (!if_status_mgr_iface_is_present(if_mgr,
pb->logical_port)) {

Same here. Also, if we follow the state transition properly, we shouldn't
worry that the if-status is not present before the interface is released,
right?

Thanks,
Han

>              return false;
>          }
> -        sbrec_port_binding_set_chassis(pb, NULL);
>      }
>
>      if (pb->virtual_parent) {
> @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding *pb,
bool sb_readonly)
>          sbrec_port_binding_set_virtual_parent(pb, NULL);
>      }
>
> -    VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port);
> +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
> +              pb->logical_port, sb_readonly);
>      return true;
>  }
>
> @@ -1017,7 +1078,7 @@ static bool
>  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
>                struct hmap *tracked_datapaths, struct if_status_mgr
*if_mgr)
>  {
> -    if (!release_lport_(pb, sb_readonly)) {
> +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
>          return false;
>      }
>
> @@ -1342,10 +1403,9 @@ consider_localport(const struct sbrec_port_binding
*pb,
>      /* If the port binding is claimed, then release it as localport is
claimed
>       * by any ovn-controller. */
>      if (pb->chassis == b_ctx_in->chassis_rec) {
> -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
> +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn,
b_ctx_out->if_mgr)) {
>              return false;
>          }
> -
>          remove_related_lport(pb, b_ctx_out);
>      }
>
> diff --git a/controller/binding.h b/controller/binding.h
> index 430a8d9b1..45202a321 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -151,14 +151,17 @@ const struct sbrec_port_binding
*local_binding_get_primary_pb(
>  ofp_port_t local_binding_get_lport_ofport(const struct shash
*local_bindings,
>                                            const char *pb_name);
>
> -bool local_binding_is_up(struct shash *local_bindings, const char
*pb_name);
> -bool local_binding_is_down(struct shash *local_bindings, const char
*pb_name);
> +bool local_binding_is_up(struct shash *local_bindings, const char
*pb_name,
> +                         const struct sbrec_chassis *);
> +bool local_binding_is_down(struct shash *local_bindings, const char
*pb_name,
> +                         const struct sbrec_chassis *);
>  void local_binding_set_up(struct shash *local_bindings, const char
*pb_name,
>                            const char *ts_now_str, bool sb_readonly,
>                            bool ovs_readonly);
>  void local_binding_set_down(struct shash *local_bindings, const char
*pb_name,
>                              bool sb_readonly, bool ovs_readonly);
> -
> +void local_binding_set_pb(struct shash *local_bindings, const char
*pb_name,
> +                         const struct sbrec_chassis *chassis_rec);
>  void binding_register_ovs_idl(struct ovsdb_idl *);
>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> diff --git a/controller/if-status.c b/controller/if-status.c
> index dbdf8b66f..e36df22a0 100644
> --- a/controller/if-status.c
> +++ b/controller/if-status.c
> @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr
*, struct ovs_iface *,
>
>  static void if_status_mgr_update_bindings(
>      struct if_status_mgr *mgr, struct local_binding_data *binding_data,
> +    const struct sbrec_chassis *chassis_rec,
>      bool sb_readonly, bool ovs_readonly);
>
>  struct if_status_mgr *
> @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr *mgr,
const char *iface_id)
>      }
>  }
>
> +bool
> +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
*iface_id)
> +{
> +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
> +    return (!!iface);
> +}
> +
>  void
>  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
*iface_id)
>  {
> @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr *mgr,
const char *iface_id)
>
>  void
>  if_status_mgr_update(struct if_status_mgr *mgr,
> -                     struct local_binding_data *binding_data)
> +                     struct local_binding_data *binding_data,
> +                     const struct sbrec_chassis *chassis_rec)
>  {
>      if (!binding_data) {
>          return;
> @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>          struct ovs_iface *iface = node->data;
>
> -        if (local_binding_is_up(bindings, iface->id)) {
> +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
>              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
>          }
>      }
> @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>          struct ovs_iface *iface = node->data;
>
> -        if (local_binding_is_down(bindings, iface->id)) {
> +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
>              ovs_iface_destroy(mgr, iface);
>          }
>      }
> @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>  void
>  if_status_mgr_run(struct if_status_mgr *mgr,
>                    struct local_binding_data *binding_data,
> +                  const struct sbrec_chassis *chassis_rec,
>                    bool sb_readonly, bool ovs_readonly)
>  {
>      struct ofctrl_acked_seqnos *acked_seqnos =
> @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
>      ofctrl_acked_seqnos_destroy(acked_seqnos);
>
>      /* Update binding states. */
> -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
> -                                  ovs_readonly);
> +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
> +                                  sb_readonly, ovs_readonly);
>  }
>
>  static void
> @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr, struct
ovs_iface *iface,
>  static void
>  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>                                struct local_binding_data *binding_data,
> +                              const struct sbrec_chassis *chassis_rec,
>                                bool sb_readonly, bool ovs_readonly)
>  {
>      if (!binding_data) {
> @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>       */
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
>          struct ovs_iface *iface = node->data;
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> +        }
>
>          local_binding_set_down(bindings, iface->id, sb_readonly,
ovs_readonly);
>      }
> @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>      char *ts_now_str = xasprintf("%lld", time_wall_msec());
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>          struct ovs_iface *iface = node->data;
> -
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> +        }
>          local_binding_set_up(bindings, iface->id, ts_now_str,
>                               sb_readonly, ovs_readonly);
>      }
> @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>       */
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>          struct ovs_iface *iface = node->data;
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, NULL);
> +        }
>
>          local_binding_set_down(bindings, iface->id, sb_readonly,
ovs_readonly);
>      }
> diff --git a/controller/if-status.h b/controller/if-status.h
> index ff4aa760e..2c55eb18e 100644
> --- a/controller/if-status.h
> +++ b/controller/if-status.h
> @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct if_status_mgr
*, const char *iface_id);
>  void if_status_mgr_release_iface(struct if_status_mgr *, const char
*iface_id);
>  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
*iface_id);
>
> -void if_status_mgr_update(struct if_status_mgr *, struct
local_binding_data *);
> +void if_status_mgr_update(struct if_status_mgr *, struct
local_binding_data *,
> +                          const struct sbrec_chassis *chassis);
>  void if_status_mgr_run(struct if_status_mgr *mgr, struct
local_binding_data *,
> +                       const struct sbrec_chassis *chassis,
>                         bool sb_readonly, bool ovs_readonly);
>  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
>                                      struct simap *usage);
> +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
> +                                    const char *iface_id);
>
>  # endif /* controller/if-status.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5a6274eb2..994aebdfb 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
>                          runtime_data ? &runtime_data->lbinding_data :
NULL;
>                      stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>                                      time_msec());
> -                    if_status_mgr_update(if_mgr, binding_data);
> +                    if_status_mgr_update(if_mgr, binding_data, chassis);
>                      stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>                                     time_msec());
>
> @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
>                                     time_msec());
>                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                      time_msec());
> -                    if_status_mgr_run(if_mgr, binding_data,
!ovnsb_idl_txn,
> -                                      !ovs_idl_txn);
> +                    if_status_mgr_run(if_mgr, binding_data, chassis,
> +                                      !ovnsb_idl_txn, !ovs_idl_txn);
>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                     time_msec());
>                  }
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 6a0a169c1..403fbc85f 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
>  ovs-vsctl -- add-port br-int hv1-vif0 -- \
>  set Interface hv1-vif0 ofport-request=1
>
> +
>  sim_add hv2
>  as hv2
>  ovs-vsctl add-br br-phys
> @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
table=65 | grep actions=output:1],
>  echo "verifying that lsp0 binding moves when requested-chassis is
changed"
>
>  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
> +
> +# We might see multiple "Releasing lport ...", when sb is read only
> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
> +
>  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
>
>  # (6) Chassis hv2 should add flows and hv1 should not.
> @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
table=0 | grep in_port=1], [0], [ig
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
actions=output:1], [0], [ignore])
>
>  check ovn-nbctl --wait=hv lsp-set-options lsp0
requested-chassis=non-existant-chassis
> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
>  check ovn-nbctl --wait=hv sync
>  wait_column '' Port_Binding chasssi logical_port=lsp0
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
[1], [])
> @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2
00\:00\:00\:00\:10\:10 1
>  OVN_CLEANUP([hv1], [hv2])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([recomputes])
> +ovn_start
> +
> +# Add chassis
> +net_add n1
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +add_switch_ports() {
> +    for port in $(seq $1 $2); do
> +        logical_switch_port=lsp${port}
> +        check ovn-nbctl lsp-add ls1 $logical_switch_port
> +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
> +
> +        as hv1 ovs-vsctl \
> +            -- add-port br-int vif${port} \
> +            -- set Interface vif${port}
external_ids:iface-id=$logical_switch_port
> +    done
> +
> +    check ovn-nbctl --wait=hv sync
> +    AT_CHECK([test $(ovn-appctl -t ovn-controller coverage/read-counter
lflow_run) == $lflow_run])
> +}
> +
> +check ovn-nbctl ls-add ls1
> +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
> +check ovn-nbctl set Logical_Switch ls1
other_config:exclude_ips=10.1.255.254
> +
> +check ovn-nbctl lr-add lr1
> +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
type=router options:router-port=lrp0 addresses=dynamic
> +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
> +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
> +
> +check ovn-nbctl --wait=hv sync
> +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter lflow_run)
> +
> +add_switch_ports 1 50
> +add_switch_ports 51 100
> +add_switch_ports 101 150
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Xavier Simonart May 17, 2022, 9:40 a.m. UTC | #5
Hi Han
Thanks for looking into this and for your feedback.
Please see comments below

On Tue, May 17, 2022 at 8:03 AM Han Zhou <hzhou@ovn.org> wrote:

>
>
> On Thu, May 12, 2022 at 2:04 AM Xavier Simonart <xsimonar@redhat.com>
> wrote:
> >
> > When VIF ports are claimed on a chassis, SBDB Port_Binding table is
> updated.
> > If the SBDB IDL is still is read-only ("in transaction") when such a
> update
> > is required, the update is not possible and recompute is triggered
> through
> > I+P failure.
> >
> > This situation can happen:
> > - after updating Port_Binding->chassis to SBDB for one port, in a
> following
> >   iteration, ovn-controller handles Interface:external_ids:ovn-installed
> >   (for the same port) while SBDB is still read-only.
> > - after updating Port_Binding->chassis to SBDB for one port, in a
> following
> >   iteration, ovn-controller updates Port_Binding->chassis for another
> port,
> >   while SBDB is still read-only.
> >
> > This patch prevent the recompute, by having the if-status module
> > updating the Port_Binding chassis (if needed), when possible.
> > This does not delay Port_Binding chassis update compared to before this
> > patch: Port_Binding chassis will be updated as soon as SBDB is again
> > writable, as it was the case, through a recompute, before this patch.
> >
> Thanks Xavier. I think the approach is good: moving the SB update from the
> I-P engine to if-status module, so it wouldn't need to trigger I-P engine
> recompute, and just let the if-status module update the SB as soon as it is
> writable, through the if-status's state-machine.
> However, I have some comments/questions regarding the implementation
> details, primarily confusion on the state transitions. Please see below.
>
> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
> > Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
> > ---
> >  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
> >  controller/binding.h        |   9 ++-
> >  controller/if-status.c      |  31 +++++++--
> >  controller/if-status.h      |   6 +-
> >  controller/ovn-controller.c |   6 +-
> >  tests/ovn.at                |  55 +++++++++++++++-
> >  6 files changed, 184 insertions(+), 47 deletions(-)
> >
> > diff --git a/controller/binding.c b/controller/binding.c
> > index e5ba56b25..d917d8775 100644
> > --- a/controller/binding.c
> > +++ b/controller/binding.c
> > @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct shash
> *local_bindings,
> >  }
> >
> >  bool
> > -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
> > +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
> > +                    const struct sbrec_chassis *chassis_rec)
> >  {
> >      struct local_binding *lbinding =
> >          local_binding_find(local_bindings, pb_name);
> >      struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
> > +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
> > +        return false;
> > +    }
>
> Why need the change here? I understand that it is obvious that the binding
> should not be considered up if the chassis is not updated in SB
> port_binding, but I wonder why we need the change in *this* patch.
> The function is called to see if an interface state should be moved from
> MARK_UP to INSTALLED, but if the chassis is not set, it shouldn't even be
> moved to MARK_UP.
>
> I probably need to provide a better explanation of the states.
In fact, my two goals were: 1) avoid/decrease recomputes and 2) do not
increase latency (time between interface is claimed and it's up in SB and
ovn-installed in OVS).
There is not really an additional state covering the fact that the chassis
is set in sbdb.
As soon as the state is CLAIMED, we try to set the pb->chassis (by 'try' I
mean we update pb->chassis if sb is not readonly).
The fact that sb is readonly in CLAIMED state does not prevent us to move
to INSTALL_FLOWS state, where we are installing flows in OVS.
As soon as the seqno for those flows was received, we moved to MARK_UP
state.
In those three states, we can update pb->chassis (if not already done).
In MARK_UP we set the interface up.
We moved to INSTALLED when both up and chassis have been written.

>      if (lbinding && b_lport && lbinding->iface) {
> >          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
> >              return false;
> > @@ -673,13 +677,23 @@ local_binding_is_up(struct shash *local_bindings,
> const char *pb_name)
> >  }
> >
> >  bool
> > -local_binding_is_down(struct shash *local_bindings, const char *pb_name)
> > +local_binding_is_down(struct shash *local_bindings, const char *pb_name,
> > +                      const struct sbrec_chassis *chassis_rec)
> >  {
> >      struct local_binding *lbinding =
> >          local_binding_find(local_bindings, pb_name);
> >
> >      struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
> >
> > +    if (b_lport) {
> > +        if (b_lport->pb->chassis == chassis_rec) {
> > +            return false;
> > +        } else if (b_lport->pb->chassis) {
> > +            VLOG_DBG("lport %s already claimed by other chassis",
> > +                     b_lport->pb->logical_port);
> > +        }
> > +    }
> > +
>
> Same as above, it is not clear to me why this change here. It would be
> better to clarify the state transition first.
>
> >      if (!lbinding) {
> >          return true;
> >      }
> > @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
> >      OVS_NOT_REACHED();
> >  }
> >
> > +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
> > +                          const struct sbrec_chassis *chassis_rec)
> > +{
> > +    if (pb->chassis != chassis_rec) {
> > +        if (pb->chassis) {
> > +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> > +                    pb->logical_port, pb->chassis->name,
> > +                    chassis_rec->name);
> > +        } else {
> > +            VLOG_INFO("Claiming lport %s for this chassis.",
> pb->logical_port);
> > +        }
> > +        for (int i = 0; i < pb->n_mac; i++) {
> > +            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> > +        }
> > +        sbrec_port_binding_set_chassis(pb, chassis_rec);
> > +    }
> > +}
> > +
> > +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding
> *pb)
> > +{
> > +    if (pb->chassis) {
> > +        sbrec_port_binding_set_chassis(pb, NULL);
> > +    }
> > +}
> > +
> > +void
> > +local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
> > +                    const struct sbrec_chassis *chassis_rec)
> > +{
> > +    struct local_binding *lbinding =
> > +        local_binding_find(local_bindings, pb_name);
> > +    struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
> > +
> > +    if (b_lport) {
> > +        if (chassis_rec) {
> > +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
> > +        } else {
> > +            clear_pb_chassis_in_sbrec(b_lport->pb);
> > +        }
>
> It looks like the logic can be simplified to just:
>     set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
> This way we don't need the if/else, and the function
> clear_pb_chassis_in_sbrec() can be removed.
>
> We can clearly combine set_pb_chassis_in_sbrec and
clear_pb_chassis_in_sbrec.
I had an impression that having the 'clear' function made the 'set'
function easier to understand (avoid an additional if (chassis_rec) in
'set'.
But I am fine w/ changing it.


> > +    }
> > +}
> > +
> >  /* For newly claimed ports, if 'notify_up' is 'false':
> >   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
> >   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g.,
> for
> > @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
> >   *   it's explicitly set to 'false'.
> >   *   This is to ensure compatibility with older versions of ovn-northd.
> >   */
> > -static void
> > +static bool
> >  claimed_lport_set_up(const struct sbrec_port_binding *pb,
> >                       const struct sbrec_port_binding *parent_pb,
> >                       const struct sbrec_chassis *chassis_rec,
> > -                     bool notify_up, struct if_status_mgr *if_mgr)
> > +                     bool notify_up, struct if_status_mgr *if_mgr,
> > +                     bool sb_readonly)
> >  {
> > +
> >      if (!notify_up) {
> >          bool up = true;
> >          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
> > -            sbrec_port_binding_set_up(pb, &up, 1);
> > +            if (!sb_readonly) {
> > +                sbrec_port_binding_set_up(pb, &up, 1);
> > +            } else if (pb->n_up && !pb->up[0]) {
> > +                return false;
> > +            }
> >          }
> > -        return;
> > +        if (sb_readonly && (pb->chassis != chassis_rec)) {
> > +            /* Handle the cases where if_status_mgr does not claim the
> > +             * interface.In those cases, if we do not set pb->chassis
> in sb
> > +             * now (as readonly), we might not do it later ...
> > +             */
>
> Thanks for the comments, but I am sorry that I still couldn't understand,
> maybe just because I am not familiar with the existing logic of container
> port handling. Any more detailed explanation would be appreciated.
>
> Some ports, such as container and virtual ports, are set up directly
(notify_up=false), when claimed, when their parent is up. Hence for those
ports we do not create/use the if-status state machine, and we will not
have an opportunity later to set them up. if sb was readonly.
So, for such ports, if sb is readonly, we will recompute.
We could extend the if-status logic and handle such ports as well - this is
also a comment from Dumitru. I did not implement it in this patch to not
complicate the if-status state machine, as I think it will require either
an additional state and/or a flag.
If/when we are fine with this patch, the goal is to start another patch
handling such container/virtual ports interfaces through the if-status
state machine .

> +            return false;
> > +        }
> > +        return true;
> >      }
> >
> >      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
> >          if_status_mgr_claim_iface(if_mgr, pb->logical_port);
>
> Since we are now updating SB port_binding's chassis in the if-status
> module, we should move to CLAIMED status there when the if-status module
> updates SB DB (when it is not read-only). The state transition looks
> incorrect if we call the function here and move the state to CLAIMED before
> it updates chassis to SB DB. My understanding of CLAIMED here is that an
> update is sent to SB for the PB's chassis column. I think we should add a
> new state before the CLAIMED state, because this is a new state that is
> introduced by this patch: an interface that is supposed to be claimed by
> the chassis, but the update to SB is not sent because of the read-only SB.
>
> Or, we can still use the state name CLAIMED for the newly bound interface,
> but add a new state between CLAIMED and INSTALL_FLOWS: CLAIMED (but SB PB's
> chassis not updated) -> SB_CHASSIS_SENT -> INSTALL_FLOWS -> MARK_UP ->
> INSTALLED
>
> This is where we diverge. I see CLAIMED as a state where we are ready to
update the chassis in SBDB. We update it if we can (not readonly), but it
does not block us in our state progression if we cannot. We do not add an
additional state, as this would add one round trip w/ SBDB. Chassis will be
updated in SBDB in CLAIMED, INSTALL_FLOWS or MARK_UP state, before or at
the same time as port up.


> >      }
> > +    return true;
> >  }
> >
> >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> > @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
> >              struct hmap *tracked_datapaths,
> >              struct if_status_mgr *if_mgr)
> >  {
> > -    if (!sb_readonly) {
> > -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> if_mgr);
> > -    }
> >
> > +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> > +           if_mgr, sb_readonly)) {
> > +        return false;
> > +    }
> >      if (pb->chassis != chassis_rec) {
> > -        if (sb_readonly) {
> > -            return false;
> > -        }
> > -
> > -        if (pb->chassis) {
> > -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> > -                    pb->logical_port, pb->chassis->name,
> > -                    chassis_rec->name);
> > -        } else {
> > -            VLOG_INFO("Claiming lport %s for this chassis.",
> pb->logical_port);
> > +        if (!sb_readonly) {
> > +            set_pb_chassis_in_sbrec(pb, chassis_rec);
>
> I am confused that if the if-status module is going to handle the update
> to SB DB, why would we set it here? I think it is better to make the
> responsibility of the modules clear and handle it in just one place.
>
> if-status handles updates... when the binding module was not able to do it
immediately... (in a similar way as, before this patch, it is handling the
port up when not done by the binding module).
But I agree with your point about clarity of having the if-status machine
handling all cases. This is however only possible when all ports (including
virtual ports) have such an interface (which is not the case now).
If fine with you, I'd prefer handling such ports (virtual, container) in a
subsequent patch, which could then also make sure the chassis update
becomes the responsibility of the if-status module.

>          }
> > -        for (int i = 0; i < pb->n_mac; i++) {
> > -            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
> > -        }
> > -
> > -        sbrec_port_binding_set_chassis(pb, chassis_rec);
> >
> >          if (tracked_datapaths) {
> >              update_lport_tracking(pb, tracked_datapaths, true);
> > @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
> >          }
> >          sbrec_port_binding_set_encap(pb, encap_rec);
> >      }
> > -
> >      return true;
> >  }
> >
> > @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
> >   * Caller should make sure that this is the case.
> >   */
> >  static bool
> > -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
> > +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
> > +               struct if_status_mgr *if_mgr)
> >  {
> >      if (pb->encap) {
> >          if (sb_readonly) {
> > @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding
> *pb, bool sb_readonly)
> >          sbrec_port_binding_set_encap(pb, NULL);
> >      }
> >
> > +    /* if sb readonly, pb->chassis unset through if-status if present */
> >      if (pb->chassis) {
> > -        if (sb_readonly) {
> > +        if (!sb_readonly) {
> > +            sbrec_port_binding_set_chassis(pb, NULL);
> > +        } else if (!if_status_mgr_iface_is_present(if_mgr,
> pb->logical_port)) {
>
> Same here. Also, if we follow the state transition properly, we shouldn't
> worry that the if-status is not present before the interface is released,
> right?
>
> Some ports (virtual, container) do not have such a state. As mentioned
earlier  I would prefer handling those ports in a subsequent patch, to
isolate the state machine changes related to them.

Thanks,
> Han
>
> Thanks again for your feedback.
Xavier

> >              return false;
> >          }
> > -        sbrec_port_binding_set_chassis(pb, NULL);
> >      }
> >
> >      if (pb->virtual_parent) {
> > @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding
> *pb, bool sb_readonly)
> >          sbrec_port_binding_set_virtual_parent(pb, NULL);
> >      }
> >
> > -    VLOG_INFO("Releasing lport %s from this chassis.",
> pb->logical_port);
> > +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
> > +              pb->logical_port, sb_readonly);
> >      return true;
> >  }
> >
> > @@ -1017,7 +1078,7 @@ static bool
> >  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
> >                struct hmap *tracked_datapaths, struct if_status_mgr
> *if_mgr)
> >  {
> > -    if (!release_lport_(pb, sb_readonly)) {
> > +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
> >          return false;
> >      }
> >
> > @@ -1342,10 +1403,9 @@ consider_localport(const struct
> sbrec_port_binding *pb,
> >      /* If the port binding is claimed, then release it as localport is
> claimed
> >       * by any ovn-controller. */
> >      if (pb->chassis == b_ctx_in->chassis_rec) {
> > -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
> > +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn,
> b_ctx_out->if_mgr)) {
> >              return false;
> >          }
> > -
> >          remove_related_lport(pb, b_ctx_out);
> >      }
> >
> > diff --git a/controller/binding.h b/controller/binding.h
> > index 430a8d9b1..45202a321 100644
> > --- a/controller/binding.h
> > +++ b/controller/binding.h
> > @@ -151,14 +151,17 @@ const struct sbrec_port_binding
> *local_binding_get_primary_pb(
> >  ofp_port_t local_binding_get_lport_ofport(const struct shash
> *local_bindings,
> >                                            const char *pb_name);
> >
> > -bool local_binding_is_up(struct shash *local_bindings, const char
> *pb_name);
> > -bool local_binding_is_down(struct shash *local_bindings, const char
> *pb_name);
> > +bool local_binding_is_up(struct shash *local_bindings, const char
> *pb_name,
> > +                         const struct sbrec_chassis *);
> > +bool local_binding_is_down(struct shash *local_bindings, const char
> *pb_name,
> > +                         const struct sbrec_chassis *);
> >  void local_binding_set_up(struct shash *local_bindings, const char
> *pb_name,
> >                            const char *ts_now_str, bool sb_readonly,
> >                            bool ovs_readonly);
> >  void local_binding_set_down(struct shash *local_bindings, const char
> *pb_name,
> >                              bool sb_readonly, bool ovs_readonly);
> > -
> > +void local_binding_set_pb(struct shash *local_bindings, const char
> *pb_name,
> > +                         const struct sbrec_chassis *chassis_rec);
> >  void binding_register_ovs_idl(struct ovsdb_idl *);
> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > diff --git a/controller/if-status.c b/controller/if-status.c
> > index dbdf8b66f..e36df22a0 100644
> > --- a/controller/if-status.c
> > +++ b/controller/if-status.c
> > @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct if_status_mgr
> *, struct ovs_iface *,
> >
> >  static void if_status_mgr_update_bindings(
> >      struct if_status_mgr *mgr, struct local_binding_data *binding_data,
> > +    const struct sbrec_chassis *chassis_rec,
> >      bool sb_readonly, bool ovs_readonly);
> >
> >  struct if_status_mgr *
> > @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr
> *mgr, const char *iface_id)
> >      }
> >  }
> >
> > +bool
> > +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
> *iface_id)
> > +{
> > +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
> > +    return (!!iface);
> > +}
> > +
> >  void
> >  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
> *iface_id)
> >  {
> > @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr
> *mgr, const char *iface_id)
> >
> >  void
> >  if_status_mgr_update(struct if_status_mgr *mgr,
> > -                     struct local_binding_data *binding_data)
> > +                     struct local_binding_data *binding_data,
> > +                     const struct sbrec_chassis *chassis_rec)
> >  {
> >      if (!binding_data) {
> >          return;
> > @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
> >      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
> >          struct ovs_iface *iface = node->data;
> >
> > -        if (local_binding_is_up(bindings, iface->id)) {
> > +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
> >              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
> >          }
> >      }
> > @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
> >      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
> >          struct ovs_iface *iface = node->data;
> >
> > -        if (local_binding_is_down(bindings, iface->id)) {
> > +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
> >              ovs_iface_destroy(mgr, iface);
> >          }
> >      }
> > @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
> >  void
> >  if_status_mgr_run(struct if_status_mgr *mgr,
> >                    struct local_binding_data *binding_data,
> > +                  const struct sbrec_chassis *chassis_rec,
> >                    bool sb_readonly, bool ovs_readonly)
> >  {
> >      struct ofctrl_acked_seqnos *acked_seqnos =
> > @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
> >      ofctrl_acked_seqnos_destroy(acked_seqnos);
> >
> >      /* Update binding states. */
> > -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
> > -                                  ovs_readonly);
> > +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
> > +                                  sb_readonly, ovs_readonly);
> >  }
> >
> >  static void
> > @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr,
> struct ovs_iface *iface,
> >  static void
> >  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
> >                                struct local_binding_data *binding_data,
> > +                              const struct sbrec_chassis *chassis_rec,
> >                                bool sb_readonly, bool ovs_readonly)
> >  {
> >      if (!binding_data) {
> > @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
> >       */
> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
> >          struct ovs_iface *iface = node->data;
> > +        if (!sb_readonly) {
> > +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> > +        }
> >
> >          local_binding_set_down(bindings, iface->id, sb_readonly,
> ovs_readonly);
> >      }
> > @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
> >      char *ts_now_str = xasprintf("%lld", time_wall_msec());
> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
> >          struct ovs_iface *iface = node->data;
> > -
> > +        if (!sb_readonly) {
> > +            local_binding_set_pb(bindings, iface->id, chassis_rec);
> > +        }
> >          local_binding_set_up(bindings, iface->id, ts_now_str,
> >                               sb_readonly, ovs_readonly);
> >      }
> > @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
> >       */
> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
> >          struct ovs_iface *iface = node->data;
> > +        if (!sb_readonly) {
> > +            local_binding_set_pb(bindings, iface->id, NULL);
> > +        }
> >
> >          local_binding_set_down(bindings, iface->id, sb_readonly,
> ovs_readonly);
> >      }
> > diff --git a/controller/if-status.h b/controller/if-status.h
> > index ff4aa760e..2c55eb18e 100644
> > --- a/controller/if-status.h
> > +++ b/controller/if-status.h
> > @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct if_status_mgr
> *, const char *iface_id);
> >  void if_status_mgr_release_iface(struct if_status_mgr *, const char
> *iface_id);
> >  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
> *iface_id);
> >
> > -void if_status_mgr_update(struct if_status_mgr *, struct
> local_binding_data *);
> > +void if_status_mgr_update(struct if_status_mgr *, struct
> local_binding_data *,
> > +                          const struct sbrec_chassis *chassis);
> >  void if_status_mgr_run(struct if_status_mgr *mgr, struct
> local_binding_data *,
> > +                       const struct sbrec_chassis *chassis,
> >                         bool sb_readonly, bool ovs_readonly);
> >  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
> >                                      struct simap *usage);
> > +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
> > +                                    const char *iface_id);
> >
> >  # endif /* controller/if-status.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 5a6274eb2..994aebdfb 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
> >                          runtime_data ? &runtime_data->lbinding_data :
> NULL;
> >                      stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
> >                                      time_msec());
> > -                    if_status_mgr_update(if_mgr, binding_data);
> > +                    if_status_mgr_update(if_mgr, binding_data, chassis);
> >                      stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
> >                                     time_msec());
> >
> > @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
> >                                     time_msec());
> >                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >                                      time_msec());
> > -                    if_status_mgr_run(if_mgr, binding_data,
> !ovnsb_idl_txn,
> > -                                      !ovs_idl_txn);
> > +                    if_status_mgr_run(if_mgr, binding_data, chassis,
> > +                                      !ovnsb_idl_txn, !ovs_idl_txn);
> >                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >                                     time_msec());
> >                  }
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 6a0a169c1..403fbc85f 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
> >  ovs-vsctl -- add-port br-int hv1-vif0 -- \
> >  set Interface hv1-vif0 ofport-request=1
> >
> > +
> >  sim_add hv2
> >  as hv2
> >  ovs-vsctl add-br br-phys
> > @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=65 | grep actions=output:1],
> >  echo "verifying that lsp0 binding moves when requested-chassis is
> changed"
> >
> >  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> > -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> > +
> > +# We might see multiple "Releasing lport ...", when sb is read only
> > +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> > +
> >  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
> >
> >  # (6) Chassis hv2 should add flows and hv1 should not.
> > @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=0 | grep in_port=1], [0], [ig
> >  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
> actions=output:1], [0], [ignore])
> >
> >  check ovn-nbctl --wait=hv lsp-set-options lsp0
> requested-chassis=non-existant-chassis
> > -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> > +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
> >  check ovn-nbctl --wait=hv sync
> >  wait_column '' Port_Binding chasssi logical_port=lsp0
> >  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
> [1], [])
> > @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2
> 00\:00\:00\:00\:10\:10 1
> >  OVN_CLEANUP([hv1], [hv2])
> >  AT_CLEANUP
> >  ])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([recomputes])
> > +ovn_start
> > +
> > +# Add chassis
> > +net_add n1
> > +sim_add hv1
> > +as hv1
> > +ovs-vsctl add-br br-phys
> > +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> > +ovn_attach n1 br-phys 192.168.0.1
> > +
> > +add_switch_ports() {
> > +    for port in $(seq $1 $2); do
> > +        logical_switch_port=lsp${port}
> > +        check ovn-nbctl lsp-add ls1 $logical_switch_port
> > +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
> > +
> > +        as hv1 ovs-vsctl \
> > +            -- add-port br-int vif${port} \
> > +            -- set Interface vif${port}
> external_ids:iface-id=$logical_switch_port
> > +    done
> > +
> > +    check ovn-nbctl --wait=hv sync
> > +    AT_CHECK([test $(ovn-appctl -t ovn-controller coverage/read-counter
> lflow_run) == $lflow_run])
> > +}
> > +
> > +check ovn-nbctl ls-add ls1
> > +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
> > +check ovn-nbctl set Logical_Switch ls1
> other_config:exclude_ips=10.1.255.254
> > +
> > +check ovn-nbctl lr-add lr1
> > +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
> type=router options:router-port=lrp0 addresses=dynamic
> > +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
> > +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
> > +
> > +check ovn-nbctl --wait=hv sync
> > +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter
> lflow_run)
> > +
> > +add_switch_ports 1 50
> > +add_switch_ports 51 100
> > +add_switch_ports 101 150
> > +
> > +OVN_CLEANUP([hv1])
> > +AT_CLEANUP
> > +])
> > --
> > 2.31.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Han Zhou May 17, 2022, 5:36 p.m. UTC | #6
On Tue, May 17, 2022 at 2:41 AM Xavier Simonart <xsimonar@redhat.com> wrote:
>
> Hi Han
> Thanks for looking into this and for your feedback.
> Please see comments below
>
> On Tue, May 17, 2022 at 8:03 AM Han Zhou <hzhou@ovn.org> wrote:
>>
>>
>>
>> On Thu, May 12, 2022 at 2:04 AM Xavier Simonart <xsimonar@redhat.com>
wrote:
>> >
>> > When VIF ports are claimed on a chassis, SBDB Port_Binding table is
updated.
>> > If the SBDB IDL is still is read-only ("in transaction") when such a
update
>> > is required, the update is not possible and recompute is triggered
through
>> > I+P failure.
>> >
>> > This situation can happen:
>> > - after updating Port_Binding->chassis to SBDB for one port, in a
following
>> >   iteration, ovn-controller handles
Interface:external_ids:ovn-installed
>> >   (for the same port) while SBDB is still read-only.
>> > - after updating Port_Binding->chassis to SBDB for one port, in a
following
>> >   iteration, ovn-controller updates Port_Binding->chassis for another
port,
>> >   while SBDB is still read-only.
>> >
>> > This patch prevent the recompute, by having the if-status module
>> > updating the Port_Binding chassis (if needed), when possible.
>> > This does not delay Port_Binding chassis update compared to before this
>> > patch: Port_Binding chassis will be updated as soon as SBDB is again
>> > writable, as it was the case, through a recompute, before this patch.
>> >
>> Thanks Xavier. I think the approach is good: moving the SB update from
the I-P engine to if-status module, so it wouldn't need to trigger I-P
engine recompute, and just let the if-status module update the SB as soon
as it is writable, through the if-status's state-machine.
>> However, I have some comments/questions regarding the implementation
details, primarily confusion on the state transitions. Please see below.
>>
>> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
>> > Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
>> > ---
>> >  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
>> >  controller/binding.h        |   9 ++-
>> >  controller/if-status.c      |  31 +++++++--
>> >  controller/if-status.h      |   6 +-
>> >  controller/ovn-controller.c |   6 +-
>> >  tests/ovn.at                |  55 +++++++++++++++-
>> >  6 files changed, 184 insertions(+), 47 deletions(-)
>> >
>> > diff --git a/controller/binding.c b/controller/binding.c
>> > index e5ba56b25..d917d8775 100644
>> > --- a/controller/binding.c
>> > +++ b/controller/binding.c
>> > @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct
shash *local_bindings,
>> >  }
>> >
>> >  bool
>> > -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
>> > +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
>> > +                    const struct sbrec_chassis *chassis_rec)
>> >  {
>> >      struct local_binding *lbinding =
>> >          local_binding_find(local_bindings, pb_name);
>> >      struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
>> > +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
>> > +        return false;
>> > +    }
>>
>> Why need the change here? I understand that it is obvious that the
binding should not be considered up if the chassis is not updated in SB
port_binding, but I wonder why we need the change in *this* patch.
>> The function is called to see if an interface state should be moved from
MARK_UP to INSTALLED, but if the chassis is not set, it shouldn't even be
moved to MARK_UP.
>>
> I probably need to provide a better explanation of the states.
> In fact, my two goals were: 1) avoid/decrease recomputes and 2) do not
increase latency (time between interface is claimed and it's up in SB and
ovn-installed in OVS).
> There is not really an additional state covering the fact that the
chassis is set in sbdb.
> As soon as the state is CLAIMED, we try to set the pb->chassis (by 'try'
I mean we update pb->chassis if sb is not readonly).
> The fact that sb is readonly in CLAIMED state does not prevent us to move
to INSTALL_FLOWS state, where we are installing flows in OVS.

This may be a problem. If we haven't updated pb->chassis, then the physical
flows being installed are incomplete, and if we move the state forward, we
would end up updating the port status (SB pb->up and OVS
interface->external_ids:installed) too early, which completely void the
original purpose of the if-statue module, which is to manage the interface
state and tell CMS when a lport is ready to receive traffic.

> As soon as the seqno for those flows was received, we moved to MARK_UP
state.
> In those three states, we can update pb->chassis (if not already done).
> In MARK_UP we set the interface up.
> We moved to INSTALLED when both up and chassis have been written.
>
>> >      if (lbinding && b_lport && lbinding->iface) {
>> >          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
>> >              return false;
>> > @@ -673,13 +677,23 @@ local_binding_is_up(struct shash
*local_bindings, const char *pb_name)
>> >  }
>> >
>> >  bool
>> > -local_binding_is_down(struct shash *local_bindings, const char
*pb_name)
>> > +local_binding_is_down(struct shash *local_bindings, const char
*pb_name,
>> > +                      const struct sbrec_chassis *chassis_rec)
>> >  {
>> >      struct local_binding *lbinding =
>> >          local_binding_find(local_bindings, pb_name);
>> >
>> >      struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
>> >
>> > +    if (b_lport) {
>> > +        if (b_lport->pb->chassis == chassis_rec) {
>> > +            return false;
>> > +        } else if (b_lport->pb->chassis) {
>> > +            VLOG_DBG("lport %s already claimed by other chassis",
>> > +                     b_lport->pb->logical_port);
>> > +        }
>> > +    }
>> > +
>>
>> Same as above, it is not clear to me why this change here. It would be
better to clarify the state transition first.
>>
>> >      if (!lbinding) {
>> >          return true;
>> >      }
>> > @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
>> >      OVS_NOT_REACHED();
>> >  }
>> >
>> > +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding
*pb,
>> > +                          const struct sbrec_chassis *chassis_rec)
>> > +{
>> > +    if (pb->chassis != chassis_rec) {
>> > +        if (pb->chassis) {
>> > +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>> > +                    pb->logical_port, pb->chassis->name,
>> > +                    chassis_rec->name);
>> > +        } else {
>> > +            VLOG_INFO("Claiming lport %s for this chassis.",
pb->logical_port);
>> > +        }
>> > +        for (int i = 0; i < pb->n_mac; i++) {
>> > +            VLOG_INFO("%s: Claiming %s", pb->logical_port,
pb->mac[i]);
>> > +        }
>> > +        sbrec_port_binding_set_chassis(pb, chassis_rec);
>> > +    }
>> > +}
>> > +
>> > +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding
*pb)
>> > +{
>> > +    if (pb->chassis) {
>> > +        sbrec_port_binding_set_chassis(pb, NULL);
>> > +    }
>> > +}
>> > +
>> > +void
>> > +local_binding_set_pb(struct shash *local_bindings, const char
*pb_name,
>> > +                    const struct sbrec_chassis *chassis_rec)
>> > +{
>> > +    struct local_binding *lbinding =
>> > +        local_binding_find(local_bindings, pb_name);
>> > +    struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
>> > +
>> > +    if (b_lport) {
>> > +        if (chassis_rec) {
>> > +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
>> > +        } else {
>> > +            clear_pb_chassis_in_sbrec(b_lport->pb);
>> > +        }
>>
>> It looks like the logic can be simplified to just:
>>     set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
>> This way we don't need the if/else, and the function
clear_pb_chassis_in_sbrec() can be removed.
>>
> We can clearly combine set_pb_chassis_in_sbrec and
clear_pb_chassis_in_sbrec.
> I had an impression that having the 'clear' function made the 'set'
function easier to understand (avoid an additional if (chassis_rec) in
'set'.
> But I am fine w/ changing it.
>
>>
>> > +    }
>> > +}
>> > +
>> >  /* For newly claimed ports, if 'notify_up' is 'false':
>> >   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
>> >   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g.,
for
>> > @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
>> >   *   it's explicitly set to 'false'.
>> >   *   This is to ensure compatibility with older versions of
ovn-northd.
>> >   */
>> > -static void
>> > +static bool
>> >  claimed_lport_set_up(const struct sbrec_port_binding *pb,
>> >                       const struct sbrec_port_binding *parent_pb,
>> >                       const struct sbrec_chassis *chassis_rec,
>> > -                     bool notify_up, struct if_status_mgr *if_mgr)
>> > +                     bool notify_up, struct if_status_mgr *if_mgr,
>> > +                     bool sb_readonly)
>> >  {
>> > +
>> >      if (!notify_up) {
>> >          bool up = true;
>> >          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
>> > -            sbrec_port_binding_set_up(pb, &up, 1);
>> > +            if (!sb_readonly) {
>> > +                sbrec_port_binding_set_up(pb, &up, 1);
>> > +            } else if (pb->n_up && !pb->up[0]) {
>> > +                return false;
>> > +            }
>> >          }
>> > -        return;
>> > +        if (sb_readonly && (pb->chassis != chassis_rec)) {
>> > +            /* Handle the cases where if_status_mgr does not claim the
>> > +             * interface.In those cases, if we do not set pb->chassis
in sb
>> > +             * now (as readonly), we might not do it later ...
>> > +             */
>>
>> Thanks for the comments, but I am sorry that I still couldn't
understand, maybe just because I am not familiar with the existing logic of
container port handling. Any more detailed explanation would be appreciated.
>>
> Some ports, such as container and virtual ports, are set up directly
(notify_up=false), when claimed, when their parent is up. Hence for those
ports we do not create/use the if-status state machine, and we will not
have an opportunity later to set them up. if sb was readonly.
> So, for such ports, if sb is readonly, we will recompute.
> We could extend the if-status logic and handle such ports as well - this
is also a comment from Dumitru. I did not implement it in this patch to not
complicate the if-status state machine, as I think it will require either
an additional state and/or a flag.
> If/when we are fine with this patch, the goal is to start another patch
handling such container/virtual ports interfaces through the if-status
state machine .
>
Make sense, thanks! It would be better to document these special
considerations (probably requires a refactor patch before your patch).

>> > +            return false;
>> > +        }
>> > +        return true;
>> >      }
>> >
>> >      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
>> >          if_status_mgr_claim_iface(if_mgr, pb->logical_port);
>>
>> Since we are now updating SB port_binding's chassis in the if-status
module, we should move to CLAIMED status there when the if-status module
updates SB DB (when it is not read-only). The state transition looks
incorrect if we call the function here and move the state to CLAIMED before
it updates chassis to SB DB. My understanding of CLAIMED here is that an
update is sent to SB for the PB's chassis column. I think we should add a
new state before the CLAIMED state, because this is a new state that is
introduced by this patch: an interface that is supposed to be claimed by
the chassis, but the update to SB is not sent because of the read-only SB.
>>
>> Or, we can still use the state name CLAIMED for the newly bound
interface, but add a new state between CLAIMED and INSTALL_FLOWS: CLAIMED
(but SB PB's chassis not updated) -> SB_CHASSIS_SENT -> INSTALL_FLOWS ->
MARK_UP -> INSTALLED
>>
> This is where we diverge. I see CLAIMED as a state where we are ready to
update the chassis in SBDB. We update it if we can (not readonly), but it
does not block us in our state progression if we cannot. We do not add an
additional state, as this would add one round trip w/ SBDB. Chassis will be
updated in SBDB in CLAIMED, INSTALL_FLOWS or MARK_UP state, before or at
the same time as port up.
>
Please see my comment above for why I think it is important to separate the
state. Regarding "this would add one round trip w/ SBDB", I don't think it
*add* any extra round trip. The round trip is necessary, because without it
the flows installed are incomplete. Before this change, we also wait until
the next recompute to move state forward and declare the port as
UP/INSTALLED.

>>
>> >      }
>> > +    return true;
>> >  }
>> >
>> >  /* Returns false if lport is not claimed due to 'sb_readonly'.
>> > @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
>> >              struct hmap *tracked_datapaths,
>> >              struct if_status_mgr *if_mgr)
>> >  {
>> > -    if (!sb_readonly) {
>> > -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
if_mgr);
>> > -    }
>> >
>> > +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
>> > +           if_mgr, sb_readonly)) {
>> > +        return false;
>> > +    }
>> >      if (pb->chassis != chassis_rec) {
>> > -        if (sb_readonly) {
>> > -            return false;
>> > -        }
>> > -
>> > -        if (pb->chassis) {
>> > -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>> > -                    pb->logical_port, pb->chassis->name,
>> > -                    chassis_rec->name);
>> > -        } else {
>> > -            VLOG_INFO("Claiming lport %s for this chassis.",
pb->logical_port);
>> > +        if (!sb_readonly) {
>> > +            set_pb_chassis_in_sbrec(pb, chassis_rec);
>>
>> I am confused that if the if-status module is going to handle the update
to SB DB, why would we set it here? I think it is better to make the
responsibility of the modules clear and handle it in just one place.
>>
> if-status handles updates... when the binding module was not able to do
it immediately... (in a similar way as, before this patch, it is handling
the port up when not done by the binding module).

If I understand correctly, before this patch, the port "up" is updated by
the if-status module only, in if_status_mgr_update_bindings(), not by the
binding module.

> But I agree with your point about clarity of having the if-status machine
handling all cases. This is however only possible when all ports (including
virtual ports) have such an interface (which is not the case now).
> If fine with you, I'd prefer handling such ports (virtual, container) in
a subsequent patch, which could then also make sure the chassis update
becomes the responsibility of the if-status module.
>
I am ok with handling virtual/container ports in a later patch. Can this
line be removed at that point? It would be good to add a comment here if
that's the case.

Thanks,
Han

>> >          }
>> > -        for (int i = 0; i < pb->n_mac; i++) {
>> > -            VLOG_INFO("%s: Claiming %s", pb->logical_port,
pb->mac[i]);
>> > -        }
>> > -
>> > -        sbrec_port_binding_set_chassis(pb, chassis_rec);
>> >
>> >          if (tracked_datapaths) {
>> >              update_lport_tracking(pb, tracked_datapaths, true);
>> > @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
>> >          }
>> >          sbrec_port_binding_set_encap(pb, encap_rec);
>> >      }
>> > -
>> >      return true;
>> >  }
>> >
>> > @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>> >   * Caller should make sure that this is the case.
>> >   */
>> >  static bool
>> > -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
>> > +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
>> > +               struct if_status_mgr *if_mgr)
>> >  {
>> >      if (pb->encap) {
>> >          if (sb_readonly) {
>> > @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding
*pb, bool sb_readonly)
>> >          sbrec_port_binding_set_encap(pb, NULL);
>> >      }
>> >
>> > +    /* if sb readonly, pb->chassis unset through if-status if present
*/
>> >      if (pb->chassis) {
>> > -        if (sb_readonly) {
>> > +        if (!sb_readonly) {
>> > +            sbrec_port_binding_set_chassis(pb, NULL);
>> > +        } else if (!if_status_mgr_iface_is_present(if_mgr,
pb->logical_port)) {
>>
>> Same here. Also, if we follow the state transition properly, we
shouldn't worry that the if-status is not present before the interface is
released, right?
>>
> Some ports (virtual, container) do not have such a state. As mentioned
earlier  I would prefer handling those ports in a subsequent patch, to
isolate the state machine changes related to them.
>
>> Thanks,
>> Han
>>
> Thanks again for your feedback.
> Xavier
>>
>> >              return false;
>> >          }
>> > -        sbrec_port_binding_set_chassis(pb, NULL);
>> >      }
>> >
>> >      if (pb->virtual_parent) {
>> > @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding
*pb, bool sb_readonly)
>> >          sbrec_port_binding_set_virtual_parent(pb, NULL);
>> >      }
>> >
>> > -    VLOG_INFO("Releasing lport %s from this chassis.",
pb->logical_port);
>> > +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
>> > +              pb->logical_port, sb_readonly);
>> >      return true;
>> >  }
>> >
>> > @@ -1017,7 +1078,7 @@ static bool
>> >  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
>> >                struct hmap *tracked_datapaths, struct if_status_mgr
*if_mgr)
>> >  {
>> > -    if (!release_lport_(pb, sb_readonly)) {
>> > +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
>> >          return false;
>> >      }
>> >
>> > @@ -1342,10 +1403,9 @@ consider_localport(const struct
sbrec_port_binding *pb,
>> >      /* If the port binding is claimed, then release it as localport
is claimed
>> >       * by any ovn-controller. */
>> >      if (pb->chassis == b_ctx_in->chassis_rec) {
>> > -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
>> > +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn,
b_ctx_out->if_mgr)) {
>> >              return false;
>> >          }
>> > -
>> >          remove_related_lport(pb, b_ctx_out);
>> >      }
>> >
>> > diff --git a/controller/binding.h b/controller/binding.h
>> > index 430a8d9b1..45202a321 100644
>> > --- a/controller/binding.h
>> > +++ b/controller/binding.h
>> > @@ -151,14 +151,17 @@ const struct sbrec_port_binding
*local_binding_get_primary_pb(
>> >  ofp_port_t local_binding_get_lport_ofport(const struct shash
*local_bindings,
>> >                                            const char *pb_name);
>> >
>> > -bool local_binding_is_up(struct shash *local_bindings, const char
*pb_name);
>> > -bool local_binding_is_down(struct shash *local_bindings, const char
*pb_name);
>> > +bool local_binding_is_up(struct shash *local_bindings, const char
*pb_name,
>> > +                         const struct sbrec_chassis *);
>> > +bool local_binding_is_down(struct shash *local_bindings, const char
*pb_name,
>> > +                         const struct sbrec_chassis *);
>> >  void local_binding_set_up(struct shash *local_bindings, const char
*pb_name,
>> >                            const char *ts_now_str, bool sb_readonly,
>> >                            bool ovs_readonly);
>> >  void local_binding_set_down(struct shash *local_bindings, const char
*pb_name,
>> >                              bool sb_readonly, bool ovs_readonly);
>> > -
>> > +void local_binding_set_pb(struct shash *local_bindings, const char
*pb_name,
>> > +                         const struct sbrec_chassis *chassis_rec);
>> >  void binding_register_ovs_idl(struct ovsdb_idl *);
>> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>> > diff --git a/controller/if-status.c b/controller/if-status.c
>> > index dbdf8b66f..e36df22a0 100644
>> > --- a/controller/if-status.c
>> > +++ b/controller/if-status.c
>> > @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct
if_status_mgr *, struct ovs_iface *,
>> >
>> >  static void if_status_mgr_update_bindings(
>> >      struct if_status_mgr *mgr, struct local_binding_data
*binding_data,
>> > +    const struct sbrec_chassis *chassis_rec,
>> >      bool sb_readonly, bool ovs_readonly);
>> >
>> >  struct if_status_mgr *
>> > @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr
*mgr, const char *iface_id)
>> >      }
>> >  }
>> >
>> > +bool
>> > +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
*iface_id)
>> > +{
>> > +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
>> > +    return (!!iface);
>> > +}
>> > +
>> >  void
>> >  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
*iface_id)
>> >  {
>> > @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr
*mgr, const char *iface_id)
>> >
>> >  void
>> >  if_status_mgr_update(struct if_status_mgr *mgr,
>> > -                     struct local_binding_data *binding_data)
>> > +                     struct local_binding_data *binding_data,
>> > +                     const struct sbrec_chassis *chassis_rec)
>> >  {
>> >      if (!binding_data) {
>> >          return;
>> > @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>> >      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>> >          struct ovs_iface *iface = node->data;
>> >
>> > -        if (local_binding_is_up(bindings, iface->id)) {
>> > +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
>> >              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
>> >          }
>> >      }
>> > @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>> >      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN])
{
>> >          struct ovs_iface *iface = node->data;
>> >
>> > -        if (local_binding_is_down(bindings, iface->id)) {
>> > +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
>> >              ovs_iface_destroy(mgr, iface);
>> >          }
>> >      }
>> > @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>> >  void
>> >  if_status_mgr_run(struct if_status_mgr *mgr,
>> >                    struct local_binding_data *binding_data,
>> > +                  const struct sbrec_chassis *chassis_rec,
>> >                    bool sb_readonly, bool ovs_readonly)
>> >  {
>> >      struct ofctrl_acked_seqnos *acked_seqnos =
>> > @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
>> >      ofctrl_acked_seqnos_destroy(acked_seqnos);
>> >
>> >      /* Update binding states. */
>> > -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
>> > -                                  ovs_readonly);
>> > +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
>> > +                                  sb_readonly, ovs_readonly);
>> >  }
>> >
>> >  static void
>> > @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr,
struct ovs_iface *iface,
>> >  static void
>> >  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>> >                                struct local_binding_data *binding_data,
>> > +                              const struct sbrec_chassis *chassis_rec,
>> >                                bool sb_readonly, bool ovs_readonly)
>> >  {
>> >      if (!binding_data) {
>> > @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>> >       */
>> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
>> >          struct ovs_iface *iface = node->data;
>> > +        if (!sb_readonly) {
>> > +            local_binding_set_pb(bindings, iface->id, chassis_rec);
>> > +        }
>> >
>> >          local_binding_set_down(bindings, iface->id, sb_readonly,
ovs_readonly);
>> >      }
>> > @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>> >      char *ts_now_str = xasprintf("%lld", time_wall_msec());
>> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>> >          struct ovs_iface *iface = node->data;
>> > -
>> > +        if (!sb_readonly) {
>> > +            local_binding_set_pb(bindings, iface->id, chassis_rec);
>> > +        }
>> >          local_binding_set_up(bindings, iface->id, ts_now_str,
>> >                               sb_readonly, ovs_readonly);
>> >      }
>> > @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>> >       */
>> >      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>> >          struct ovs_iface *iface = node->data;
>> > +        if (!sb_readonly) {
>> > +            local_binding_set_pb(bindings, iface->id, NULL);
>> > +        }
>> >
>> >          local_binding_set_down(bindings, iface->id, sb_readonly,
ovs_readonly);
>> >      }
>> > diff --git a/controller/if-status.h b/controller/if-status.h
>> > index ff4aa760e..2c55eb18e 100644
>> > --- a/controller/if-status.h
>> > +++ b/controller/if-status.h
>> > @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct
if_status_mgr *, const char *iface_id);
>> >  void if_status_mgr_release_iface(struct if_status_mgr *, const char
*iface_id);
>> >  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
*iface_id);
>> >
>> > -void if_status_mgr_update(struct if_status_mgr *, struct
local_binding_data *);
>> > +void if_status_mgr_update(struct if_status_mgr *, struct
local_binding_data *,
>> > +                          const struct sbrec_chassis *chassis);
>> >  void if_status_mgr_run(struct if_status_mgr *mgr, struct
local_binding_data *,
>> > +                       const struct sbrec_chassis *chassis,
>> >                         bool sb_readonly, bool ovs_readonly);
>> >  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
>> >                                      struct simap *usage);
>> > +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
>> > +                                    const char *iface_id);
>> >
>> >  # endif /* controller/if-status.h */
>> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>> > index 5a6274eb2..994aebdfb 100644
>> > --- a/controller/ovn-controller.c
>> > +++ b/controller/ovn-controller.c
>> > @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
>> >                          runtime_data ? &runtime_data->lbinding_data :
NULL;
>> >
 stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>> >                                      time_msec());
>> > -                    if_status_mgr_update(if_mgr, binding_data);
>> > +                    if_status_mgr_update(if_mgr, binding_data,
chassis);
>> >
 stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>> >                                     time_msec());
>> >
>> > @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
>> >                                     time_msec());
>> >                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>> >                                      time_msec());
>> > -                    if_status_mgr_run(if_mgr, binding_data,
!ovnsb_idl_txn,
>> > -                                      !ovs_idl_txn);
>> > +                    if_status_mgr_run(if_mgr, binding_data, chassis,
>> > +                                      !ovnsb_idl_txn, !ovs_idl_txn);
>> >                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>> >                                     time_msec());
>> >                  }
>> > diff --git a/tests/ovn.at b/tests/ovn.at
>> > index 6a0a169c1..403fbc85f 100644
>> > --- a/tests/ovn.at
>> > +++ b/tests/ovn.at
>> > @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
>> >  ovs-vsctl -- add-port br-int hv1-vif0 -- \
>> >  set Interface hv1-vif0 ofport-request=1
>> >
>> > +
>> >  sim_add hv2
>> >  as hv2
>> >  ovs-vsctl add-br br-phys
>> > @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
table=65 | grep actions=output:1],
>> >  echo "verifying that lsp0 binding moves when requested-chassis is
changed"
>> >
>> >  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
>> > -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
>> > +
>> > +# We might see multiple "Releasing lport ...", when sb is read only
>> > +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
>> > +
>> >  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
>> >
>> >  # (6) Chassis hv2 should add flows and hv1 should not.
>> > @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
table=0 | grep in_port=1], [0], [ig
>> >  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
actions=output:1], [0], [ignore])
>> >
>> >  check ovn-nbctl --wait=hv lsp-set-options lsp0
requested-chassis=non-existant-chassis
>> > -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
>> > +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
>> >  check ovn-nbctl --wait=hv sync
>> >  wait_column '' Port_Binding chasssi logical_port=lsp0
>> >  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep
in_port=1], [1], [])
>> > @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2
00\:00\:00\:00\:10\:10 1
>> >  OVN_CLEANUP([hv1], [hv2])
>> >  AT_CLEANUP
>> >  ])
>> > +
>> > +OVN_FOR_EACH_NORTHD([
>> > +AT_SETUP([recomputes])
>> > +ovn_start
>> > +
>> > +# Add chassis
>> > +net_add n1
>> > +sim_add hv1
>> > +as hv1
>> > +ovs-vsctl add-br br-phys
>> > +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
>> > +ovn_attach n1 br-phys 192.168.0.1
>> > +
>> > +add_switch_ports() {
>> > +    for port in $(seq $1 $2); do
>> > +        logical_switch_port=lsp${port}
>> > +        check ovn-nbctl lsp-add ls1 $logical_switch_port
>> > +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
>> > +
>> > +        as hv1 ovs-vsctl \
>> > +            -- add-port br-int vif${port} \
>> > +            -- set Interface vif${port}
external_ids:iface-id=$logical_switch_port
>> > +    done
>> > +
>> > +    check ovn-nbctl --wait=hv sync
>> > +    AT_CHECK([test $(ovn-appctl -t ovn-controller
coverage/read-counter lflow_run) == $lflow_run])
>> > +}
>> > +
>> > +check ovn-nbctl ls-add ls1
>> > +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
>> > +check ovn-nbctl set Logical_Switch ls1
other_config:exclude_ips=10.1.255.254
>> > +
>> > +check ovn-nbctl lr-add lr1
>> > +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
type=router options:router-port=lrp0 addresses=dynamic
>> > +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
>> > +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
>> > +
>> > +check ovn-nbctl --wait=hv sync
>> > +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter
lflow_run)
>> > +
>> > +add_switch_ports 1 50
>> > +add_switch_ports 51 100
>> > +add_switch_ports 101 150
>> > +
>> > +OVN_CLEANUP([hv1])
>> > +AT_CLEANUP
>> > +])
>> > --
>> > 2.31.1
>> >
>> > _______________________________________________
>> > dev mailing list
>> > dev@openvswitch.org
>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Dumitru Ceara May 19, 2022, 10:30 a.m. UTC | #7
On 5/17/22 19:36, Han Zhou wrote:
> On Tue, May 17, 2022 at 2:41 AM Xavier Simonart <xsimonar@redhat.com> wrote:
>>
>> Hi Han

Hi Xavier, Han,


>> Thanks for looking into this and for your feedback.
>> Please see comments below
>>
>> On Tue, May 17, 2022 at 8:03 AM Han Zhou <hzhou@ovn.org> wrote:
>>>
>>>
>>>
>>> On Thu, May 12, 2022 at 2:04 AM Xavier Simonart <xsimonar@redhat.com>
> wrote:
>>>>
>>>> When VIF ports are claimed on a chassis, SBDB Port_Binding table is
> updated.
>>>> If the SBDB IDL is still is read-only ("in transaction") when such a
> update
>>>> is required, the update is not possible and recompute is triggered
> through
>>>> I+P failure.
>>>>
>>>> This situation can happen:
>>>> - after updating Port_Binding->chassis to SBDB for one port, in a
> following
>>>>   iteration, ovn-controller handles
> Interface:external_ids:ovn-installed
>>>>   (for the same port) while SBDB is still read-only.
>>>> - after updating Port_Binding->chassis to SBDB for one port, in a
> following
>>>>   iteration, ovn-controller updates Port_Binding->chassis for another
> port,
>>>>   while SBDB is still read-only.
>>>>
>>>> This patch prevent the recompute, by having the if-status module
>>>> updating the Port_Binding chassis (if needed), when possible.
>>>> This does not delay Port_Binding chassis update compared to before this
>>>> patch: Port_Binding chassis will be updated as soon as SBDB is again
>>>> writable, as it was the case, through a recompute, before this patch.
>>>>
>>> Thanks Xavier. I think the approach is good: moving the SB update from
> the I-P engine to if-status module, so it wouldn't need to trigger I-P
> engine recompute, and just let the if-status module update the SB as soon
> as it is writable, through the if-status's state-machine.
>>> However, I have some comments/questions regarding the implementation
> details, primarily confusion on the state transitions. Please see below.
>>>
>>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
>>>> Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
>>>> ---
>>>>  controller/binding.c        | 124 ++++++++++++++++++++++++++----------
>>>>  controller/binding.h        |   9 ++-
>>>>  controller/if-status.c      |  31 +++++++--
>>>>  controller/if-status.h      |   6 +-
>>>>  controller/ovn-controller.c |   6 +-
>>>>  tests/ovn.at                |  55 +++++++++++++++-
>>>>  6 files changed, 184 insertions(+), 47 deletions(-)
>>>>
>>>> diff --git a/controller/binding.c b/controller/binding.c
>>>> index e5ba56b25..d917d8775 100644
>>>> --- a/controller/binding.c
>>>> +++ b/controller/binding.c
>>>> @@ -657,11 +657,15 @@ local_binding_get_lport_ofport(const struct
> shash *local_bindings,
>>>>  }
>>>>
>>>>  bool
>>>> -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
>>>> +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
>>>> +                    const struct sbrec_chassis *chassis_rec)
>>>>  {
>>>>      struct local_binding *lbinding =
>>>>          local_binding_find(local_bindings, pb_name);
>>>>      struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
>>>> +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
>>>> +        return false;
>>>> +    }
>>>
>>> Why need the change here? I understand that it is obvious that the
> binding should not be considered up if the chassis is not updated in SB
> port_binding, but I wonder why we need the change in *this* patch.
>>> The function is called to see if an interface state should be moved from
> MARK_UP to INSTALLED, but if the chassis is not set, it shouldn't even be
> moved to MARK_UP.
>>>
>> I probably need to provide a better explanation of the states.
>> In fact, my two goals were: 1) avoid/decrease recomputes and 2) do not
> increase latency (time between interface is claimed and it's up in SB and
> ovn-installed in OVS).
>> There is not really an additional state covering the fact that the
> chassis is set in sbdb.
>> As soon as the state is CLAIMED, we try to set the pb->chassis (by 'try'
> I mean we update pb->chassis if sb is not readonly).
>> The fact that sb is readonly in CLAIMED state does not prevent us to move
> to INSTALL_FLOWS state, where we are installing flows in OVS.
> 
> This may be a problem. If we haven't updated pb->chassis, then the physical
> flows being installed are incomplete, and if we move the state forward, we
> would end up updating the port status (SB pb->up and OVS
> interface->external_ids:installed) too early, which completely void the
> original purpose of the if-statue module, which is to manage the interface
> state and tell CMS when a lport is ready to receive traffic.
> 

Sounds like an issue indeed.

>> As soon as the seqno for those flows was received, we moved to MARK_UP
> state.
>> In those three states, we can update pb->chassis (if not already done).
>> In MARK_UP we set the interface up.
>> We moved to INSTALLED when both up and chassis have been written.
>>
>>>>      if (lbinding && b_lport && lbinding->iface) {
>>>>          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
>>>>              return false;
>>>> @@ -673,13 +677,23 @@ local_binding_is_up(struct shash
> *local_bindings, const char *pb_name)
>>>>  }
>>>>
>>>>  bool
>>>> -local_binding_is_down(struct shash *local_bindings, const char
> *pb_name)
>>>> +local_binding_is_down(struct shash *local_bindings, const char
> *pb_name,
>>>> +                      const struct sbrec_chassis *chassis_rec)
>>>>  {
>>>>      struct local_binding *lbinding =
>>>>          local_binding_find(local_bindings, pb_name);
>>>>
>>>>      struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
>>>>
>>>> +    if (b_lport) {
>>>> +        if (b_lport->pb->chassis == chassis_rec) {
>>>> +            return false;
>>>> +        } else if (b_lport->pb->chassis) {
>>>> +            VLOG_DBG("lport %s already claimed by other chassis",
>>>> +                     b_lport->pb->logical_port);
>>>> +        }
>>>> +    }
>>>> +
>>>
>>> Same as above, it is not clear to me why this change here. It would be
> better to clarify the state transition first.
>>>
>>>>      if (!lbinding) {
>>>>          return true;
>>>>      }
>>>> @@ -894,6 +908,48 @@ get_lport_type_str(enum en_lport_type lport_type)
>>>>      OVS_NOT_REACHED();
>>>>  }
>>>>
>>>> +static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding
> *pb,
>>>> +                          const struct sbrec_chassis *chassis_rec)
>>>> +{
>>>> +    if (pb->chassis != chassis_rec) {
>>>> +        if (pb->chassis) {
>>>> +            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>>>> +                    pb->logical_port, pb->chassis->name,
>>>> +                    chassis_rec->name);
>>>> +        } else {
>>>> +            VLOG_INFO("Claiming lport %s for this chassis.",
> pb->logical_port);
>>>> +        }
>>>> +        for (int i = 0; i < pb->n_mac; i++) {
>>>> +            VLOG_INFO("%s: Claiming %s", pb->logical_port,
> pb->mac[i]);
>>>> +        }
>>>> +        sbrec_port_binding_set_chassis(pb, chassis_rec);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding
> *pb)
>>>> +{
>>>> +    if (pb->chassis) {
>>>> +        sbrec_port_binding_set_chassis(pb, NULL);
>>>> +    }
>>>> +}
>>>> +
>>>> +void
>>>> +local_binding_set_pb(struct shash *local_bindings, const char
> *pb_name,
>>>> +                    const struct sbrec_chassis *chassis_rec)
>>>> +{
>>>> +    struct local_binding *lbinding =
>>>> +        local_binding_find(local_bindings, pb_name);
>>>> +    struct binding_lport *b_lport =
> local_binding_get_primary_lport(lbinding);
>>>> +
>>>> +    if (b_lport) {
>>>> +        if (chassis_rec) {
>>>> +            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
>>>> +        } else {
>>>> +            clear_pb_chassis_in_sbrec(b_lport->pb);
>>>> +        }
>>>
>>> It looks like the logic can be simplified to just:
>>>     set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
>>> This way we don't need the if/else, and the function
> clear_pb_chassis_in_sbrec() can be removed.
>>>
>> We can clearly combine set_pb_chassis_in_sbrec and
> clear_pb_chassis_in_sbrec.
>> I had an impression that having the 'clear' function made the 'set'
> function easier to understand (avoid an additional if (chassis_rec) in
> 'set'.
>> But I am fine w/ changing it.
>>
>>>
>>>> +    }
>>>> +}
>>>> +
>>>>  /* For newly claimed ports, if 'notify_up' is 'false':
>>>>   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
>>>>   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g.,
> for
>>>> @@ -906,23 +962,37 @@ get_lport_type_str(enum en_lport_type lport_type)
>>>>   *   it's explicitly set to 'false'.
>>>>   *   This is to ensure compatibility with older versions of
> ovn-northd.
>>>>   */
>>>> -static void
>>>> +static bool
>>>>  claimed_lport_set_up(const struct sbrec_port_binding *pb,
>>>>                       const struct sbrec_port_binding *parent_pb,
>>>>                       const struct sbrec_chassis *chassis_rec,
>>>> -                     bool notify_up, struct if_status_mgr *if_mgr)
>>>> +                     bool notify_up, struct if_status_mgr *if_mgr,
>>>> +                     bool sb_readonly)
>>>>  {
>>>> +
>>>>      if (!notify_up) {
>>>>          bool up = true;
>>>>          if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
>>>> -            sbrec_port_binding_set_up(pb, &up, 1);
>>>> +            if (!sb_readonly) {
>>>> +                sbrec_port_binding_set_up(pb, &up, 1);
>>>> +            } else if (pb->n_up && !pb->up[0]) {
>>>> +                return false;
>>>> +            }
>>>>          }
>>>> -        return;
>>>> +        if (sb_readonly && (pb->chassis != chassis_rec)) {
>>>> +            /* Handle the cases where if_status_mgr does not claim the
>>>> +             * interface.In those cases, if we do not set pb->chassis
> in sb
>>>> +             * now (as readonly), we might not do it later ...
>>>> +             */
>>>
>>> Thanks for the comments, but I am sorry that I still couldn't
> understand, maybe just because I am not familiar with the existing logic of
> container port handling. Any more detailed explanation would be appreciated.
>>>
>> Some ports, such as container and virtual ports, are set up directly
> (notify_up=false), when claimed, when their parent is up. Hence for those
> ports we do not create/use the if-status state machine, and we will not
> have an opportunity later to set them up. if sb was readonly.
>> So, for such ports, if sb is readonly, we will recompute.
>> We could extend the if-status logic and handle such ports as well - this
> is also a comment from Dumitru. I did not implement it in this patch to not
> complicate the if-status state machine, as I think it will require either
> an additional state and/or a flag.
>> If/when we are fine with this patch, the goal is to start another patch
> handling such container/virtual ports interfaces through the if-status
> state machine .
>>
> Make sense, thanks! It would be better to document these special
> considerations (probably requires a refactor patch before your patch).
> 
>>>> +            return false;
>>>> +        }
>>>> +        return true;
>>>>      }
>>>>
>>>>      if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
>>>>          if_status_mgr_claim_iface(if_mgr, pb->logical_port);
>>>
>>> Since we are now updating SB port_binding's chassis in the if-status
> module, we should move to CLAIMED status there when the if-status module
> updates SB DB (when it is not read-only). The state transition looks
> incorrect if we call the function here and move the state to CLAIMED before
> it updates chassis to SB DB. My understanding of CLAIMED here is that an
> update is sent to SB for the PB's chassis column. I think we should add a
> new state before the CLAIMED state, because this is a new state that is
> introduced by this patch: an interface that is supposed to be claimed by
> the chassis, but the update to SB is not sent because of the read-only SB.
>>>
>>> Or, we can still use the state name CLAIMED for the newly bound
> interface, but add a new state between CLAIMED and INSTALL_FLOWS: CLAIMED
> (but SB PB's chassis not updated) -> SB_CHASSIS_SENT -> INSTALL_FLOWS ->
> MARK_UP -> INSTALLED
>>>
>> This is where we diverge. I see CLAIMED as a state where we are ready to
> update the chassis in SBDB. We update it if we can (not readonly), but it
> does not block us in our state progression if we cannot. We do not add an
> additional state, as this would add one round trip w/ SBDB. Chassis will be
> updated in SBDB in CLAIMED, INSTALL_FLOWS or MARK_UP state, before or at
> the same time as port up.
>>
> Please see my comment above for why I think it is important to separate the
> state. Regarding "this would add one round trip w/ SBDB", I don't think it
> *add* any extra round trip. The round trip is necessary, because without it
> the flows installed are incomplete. Before this change, we also wait until
> the next recompute to move state forward and declare the port as
> UP/INSTALLED.

I think I agree with Han on this, sounds like we need the extra state.

I know it might get outdated quickly but, with the thought that this
state machine shouldn't change too much in the future, would it make
sense to add an ascii diagram of the state machine for future reference?

> 
>>>
>>>>      }
>>>> +    return true;
>>>>  }
>>>>
>>>>  /* Returns false if lport is not claimed due to 'sb_readonly'.
>>>> @@ -937,27 +1007,15 @@ claim_lport(const struct sbrec_port_binding *pb,
>>>>              struct hmap *tracked_datapaths,
>>>>              struct if_status_mgr *if_mgr)
>>>>  {
>>>> -    if (!sb_readonly) {
>>>> -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
> if_mgr);
>>>> -    }
>>>>
>>>> +    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
>>>> +           if_mgr, sb_readonly)) {
>>>> +        return false;
>>>> +    }
>>>>      if (pb->chassis != chassis_rec) {
>>>> -        if (sb_readonly) {
>>>> -            return false;
>>>> -        }
>>>> -
>>>> -        if (pb->chassis) {
>>>> -            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>>>> -                    pb->logical_port, pb->chassis->name,
>>>> -                    chassis_rec->name);
>>>> -        } else {
>>>> -            VLOG_INFO("Claiming lport %s for this chassis.",
> pb->logical_port);
>>>> +        if (!sb_readonly) {
>>>> +            set_pb_chassis_in_sbrec(pb, chassis_rec);
>>>
>>> I am confused that if the if-status module is going to handle the update
> to SB DB, why would we set it here? I think it is better to make the
> responsibility of the modules clear and handle it in just one place.
>>>
>> if-status handles updates... when the binding module was not able to do
> it immediately... (in a similar way as, before this patch, it is handling
> the port up when not done by the binding module).
> 
> If I understand correctly, before this patch, the port "up" is updated by
> the if-status module only, in if_status_mgr_update_bindings(), not by the
> binding module.
> 
>> But I agree with your point about clarity of having the if-status machine
> handling all cases. This is however only possible when all ports (including
> virtual ports) have such an interface (which is not the case now).
>> If fine with you, I'd prefer handling such ports (virtual, container) in
> a subsequent patch, which could then also make sure the chassis update
> becomes the responsibility of the if-status module.
>>
> I am ok with handling virtual/container ports in a later patch. Can this
> line be removed at that point? It would be good to add a comment here if
> that's the case.

+1

> 
> Thanks,
> Han
> 

Thanks,
Dumitru

>>>>          }
>>>> -        for (int i = 0; i < pb->n_mac; i++) {
>>>> -            VLOG_INFO("%s: Claiming %s", pb->logical_port,
> pb->mac[i]);
>>>> -        }
>>>> -
>>>> -        sbrec_port_binding_set_chassis(pb, chassis_rec);
>>>>
>>>>          if (tracked_datapaths) {
>>>>              update_lport_tracking(pb, tracked_datapaths, true);
>>>> @@ -974,7 +1032,6 @@ claim_lport(const struct sbrec_port_binding *pb,
>>>>          }
>>>>          sbrec_port_binding_set_encap(pb, encap_rec);
>>>>      }
>>>> -
>>>>      return true;
>>>>  }
>>>>
>>>> @@ -986,7 +1043,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>>>>   * Caller should make sure that this is the case.
>>>>   */
>>>>  static bool
>>>> -release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
>>>> +release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
>>>> +               struct if_status_mgr *if_mgr)
>>>>  {
>>>>      if (pb->encap) {
>>>>          if (sb_readonly) {
>>>> @@ -995,11 +1053,13 @@ release_lport_(const struct sbrec_port_binding
> *pb, bool sb_readonly)
>>>>          sbrec_port_binding_set_encap(pb, NULL);
>>>>      }
>>>>
>>>> +    /* if sb readonly, pb->chassis unset through if-status if present
> */
>>>>      if (pb->chassis) {
>>>> -        if (sb_readonly) {
>>>> +        if (!sb_readonly) {
>>>> +            sbrec_port_binding_set_chassis(pb, NULL);
>>>> +        } else if (!if_status_mgr_iface_is_present(if_mgr,
> pb->logical_port)) {
>>>
>>> Same here. Also, if we follow the state transition properly, we
> shouldn't worry that the if-status is not present before the interface is
> released, right?
>>>
>> Some ports (virtual, container) do not have such a state. As mentioned
> earlier  I would prefer handling those ports in a subsequent patch, to
> isolate the state machine changes related to them.
>>
>>> Thanks,
>>> Han
>>>
>> Thanks again for your feedback.
>> Xavier
>>>
>>>>              return false;
>>>>          }
>>>> -        sbrec_port_binding_set_chassis(pb, NULL);
>>>>      }
>>>>
>>>>      if (pb->virtual_parent) {
>>>> @@ -1009,7 +1069,8 @@ release_lport_(const struct sbrec_port_binding
> *pb, bool sb_readonly)
>>>>          sbrec_port_binding_set_virtual_parent(pb, NULL);
>>>>      }
>>>>
>>>> -    VLOG_INFO("Releasing lport %s from this chassis.",
> pb->logical_port);
>>>> +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
>>>> +              pb->logical_port, sb_readonly);
>>>>      return true;
>>>>  }
>>>>
>>>> @@ -1017,7 +1078,7 @@ static bool
>>>>  release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
>>>>                struct hmap *tracked_datapaths, struct if_status_mgr
> *if_mgr)
>>>>  {
>>>> -    if (!release_lport_(pb, sb_readonly)) {
>>>> +    if (!release_lport_(pb, sb_readonly, if_mgr)) {
>>>>          return false;
>>>>      }
>>>>
>>>> @@ -1342,10 +1403,9 @@ consider_localport(const struct
> sbrec_port_binding *pb,
>>>>      /* If the port binding is claimed, then release it as localport
> is claimed
>>>>       * by any ovn-controller. */
>>>>      if (pb->chassis == b_ctx_in->chassis_rec) {
>>>> -        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
>>>> +        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn,
> b_ctx_out->if_mgr)) {
>>>>              return false;
>>>>          }
>>>> -
>>>>          remove_related_lport(pb, b_ctx_out);
>>>>      }
>>>>
>>>> diff --git a/controller/binding.h b/controller/binding.h
>>>> index 430a8d9b1..45202a321 100644
>>>> --- a/controller/binding.h
>>>> +++ b/controller/binding.h
>>>> @@ -151,14 +151,17 @@ const struct sbrec_port_binding
> *local_binding_get_primary_pb(
>>>>  ofp_port_t local_binding_get_lport_ofport(const struct shash
> *local_bindings,
>>>>                                            const char *pb_name);
>>>>
>>>> -bool local_binding_is_up(struct shash *local_bindings, const char
> *pb_name);
>>>> -bool local_binding_is_down(struct shash *local_bindings, const char
> *pb_name);
>>>> +bool local_binding_is_up(struct shash *local_bindings, const char
> *pb_name,
>>>> +                         const struct sbrec_chassis *);
>>>> +bool local_binding_is_down(struct shash *local_bindings, const char
> *pb_name,
>>>> +                         const struct sbrec_chassis *);
>>>>  void local_binding_set_up(struct shash *local_bindings, const char
> *pb_name,
>>>>                            const char *ts_now_str, bool sb_readonly,
>>>>                            bool ovs_readonly);
>>>>  void local_binding_set_down(struct shash *local_bindings, const char
> *pb_name,
>>>>                              bool sb_readonly, bool ovs_readonly);
>>>> -
>>>> +void local_binding_set_pb(struct shash *local_bindings, const char
> *pb_name,
>>>> +                         const struct sbrec_chassis *chassis_rec);
>>>>  void binding_register_ovs_idl(struct ovsdb_idl *);
>>>>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>>>>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>> diff --git a/controller/if-status.c b/controller/if-status.c
>>>> index dbdf8b66f..e36df22a0 100644
>>>> --- a/controller/if-status.c
>>>> +++ b/controller/if-status.c
>>>> @@ -115,6 +115,7 @@ static void ovs_iface_set_state(struct
> if_status_mgr *, struct ovs_iface *,
>>>>
>>>>  static void if_status_mgr_update_bindings(
>>>>      struct if_status_mgr *mgr, struct local_binding_data
> *binding_data,
>>>> +    const struct sbrec_chassis *chassis_rec,
>>>>      bool sb_readonly, bool ovs_readonly);
>>>>
>>>>  struct if_status_mgr *
>>>> @@ -181,6 +182,13 @@ if_status_mgr_claim_iface(struct if_status_mgr
> *mgr, const char *iface_id)
>>>>      }
>>>>  }
>>>>
>>>> +bool
>>>> +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
> *iface_id)
>>>> +{
>>>> +    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
>>>> +    return (!!iface);
>>>> +}
>>>> +
>>>>  void
>>>>  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
> *iface_id)
>>>>  {
>>>> @@ -247,7 +255,8 @@ if_status_mgr_delete_iface(struct if_status_mgr
> *mgr, const char *iface_id)
>>>>
>>>>  void
>>>>  if_status_mgr_update(struct if_status_mgr *mgr,
>>>> -                     struct local_binding_data *binding_data)
>>>> +                     struct local_binding_data *binding_data,
>>>> +                     const struct sbrec_chassis *chassis_rec)
>>>>  {
>>>>      if (!binding_data) {
>>>>          return;
>>>> @@ -262,7 +271,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>>>>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>>>>          struct ovs_iface *iface = node->data;
>>>>
>>>> -        if (local_binding_is_up(bindings, iface->id)) {
>>>> +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
>>>>              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
>>>>          }
>>>>      }
>>>> @@ -273,7 +282,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>>>>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN])
> {
>>>>          struct ovs_iface *iface = node->data;
>>>>
>>>> -        if (local_binding_is_down(bindings, iface->id)) {
>>>> +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
>>>>              ovs_iface_destroy(mgr, iface);
>>>>          }
>>>>      }
>>>> @@ -306,6 +315,7 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>>>>  void
>>>>  if_status_mgr_run(struct if_status_mgr *mgr,
>>>>                    struct local_binding_data *binding_data,
>>>> +                  const struct sbrec_chassis *chassis_rec,
>>>>                    bool sb_readonly, bool ovs_readonly)
>>>>  {
>>>>      struct ofctrl_acked_seqnos *acked_seqnos =
>>>> @@ -328,8 +338,8 @@ if_status_mgr_run(struct if_status_mgr *mgr,
>>>>      ofctrl_acked_seqnos_destroy(acked_seqnos);
>>>>
>>>>      /* Update binding states. */
>>>> -    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
>>>> -                                  ovs_readonly);
>>>> +    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
>>>> +                                  sb_readonly, ovs_readonly);
>>>>  }
>>>>
>>>>  static void
>>>> @@ -390,6 +400,7 @@ ovs_iface_set_state(struct if_status_mgr *mgr,
> struct ovs_iface *iface,
>>>>  static void
>>>>  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
>>>>                                struct local_binding_data *binding_data,
>>>> +                              const struct sbrec_chassis *chassis_rec,
>>>>                                bool sb_readonly, bool ovs_readonly)
>>>>  {
>>>>      if (!binding_data) {
>>>> @@ -404,6 +415,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
>>>>       */
>>>>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
>>>>          struct ovs_iface *iface = node->data;
>>>> +        if (!sb_readonly) {
>>>> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
>>>> +        }
>>>>
>>>>          local_binding_set_down(bindings, iface->id, sb_readonly,
> ovs_readonly);
>>>>      }
>>>> @@ -415,7 +429,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
>>>>      char *ts_now_str = xasprintf("%lld", time_wall_msec());
>>>>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>>>>          struct ovs_iface *iface = node->data;
>>>> -
>>>> +        if (!sb_readonly) {
>>>> +            local_binding_set_pb(bindings, iface->id, chassis_rec);
>>>> +        }
>>>>          local_binding_set_up(bindings, iface->id, ts_now_str,
>>>>                               sb_readonly, ovs_readonly);
>>>>      }
>>>> @@ -426,6 +442,9 @@ if_status_mgr_update_bindings(struct if_status_mgr
> *mgr,
>>>>       */
>>>>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>>>>          struct ovs_iface *iface = node->data;
>>>> +        if (!sb_readonly) {
>>>> +            local_binding_set_pb(bindings, iface->id, NULL);
>>>> +        }
>>>>
>>>>          local_binding_set_down(bindings, iface->id, sb_readonly,
> ovs_readonly);
>>>>      }
>>>> diff --git a/controller/if-status.h b/controller/if-status.h
>>>> index ff4aa760e..2c55eb18e 100644
>>>> --- a/controller/if-status.h
>>>> +++ b/controller/if-status.h
>>>> @@ -31,10 +31,14 @@ void if_status_mgr_claim_iface(struct
> if_status_mgr *, const char *iface_id);
>>>>  void if_status_mgr_release_iface(struct if_status_mgr *, const char
> *iface_id);
>>>>  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
> *iface_id);
>>>>
>>>> -void if_status_mgr_update(struct if_status_mgr *, struct
> local_binding_data *);
>>>> +void if_status_mgr_update(struct if_status_mgr *, struct
> local_binding_data *,
>>>> +                          const struct sbrec_chassis *chassis);
>>>>  void if_status_mgr_run(struct if_status_mgr *mgr, struct
> local_binding_data *,
>>>> +                       const struct sbrec_chassis *chassis,
>>>>                         bool sb_readonly, bool ovs_readonly);
>>>>  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
>>>>                                      struct simap *usage);
>>>> +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
>>>> +                                    const char *iface_id);
>>>>
>>>>  # endif /* controller/if-status.h */
>>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>>> index 5a6274eb2..994aebdfb 100644
>>>> --- a/controller/ovn-controller.c
>>>> +++ b/controller/ovn-controller.c
>>>> @@ -3912,7 +3912,7 @@ main(int argc, char *argv[])
>>>>                          runtime_data ? &runtime_data->lbinding_data :
> NULL;
>>>>
>  stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>>>>                                      time_msec());
>>>> -                    if_status_mgr_update(if_mgr, binding_data);
>>>> +                    if_status_mgr_update(if_mgr, binding_data,
> chassis);
>>>>
>  stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>>>>                                     time_msec());
>>>>
>>>> @@ -3938,8 +3938,8 @@ main(int argc, char *argv[])
>>>>                                     time_msec());
>>>>                      stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>>>>                                      time_msec());
>>>> -                    if_status_mgr_run(if_mgr, binding_data,
> !ovnsb_idl_txn,
>>>> -                                      !ovs_idl_txn);
>>>> +                    if_status_mgr_run(if_mgr, binding_data, chassis,
>>>> +                                      !ovnsb_idl_txn, !ovs_idl_txn);
>>>>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>>>>                                     time_msec());
>>>>                  }
>>>> diff --git a/tests/ovn.at b/tests/ovn.at
>>>> index 6a0a169c1..403fbc85f 100644
>>>> --- a/tests/ovn.at
>>>> +++ b/tests/ovn.at
>>>> @@ -13934,6 +13934,7 @@ ovn_attach n1 br-phys 192.168.0.11
>>>>  ovs-vsctl -- add-port br-int hv1-vif0 -- \
>>>>  set Interface hv1-vif0 ofport-request=1
>>>>
>>>> +
>>>>  sim_add hv2
>>>>  as hv2
>>>>  ovs-vsctl add-br br-phys
>>>> @@ -13983,7 +13984,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=65 | grep actions=output:1],
>>>>  echo "verifying that lsp0 binding moves when requested-chassis is
> changed"
>>>>
>>>>  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
>>>> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
>>>> +
>>>> +# We might see multiple "Releasing lport ...", when sb is read only
>>>> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
>>>> +
>>>>  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
>>>>
>>>>  # (6) Chassis hv2 should add flows and hv1 should not.
>>>> @@ -14029,7 +14033,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=0 | grep in_port=1], [0], [ig
>>>>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
> actions=output:1], [0], [ignore])
>>>>
>>>>  check ovn-nbctl --wait=hv lsp-set-options lsp0
> requested-chassis=non-existant-chassis
>>>> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
>>>> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
> chassis" hv1/ovn-controller.log)])
>>>>  check ovn-nbctl --wait=hv sync
>>>>  wait_column '' Port_Binding chasssi logical_port=lsp0
>>>>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep
> in_port=1], [1], [])
>>>> @@ -30595,3 +30599,50 @@ test_mac_binding_flows hv2
> 00\:00\:00\:00\:10\:10 1
>>>>  OVN_CLEANUP([hv1], [hv2])
>>>>  AT_CLEANUP
>>>>  ])
>>>> +
>>>> +OVN_FOR_EACH_NORTHD([
>>>> +AT_SETUP([recomputes])
>>>> +ovn_start
>>>> +
>>>> +# Add chassis
>>>> +net_add n1
>>>> +sim_add hv1
>>>> +as hv1
>>>> +ovs-vsctl add-br br-phys
>>>> +ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
>>>> +ovn_attach n1 br-phys 192.168.0.1
>>>> +
>>>> +add_switch_ports() {
>>>> +    for port in $(seq $1 $2); do
>>>> +        logical_switch_port=lsp${port}
>>>> +        check ovn-nbctl lsp-add ls1 $logical_switch_port
>>>> +        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
>>>> +
>>>> +        as hv1 ovs-vsctl \
>>>> +            -- add-port br-int vif${port} \
>>>> +            -- set Interface vif${port}
> external_ids:iface-id=$logical_switch_port
>>>> +    done
>>>> +
>>>> +    check ovn-nbctl --wait=hv sync
>>>> +    AT_CHECK([test $(ovn-appctl -t ovn-controller
> coverage/read-counter lflow_run) == $lflow_run])
>>>> +}
>>>> +
>>>> +check ovn-nbctl ls-add ls1
>>>> +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
>>>> +check ovn-nbctl set Logical_Switch ls1
> other_config:exclude_ips=10.1.255.254
>>>> +
>>>> +check ovn-nbctl lr-add lr1
>>>> +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
> type=router options:router-port=lrp0 addresses=dynamic
>>>> +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
>>>> +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
>>>> +
>>>> +check ovn-nbctl --wait=hv sync
>>>> +lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter
> lflow_run)
>>>> +
>>>> +add_switch_ports 1 50
>>>> +add_switch_ports 51 100
>>>> +add_switch_ports 101 150
>>>> +
>>>> +OVN_CLEANUP([hv1])
>>>> +AT_CLEANUP
>>>> +])
>>>> --
>>>> 2.31.1
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/binding.c b/controller/binding.c
index e5ba56b25..d917d8775 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -657,11 +657,15 @@  local_binding_get_lport_ofport(const struct shash *local_bindings,
 }
 
 bool
-local_binding_is_up(struct shash *local_bindings, const char *pb_name)
+local_binding_is_up(struct shash *local_bindings, const char *pb_name,
+                    const struct sbrec_chassis *chassis_rec)
 {
     struct local_binding *lbinding =
         local_binding_find(local_bindings, pb_name);
     struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding);
+    if (b_lport && b_lport->pb->chassis != chassis_rec) {
+        return false;
+    }
     if (lbinding && b_lport && lbinding->iface) {
         if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
             return false;
@@ -673,13 +677,23 @@  local_binding_is_up(struct shash *local_bindings, const char *pb_name)
 }
 
 bool
-local_binding_is_down(struct shash *local_bindings, const char *pb_name)
+local_binding_is_down(struct shash *local_bindings, const char *pb_name,
+                      const struct sbrec_chassis *chassis_rec)
 {
     struct local_binding *lbinding =
         local_binding_find(local_bindings, pb_name);
 
     struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding);
 
+    if (b_lport) {
+        if (b_lport->pb->chassis == chassis_rec) {
+            return false;
+        } else if (b_lport->pb->chassis) {
+            VLOG_DBG("lport %s already claimed by other chassis",
+                     b_lport->pb->logical_port);
+        }
+    }
+
     if (!lbinding) {
         return true;
     }
@@ -894,6 +908,48 @@  get_lport_type_str(enum en_lport_type lport_type)
     OVS_NOT_REACHED();
 }
 
+static void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
+                          const struct sbrec_chassis *chassis_rec)
+{
+    if (pb->chassis != chassis_rec) {
+        if (pb->chassis) {
+            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
+                    pb->logical_port, pb->chassis->name,
+                    chassis_rec->name);
+        } else {
+            VLOG_INFO("Claiming lport %s for this chassis.", pb->logical_port);
+        }
+        for (int i = 0; i < pb->n_mac; i++) {
+            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
+        }
+        sbrec_port_binding_set_chassis(pb, chassis_rec);
+    }
+}
+
+static void clear_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb)
+{
+    if (pb->chassis) {
+        sbrec_port_binding_set_chassis(pb, NULL);
+    }
+}
+
+void
+local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
+                    const struct sbrec_chassis *chassis_rec)
+{
+    struct local_binding *lbinding =
+        local_binding_find(local_bindings, pb_name);
+    struct binding_lport *b_lport = local_binding_get_primary_lport(lbinding);
+
+    if (b_lport) {
+        if (chassis_rec) {
+            set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec);
+        } else {
+            clear_pb_chassis_in_sbrec(b_lport->pb);
+        }
+    }
+}
+
 /* For newly claimed ports, if 'notify_up' is 'false':
  * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
  * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for
@@ -906,23 +962,37 @@  get_lport_type_str(enum en_lport_type lport_type)
  *   it's explicitly set to 'false'.
  *   This is to ensure compatibility with older versions of ovn-northd.
  */
-static void
+static bool
 claimed_lport_set_up(const struct sbrec_port_binding *pb,
                      const struct sbrec_port_binding *parent_pb,
                      const struct sbrec_chassis *chassis_rec,
-                     bool notify_up, struct if_status_mgr *if_mgr)
+                     bool notify_up, struct if_status_mgr *if_mgr,
+                     bool sb_readonly)
 {
+
     if (!notify_up) {
         bool up = true;
         if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
-            sbrec_port_binding_set_up(pb, &up, 1);
+            if (!sb_readonly) {
+                sbrec_port_binding_set_up(pb, &up, 1);
+            } else if (pb->n_up && !pb->up[0]) {
+                return false;
+            }
         }
-        return;
+        if (sb_readonly && (pb->chassis != chassis_rec)) {
+            /* Handle the cases where if_status_mgr does not claim the
+             * interface.In those cases, if we do not set pb->chassis in sb
+             * now (as readonly), we might not do it later ...
+             */
+            return false;
+        }
+        return true;
     }
 
     if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
         if_status_mgr_claim_iface(if_mgr, pb->logical_port);
     }
+    return true;
 }
 
 /* Returns false if lport is not claimed due to 'sb_readonly'.
@@ -937,27 +1007,15 @@  claim_lport(const struct sbrec_port_binding *pb,
             struct hmap *tracked_datapaths,
             struct if_status_mgr *if_mgr)
 {
-    if (!sb_readonly) {
-        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
-    }
 
+    if (!claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
+           if_mgr, sb_readonly)) {
+        return false;
+    }
     if (pb->chassis != chassis_rec) {
-        if (sb_readonly) {
-            return false;
-        }
-
-        if (pb->chassis) {
-            VLOG_INFO("Changing chassis for lport %s from %s to %s.",
-                    pb->logical_port, pb->chassis->name,
-                    chassis_rec->name);
-        } else {
-            VLOG_INFO("Claiming lport %s for this chassis.", pb->logical_port);
+        if (!sb_readonly) {
+            set_pb_chassis_in_sbrec(pb, chassis_rec);
         }
-        for (int i = 0; i < pb->n_mac; i++) {
-            VLOG_INFO("%s: Claiming %s", pb->logical_port, pb->mac[i]);
-        }
-
-        sbrec_port_binding_set_chassis(pb, chassis_rec);
 
         if (tracked_datapaths) {
             update_lport_tracking(pb, tracked_datapaths, true);
@@ -974,7 +1032,6 @@  claim_lport(const struct sbrec_port_binding *pb,
         }
         sbrec_port_binding_set_encap(pb, encap_rec);
     }
-
     return true;
 }
 
@@ -986,7 +1043,8 @@  claim_lport(const struct sbrec_port_binding *pb,
  * Caller should make sure that this is the case.
  */
 static bool
-release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
+release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly,
+               struct if_status_mgr *if_mgr)
 {
     if (pb->encap) {
         if (sb_readonly) {
@@ -995,11 +1053,13 @@  release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
         sbrec_port_binding_set_encap(pb, NULL);
     }
 
+    /* if sb readonly, pb->chassis unset through if-status if present */
     if (pb->chassis) {
-        if (sb_readonly) {
+        if (!sb_readonly) {
+            sbrec_port_binding_set_chassis(pb, NULL);
+        } else if (!if_status_mgr_iface_is_present(if_mgr, pb->logical_port)) {
             return false;
         }
-        sbrec_port_binding_set_chassis(pb, NULL);
     }
 
     if (pb->virtual_parent) {
@@ -1009,7 +1069,8 @@  release_lport_(const struct sbrec_port_binding *pb, bool sb_readonly)
         sbrec_port_binding_set_virtual_parent(pb, NULL);
     }
 
-    VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port);
+    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
+              pb->logical_port, sb_readonly);
     return true;
 }
 
@@ -1017,7 +1078,7 @@  static bool
 release_lport(const struct sbrec_port_binding *pb, bool sb_readonly,
               struct hmap *tracked_datapaths, struct if_status_mgr *if_mgr)
 {
-    if (!release_lport_(pb, sb_readonly)) {
+    if (!release_lport_(pb, sb_readonly, if_mgr)) {
         return false;
     }
 
@@ -1342,10 +1403,9 @@  consider_localport(const struct sbrec_port_binding *pb,
     /* If the port binding is claimed, then release it as localport is claimed
      * by any ovn-controller. */
     if (pb->chassis == b_ctx_in->chassis_rec) {
-        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn)) {
+        if (!release_lport_(pb, !b_ctx_in->ovnsb_idl_txn, b_ctx_out->if_mgr)) {
             return false;
         }
-
         remove_related_lport(pb, b_ctx_out);
     }
 
diff --git a/controller/binding.h b/controller/binding.h
index 430a8d9b1..45202a321 100644
--- a/controller/binding.h
+++ b/controller/binding.h
@@ -151,14 +151,17 @@  const struct sbrec_port_binding *local_binding_get_primary_pb(
 ofp_port_t local_binding_get_lport_ofport(const struct shash *local_bindings,
                                           const char *pb_name);
 
-bool local_binding_is_up(struct shash *local_bindings, const char *pb_name);
-bool local_binding_is_down(struct shash *local_bindings, const char *pb_name);
+bool local_binding_is_up(struct shash *local_bindings, const char *pb_name,
+                         const struct sbrec_chassis *);
+bool local_binding_is_down(struct shash *local_bindings, const char *pb_name,
+                         const struct sbrec_chassis *);
 void local_binding_set_up(struct shash *local_bindings, const char *pb_name,
                           const char *ts_now_str, bool sb_readonly,
                           bool ovs_readonly);
 void local_binding_set_down(struct shash *local_bindings, const char *pb_name,
                             bool sb_readonly, bool ovs_readonly);
-
+void local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
+                         const struct sbrec_chassis *chassis_rec);
 void binding_register_ovs_idl(struct ovsdb_idl *);
 void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
 bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
diff --git a/controller/if-status.c b/controller/if-status.c
index dbdf8b66f..e36df22a0 100644
--- a/controller/if-status.c
+++ b/controller/if-status.c
@@ -115,6 +115,7 @@  static void ovs_iface_set_state(struct if_status_mgr *, struct ovs_iface *,
 
 static void if_status_mgr_update_bindings(
     struct if_status_mgr *mgr, struct local_binding_data *binding_data,
+    const struct sbrec_chassis *chassis_rec,
     bool sb_readonly, bool ovs_readonly);
 
 struct if_status_mgr *
@@ -181,6 +182,13 @@  if_status_mgr_claim_iface(struct if_status_mgr *mgr, const char *iface_id)
     }
 }
 
+bool
+if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char *iface_id)
+{
+    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
+    return (!!iface);
+}
+
 void
 if_status_mgr_release_iface(struct if_status_mgr *mgr, const char *iface_id)
 {
@@ -247,7 +255,8 @@  if_status_mgr_delete_iface(struct if_status_mgr *mgr, const char *iface_id)
 
 void
 if_status_mgr_update(struct if_status_mgr *mgr,
-                     struct local_binding_data *binding_data)
+                     struct local_binding_data *binding_data,
+                     const struct sbrec_chassis *chassis_rec)
 {
     if (!binding_data) {
         return;
@@ -262,7 +271,7 @@  if_status_mgr_update(struct if_status_mgr *mgr,
     HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
         struct ovs_iface *iface = node->data;
 
-        if (local_binding_is_up(bindings, iface->id)) {
+        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
             ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
         }
     }
@@ -273,7 +282,7 @@  if_status_mgr_update(struct if_status_mgr *mgr,
     HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
         struct ovs_iface *iface = node->data;
 
-        if (local_binding_is_down(bindings, iface->id)) {
+        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
             ovs_iface_destroy(mgr, iface);
         }
     }
@@ -306,6 +315,7 @@  if_status_mgr_update(struct if_status_mgr *mgr,
 void
 if_status_mgr_run(struct if_status_mgr *mgr,
                   struct local_binding_data *binding_data,
+                  const struct sbrec_chassis *chassis_rec,
                   bool sb_readonly, bool ovs_readonly)
 {
     struct ofctrl_acked_seqnos *acked_seqnos =
@@ -328,8 +338,8 @@  if_status_mgr_run(struct if_status_mgr *mgr,
     ofctrl_acked_seqnos_destroy(acked_seqnos);
 
     /* Update binding states. */
-    if_status_mgr_update_bindings(mgr, binding_data, sb_readonly,
-                                  ovs_readonly);
+    if_status_mgr_update_bindings(mgr, binding_data, chassis_rec,
+                                  sb_readonly, ovs_readonly);
 }
 
 static void
@@ -390,6 +400,7 @@  ovs_iface_set_state(struct if_status_mgr *mgr, struct ovs_iface *iface,
 static void
 if_status_mgr_update_bindings(struct if_status_mgr *mgr,
                               struct local_binding_data *binding_data,
+                              const struct sbrec_chassis *chassis_rec,
                               bool sb_readonly, bool ovs_readonly)
 {
     if (!binding_data) {
@@ -404,6 +415,9 @@  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
      */
     HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
         struct ovs_iface *iface = node->data;
+        if (!sb_readonly) {
+            local_binding_set_pb(bindings, iface->id, chassis_rec);
+        }
 
         local_binding_set_down(bindings, iface->id, sb_readonly, ovs_readonly);
     }
@@ -415,7 +429,9 @@  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
     char *ts_now_str = xasprintf("%lld", time_wall_msec());
     HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
         struct ovs_iface *iface = node->data;
-
+        if (!sb_readonly) {
+            local_binding_set_pb(bindings, iface->id, chassis_rec);
+        }
         local_binding_set_up(bindings, iface->id, ts_now_str,
                              sb_readonly, ovs_readonly);
     }
@@ -426,6 +442,9 @@  if_status_mgr_update_bindings(struct if_status_mgr *mgr,
      */
     HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
         struct ovs_iface *iface = node->data;
+        if (!sb_readonly) {
+            local_binding_set_pb(bindings, iface->id, NULL);
+        }
 
         local_binding_set_down(bindings, iface->id, sb_readonly, ovs_readonly);
     }
diff --git a/controller/if-status.h b/controller/if-status.h
index ff4aa760e..2c55eb18e 100644
--- a/controller/if-status.h
+++ b/controller/if-status.h
@@ -31,10 +31,14 @@  void if_status_mgr_claim_iface(struct if_status_mgr *, const char *iface_id);
 void if_status_mgr_release_iface(struct if_status_mgr *, const char *iface_id);
 void if_status_mgr_delete_iface(struct if_status_mgr *, const char *iface_id);
 
-void if_status_mgr_update(struct if_status_mgr *, struct local_binding_data *);
+void if_status_mgr_update(struct if_status_mgr *, struct local_binding_data *,
+                          const struct sbrec_chassis *chassis);
 void if_status_mgr_run(struct if_status_mgr *mgr, struct local_binding_data *,
+                       const struct sbrec_chassis *chassis,
                        bool sb_readonly, bool ovs_readonly);
 void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
                                     struct simap *usage);
+bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
+                                    const char *iface_id);
 
 # endif /* controller/if-status.h */
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 5a6274eb2..994aebdfb 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -3912,7 +3912,7 @@  main(int argc, char *argv[])
                         runtime_data ? &runtime_data->lbinding_data : NULL;
                     stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
                                     time_msec());
-                    if_status_mgr_update(if_mgr, binding_data);
+                    if_status_mgr_update(if_mgr, binding_data, chassis);
                     stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
                                    time_msec());
 
@@ -3938,8 +3938,8 @@  main(int argc, char *argv[])
                                    time_msec());
                     stopwatch_start(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
                                     time_msec());
-                    if_status_mgr_run(if_mgr, binding_data, !ovnsb_idl_txn,
-                                      !ovs_idl_txn);
+                    if_status_mgr_run(if_mgr, binding_data, chassis,
+                                      !ovnsb_idl_txn, !ovs_idl_txn);
                     stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
                                    time_msec());
                 }
diff --git a/tests/ovn.at b/tests/ovn.at
index 6a0a169c1..403fbc85f 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -13934,6 +13934,7 @@  ovn_attach n1 br-phys 192.168.0.11
 ovs-vsctl -- add-port br-int hv1-vif0 -- \
 set Interface hv1-vif0 ofport-request=1
 
+
 sim_add hv2
 as hv2
 ovs-vsctl add-br br-phys
@@ -13983,7 +13984,10 @@  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep actions=output:1],
 echo "verifying that lsp0 binding moves when requested-chassis is changed"
 
 ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
-OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
+
+# We might see multiple "Releasing lport ...", when sb is read only
+OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
+
 wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
 
 # (6) Chassis hv2 should add flows and hv1 should not.
@@ -14029,7 +14033,7 @@  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [0], [ig
 AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep actions=output:1], [0], [ignore])
 
 check ovn-nbctl --wait=hv lsp-set-options lsp0 requested-chassis=non-existant-chassis
-OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
+OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this chassis" hv1/ovn-controller.log)])
 check ovn-nbctl --wait=hv sync
 wait_column '' Port_Binding chasssi logical_port=lsp0
 AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [1], [])
@@ -30595,3 +30599,50 @@  test_mac_binding_flows hv2 00\:00\:00\:00\:10\:10 1
 OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([recomputes])
+ovn_start
+
+# Add chassis
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+add_switch_ports() {
+    for port in $(seq $1 $2); do
+        logical_switch_port=lsp${port}
+        check ovn-nbctl lsp-add ls1 $logical_switch_port
+        check ovn-nbctl lsp-set-addresses $logical_switch_port dynamic
+
+        as hv1 ovs-vsctl \
+            -- add-port br-int vif${port} \
+            -- set Interface vif${port} external_ids:iface-id=$logical_switch_port
+    done
+
+    check ovn-nbctl --wait=hv sync
+    AT_CHECK([test $(ovn-appctl -t ovn-controller coverage/read-counter lflow_run) == $lflow_run])
+}
+
+check ovn-nbctl ls-add ls1
+check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
+check ovn-nbctl set Logical_Switch ls1 other_config:exclude_ips=10.1.255.254
+
+check ovn-nbctl lr-add lr1
+check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0 type=router options:router-port=lrp0 addresses=dynamic
+check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
+check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
+
+check ovn-nbctl --wait=hv sync
+lflow_run=$(ovn-appctl -t ovn-controller coverage/read-counter lflow_run)
+
+add_switch_ports 1 50
+add_switch_ports 51 100
+add_switch_ports 101 150
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])