diff mbox series

[ovs-dev,v6,2/2] controller: throttle port claim attempts

Message ID 20220809182503.955660-2-ihrachys@redhat.com
State Accepted
Headers show
Series [ovs-dev,v6,1/2] Split out code to handle port binding db updates | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Ihar Hrachyshka Aug. 9, 2022, 6:25 p.m. UTC
When multiple chassis are fighting for the same port (requested-chassis
is not set, e.g. for gateway ports), they may produce an unreasonable
number of chassis field updates in a very short time frame (hundreds of
updates in several seconds). This puts unnecessary load on OVN as well
as any db notification consumers trying to keep up with the barrage.

This patch throttles port claim attempts so that they don't happen more
frequently than once per 0.5 seconds.

Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
---
v1: initial version
v2: don't postpone claim when port is unclaimed (chassis == nil)
v2: don't postpone claim as an additional chassis for a multichassis
port
v2: fixed memory corruption when modifying sset while iterating over
it
v3: rebased to resolve a git conflict
v4: added opportunistic cleanup for claimed_ports shash
v4: made a debug message in the new test case more intelligible
v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
    freed)
v6: rebased, added Mark's ack.
v6: removed poll_wait calls from engine handler, moved them to
    binding_wait.
---
 controller/binding.c        | 127 ++++++++++++++++++++++++++++++++++--
 controller/binding.h        |  10 +++
 controller/ovn-controller.c |  49 ++++++++++++++
 tests/ovn.at                |  41 ++++++++++++
 4 files changed, 222 insertions(+), 5 deletions(-)

Comments

Numan Siddique Aug. 10, 2022, 12:50 a.m. UTC | #1
On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>
> When multiple chassis are fighting for the same port (requested-chassis
> is not set, e.g. for gateway ports), they may produce an unreasonable
> number of chassis field updates in a very short time frame (hundreds of
> updates in several seconds). This puts unnecessary load on OVN as well
> as any db notification consumers trying to keep up with the barrage.
>
> This patch throttles port claim attempts so that they don't happen more
> frequently than once per 0.5 seconds.
>
> Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> Acked-by: Mark Michelson <mmichels@redhat.com>

Thanks for the v6.  I applied both the patches to the main.

Numan

> ---
> v1: initial version
> v2: don't postpone claim when port is unclaimed (chassis == nil)
> v2: don't postpone claim as an additional chassis for a multichassis
> port
> v2: fixed memory corruption when modifying sset while iterating over
> it
> v3: rebased to resolve a git conflict
> v4: added opportunistic cleanup for claimed_ports shash
> v4: made a debug message in the new test case more intelligible
> v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
>     freed)
> v6: rebased, added Mark's ack.
> v6: removed poll_wait calls from engine handler, moved them to
>     binding_wait.
> ---
>  controller/binding.c        | 127 ++++++++++++++++++++++++++++++++++--
>  controller/binding.h        |  10 +++
>  controller/ovn-controller.c |  49 ++++++++++++++
>  tests/ovn.at                |  41 ++++++++++++
>  4 files changed, 222 insertions(+), 5 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 96a158225..9f5393a92 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
>
>  #define OVN_QOS_TYPE "linux-htb"
>
> +#define CLAIM_TIME_THRESHOLD_MS 500
> +
> +struct claimed_port {
> +    long long int last_claimed;
> +};
> +
> +static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
> +static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
> +
> +struct sset *
> +get_postponed_ports(void)
> +{
> +    return &_postponed_ports;
> +}
> +
> +static long long int
> +get_claim_timestamp(const char *port_name)
> +{
> +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> +    return cp ? cp->last_claimed : 0;
> +}
> +
> +static void
> +register_claim_timestamp(const char *port_name, long long int t)
> +{
> +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> +    if (!cp) {
> +        cp = xzalloc(sizeof *cp);
> +        shash_add(&_claimed_ports, port_name, cp);
> +    }
> +    cp->last_claimed = t;
> +}
> +
> +static void
> +cleanup_claimed_port_timestamps(void)
> +{
> +    long long int now = time_msec();
> +    struct shash_node *node;
> +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> +        struct claimed_port *cp = (struct claimed_port *) node->data;
> +        if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
> +            free(cp);
> +            shash_delete(&_claimed_ports, node);
> +        }
> +    }
> +}
> +
> +/* Schedule any pending binding work. Runs with in the main ovn-controller
> + * thread context.*/
> +void
> +binding_wait(void)
> +{
> +    const char *port_name;
> +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> +        long long int t = get_claim_timestamp(port_name);
> +        if (t) {
> +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> +        }
> +    }
> +}
> +
>  struct qos_queue {
>      struct hmap_node node;
>      uint32_t queue_id;
> @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct sbrec_port_binding *pb,
>      remove_additional_encap_for_chassis(pb, chassis_rec);
>  }
>
> +static bool
> +lport_maybe_postpone(const char *port_name, long long int now,
> +                     struct sset *postponed_ports)
> +{
> +    long long int last_claimed = get_claim_timestamp(port_name);
> +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> +        return false;
> +    }
> +
> +    sset_add(postponed_ports, port_name);
> +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> +
> +    return true;
> +}
> +
>  /* Returns false if lport is not claimed due to 'sb_readonly'.
>   * Returns true otherwise.
>   */
> @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>              const struct ovsrec_interface *iface_rec,
>              bool sb_readonly, bool notify_up,
>              struct hmap *tracked_datapaths,
> -            struct if_status_mgr *if_mgr)
> +            struct if_status_mgr *if_mgr,
> +            struct sset *postponed_ports)
>  {
>      if (!sb_readonly) {
>          claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> @@ -1021,7 +1098,12 @@ claim_lport(const struct sbrec_port_binding *pb,
>                  return false;
>              }
>
> +            long long int now = time_msec();
>              if (pb->chassis) {
> +                if (lport_maybe_postpone(pb->logical_port, now,
> +                                         postponed_ports)) {
> +                    return true;
> +                }
>                  VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>                          pb->logical_port, pb->chassis->name,
>                          chassis_rec->name);
> @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding *pb,
>                  remove_additional_chassis(pb, chassis_rec);
>              }
>              update_tracked = true;
> +
> +            register_claim_timestamp(pb->logical_port, now);
> +            sset_find_and_delete(postponed_ports, pb->logical_port);
>          }
>      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
>          if (!is_additional_chassis(pb, chassis_rec)) {
> @@ -1060,8 +1145,10 @@ claim_lport(const struct sbrec_port_binding *pb,
>          }
>      }
>
> -    if (update_tracked && tracked_datapaths) {
> -        update_lport_tracking(pb, tracked_datapaths, true);
> +    if (update_tracked) {
> +        if (tracked_datapaths) {
> +            update_lport_tracking(pb, tracked_datapaths, true);
> +        }
>      }
>
>      /* Check if the port encap binding, if any, has changed */
> @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
>                               b_lport->lbinding->iface,
>                               !b_ctx_in->ovnsb_idl_txn,
>                               !parent_pb, b_ctx_out->tracked_dp_bindings,
> -                             b_ctx_out->if_mgr)){
> +                             b_ctx_out->if_mgr,
> +                             b_ctx_out->postponed_ports)) {
>                  return false;
>              }
>
> @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb,
>          return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
>                             !b_ctx_in->ovnsb_idl_txn, false,
>                             b_ctx_out->tracked_dp_bindings,
> -                           b_ctx_out->if_mgr);
> +                           b_ctx_out->if_mgr,
> +                           b_ctx_out->postponed_ports);
>      }
>
>      if (pb->chassis == b_ctx_in->chassis_rec ||
> @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
>      }
>
>      destroy_qos_map(&qos_map);
> +
> +    cleanup_claimed_port_timestamps();
>  }
>
>  /* Returns true if the database is all cleaned up, false if more work is
> @@ -2740,6 +2831,25 @@ delete_done:
>          }
>      }
>
> +    /* Also handle any postponed (throttled) ports. */
> +    const char *port_name;
> +    struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
> +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> +    SSET_FOR_EACH (port_name, &postponed_ports) {
> +        pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> +                                  port_name);
> +        if (!pb) {
> +            sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
> +            continue;
> +        }
> +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
> +        if (!handled) {
> +            break;
> +        }
> +    }
> +    sset_destroy(&postponed_ports);
> +    cleanup_claimed_port_timestamps();
> +
>      if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
>                                                 b_ctx_in->port_table,
>                                                 b_ctx_in->qos_table,
> @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const struct ovsrec_interface *iface,
>
>      return true;
>  }
> +
> +void
> +binding_destroy(void)
> +{
> +    shash_destroy_free_data(&_claimed_ports);
> +    sset_clear(&_postponed_ports);
> +}
> diff --git a/controller/binding.h b/controller/binding.h
> index 1fed06674..b2360bac2 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -103,6 +103,8 @@ struct binding_ctx_out {
>      struct hmap *tracked_dp_bindings;
>
>      struct if_status_mgr *if_mgr;
> +
> +    struct sset *postponed_ports;
>  };
>
>  /* Local bindings. binding.c module binds the logical port (represented by
> @@ -219,4 +221,12 @@ struct binding_lport {
>      size_t n_port_security;
>  };
>
> +struct sset *get_postponed_ports(void);
> +
> +/* Schedule any pending binding work. */
> +void binding_wait(void);
> +
> +/* Clean up module state. */
> +void binding_destroy(void);
> +
>  #endif /* controller/binding.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5449743e8..8268726e6 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node *node, void *data_)
>      engine_set_node_state(node, state);
>  }
>
> +struct ed_type_postponed_ports {
> +    struct sset *postponed_ports;
> +};
> +
> +static void *
> +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> +                        struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> +    data->postponed_ports = get_postponed_ports();
> +    return data;
> +}
> +
> +static void
> +en_postponed_ports_cleanup(void *data_)
> +{
> +    struct ed_type_postponed_ports *data = data_;
> +    if (!data->postponed_ports) {
> +        return;
> +    }
> +    data->postponed_ports = NULL;
> +}
> +
> +static void
> +en_postponed_ports_run(struct engine_node *node, void *data_)
> +{
> +    struct ed_type_postponed_ports *data = data_;
> +    enum engine_node_state state = EN_UNCHANGED;
> +    data->postponed_ports = get_postponed_ports();
> +    if (!sset_is_empty(data->postponed_ports)) {
> +        state = EN_UPDATED;
> +    }
> +    engine_set_node_state(node, state);
> +}
> +
>  struct ed_type_runtime_data {
>      /* Contains "struct local_datapath" nodes. */
>      struct hmap local_datapaths;
> @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
>
>      struct shash local_active_ports_ipv6_pd;
>      struct shash local_active_ports_ras;
> +
> +    struct sset *postponed_ports;
>  };
>
>  /* struct ed_type_runtime_data has the below members for tracking the
> @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
>      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
>      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
>      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
>      b_ctx_out->tracked_dp_bindings = NULL;
>      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
>  }
> @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node *node, void *data)
>          local_binding_data_init(&rt_data->lbinding_data);
>      }
>
> +    struct ed_type_postponed_ports *pp_data =
> +        engine_get_input_data("postponed_ports", node);
> +    rt_data->postponed_ports = pp_data->postponed_ports;
> +
>      struct binding_ctx_in b_ctx_in;
>      struct binding_ctx_out b_ctx_out;
>      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
>      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
> +    ENGINE_NODE(postponed_ports, "postponed_ports");
>      ENGINE_NODE(pflow_output, "physical_flow_output");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
>      ENGINE_NODE(flow_output, "flow_output");
> @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
>                       runtime_data_sb_datapath_binding_handler);
>      engine_add_input(&en_runtime_data, &en_sb_port_binding,
>                       runtime_data_sb_port_binding_handler);
> +    /* Reuse the same handler for any previously postponed ports. */
> +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> +                     runtime_data_sb_port_binding_handler);
>
>      /* The OVS interface handler for runtime_data changes MUST be executed
>       * after the sb_port_binding_handler as port_binding deletes must be
> @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
>                  ofctrl_wait();
>                  pinctrl_wait(ovnsb_idl_txn);
>              }
> +
> +            binding_wait();
>          }
>
>          if (!northd_version_match && br_int) {
> @@ -4318,6 +4366,7 @@ loop_done:
>      lflow_destroy();
>      ofctrl_destroy();
>      pinctrl_destroy();
> +    binding_destroy();
>      patch_destroy();
>      if_status_mgr_destroy(if_mgr);
>      shash_destroy(&vif_plug_deleted_iface_ids);
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 23b205791..c8cc8cde4 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
>  AT_CLEANUP
>  ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([tug-of-war between two chassis for the same port])
> +ovn_start
> +
> +ovn-nbctl ls-add ls0
> +ovn-nbctl lsp-add ls0 lsp0
> +
> +net_add n1
> +for i in 1 2; do
> +    sim_add hv$i
> +    as hv$i
> +    ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.$i
> +done
> +
> +for i in 1 2; do
> +    as hv$i
> +    ovs-vsctl -- add-port br-int vif \
> +              -- set Interface vif external-ids:iface-id=lsp0
> +done
> +
> +# give controllers some time to fight for the port binding
> +sleep 3
> +
> +# calculate the number of port claims registered by each fighting chassis
> +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis' hv1/ovn-controller.log)
> +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis' hv2/ovn-controller.log)
> +
> +echo "hv1 claimed ${hv1_claims} times"
> +echo "hv2 claimed ${hv2_claims} times"
> +
> +# check that neither registered an outrageous number of port claims
> +max_claims=10
> +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([options:requested-chassis with hostname])
>
> --
> 2.34.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Ihar Hrachyshka Aug. 10, 2022, 6:07 p.m. UTC | #2
Numan, thank you!

Is it backport material? I know there may be some conflicts and am
happy to handle them if we agree this fix can be backported.

Thanks again.

Ihar

On Tue, Aug 9, 2022 at 8:50 PM Numan Siddique <numans@ovn.org> wrote:
>
> On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> >
> > When multiple chassis are fighting for the same port (requested-chassis
> > is not set, e.g. for gateway ports), they may produce an unreasonable
> > number of chassis field updates in a very short time frame (hundreds of
> > updates in several seconds). This puts unnecessary load on OVN as well
> > as any db notification consumers trying to keep up with the barrage.
> >
> > This patch throttles port claim attempts so that they don't happen more
> > frequently than once per 0.5 seconds.
> >
> > Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> > Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> > Acked-by: Mark Michelson <mmichels@redhat.com>
>
> Thanks for the v6.  I applied both the patches to the main.
>
> Numan
>
> > ---
> > v1: initial version
> > v2: don't postpone claim when port is unclaimed (chassis == nil)
> > v2: don't postpone claim as an additional chassis for a multichassis
> > port
> > v2: fixed memory corruption when modifying sset while iterating over
> > it
> > v3: rebased to resolve a git conflict
> > v4: added opportunistic cleanup for claimed_ports shash
> > v4: made a debug message in the new test case more intelligible
> > v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
> >     freed)
> > v6: rebased, added Mark's ack.
> > v6: removed poll_wait calls from engine handler, moved them to
> >     binding_wait.
> > ---
> >  controller/binding.c        | 127 ++++++++++++++++++++++++++++++++++--
> >  controller/binding.h        |  10 +++
> >  controller/ovn-controller.c |  49 ++++++++++++++
> >  tests/ovn.at                |  41 ++++++++++++
> >  4 files changed, 222 insertions(+), 5 deletions(-)
> >
> > diff --git a/controller/binding.c b/controller/binding.c
> > index 96a158225..9f5393a92 100644
> > --- a/controller/binding.c
> > +++ b/controller/binding.c
> > @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
> >
> >  #define OVN_QOS_TYPE "linux-htb"
> >
> > +#define CLAIM_TIME_THRESHOLD_MS 500
> > +
> > +struct claimed_port {
> > +    long long int last_claimed;
> > +};
> > +
> > +static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
> > +static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
> > +
> > +struct sset *
> > +get_postponed_ports(void)
> > +{
> > +    return &_postponed_ports;
> > +}
> > +
> > +static long long int
> > +get_claim_timestamp(const char *port_name)
> > +{
> > +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> > +    return cp ? cp->last_claimed : 0;
> > +}
> > +
> > +static void
> > +register_claim_timestamp(const char *port_name, long long int t)
> > +{
> > +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> > +    if (!cp) {
> > +        cp = xzalloc(sizeof *cp);
> > +        shash_add(&_claimed_ports, port_name, cp);
> > +    }
> > +    cp->last_claimed = t;
> > +}
> > +
> > +static void
> > +cleanup_claimed_port_timestamps(void)
> > +{
> > +    long long int now = time_msec();
> > +    struct shash_node *node;
> > +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> > +        struct claimed_port *cp = (struct claimed_port *) node->data;
> > +        if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
> > +            free(cp);
> > +            shash_delete(&_claimed_ports, node);
> > +        }
> > +    }
> > +}
> > +
> > +/* Schedule any pending binding work. Runs with in the main ovn-controller
> > + * thread context.*/
> > +void
> > +binding_wait(void)
> > +{
> > +    const char *port_name;
> > +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> > +        long long int t = get_claim_timestamp(port_name);
> > +        if (t) {
> > +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> > +        }
> > +    }
> > +}
> > +
> >  struct qos_queue {
> >      struct hmap_node node;
> >      uint32_t queue_id;
> > @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct sbrec_port_binding *pb,
> >      remove_additional_encap_for_chassis(pb, chassis_rec);
> >  }
> >
> > +static bool
> > +lport_maybe_postpone(const char *port_name, long long int now,
> > +                     struct sset *postponed_ports)
> > +{
> > +    long long int last_claimed = get_claim_timestamp(port_name);
> > +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> > +        return false;
> > +    }
> > +
> > +    sset_add(postponed_ports, port_name);
> > +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> > +
> > +    return true;
> > +}
> > +
> >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> >   * Returns true otherwise.
> >   */
> > @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding *pb,
> >              const struct ovsrec_interface *iface_rec,
> >              bool sb_readonly, bool notify_up,
> >              struct hmap *tracked_datapaths,
> > -            struct if_status_mgr *if_mgr)
> > +            struct if_status_mgr *if_mgr,
> > +            struct sset *postponed_ports)
> >  {
> >      if (!sb_readonly) {
> >          claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> > @@ -1021,7 +1098,12 @@ claim_lport(const struct sbrec_port_binding *pb,
> >                  return false;
> >              }
> >
> > +            long long int now = time_msec();
> >              if (pb->chassis) {
> > +                if (lport_maybe_postpone(pb->logical_port, now,
> > +                                         postponed_ports)) {
> > +                    return true;
> > +                }
> >                  VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> >                          pb->logical_port, pb->chassis->name,
> >                          chassis_rec->name);
> > @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding *pb,
> >                  remove_additional_chassis(pb, chassis_rec);
> >              }
> >              update_tracked = true;
> > +
> > +            register_claim_timestamp(pb->logical_port, now);
> > +            sset_find_and_delete(postponed_ports, pb->logical_port);
> >          }
> >      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> >          if (!is_additional_chassis(pb, chassis_rec)) {
> > @@ -1060,8 +1145,10 @@ claim_lport(const struct sbrec_port_binding *pb,
> >          }
> >      }
> >
> > -    if (update_tracked && tracked_datapaths) {
> > -        update_lport_tracking(pb, tracked_datapaths, true);
> > +    if (update_tracked) {
> > +        if (tracked_datapaths) {
> > +            update_lport_tracking(pb, tracked_datapaths, true);
> > +        }
> >      }
> >
> >      /* Check if the port encap binding, if any, has changed */
> > @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
> >                               b_lport->lbinding->iface,
> >                               !b_ctx_in->ovnsb_idl_txn,
> >                               !parent_pb, b_ctx_out->tracked_dp_bindings,
> > -                             b_ctx_out->if_mgr)){
> > +                             b_ctx_out->if_mgr,
> > +                             b_ctx_out->postponed_ports)) {
> >                  return false;
> >              }
> >
> > @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb,
> >          return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
> >                             !b_ctx_in->ovnsb_idl_txn, false,
> >                             b_ctx_out->tracked_dp_bindings,
> > -                           b_ctx_out->if_mgr);
> > +                           b_ctx_out->if_mgr,
> > +                           b_ctx_out->postponed_ports);
> >      }
> >
> >      if (pb->chassis == b_ctx_in->chassis_rec ||
> > @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
> >      }
> >
> >      destroy_qos_map(&qos_map);
> > +
> > +    cleanup_claimed_port_timestamps();
> >  }
> >
> >  /* Returns true if the database is all cleaned up, false if more work is
> > @@ -2740,6 +2831,25 @@ delete_done:
> >          }
> >      }
> >
> > +    /* Also handle any postponed (throttled) ports. */
> > +    const char *port_name;
> > +    struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
> > +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> > +    SSET_FOR_EACH (port_name, &postponed_ports) {
> > +        pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> > +                                  port_name);
> > +        if (!pb) {
> > +            sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
> > +            continue;
> > +        }
> > +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
> > +        if (!handled) {
> > +            break;
> > +        }
> > +    }
> > +    sset_destroy(&postponed_ports);
> > +    cleanup_claimed_port_timestamps();
> > +
> >      if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
> >                                                 b_ctx_in->port_table,
> >                                                 b_ctx_in->qos_table,
> > @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const struct ovsrec_interface *iface,
> >
> >      return true;
> >  }
> > +
> > +void
> > +binding_destroy(void)
> > +{
> > +    shash_destroy_free_data(&_claimed_ports);
> > +    sset_clear(&_postponed_ports);
> > +}
> > diff --git a/controller/binding.h b/controller/binding.h
> > index 1fed06674..b2360bac2 100644
> > --- a/controller/binding.h
> > +++ b/controller/binding.h
> > @@ -103,6 +103,8 @@ struct binding_ctx_out {
> >      struct hmap *tracked_dp_bindings;
> >
> >      struct if_status_mgr *if_mgr;
> > +
> > +    struct sset *postponed_ports;
> >  };
> >
> >  /* Local bindings. binding.c module binds the logical port (represented by
> > @@ -219,4 +221,12 @@ struct binding_lport {
> >      size_t n_port_security;
> >  };
> >
> > +struct sset *get_postponed_ports(void);
> > +
> > +/* Schedule any pending binding work. */
> > +void binding_wait(void);
> > +
> > +/* Clean up module state. */
> > +void binding_destroy(void);
> > +
> >  #endif /* controller/binding.h */
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 5449743e8..8268726e6 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node *node, void *data_)
> >      engine_set_node_state(node, state);
> >  }
> >
> > +struct ed_type_postponed_ports {
> > +    struct sset *postponed_ports;
> > +};
> > +
> > +static void *
> > +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> > +                        struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> > +    data->postponed_ports = get_postponed_ports();
> > +    return data;
> > +}
> > +
> > +static void
> > +en_postponed_ports_cleanup(void *data_)
> > +{
> > +    struct ed_type_postponed_ports *data = data_;
> > +    if (!data->postponed_ports) {
> > +        return;
> > +    }
> > +    data->postponed_ports = NULL;
> > +}
> > +
> > +static void
> > +en_postponed_ports_run(struct engine_node *node, void *data_)
> > +{
> > +    struct ed_type_postponed_ports *data = data_;
> > +    enum engine_node_state state = EN_UNCHANGED;
> > +    data->postponed_ports = get_postponed_ports();
> > +    if (!sset_is_empty(data->postponed_ports)) {
> > +        state = EN_UPDATED;
> > +    }
> > +    engine_set_node_state(node, state);
> > +}
> > +
> >  struct ed_type_runtime_data {
> >      /* Contains "struct local_datapath" nodes. */
> >      struct hmap local_datapaths;
> > @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
> >
> >      struct shash local_active_ports_ipv6_pd;
> >      struct shash local_active_ports_ras;
> > +
> > +    struct sset *postponed_ports;
> >  };
> >
> >  /* struct ed_type_runtime_data has the below members for tracking the
> > @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
> >      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
> >      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
> >      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> > +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
> >      b_ctx_out->tracked_dp_bindings = NULL;
> >      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
> >  }
> > @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node *node, void *data)
> >          local_binding_data_init(&rt_data->lbinding_data);
> >      }
> >
> > +    struct ed_type_postponed_ports *pp_data =
> > +        engine_get_input_data("postponed_ports", node);
> > +    rt_data->postponed_ports = pp_data->postponed_ports;
> > +
> >      struct binding_ctx_in b_ctx_in;
> >      struct binding_ctx_out b_ctx_out;
> >      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> > @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
> > +    ENGINE_NODE(postponed_ports, "postponed_ports");
> >      ENGINE_NODE(pflow_output, "physical_flow_output");
> >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
> >      ENGINE_NODE(flow_output, "flow_output");
> > @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
> >                       runtime_data_sb_datapath_binding_handler);
> >      engine_add_input(&en_runtime_data, &en_sb_port_binding,
> >                       runtime_data_sb_port_binding_handler);
> > +    /* Reuse the same handler for any previously postponed ports. */
> > +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> > +                     runtime_data_sb_port_binding_handler);
> >
> >      /* The OVS interface handler for runtime_data changes MUST be executed
> >       * after the sb_port_binding_handler as port_binding deletes must be
> > @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
> >                  ofctrl_wait();
> >                  pinctrl_wait(ovnsb_idl_txn);
> >              }
> > +
> > +            binding_wait();
> >          }
> >
> >          if (!northd_version_match && br_int) {
> > @@ -4318,6 +4366,7 @@ loop_done:
> >      lflow_destroy();
> >      ofctrl_destroy();
> >      pinctrl_destroy();
> > +    binding_destroy();
> >      patch_destroy();
> >      if_status_mgr_destroy(if_mgr);
> >      shash_destroy(&vif_plug_deleted_iface_ids);
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 23b205791..c8cc8cde4 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
> >  AT_CLEANUP
> >  ])
> >
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([tug-of-war between two chassis for the same port])
> > +ovn_start
> > +
> > +ovn-nbctl ls-add ls0
> > +ovn-nbctl lsp-add ls0 lsp0
> > +
> > +net_add n1
> > +for i in 1 2; do
> > +    sim_add hv$i
> > +    as hv$i
> > +    ovs-vsctl add-br br-phys
> > +    ovn_attach n1 br-phys 192.168.0.$i
> > +done
> > +
> > +for i in 1 2; do
> > +    as hv$i
> > +    ovs-vsctl -- add-port br-int vif \
> > +              -- set Interface vif external-ids:iface-id=lsp0
> > +done
> > +
> > +# give controllers some time to fight for the port binding
> > +sleep 3
> > +
> > +# calculate the number of port claims registered by each fighting chassis
> > +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis' hv1/ovn-controller.log)
> > +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis' hv2/ovn-controller.log)
> > +
> > +echo "hv1 claimed ${hv1_claims} times"
> > +echo "hv2 claimed ${hv2_claims} times"
> > +
> > +# check that neither registered an outrageous number of port claims
> > +max_claims=10
> > +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> > +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> > +
> > +OVN_CLEANUP([hv1],[hv2])
> > +
> > +AT_CLEANUP
> > +])
> > +
> >  OVN_FOR_EACH_NORTHD([
> >  AT_SETUP([options:requested-chassis with hostname])
> >
> > --
> > 2.34.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
Numan Siddique Aug. 15, 2022, 12:52 a.m. UTC | #3
On Thu, Aug 11, 2022 at 4:07 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
>
> Numan, thank you!
>
> Is it backport material? I know there may be some conflicts and am
> happy to handle them if we agree this fix can be backported.

@Mark Michelson  @Han Zhou Any comments or any objections on this ?

Thanks
Numan


>
> Thanks again.
>
> Ihar
>
> On Tue, Aug 9, 2022 at 8:50 PM Numan Siddique <numans@ovn.org> wrote:
> >
> > On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> > >
> > > When multiple chassis are fighting for the same port (requested-chassis
> > > is not set, e.g. for gateway ports), they may produce an unreasonable
> > > number of chassis field updates in a very short time frame (hundreds of
> > > updates in several seconds). This puts unnecessary load on OVN as well
> > > as any db notification consumers trying to keep up with the barrage.
> > >
> > > This patch throttles port claim attempts so that they don't happen more
> > > frequently than once per 0.5 seconds.
> > >
> > > Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> > > Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> > > Acked-by: Mark Michelson <mmichels@redhat.com>
> >
> > Thanks for the v6.  I applied both the patches to the main.
> >
> > Numan
> >
> > > ---
> > > v1: initial version
> > > v2: don't postpone claim when port is unclaimed (chassis == nil)
> > > v2: don't postpone claim as an additional chassis for a multichassis
> > > port
> > > v2: fixed memory corruption when modifying sset while iterating over
> > > it
> > > v3: rebased to resolve a git conflict
> > > v4: added opportunistic cleanup for claimed_ports shash
> > > v4: made a debug message in the new test case more intelligible
> > > v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
> > >     freed)
> > > v6: rebased, added Mark's ack.
> > > v6: removed poll_wait calls from engine handler, moved them to
> > >     binding_wait.
> > > ---
> > >  controller/binding.c        | 127 ++++++++++++++++++++++++++++++++++--
> > >  controller/binding.h        |  10 +++
> > >  controller/ovn-controller.c |  49 ++++++++++++++
> > >  tests/ovn.at                |  41 ++++++++++++
> > >  4 files changed, 222 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/controller/binding.c b/controller/binding.c
> > > index 96a158225..9f5393a92 100644
> > > --- a/controller/binding.c
> > > +++ b/controller/binding.c
> > > @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
> > >
> > >  #define OVN_QOS_TYPE "linux-htb"
> > >
> > > +#define CLAIM_TIME_THRESHOLD_MS 500
> > > +
> > > +struct claimed_port {
> > > +    long long int last_claimed;
> > > +};
> > > +
> > > +static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
> > > +static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
> > > +
> > > +struct sset *
> > > +get_postponed_ports(void)
> > > +{
> > > +    return &_postponed_ports;
> > > +}
> > > +
> > > +static long long int
> > > +get_claim_timestamp(const char *port_name)
> > > +{
> > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> > > +    return cp ? cp->last_claimed : 0;
> > > +}
> > > +
> > > +static void
> > > +register_claim_timestamp(const char *port_name, long long int t)
> > > +{
> > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> > > +    if (!cp) {
> > > +        cp = xzalloc(sizeof *cp);
> > > +        shash_add(&_claimed_ports, port_name, cp);
> > > +    }
> > > +    cp->last_claimed = t;
> > > +}
> > > +
> > > +static void
> > > +cleanup_claimed_port_timestamps(void)
> > > +{
> > > +    long long int now = time_msec();
> > > +    struct shash_node *node;
> > > +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> > > +        struct claimed_port *cp = (struct claimed_port *) node->data;
> > > +        if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
> > > +            free(cp);
> > > +            shash_delete(&_claimed_ports, node);
> > > +        }
> > > +    }
> > > +}
> > > +
> > > +/* Schedule any pending binding work. Runs with in the main ovn-controller
> > > + * thread context.*/
> > > +void
> > > +binding_wait(void)
> > > +{
> > > +    const char *port_name;
> > > +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> > > +        long long int t = get_claim_timestamp(port_name);
> > > +        if (t) {
> > > +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> > > +        }
> > > +    }
> > > +}
> > > +
> > >  struct qos_queue {
> > >      struct hmap_node node;
> > >      uint32_t queue_id;
> > > @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct sbrec_port_binding *pb,
> > >      remove_additional_encap_for_chassis(pb, chassis_rec);
> > >  }
> > >
> > > +static bool
> > > +lport_maybe_postpone(const char *port_name, long long int now,
> > > +                     struct sset *postponed_ports)
> > > +{
> > > +    long long int last_claimed = get_claim_timestamp(port_name);
> > > +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> > > +        return false;
> > > +    }
> > > +
> > > +    sset_add(postponed_ports, port_name);
> > > +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> > > +
> > > +    return true;
> > > +}
> > > +
> > >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> > >   * Returns true otherwise.
> > >   */
> > > @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding *pb,
> > >              const struct ovsrec_interface *iface_rec,
> > >              bool sb_readonly, bool notify_up,
> > >              struct hmap *tracked_datapaths,
> > > -            struct if_status_mgr *if_mgr)
> > > +            struct if_status_mgr *if_mgr,
> > > +            struct sset *postponed_ports)
> > >  {
> > >      if (!sb_readonly) {
> > >          claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> > > @@ -1021,7 +1098,12 @@ claim_lport(const struct sbrec_port_binding *pb,
> > >                  return false;
> > >              }
> > >
> > > +            long long int now = time_msec();
> > >              if (pb->chassis) {
> > > +                if (lport_maybe_postpone(pb->logical_port, now,
> > > +                                         postponed_ports)) {
> > > +                    return true;
> > > +                }
> > >                  VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> > >                          pb->logical_port, pb->chassis->name,
> > >                          chassis_rec->name);
> > > @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding *pb,
> > >                  remove_additional_chassis(pb, chassis_rec);
> > >              }
> > >              update_tracked = true;
> > > +
> > > +            register_claim_timestamp(pb->logical_port, now);
> > > +            sset_find_and_delete(postponed_ports, pb->logical_port);
> > >          }
> > >      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> > >          if (!is_additional_chassis(pb, chassis_rec)) {
> > > @@ -1060,8 +1145,10 @@ claim_lport(const struct sbrec_port_binding *pb,
> > >          }
> > >      }
> > >
> > > -    if (update_tracked && tracked_datapaths) {
> > > -        update_lport_tracking(pb, tracked_datapaths, true);
> > > +    if (update_tracked) {
> > > +        if (tracked_datapaths) {
> > > +            update_lport_tracking(pb, tracked_datapaths, true);
> > > +        }
> > >      }
> > >
> > >      /* Check if the port encap binding, if any, has changed */
> > > @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
> > >                               b_lport->lbinding->iface,
> > >                               !b_ctx_in->ovnsb_idl_txn,
> > >                               !parent_pb, b_ctx_out->tracked_dp_bindings,
> > > -                             b_ctx_out->if_mgr)){
> > > +                             b_ctx_out->if_mgr,
> > > +                             b_ctx_out->postponed_ports)) {
> > >                  return false;
> > >              }
> > >
> > > @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb,
> > >          return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
> > >                             !b_ctx_in->ovnsb_idl_txn, false,
> > >                             b_ctx_out->tracked_dp_bindings,
> > > -                           b_ctx_out->if_mgr);
> > > +                           b_ctx_out->if_mgr,
> > > +                           b_ctx_out->postponed_ports);
> > >      }
> > >
> > >      if (pb->chassis == b_ctx_in->chassis_rec ||
> > > @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
> > >      }
> > >
> > >      destroy_qos_map(&qos_map);
> > > +
> > > +    cleanup_claimed_port_timestamps();
> > >  }
> > >
> > >  /* Returns true if the database is all cleaned up, false if more work is
> > > @@ -2740,6 +2831,25 @@ delete_done:
> > >          }
> > >      }
> > >
> > > +    /* Also handle any postponed (throttled) ports. */
> > > +    const char *port_name;
> > > +    struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
> > > +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> > > +    SSET_FOR_EACH (port_name, &postponed_ports) {
> > > +        pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> > > +                                  port_name);
> > > +        if (!pb) {
> > > +            sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
> > > +            continue;
> > > +        }
> > > +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
> > > +        if (!handled) {
> > > +            break;
> > > +        }
> > > +    }
> > > +    sset_destroy(&postponed_ports);
> > > +    cleanup_claimed_port_timestamps();
> > > +
> > >      if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
> > >                                                 b_ctx_in->port_table,
> > >                                                 b_ctx_in->qos_table,
> > > @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const struct ovsrec_interface *iface,
> > >
> > >      return true;
> > >  }
> > > +
> > > +void
> > > +binding_destroy(void)
> > > +{
> > > +    shash_destroy_free_data(&_claimed_ports);
> > > +    sset_clear(&_postponed_ports);
> > > +}
> > > diff --git a/controller/binding.h b/controller/binding.h
> > > index 1fed06674..b2360bac2 100644
> > > --- a/controller/binding.h
> > > +++ b/controller/binding.h
> > > @@ -103,6 +103,8 @@ struct binding_ctx_out {
> > >      struct hmap *tracked_dp_bindings;
> > >
> > >      struct if_status_mgr *if_mgr;
> > > +
> > > +    struct sset *postponed_ports;
> > >  };
> > >
> > >  /* Local bindings. binding.c module binds the logical port (represented by
> > > @@ -219,4 +221,12 @@ struct binding_lport {
> > >      size_t n_port_security;
> > >  };
> > >
> > > +struct sset *get_postponed_ports(void);
> > > +
> > > +/* Schedule any pending binding work. */
> > > +void binding_wait(void);
> > > +
> > > +/* Clean up module state. */
> > > +void binding_destroy(void);
> > > +
> > >  #endif /* controller/binding.h */
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index 5449743e8..8268726e6 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node *node, void *data_)
> > >      engine_set_node_state(node, state);
> > >  }
> > >
> > > +struct ed_type_postponed_ports {
> > > +    struct sset *postponed_ports;
> > > +};
> > > +
> > > +static void *
> > > +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> > > +                        struct engine_arg *arg OVS_UNUSED)
> > > +{
> > > +    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> > > +    data->postponed_ports = get_postponed_ports();
> > > +    return data;
> > > +}
> > > +
> > > +static void
> > > +en_postponed_ports_cleanup(void *data_)
> > > +{
> > > +    struct ed_type_postponed_ports *data = data_;
> > > +    if (!data->postponed_ports) {
> > > +        return;
> > > +    }
> > > +    data->postponed_ports = NULL;
> > > +}
> > > +
> > > +static void
> > > +en_postponed_ports_run(struct engine_node *node, void *data_)
> > > +{
> > > +    struct ed_type_postponed_ports *data = data_;
> > > +    enum engine_node_state state = EN_UNCHANGED;
> > > +    data->postponed_ports = get_postponed_ports();
> > > +    if (!sset_is_empty(data->postponed_ports)) {
> > > +        state = EN_UPDATED;
> > > +    }
> > > +    engine_set_node_state(node, state);
> > > +}
> > > +
> > >  struct ed_type_runtime_data {
> > >      /* Contains "struct local_datapath" nodes. */
> > >      struct hmap local_datapaths;
> > > @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
> > >
> > >      struct shash local_active_ports_ipv6_pd;
> > >      struct shash local_active_ports_ras;
> > > +
> > > +    struct sset *postponed_ports;
> > >  };
> > >
> > >  /* struct ed_type_runtime_data has the below members for tracking the
> > > @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
> > >      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
> > >      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
> > >      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> > > +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
> > >      b_ctx_out->tracked_dp_bindings = NULL;
> > >      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
> > >  }
> > > @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node *node, void *data)
> > >          local_binding_data_init(&rt_data->lbinding_data);
> > >      }
> > >
> > > +    struct ed_type_postponed_ports *pp_data =
> > > +        engine_get_input_data("postponed_ports", node);
> > > +    rt_data->postponed_ports = pp_data->postponed_ports;
> > > +
> > >      struct binding_ctx_in b_ctx_in;
> > >      struct binding_ctx_out b_ctx_out;
> > >      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> > > @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
> > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
> > > +    ENGINE_NODE(postponed_ports, "postponed_ports");
> > >      ENGINE_NODE(pflow_output, "physical_flow_output");
> > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
> > >      ENGINE_NODE(flow_output, "flow_output");
> > > @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
> > >                       runtime_data_sb_datapath_binding_handler);
> > >      engine_add_input(&en_runtime_data, &en_sb_port_binding,
> > >                       runtime_data_sb_port_binding_handler);
> > > +    /* Reuse the same handler for any previously postponed ports. */
> > > +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> > > +                     runtime_data_sb_port_binding_handler);
> > >
> > >      /* The OVS interface handler for runtime_data changes MUST be executed
> > >       * after the sb_port_binding_handler as port_binding deletes must be
> > > @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
> > >                  ofctrl_wait();
> > >                  pinctrl_wait(ovnsb_idl_txn);
> > >              }
> > > +
> > > +            binding_wait();
> > >          }
> > >
> > >          if (!northd_version_match && br_int) {
> > > @@ -4318,6 +4366,7 @@ loop_done:
> > >      lflow_destroy();
> > >      ofctrl_destroy();
> > >      pinctrl_destroy();
> > > +    binding_destroy();
> > >      patch_destroy();
> > >      if_status_mgr_destroy(if_mgr);
> > >      shash_destroy(&vif_plug_deleted_iface_ids);
> > > diff --git a/tests/ovn.at b/tests/ovn.at
> > > index 23b205791..c8cc8cde4 100644
> > > --- a/tests/ovn.at
> > > +++ b/tests/ovn.at
> > > @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
> > >  AT_CLEANUP
> > >  ])
> > >
> > > +OVN_FOR_EACH_NORTHD([
> > > +AT_SETUP([tug-of-war between two chassis for the same port])
> > > +ovn_start
> > > +
> > > +ovn-nbctl ls-add ls0
> > > +ovn-nbctl lsp-add ls0 lsp0
> > > +
> > > +net_add n1
> > > +for i in 1 2; do
> > > +    sim_add hv$i
> > > +    as hv$i
> > > +    ovs-vsctl add-br br-phys
> > > +    ovn_attach n1 br-phys 192.168.0.$i
> > > +done
> > > +
> > > +for i in 1 2; do
> > > +    as hv$i
> > > +    ovs-vsctl -- add-port br-int vif \
> > > +              -- set Interface vif external-ids:iface-id=lsp0
> > > +done
> > > +
> > > +# give controllers some time to fight for the port binding
> > > +sleep 3
> > > +
> > > +# calculate the number of port claims registered by each fighting chassis
> > > +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis' hv1/ovn-controller.log)
> > > +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis' hv2/ovn-controller.log)
> > > +
> > > +echo "hv1 claimed ${hv1_claims} times"
> > > +echo "hv2 claimed ${hv2_claims} times"
> > > +
> > > +# check that neither registered an outrageous number of port claims
> > > +max_claims=10
> > > +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> > > +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> > > +
> > > +OVN_CLEANUP([hv1],[hv2])
> > > +
> > > +AT_CLEANUP
> > > +])
> > > +
> > >  OVN_FOR_EACH_NORTHD([
> > >  AT_SETUP([options:requested-chassis with hostname])
> > >
> > > --
> > > 2.34.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Ihar Hrachyshka Aug. 17, 2022, 5:26 p.m. UTC | #4
Thanks for bringing this up to Mark and Han. I just posted 22.06 and
22.03 branch backport series in case they are ok'ed to go.

Ihar

On Sun, Aug 14, 2022 at 8:53 PM Numan Siddique <numans@ovn.org> wrote:
>
> On Thu, Aug 11, 2022 at 4:07 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> >
> > Numan, thank you!
> >
> > Is it backport material? I know there may be some conflicts and am
> > happy to handle them if we agree this fix can be backported.
>
> @Mark Michelson  @Han Zhou Any comments or any objections on this ?
>
> Thanks
> Numan
>
>
> >
> > Thanks again.
> >
> > Ihar
> >
> > On Tue, Aug 9, 2022 at 8:50 PM Numan Siddique <numans@ovn.org> wrote:
> > >
> > > On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <ihrachys@redhat.com> wrote:
> > > >
> > > > When multiple chassis are fighting for the same port (requested-chassis
> > > > is not set, e.g. for gateway ports), they may produce an unreasonable
> > > > number of chassis field updates in a very short time frame (hundreds of
> > > > updates in several seconds). This puts unnecessary load on OVN as well
> > > > as any db notification consumers trying to keep up with the barrage.
> > > >
> > > > This patch throttles port claim attempts so that they don't happen more
> > > > frequently than once per 0.5 seconds.
> > > >
> > > > Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> > > > Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> > > > Acked-by: Mark Michelson <mmichels@redhat.com>
> > >
> > > Thanks for the v6.  I applied both the patches to the main.
> > >
> > > Numan
> > >
> > > > ---
> > > > v1: initial version
> > > > v2: don't postpone claim when port is unclaimed (chassis == nil)
> > > > v2: don't postpone claim as an additional chassis for a multichassis
> > > > port
> > > > v2: fixed memory corruption when modifying sset while iterating over
> > > > it
> > > > v3: rebased to resolve a git conflict
> > > > v4: added opportunistic cleanup for claimed_ports shash
> > > > v4: made a debug message in the new test case more intelligible
> > > > v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
> > > >     freed)
> > > > v6: rebased, added Mark's ack.
> > > > v6: removed poll_wait calls from engine handler, moved them to
> > > >     binding_wait.
> > > > ---
> > > >  controller/binding.c        | 127 ++++++++++++++++++++++++++++++++++--
> > > >  controller/binding.h        |  10 +++
> > > >  controller/ovn-controller.c |  49 ++++++++++++++
> > > >  tests/ovn.at                |  41 ++++++++++++
> > > >  4 files changed, 222 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/controller/binding.c b/controller/binding.c
> > > > index 96a158225..9f5393a92 100644
> > > > --- a/controller/binding.c
> > > > +++ b/controller/binding.c
> > > > @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
> > > >
> > > >  #define OVN_QOS_TYPE "linux-htb"
> > > >
> > > > +#define CLAIM_TIME_THRESHOLD_MS 500
> > > > +
> > > > +struct claimed_port {
> > > > +    long long int last_claimed;
> > > > +};
> > > > +
> > > > +static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
> > > > +static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
> > > > +
> > > > +struct sset *
> > > > +get_postponed_ports(void)
> > > > +{
> > > > +    return &_postponed_ports;
> > > > +}
> > > > +
> > > > +static long long int
> > > > +get_claim_timestamp(const char *port_name)
> > > > +{
> > > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> > > > +    return cp ? cp->last_claimed : 0;
> > > > +}
> > > > +
> > > > +static void
> > > > +register_claim_timestamp(const char *port_name, long long int t)
> > > > +{
> > > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> > > > +    if (!cp) {
> > > > +        cp = xzalloc(sizeof *cp);
> > > > +        shash_add(&_claimed_ports, port_name, cp);
> > > > +    }
> > > > +    cp->last_claimed = t;
> > > > +}
> > > > +
> > > > +static void
> > > > +cleanup_claimed_port_timestamps(void)
> > > > +{
> > > > +    long long int now = time_msec();
> > > > +    struct shash_node *node;
> > > > +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> > > > +        struct claimed_port *cp = (struct claimed_port *) node->data;
> > > > +        if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
> > > > +            free(cp);
> > > > +            shash_delete(&_claimed_ports, node);
> > > > +        }
> > > > +    }
> > > > +}
> > > > +
> > > > +/* Schedule any pending binding work. Runs with in the main ovn-controller
> > > > + * thread context.*/
> > > > +void
> > > > +binding_wait(void)
> > > > +{
> > > > +    const char *port_name;
> > > > +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> > > > +        long long int t = get_claim_timestamp(port_name);
> > > > +        if (t) {
> > > > +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> > > > +        }
> > > > +    }
> > > > +}
> > > > +
> > > >  struct qos_queue {
> > > >      struct hmap_node node;
> > > >      uint32_t queue_id;
> > > > @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct sbrec_port_binding *pb,
> > > >      remove_additional_encap_for_chassis(pb, chassis_rec);
> > > >  }
> > > >
> > > > +static bool
> > > > +lport_maybe_postpone(const char *port_name, long long int now,
> > > > +                     struct sset *postponed_ports)
> > > > +{
> > > > +    long long int last_claimed = get_claim_timestamp(port_name);
> > > > +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    sset_add(postponed_ports, port_name);
> > > > +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> > > > +
> > > > +    return true;
> > > > +}
> > > > +
> > > >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> > > >   * Returns true otherwise.
> > > >   */
> > > > @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding *pb,
> > > >              const struct ovsrec_interface *iface_rec,
> > > >              bool sb_readonly, bool notify_up,
> > > >              struct hmap *tracked_datapaths,
> > > > -            struct if_status_mgr *if_mgr)
> > > > +            struct if_status_mgr *if_mgr,
> > > > +            struct sset *postponed_ports)
> > > >  {
> > > >      if (!sb_readonly) {
> > > >          claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> > > > @@ -1021,7 +1098,12 @@ claim_lport(const struct sbrec_port_binding *pb,
> > > >                  return false;
> > > >              }
> > > >
> > > > +            long long int now = time_msec();
> > > >              if (pb->chassis) {
> > > > +                if (lport_maybe_postpone(pb->logical_port, now,
> > > > +                                         postponed_ports)) {
> > > > +                    return true;
> > > > +                }
> > > >                  VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> > > >                          pb->logical_port, pb->chassis->name,
> > > >                          chassis_rec->name);
> > > > @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding *pb,
> > > >                  remove_additional_chassis(pb, chassis_rec);
> > > >              }
> > > >              update_tracked = true;
> > > > +
> > > > +            register_claim_timestamp(pb->logical_port, now);
> > > > +            sset_find_and_delete(postponed_ports, pb->logical_port);
> > > >          }
> > > >      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> > > >          if (!is_additional_chassis(pb, chassis_rec)) {
> > > > @@ -1060,8 +1145,10 @@ claim_lport(const struct sbrec_port_binding *pb,
> > > >          }
> > > >      }
> > > >
> > > > -    if (update_tracked && tracked_datapaths) {
> > > > -        update_lport_tracking(pb, tracked_datapaths, true);
> > > > +    if (update_tracked) {
> > > > +        if (tracked_datapaths) {
> > > > +            update_lport_tracking(pb, tracked_datapaths, true);
> > > > +        }
> > > >      }
> > > >
> > > >      /* Check if the port encap binding, if any, has changed */
> > > > @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
> > > >                               b_lport->lbinding->iface,
> > > >                               !b_ctx_in->ovnsb_idl_txn,
> > > >                               !parent_pb, b_ctx_out->tracked_dp_bindings,
> > > > -                             b_ctx_out->if_mgr)){
> > > > +                             b_ctx_out->if_mgr,
> > > > +                             b_ctx_out->postponed_ports)) {
> > > >                  return false;
> > > >              }
> > > >
> > > > @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding *pb,
> > > >          return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
> > > >                             !b_ctx_in->ovnsb_idl_txn, false,
> > > >                             b_ctx_out->tracked_dp_bindings,
> > > > -                           b_ctx_out->if_mgr);
> > > > +                           b_ctx_out->if_mgr,
> > > > +                           b_ctx_out->postponed_ports);
> > > >      }
> > > >
> > > >      if (pb->chassis == b_ctx_in->chassis_rec ||
> > > > @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
> > > >      }
> > > >
> > > >      destroy_qos_map(&qos_map);
> > > > +
> > > > +    cleanup_claimed_port_timestamps();
> > > >  }
> > > >
> > > >  /* Returns true if the database is all cleaned up, false if more work is
> > > > @@ -2740,6 +2831,25 @@ delete_done:
> > > >          }
> > > >      }
> > > >
> > > > +    /* Also handle any postponed (throttled) ports. */
> > > > +    const char *port_name;
> > > > +    struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
> > > > +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> > > > +    SSET_FOR_EACH (port_name, &postponed_ports) {
> > > > +        pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> > > > +                                  port_name);
> > > > +        if (!pb) {
> > > > +            sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
> > > > +            continue;
> > > > +        }
> > > > +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
> > > > +        if (!handled) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +    sset_destroy(&postponed_ports);
> > > > +    cleanup_claimed_port_timestamps();
> > > > +
> > > >      if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
> > > >                                                 b_ctx_in->port_table,
> > > >                                                 b_ctx_in->qos_table,
> > > > @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const struct ovsrec_interface *iface,
> > > >
> > > >      return true;
> > > >  }
> > > > +
> > > > +void
> > > > +binding_destroy(void)
> > > > +{
> > > > +    shash_destroy_free_data(&_claimed_ports);
> > > > +    sset_clear(&_postponed_ports);
> > > > +}
> > > > diff --git a/controller/binding.h b/controller/binding.h
> > > > index 1fed06674..b2360bac2 100644
> > > > --- a/controller/binding.h
> > > > +++ b/controller/binding.h
> > > > @@ -103,6 +103,8 @@ struct binding_ctx_out {
> > > >      struct hmap *tracked_dp_bindings;
> > > >
> > > >      struct if_status_mgr *if_mgr;
> > > > +
> > > > +    struct sset *postponed_ports;
> > > >  };
> > > >
> > > >  /* Local bindings. binding.c module binds the logical port (represented by
> > > > @@ -219,4 +221,12 @@ struct binding_lport {
> > > >      size_t n_port_security;
> > > >  };
> > > >
> > > > +struct sset *get_postponed_ports(void);
> > > > +
> > > > +/* Schedule any pending binding work. */
> > > > +void binding_wait(void);
> > > > +
> > > > +/* Clean up module state. */
> > > > +void binding_destroy(void);
> > > > +
> > > >  #endif /* controller/binding.h */
> > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > > index 5449743e8..8268726e6 100644
> > > > --- a/controller/ovn-controller.c
> > > > +++ b/controller/ovn-controller.c
> > > > @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node *node, void *data_)
> > > >      engine_set_node_state(node, state);
> > > >  }
> > > >
> > > > +struct ed_type_postponed_ports {
> > > > +    struct sset *postponed_ports;
> > > > +};
> > > > +
> > > > +static void *
> > > > +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> > > > +                        struct engine_arg *arg OVS_UNUSED)
> > > > +{
> > > > +    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> > > > +    data->postponed_ports = get_postponed_ports();
> > > > +    return data;
> > > > +}
> > > > +
> > > > +static void
> > > > +en_postponed_ports_cleanup(void *data_)
> > > > +{
> > > > +    struct ed_type_postponed_ports *data = data_;
> > > > +    if (!data->postponed_ports) {
> > > > +        return;
> > > > +    }
> > > > +    data->postponed_ports = NULL;
> > > > +}
> > > > +
> > > > +static void
> > > > +en_postponed_ports_run(struct engine_node *node, void *data_)
> > > > +{
> > > > +    struct ed_type_postponed_ports *data = data_;
> > > > +    enum engine_node_state state = EN_UNCHANGED;
> > > > +    data->postponed_ports = get_postponed_ports();
> > > > +    if (!sset_is_empty(data->postponed_ports)) {
> > > > +        state = EN_UPDATED;
> > > > +    }
> > > > +    engine_set_node_state(node, state);
> > > > +}
> > > > +
> > > >  struct ed_type_runtime_data {
> > > >      /* Contains "struct local_datapath" nodes. */
> > > >      struct hmap local_datapaths;
> > > > @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
> > > >
> > > >      struct shash local_active_ports_ipv6_pd;
> > > >      struct shash local_active_ports_ras;
> > > > +
> > > > +    struct sset *postponed_ports;
> > > >  };
> > > >
> > > >  /* struct ed_type_runtime_data has the below members for tracking the
> > > > @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
> > > >      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
> > > >      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
> > > >      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> > > > +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
> > > >      b_ctx_out->tracked_dp_bindings = NULL;
> > > >      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
> > > >  }
> > > > @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node *node, void *data)
> > > >          local_binding_data_init(&rt_data->lbinding_data);
> > > >      }
> > > >
> > > > +    struct ed_type_postponed_ports *pp_data =
> > > > +        engine_get_input_data("postponed_ports", node);
> > > > +    rt_data->postponed_ports = pp_data->postponed_ports;
> > > > +
> > > >      struct binding_ctx_in b_ctx_in;
> > > >      struct binding_ctx_out b_ctx_out;
> > > >      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> > > > @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
> > > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
> > > > +    ENGINE_NODE(postponed_ports, "postponed_ports");
> > > >      ENGINE_NODE(pflow_output, "physical_flow_output");
> > > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
> > > >      ENGINE_NODE(flow_output, "flow_output");
> > > > @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
> > > >                       runtime_data_sb_datapath_binding_handler);
> > > >      engine_add_input(&en_runtime_data, &en_sb_port_binding,
> > > >                       runtime_data_sb_port_binding_handler);
> > > > +    /* Reuse the same handler for any previously postponed ports. */
> > > > +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> > > > +                     runtime_data_sb_port_binding_handler);
> > > >
> > > >      /* The OVS interface handler for runtime_data changes MUST be executed
> > > >       * after the sb_port_binding_handler as port_binding deletes must be
> > > > @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
> > > >                  ofctrl_wait();
> > > >                  pinctrl_wait(ovnsb_idl_txn);
> > > >              }
> > > > +
> > > > +            binding_wait();
> > > >          }
> > > >
> > > >          if (!northd_version_match && br_int) {
> > > > @@ -4318,6 +4366,7 @@ loop_done:
> > > >      lflow_destroy();
> > > >      ofctrl_destroy();
> > > >      pinctrl_destroy();
> > > > +    binding_destroy();
> > > >      patch_destroy();
> > > >      if_status_mgr_destroy(if_mgr);
> > > >      shash_destroy(&vif_plug_deleted_iface_ids);
> > > > diff --git a/tests/ovn.at b/tests/ovn.at
> > > > index 23b205791..c8cc8cde4 100644
> > > > --- a/tests/ovn.at
> > > > +++ b/tests/ovn.at
> > > > @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
> > > >  AT_CLEANUP
> > > >  ])
> > > >
> > > > +OVN_FOR_EACH_NORTHD([
> > > > +AT_SETUP([tug-of-war between two chassis for the same port])
> > > > +ovn_start
> > > > +
> > > > +ovn-nbctl ls-add ls0
> > > > +ovn-nbctl lsp-add ls0 lsp0
> > > > +
> > > > +net_add n1
> > > > +for i in 1 2; do
> > > > +    sim_add hv$i
> > > > +    as hv$i
> > > > +    ovs-vsctl add-br br-phys
> > > > +    ovn_attach n1 br-phys 192.168.0.$i
> > > > +done
> > > > +
> > > > +for i in 1 2; do
> > > > +    as hv$i
> > > > +    ovs-vsctl -- add-port br-int vif \
> > > > +              -- set Interface vif external-ids:iface-id=lsp0
> > > > +done
> > > > +
> > > > +# give controllers some time to fight for the port binding
> > > > +sleep 3
> > > > +
> > > > +# calculate the number of port claims registered by each fighting chassis
> > > > +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis' hv1/ovn-controller.log)
> > > > +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis' hv2/ovn-controller.log)
> > > > +
> > > > +echo "hv1 claimed ${hv1_claims} times"
> > > > +echo "hv2 claimed ${hv2_claims} times"
> > > > +
> > > > +# check that neither registered an outrageous number of port claims
> > > > +max_claims=10
> > > > +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> > > > +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> > > > +
> > > > +OVN_CLEANUP([hv1],[hv2])
> > > > +
> > > > +AT_CLEANUP
> > > > +])
> > > > +
> > > >  OVN_FOR_EACH_NORTHD([
> > > >  AT_SETUP([options:requested-chassis with hostname])
> > > >
> > > > --
> > > > 2.34.1
> > > >
> > > > _______________________________________________
> > > > dev mailing list
> > > > dev@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > >
> > >
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
Han Zhou Aug. 18, 2022, 10 p.m. UTC | #5
On Wed, Aug 17, 2022 at 10:27 AM Ihar Hrachyshka <ihrachys@redhat.com>
wrote:
>
> Thanks for bringing this up to Mark and Han. I just posted 22.06 and
> 22.03 branch backport series in case they are ok'ed to go.
>
> Ihar
>
> On Sun, Aug 14, 2022 at 8:53 PM Numan Siddique <numans@ovn.org> wrote:
> >
> > On Thu, Aug 11, 2022 at 4:07 AM Ihar Hrachyshka <ihrachys@redhat.com>
wrote:
> > >
> > > Numan, thank you!
> > >
> > > Is it backport material? I know there may be some conflicts and am
> > > happy to handle them if we agree this fix can be backported.
> >
> > @Mark Michelson  @Han Zhou Any comments or any objections on this ?
> >

I am ok with backporting

Thanks,
Han

> > Thanks
> > Numan
> >
> >
> > >
> > > Thanks again.
> > >
> > > Ihar
> > >
> > > On Tue, Aug 9, 2022 at 8:50 PM Numan Siddique <numans@ovn.org> wrote:
> > > >
> > > > On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <ihrachys@redhat.com>
wrote:
> > > > >
> > > > > When multiple chassis are fighting for the same port
(requested-chassis
> > > > > is not set, e.g. for gateway ports), they may produce an
unreasonable
> > > > > number of chassis field updates in a very short time frame
(hundreds of
> > > > > updates in several seconds). This puts unnecessary load on OVN as
well
> > > > > as any db notification consumers trying to keep up with the
barrage.
> > > > >
> > > > > This patch throttles port claim attempts so that they don't
happen more
> > > > > frequently than once per 0.5 seconds.
> > > > >
> > > > > Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> > > > > Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> > > > > Acked-by: Mark Michelson <mmichels@redhat.com>
> > > >
> > > > Thanks for the v6.  I applied both the patches to the main.
> > > >
> > > > Numan
> > > >
> > > > > ---
> > > > > v1: initial version
> > > > > v2: don't postpone claim when port is unclaimed (chassis == nil)
> > > > > v2: don't postpone claim as an additional chassis for a
multichassis
> > > > > port
> > > > > v2: fixed memory corruption when modifying sset while iterating
over
> > > > > it
> > > > > v3: rebased to resolve a git conflict
> > > > > v4: added opportunistic cleanup for claimed_ports shash
> > > > > v4: made a debug message in the new test case more intelligible
> > > > > v5: fixed a memleak in cleanup_claimed_port_timestamps
(node->data not
> > > > >     freed)
> > > > > v6: rebased, added Mark's ack.
> > > > > v6: removed poll_wait calls from engine handler, moved them to
> > > > >     binding_wait.
> > > > > ---
> > > > >  controller/binding.c        | 127
++++++++++++++++++++++++++++++++++--
> > > > >  controller/binding.h        |  10 +++
> > > > >  controller/ovn-controller.c |  49 ++++++++++++++
> > > > >  tests/ovn.at                |  41 ++++++++++++
> > > > >  4 files changed, 222 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/controller/binding.c b/controller/binding.c
> > > > > index 96a158225..9f5393a92 100644
> > > > > --- a/controller/binding.c
> > > > > +++ b/controller/binding.c
> > > > > @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
> > > > >
> > > > >  #define OVN_QOS_TYPE "linux-htb"
> > > > >
> > > > > +#define CLAIM_TIME_THRESHOLD_MS 500
> > > > > +
> > > > > +struct claimed_port {
> > > > > +    long long int last_claimed;
> > > > > +};
> > > > > +
> > > > > +static struct shash _claimed_ports =
SHASH_INITIALIZER(&_claimed_ports);
> > > > > +static struct sset _postponed_ports =
SSET_INITIALIZER(&_postponed_ports);
> > > > > +
> > > > > +struct sset *
> > > > > +get_postponed_ports(void)
> > > > > +{
> > > > > +    return &_postponed_ports;
> > > > > +}
> > > > > +
> > > > > +static long long int
> > > > > +get_claim_timestamp(const char *port_name)
> > > > > +{
> > > > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports,
port_name);
> > > > > +    return cp ? cp->last_claimed : 0;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +register_claim_timestamp(const char *port_name, long long int t)
> > > > > +{
> > > > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports,
port_name);
> > > > > +    if (!cp) {
> > > > > +        cp = xzalloc(sizeof *cp);
> > > > > +        shash_add(&_claimed_ports, port_name, cp);
> > > > > +    }
> > > > > +    cp->last_claimed = t;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +cleanup_claimed_port_timestamps(void)
> > > > > +{
> > > > > +    long long int now = time_msec();
> > > > > +    struct shash_node *node;
> > > > > +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> > > > > +        struct claimed_port *cp = (struct claimed_port *)
node->data;
> > > > > +        if (now - cp->last_claimed >= 5 *
CLAIM_TIME_THRESHOLD_MS) {
> > > > > +            free(cp);
> > > > > +            shash_delete(&_claimed_ports, node);
> > > > > +        }
> > > > > +    }
> > > > > +}
> > > > > +
> > > > > +/* Schedule any pending binding work. Runs with in the main
ovn-controller
> > > > > + * thread context.*/
> > > > > +void
> > > > > +binding_wait(void)
> > > > > +{
> > > > > +    const char *port_name;
> > > > > +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> > > > > +        long long int t = get_claim_timestamp(port_name);
> > > > > +        if (t) {
> > > > > +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> > > > > +        }
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >  struct qos_queue {
> > > > >      struct hmap_node node;
> > > > >      uint32_t queue_id;
> > > > > @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct
sbrec_port_binding *pb,
> > > > >      remove_additional_encap_for_chassis(pb, chassis_rec);
> > > > >  }
> > > > >
> > > > > +static bool
> > > > > +lport_maybe_postpone(const char *port_name, long long int now,
> > > > > +                     struct sset *postponed_ports)
> > > > > +{
> > > > > +    long long int last_claimed = get_claim_timestamp(port_name);
> > > > > +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> > > > > +        return false;
> > > > > +    }
> > > > > +
> > > > > +    sset_add(postponed_ports, port_name);
> > > > > +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> > > > > +
> > > > > +    return true;
> > > > > +}
> > > > > +
> > > > >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> > > > >   * Returns true otherwise.
> > > > >   */
> > > > > @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding
*pb,
> > > > >              const struct ovsrec_interface *iface_rec,
> > > > >              bool sb_readonly, bool notify_up,
> > > > >              struct hmap *tracked_datapaths,
> > > > > -            struct if_status_mgr *if_mgr)
> > > > > +            struct if_status_mgr *if_mgr,
> > > > > +            struct sset *postponed_ports)
> > > > >  {
> > > > >      if (!sb_readonly) {
> > > > >          claimed_lport_set_up(pb, parent_pb, chassis_rec,
notify_up, if_mgr);
> > > > > @@ -1021,7 +1098,12 @@ claim_lport(const struct
sbrec_port_binding *pb,
> > > > >                  return false;
> > > > >              }
> > > > >
> > > > > +            long long int now = time_msec();
> > > > >              if (pb->chassis) {
> > > > > +                if (lport_maybe_postpone(pb->logical_port, now,
> > > > > +                                         postponed_ports)) {
> > > > > +                    return true;
> > > > > +                }
> > > > >                  VLOG_INFO("Changing chassis for lport %s from %s
to %s.",
> > > > >                          pb->logical_port, pb->chassis->name,
> > > > >                          chassis_rec->name);
> > > > > @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding
*pb,
> > > > >                  remove_additional_chassis(pb, chassis_rec);
> > > > >              }
> > > > >              update_tracked = true;
> > > > > +
> > > > > +            register_claim_timestamp(pb->logical_port, now);
> > > > > +            sset_find_and_delete(postponed_ports,
pb->logical_port);
> > > > >          }
> > > > >      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> > > > >          if (!is_additional_chassis(pb, chassis_rec)) {
> > > > > @@ -1060,8 +1145,10 @@ claim_lport(const struct
sbrec_port_binding *pb,
> > > > >          }
> > > > >      }
> > > > >
> > > > > -    if (update_tracked && tracked_datapaths) {
> > > > > -        update_lport_tracking(pb, tracked_datapaths, true);
> > > > > +    if (update_tracked) {
> > > > > +        if (tracked_datapaths) {
> > > > > +            update_lport_tracking(pb, tracked_datapaths, true);
> > > > > +        }
> > > > >      }
> > > > >
> > > > >      /* Check if the port encap binding, if any, has changed */
> > > > > @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct
sbrec_port_binding *pb,
> > > > >                               b_lport->lbinding->iface,
> > > > >                               !b_ctx_in->ovnsb_idl_txn,
> > > > >                               !parent_pb,
b_ctx_out->tracked_dp_bindings,
> > > > > -                             b_ctx_out->if_mgr)){
> > > > > +                             b_ctx_out->if_mgr,
> > > > > +                             b_ctx_out->postponed_ports)) {
> > > > >                  return false;
> > > > >              }
> > > > >
> > > > > @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct
sbrec_port_binding *pb,
> > > > >          return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
> > > > >                             !b_ctx_in->ovnsb_idl_txn, false,
> > > > >                             b_ctx_out->tracked_dp_bindings,
> > > > > -                           b_ctx_out->if_mgr);
> > > > > +                           b_ctx_out->if_mgr,
> > > > > +                           b_ctx_out->postponed_ports);
> > > > >      }
> > > > >
> > > > >      if (pb->chassis == b_ctx_in->chassis_rec ||
> > > > > @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in
*b_ctx_in, struct binding_ctx_out *b_ctx_out)
> > > > >      }
> > > > >
> > > > >      destroy_qos_map(&qos_map);
> > > > > +
> > > > > +    cleanup_claimed_port_timestamps();
> > > > >  }
> > > > >
> > > > >  /* Returns true if the database is all cleaned up, false if more
work is
> > > > > @@ -2740,6 +2831,25 @@ delete_done:
> > > > >          }
> > > > >      }
> > > > >
> > > > > +    /* Also handle any postponed (throttled) ports. */
> > > > > +    const char *port_name;
> > > > > +    struct sset postponed_ports =
SSET_INITIALIZER(&postponed_ports);
> > > > > +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> > > > > +    SSET_FOR_EACH (port_name, &postponed_ports) {
> > > > > +        pb =
lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> > > > > +                                  port_name);
> > > > > +        if (!pb) {
> > > > > +            sset_find_and_delete(b_ctx_out->postponed_ports,
port_name);
> > > > > +            continue;
> > > > > +        }
> > > > > +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb,
qos_map_ptr);
> > > > > +        if (!handled) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +    sset_destroy(&postponed_ports);
> > > > > +    cleanup_claimed_port_timestamps();
> > > > > +
> > > > >      if (handled && qos_map_ptr &&
set_noop_qos(b_ctx_in->ovs_idl_txn,
> > > > >
b_ctx_in->port_table,
> > > > >
b_ctx_in->qos_table,
> > > > > @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const
struct ovsrec_interface *iface,
> > > > >
> > > > >      return true;
> > > > >  }
> > > > > +
> > > > > +void
> > > > > +binding_destroy(void)
> > > > > +{
> > > > > +    shash_destroy_free_data(&_claimed_ports);
> > > > > +    sset_clear(&_postponed_ports);
> > > > > +}
> > > > > diff --git a/controller/binding.h b/controller/binding.h
> > > > > index 1fed06674..b2360bac2 100644
> > > > > --- a/controller/binding.h
> > > > > +++ b/controller/binding.h
> > > > > @@ -103,6 +103,8 @@ struct binding_ctx_out {
> > > > >      struct hmap *tracked_dp_bindings;
> > > > >
> > > > >      struct if_status_mgr *if_mgr;
> > > > > +
> > > > > +    struct sset *postponed_ports;
> > > > >  };
> > > > >
> > > > >  /* Local bindings. binding.c module binds the logical port
(represented by
> > > > > @@ -219,4 +221,12 @@ struct binding_lport {
> > > > >      size_t n_port_security;
> > > > >  };
> > > > >
> > > > > +struct sset *get_postponed_ports(void);
> > > > > +
> > > > > +/* Schedule any pending binding work. */
> > > > > +void binding_wait(void);
> > > > > +
> > > > > +/* Clean up module state. */
> > > > > +void binding_destroy(void);
> > > > > +
> > > > >  #endif /* controller/binding.h */
> > > > > diff --git a/controller/ovn-controller.c
b/controller/ovn-controller.c
> > > > > index 5449743e8..8268726e6 100644
> > > > > --- a/controller/ovn-controller.c
> > > > > +++ b/controller/ovn-controller.c
> > > > > @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node
*node, void *data_)
> > > > >      engine_set_node_state(node, state);
> > > > >  }
> > > > >
> > > > > +struct ed_type_postponed_ports {
> > > > > +    struct sset *postponed_ports;
> > > > > +};
> > > > > +
> > > > > +static void *
> > > > > +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> > > > > +                        struct engine_arg *arg OVS_UNUSED)
> > > > > +{
> > > > > +    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> > > > > +    data->postponed_ports = get_postponed_ports();
> > > > > +    return data;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +en_postponed_ports_cleanup(void *data_)
> > > > > +{
> > > > > +    struct ed_type_postponed_ports *data = data_;
> > > > > +    if (!data->postponed_ports) {
> > > > > +        return;
> > > > > +    }
> > > > > +    data->postponed_ports = NULL;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +en_postponed_ports_run(struct engine_node *node, void *data_)
> > > > > +{
> > > > > +    struct ed_type_postponed_ports *data = data_;
> > > > > +    enum engine_node_state state = EN_UNCHANGED;
> > > > > +    data->postponed_ports = get_postponed_ports();
> > > > > +    if (!sset_is_empty(data->postponed_ports)) {
> > > > > +        state = EN_UPDATED;
> > > > > +    }
> > > > > +    engine_set_node_state(node, state);
> > > > > +}
> > > > > +
> > > > >  struct ed_type_runtime_data {
> > > > >      /* Contains "struct local_datapath" nodes. */
> > > > >      struct hmap local_datapaths;
> > > > > @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
> > > > >
> > > > >      struct shash local_active_ports_ipv6_pd;
> > > > >      struct shash local_active_ports_ras;
> > > > > +
> > > > > +    struct sset *postponed_ports;
> > > > >  };
> > > > >
> > > > >  /* struct ed_type_runtime_data has the below members for
tracking the
> > > > > @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
> > > > >      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
> > > > >      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
> > > > >      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> > > > > +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
> > > > >      b_ctx_out->tracked_dp_bindings = NULL;
> > > > >      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
> > > > >  }
> > > > > @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node
*node, void *data)
> > > > >          local_binding_data_init(&rt_data->lbinding_data);
> > > > >      }
> > > > >
> > > > > +    struct ed_type_postponed_ports *pp_data =
> > > > > +        engine_get_input_data("postponed_ports", node);
> > > > > +    rt_data->postponed_ports = pp_data->postponed_ports;
> > > > > +
> > > > >      struct binding_ctx_in b_ctx_in;
> > > > >      struct binding_ctx_out b_ctx_out;
> > > > >      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> > > > > @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
> > > > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > > > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > > > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports,
"activated_ports");
> > > > > +    ENGINE_NODE(postponed_ports, "postponed_ports");
> > > > >      ENGINE_NODE(pflow_output, "physical_flow_output");
> > > > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output,
"logical_flow_output");
> > > > >      ENGINE_NODE(flow_output, "flow_output");
> > > > > @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
> > > > >                       runtime_data_sb_datapath_binding_handler);
> > > > >      engine_add_input(&en_runtime_data, &en_sb_port_binding,
> > > > >                       runtime_data_sb_port_binding_handler);
> > > > > +    /* Reuse the same handler for any previously postponed
ports. */
> > > > > +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> > > > > +                     runtime_data_sb_port_binding_handler);
> > > > >
> > > > >      /* The OVS interface handler for runtime_data changes MUST
be executed
> > > > >       * after the sb_port_binding_handler as port_binding deletes
must be
> > > > > @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
> > > > >                  ofctrl_wait();
> > > > >                  pinctrl_wait(ovnsb_idl_txn);
> > > > >              }
> > > > > +
> > > > > +            binding_wait();
> > > > >          }
> > > > >
> > > > >          if (!northd_version_match && br_int) {
> > > > > @@ -4318,6 +4366,7 @@ loop_done:
> > > > >      lflow_destroy();
> > > > >      ofctrl_destroy();
> > > > >      pinctrl_destroy();
> > > > > +    binding_destroy();
> > > > >      patch_destroy();
> > > > >      if_status_mgr_destroy(if_mgr);
> > > > >      shash_destroy(&vif_plug_deleted_iface_ids);
> > > > > diff --git a/tests/ovn.at b/tests/ovn.at
> > > > > index 23b205791..c8cc8cde4 100644
> > > > > --- a/tests/ovn.at
> > > > > +++ b/tests/ovn.at
> > > > > @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
> > > > >  AT_CLEANUP
> > > > >  ])
> > > > >
> > > > > +OVN_FOR_EACH_NORTHD([
> > > > > +AT_SETUP([tug-of-war between two chassis for the same port])
> > > > > +ovn_start
> > > > > +
> > > > > +ovn-nbctl ls-add ls0
> > > > > +ovn-nbctl lsp-add ls0 lsp0
> > > > > +
> > > > > +net_add n1
> > > > > +for i in 1 2; do
> > > > > +    sim_add hv$i
> > > > > +    as hv$i
> > > > > +    ovs-vsctl add-br br-phys
> > > > > +    ovn_attach n1 br-phys 192.168.0.$i
> > > > > +done
> > > > > +
> > > > > +for i in 1 2; do
> > > > > +    as hv$i
> > > > > +    ovs-vsctl -- add-port br-int vif \
> > > > > +              -- set Interface vif external-ids:iface-id=lsp0
> > > > > +done
> > > > > +
> > > > > +# give controllers some time to fight for the port binding
> > > > > +sleep 3
> > > > > +
> > > > > +# calculate the number of port claims registered by each
fighting chassis
> > > > > +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis'
hv1/ovn-controller.log)
> > > > > +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis'
hv2/ovn-controller.log)
> > > > > +
> > > > > +echo "hv1 claimed ${hv1_claims} times"
> > > > > +echo "hv2 claimed ${hv2_claims} times"
> > > > > +
> > > > > +# check that neither registered an outrageous number of port
claims
> > > > > +max_claims=10
> > > > > +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> > > > > +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> > > > > +
> > > > > +OVN_CLEANUP([hv1],[hv2])
> > > > > +
> > > > > +AT_CLEANUP
> > > > > +])
> > > > > +
> > > > >  OVN_FOR_EACH_NORTHD([
> > > > >  AT_SETUP([options:requested-chassis with hostname])
> > > > >
> > > > > --
> > > > > 2.34.1
> > > > >
> > > > > _______________________________________________
> > > > > dev mailing list
> > > > > dev@openvswitch.org
> > > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > > >
> > > >
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> >
>
Han Zhou Aug. 19, 2022, 12:33 a.m. UTC | #6
On Thu, Aug 18, 2022 at 3:00 PM Han Zhou <hzhou@ovn.org> wrote:
>
>
>
> On Wed, Aug 17, 2022 at 10:27 AM Ihar Hrachyshka <ihrachys@redhat.com>
wrote:
> >
> > Thanks for bringing this up to Mark and Han. I just posted 22.06 and
> > 22.03 branch backport series in case they are ok'ed to go.
> >
> > Ihar
> >
> > On Sun, Aug 14, 2022 at 8:53 PM Numan Siddique <numans@ovn.org> wrote:
> > >
> > > On Thu, Aug 11, 2022 at 4:07 AM Ihar Hrachyshka <ihrachys@redhat.com>
wrote:
> > > >
> > > > Numan, thank you!
> > > >
> > > > Is it backport material? I know there may be some conflicts and am
> > > > happy to handle them if we agree this fix can be backported.
> > >
> > > @Mark Michelson  @Han Zhou Any comments or any objections on this ?
> > >
>
> I am ok with backporting
>
Sorry, just found a minor problem for patch 1 when rebasing my "avoid patch
port delete & recreate" series. I added a fix to that as part of my series:
https://patchwork.ozlabs.org/project/ovn/patch/20220819002049.513127-2-hzhou@ovn.org/
Please take a look, and consider backporting if that fix is ok.

Thanks,
Han

> Thanks,
> Han
>
> > > Thanks
> > > Numan
> > >
> > >
> > > >
> > > > Thanks again.
> > > >
> > > > Ihar
> > > >
> > > > On Tue, Aug 9, 2022 at 8:50 PM Numan Siddique <numans@ovn.org>
wrote:
> > > > >
> > > > > On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <
ihrachys@redhat.com> wrote:
> > > > > >
> > > > > > When multiple chassis are fighting for the same port
(requested-chassis
> > > > > > is not set, e.g. for gateway ports), they may produce an
unreasonable
> > > > > > number of chassis field updates in a very short time frame
(hundreds of
> > > > > > updates in several seconds). This puts unnecessary load on OVN
as well
> > > > > > as any db notification consumers trying to keep up with the
barrage.
> > > > > >
> > > > > > This patch throttles port claim attempts so that they don't
happen more
> > > > > > frequently than once per 0.5 seconds.
> > > > > >
> > > > > > Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> > > > > > Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
> > > > > > Acked-by: Mark Michelson <mmichels@redhat.com>
> > > > >
> > > > > Thanks for the v6.  I applied both the patches to the main.
> > > > >
> > > > > Numan
> > > > >
> > > > > > ---
> > > > > > v1: initial version
> > > > > > v2: don't postpone claim when port is unclaimed (chassis == nil)
> > > > > > v2: don't postpone claim as an additional chassis for a
multichassis
> > > > > > port
> > > > > > v2: fixed memory corruption when modifying sset while iterating
over
> > > > > > it
> > > > > > v3: rebased to resolve a git conflict
> > > > > > v4: added opportunistic cleanup for claimed_ports shash
> > > > > > v4: made a debug message in the new test case more intelligible
> > > > > > v5: fixed a memleak in cleanup_claimed_port_timestamps
(node->data not
> > > > > >     freed)
> > > > > > v6: rebased, added Mark's ack.
> > > > > > v6: removed poll_wait calls from engine handler, moved them to
> > > > > >     binding_wait.
> > > > > > ---
> > > > > >  controller/binding.c        | 127
++++++++++++++++++++++++++++++++++--
> > > > > >  controller/binding.h        |  10 +++
> > > > > >  controller/ovn-controller.c |  49 ++++++++++++++
> > > > > >  tests/ovn.at                |  41 ++++++++++++
> > > > > >  4 files changed, 222 insertions(+), 5 deletions(-)
> > > > > >
> > > > > > diff --git a/controller/binding.c b/controller/binding.c
> > > > > > index 96a158225..9f5393a92 100644
> > > > > > --- a/controller/binding.c
> > > > > > +++ b/controller/binding.c
> > > > > > @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
> > > > > >
> > > > > >  #define OVN_QOS_TYPE "linux-htb"
> > > > > >
> > > > > > +#define CLAIM_TIME_THRESHOLD_MS 500
> > > > > > +
> > > > > > +struct claimed_port {
> > > > > > +    long long int last_claimed;
> > > > > > +};
> > > > > > +
> > > > > > +static struct shash _claimed_ports =
SHASH_INITIALIZER(&_claimed_ports);
> > > > > > +static struct sset _postponed_ports =
SSET_INITIALIZER(&_postponed_ports);
> > > > > > +
> > > > > > +struct sset *
> > > > > > +get_postponed_ports(void)
> > > > > > +{
> > > > > > +    return &_postponed_ports;
> > > > > > +}
> > > > > > +
> > > > > > +static long long int
> > > > > > +get_claim_timestamp(const char *port_name)
> > > > > > +{
> > > > > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports,
port_name);
> > > > > > +    return cp ? cp->last_claimed : 0;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +register_claim_timestamp(const char *port_name, long long int
t)
> > > > > > +{
> > > > > > +    struct claimed_port *cp = shash_find_data(&_claimed_ports,
port_name);
> > > > > > +    if (!cp) {
> > > > > > +        cp = xzalloc(sizeof *cp);
> > > > > > +        shash_add(&_claimed_ports, port_name, cp);
> > > > > > +    }
> > > > > > +    cp->last_claimed = t;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +cleanup_claimed_port_timestamps(void)
> > > > > > +{
> > > > > > +    long long int now = time_msec();
> > > > > > +    struct shash_node *node;
> > > > > > +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> > > > > > +        struct claimed_port *cp = (struct claimed_port *)
node->data;
> > > > > > +        if (now - cp->last_claimed >= 5 *
CLAIM_TIME_THRESHOLD_MS) {
> > > > > > +            free(cp);
> > > > > > +            shash_delete(&_claimed_ports, node);
> > > > > > +        }
> > > > > > +    }
> > > > > > +}
> > > > > > +
> > > > > > +/* Schedule any pending binding work. Runs with in the main
ovn-controller
> > > > > > + * thread context.*/
> > > > > > +void
> > > > > > +binding_wait(void)
> > > > > > +{
> > > > > > +    const char *port_name;
> > > > > > +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> > > > > > +        long long int t = get_claim_timestamp(port_name);
> > > > > > +        if (t) {
> > > > > > +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> > > > > > +        }
> > > > > > +    }
> > > > > > +}
> > > > > > +
> > > > > >  struct qos_queue {
> > > > > >      struct hmap_node node;
> > > > > >      uint32_t queue_id;
> > > > > > @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct
sbrec_port_binding *pb,
> > > > > >      remove_additional_encap_for_chassis(pb, chassis_rec);
> > > > > >  }
> > > > > >
> > > > > > +static bool
> > > > > > +lport_maybe_postpone(const char *port_name, long long int now,
> > > > > > +                     struct sset *postponed_ports)
> > > > > > +{
> > > > > > +    long long int last_claimed =
get_claim_timestamp(port_name);
> > > > > > +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> > > > > > +        return false;
> > > > > > +    }
> > > > > > +
> > > > > > +    sset_add(postponed_ports, port_name);
> > > > > > +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> > > > > > +
> > > > > > +    return true;
> > > > > > +}
> > > > > > +
> > > > > >  /* Returns false if lport is not claimed due to 'sb_readonly'.
> > > > > >   * Returns true otherwise.
> > > > > >   */
> > > > > > @@ -1006,7 +1082,8 @@ claim_lport(const struct
sbrec_port_binding *pb,
> > > > > >              const struct ovsrec_interface *iface_rec,
> > > > > >              bool sb_readonly, bool notify_up,
> > > > > >              struct hmap *tracked_datapaths,
> > > > > > -            struct if_status_mgr *if_mgr)
> > > > > > +            struct if_status_mgr *if_mgr,
> > > > > > +            struct sset *postponed_ports)
> > > > > >  {
> > > > > >      if (!sb_readonly) {
> > > > > >          claimed_lport_set_up(pb, parent_pb, chassis_rec,
notify_up, if_mgr);
> > > > > > @@ -1021,7 +1098,12 @@ claim_lport(const struct
sbrec_port_binding *pb,
> > > > > >                  return false;
> > > > > >              }
> > > > > >
> > > > > > +            long long int now = time_msec();
> > > > > >              if (pb->chassis) {
> > > > > > +                if (lport_maybe_postpone(pb->logical_port, now,
> > > > > > +                                         postponed_ports)) {
> > > > > > +                    return true;
> > > > > > +                }
> > > > > >                  VLOG_INFO("Changing chassis for lport %s from
%s to %s.",
> > > > > >                          pb->logical_port, pb->chassis->name,
> > > > > >                          chassis_rec->name);
> > > > > > @@ -1038,6 +1120,9 @@ claim_lport(const struct
sbrec_port_binding *pb,
> > > > > >                  remove_additional_chassis(pb, chassis_rec);
> > > > > >              }
> > > > > >              update_tracked = true;
> > > > > > +
> > > > > > +            register_claim_timestamp(pb->logical_port, now);
> > > > > > +            sset_find_and_delete(postponed_ports,
pb->logical_port);
> > > > > >          }
> > > > > >      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> > > > > >          if (!is_additional_chassis(pb, chassis_rec)) {
> > > > > > @@ -1060,8 +1145,10 @@ claim_lport(const struct
sbrec_port_binding *pb,
> > > > > >          }
> > > > > >      }
> > > > > >
> > > > > > -    if (update_tracked && tracked_datapaths) {
> > > > > > -        update_lport_tracking(pb, tracked_datapaths, true);
> > > > > > +    if (update_tracked) {
> > > > > > +        if (tracked_datapaths) {
> > > > > > +            update_lport_tracking(pb, tracked_datapaths, true);
> > > > > > +        }
> > > > > >      }
> > > > > >
> > > > > >      /* Check if the port encap binding, if any, has changed */
> > > > > > @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct
sbrec_port_binding *pb,
> > > > > >                               b_lport->lbinding->iface,
> > > > > >                               !b_ctx_in->ovnsb_idl_txn,
> > > > > >                               !parent_pb,
b_ctx_out->tracked_dp_bindings,
> > > > > > -                             b_ctx_out->if_mgr)){
> > > > > > +                             b_ctx_out->if_mgr,
> > > > > > +                             b_ctx_out->postponed_ports)) {
> > > > > >                  return false;
> > > > > >              }
> > > > > >
> > > > > > @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct
sbrec_port_binding *pb,
> > > > > >          return claim_lport(pb, NULL, b_ctx_in->chassis_rec,
NULL,
> > > > > >                             !b_ctx_in->ovnsb_idl_txn, false,
> > > > > >                             b_ctx_out->tracked_dp_bindings,
> > > > > > -                           b_ctx_out->if_mgr);
> > > > > > +                           b_ctx_out->if_mgr,
> > > > > > +                           b_ctx_out->postponed_ports);
> > > > > >      }
> > > > > >
> > > > > >      if (pb->chassis == b_ctx_in->chassis_rec ||
> > > > > > @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in
*b_ctx_in, struct binding_ctx_out *b_ctx_out)
> > > > > >      }
> > > > > >
> > > > > >      destroy_qos_map(&qos_map);
> > > > > > +
> > > > > > +    cleanup_claimed_port_timestamps();
> > > > > >  }
> > > > > >
> > > > > >  /* Returns true if the database is all cleaned up, false if
more work is
> > > > > > @@ -2740,6 +2831,25 @@ delete_done:
> > > > > >          }
> > > > > >      }
> > > > > >
> > > > > > +    /* Also handle any postponed (throttled) ports. */
> > > > > > +    const char *port_name;
> > > > > > +    struct sset postponed_ports =
SSET_INITIALIZER(&postponed_ports);
> > > > > > +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> > > > > > +    SSET_FOR_EACH (port_name, &postponed_ports) {
> > > > > > +        pb =
lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> > > > > > +                                  port_name);
> > > > > > +        if (!pb) {
> > > > > > +            sset_find_and_delete(b_ctx_out->postponed_ports,
port_name);
> > > > > > +            continue;
> > > > > > +        }
> > > > > > +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb,
qos_map_ptr);
> > > > > > +        if (!handled) {
> > > > > > +            break;
> > > > > > +        }
> > > > > > +    }
> > > > > > +    sset_destroy(&postponed_ports);
> > > > > > +    cleanup_claimed_port_timestamps();
> > > > > > +
> > > > > >      if (handled && qos_map_ptr &&
set_noop_qos(b_ctx_in->ovs_idl_txn,
> > > > > >
b_ctx_in->port_table,
> > > > > >
b_ctx_in->qos_table,
> > > > > > @@ -3182,3 +3292,10 @@
ovs_iface_matches_lport_iface_id_ver(const struct ovsrec_interface *iface,
> > > > > >
> > > > > >      return true;
> > > > > >  }
> > > > > > +
> > > > > > +void
> > > > > > +binding_destroy(void)
> > > > > > +{
> > > > > > +    shash_destroy_free_data(&_claimed_ports);
> > > > > > +    sset_clear(&_postponed_ports);
> > > > > > +}
> > > > > > diff --git a/controller/binding.h b/controller/binding.h
> > > > > > index 1fed06674..b2360bac2 100644
> > > > > > --- a/controller/binding.h
> > > > > > +++ b/controller/binding.h
> > > > > > @@ -103,6 +103,8 @@ struct binding_ctx_out {
> > > > > >      struct hmap *tracked_dp_bindings;
> > > > > >
> > > > > >      struct if_status_mgr *if_mgr;
> > > > > > +
> > > > > > +    struct sset *postponed_ports;
> > > > > >  };
> > > > > >
> > > > > >  /* Local bindings. binding.c module binds the logical port
(represented by
> > > > > > @@ -219,4 +221,12 @@ struct binding_lport {
> > > > > >      size_t n_port_security;
> > > > > >  };
> > > > > >
> > > > > > +struct sset *get_postponed_ports(void);
> > > > > > +
> > > > > > +/* Schedule any pending binding work. */
> > > > > > +void binding_wait(void);
> > > > > > +
> > > > > > +/* Clean up module state. */
> > > > > > +void binding_destroy(void);
> > > > > > +
> > > > > >  #endif /* controller/binding.h */
> > > > > > diff --git a/controller/ovn-controller.c
b/controller/ovn-controller.c
> > > > > > index 5449743e8..8268726e6 100644
> > > > > > --- a/controller/ovn-controller.c
> > > > > > +++ b/controller/ovn-controller.c
> > > > > > @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct
engine_node *node, void *data_)
> > > > > >      engine_set_node_state(node, state);
> > > > > >  }
> > > > > >
> > > > > > +struct ed_type_postponed_ports {
> > > > > > +    struct sset *postponed_ports;
> > > > > > +};
> > > > > > +
> > > > > > +static void *
> > > > > > +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> > > > > > +                        struct engine_arg *arg OVS_UNUSED)
> > > > > > +{
> > > > > > +    struct ed_type_postponed_ports *data = xzalloc(sizeof
*data);
> > > > > > +    data->postponed_ports = get_postponed_ports();
> > > > > > +    return data;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +en_postponed_ports_cleanup(void *data_)
> > > > > > +{
> > > > > > +    struct ed_type_postponed_ports *data = data_;
> > > > > > +    if (!data->postponed_ports) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +    data->postponed_ports = NULL;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +en_postponed_ports_run(struct engine_node *node, void *data_)
> > > > > > +{
> > > > > > +    struct ed_type_postponed_ports *data = data_;
> > > > > > +    enum engine_node_state state = EN_UNCHANGED;
> > > > > > +    data->postponed_ports = get_postponed_ports();
> > > > > > +    if (!sset_is_empty(data->postponed_ports)) {
> > > > > > +        state = EN_UPDATED;
> > > > > > +    }
> > > > > > +    engine_set_node_state(node, state);
> > > > > > +}
> > > > > > +
> > > > > >  struct ed_type_runtime_data {
> > > > > >      /* Contains "struct local_datapath" nodes. */
> > > > > >      struct hmap local_datapaths;
> > > > > > @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
> > > > > >
> > > > > >      struct shash local_active_ports_ipv6_pd;
> > > > > >      struct shash local_active_ports_ras;
> > > > > > +
> > > > > > +    struct sset *postponed_ports;
> > > > > >  };
> > > > > >
> > > > > >  /* struct ed_type_runtime_data has the below members for
tracking the
> > > > > > @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
> > > > > >      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
> > > > > >      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
> > > > > >      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> > > > > > +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
> > > > > >      b_ctx_out->tracked_dp_bindings = NULL;
> > > > > >      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
> > > > > >  }
> > > > > > @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node
*node, void *data)
> > > > > >          local_binding_data_init(&rt_data->lbinding_data);
> > > > > >      }
> > > > > >
> > > > > > +    struct ed_type_postponed_ports *pp_data =
> > > > > > +        engine_get_input_data("postponed_ports", node);
> > > > > > +    rt_data->postponed_ports = pp_data->postponed_ports;
> > > > > > +
> > > > > >      struct binding_ctx_in b_ctx_in;
> > > > > >      struct binding_ctx_out b_ctx_out;
> > > > > >      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> > > > > > @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
> > > > > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > > > > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > > > > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports,
"activated_ports");
> > > > > > +    ENGINE_NODE(postponed_ports, "postponed_ports");
> > > > > >      ENGINE_NODE(pflow_output, "physical_flow_output");
> > > > > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output,
"logical_flow_output");
> > > > > >      ENGINE_NODE(flow_output, "flow_output");
> > > > > > @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
> > > > > >                       runtime_data_sb_datapath_binding_handler);
> > > > > >      engine_add_input(&en_runtime_data, &en_sb_port_binding,
> > > > > >                       runtime_data_sb_port_binding_handler);
> > > > > > +    /* Reuse the same handler for any previously postponed
ports. */
> > > > > > +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> > > > > > +                     runtime_data_sb_port_binding_handler);
> > > > > >
> > > > > >      /* The OVS interface handler for runtime_data changes MUST
be executed
> > > > > >       * after the sb_port_binding_handler as port_binding
deletes must be
> > > > > > @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
> > > > > >                  ofctrl_wait();
> > > > > >                  pinctrl_wait(ovnsb_idl_txn);
> > > > > >              }
> > > > > > +
> > > > > > +            binding_wait();
> > > > > >          }
> > > > > >
> > > > > >          if (!northd_version_match && br_int) {
> > > > > > @@ -4318,6 +4366,7 @@ loop_done:
> > > > > >      lflow_destroy();
> > > > > >      ofctrl_destroy();
> > > > > >      pinctrl_destroy();
> > > > > > +    binding_destroy();
> > > > > >      patch_destroy();
> > > > > >      if_status_mgr_destroy(if_mgr);
> > > > > >      shash_destroy(&vif_plug_deleted_iface_ids);
> > > > > > diff --git a/tests/ovn.at b/tests/ovn.at
> > > > > > index 23b205791..c8cc8cde4 100644
> > > > > > --- a/tests/ovn.at
> > > > > > +++ b/tests/ovn.at
> > > > > > @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
> > > > > >  AT_CLEANUP
> > > > > >  ])
> > > > > >
> > > > > > +OVN_FOR_EACH_NORTHD([
> > > > > > +AT_SETUP([tug-of-war between two chassis for the same port])
> > > > > > +ovn_start
> > > > > > +
> > > > > > +ovn-nbctl ls-add ls0
> > > > > > +ovn-nbctl lsp-add ls0 lsp0
> > > > > > +
> > > > > > +net_add n1
> > > > > > +for i in 1 2; do
> > > > > > +    sim_add hv$i
> > > > > > +    as hv$i
> > > > > > +    ovs-vsctl add-br br-phys
> > > > > > +    ovn_attach n1 br-phys 192.168.0.$i
> > > > > > +done
> > > > > > +
> > > > > > +for i in 1 2; do
> > > > > > +    as hv$i
> > > > > > +    ovs-vsctl -- add-port br-int vif \
> > > > > > +              -- set Interface vif external-ids:iface-id=lsp0
> > > > > > +done
> > > > > > +
> > > > > > +# give controllers some time to fight for the port binding
> > > > > > +sleep 3
> > > > > > +
> > > > > > +# calculate the number of port claims registered by each
fighting chassis
> > > > > > +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis'
hv1/ovn-controller.log)
> > > > > > +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis'
hv2/ovn-controller.log)
> > > > > > +
> > > > > > +echo "hv1 claimed ${hv1_claims} times"
> > > > > > +echo "hv2 claimed ${hv2_claims} times"
> > > > > > +
> > > > > > +# check that neither registered an outrageous number of port
claims
> > > > > > +max_claims=10
> > > > > > +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> > > > > > +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> > > > > > +
> > > > > > +OVN_CLEANUP([hv1],[hv2])
> > > > > > +
> > > > > > +AT_CLEANUP
> > > > > > +])
> > > > > > +
> > > > > >  OVN_FOR_EACH_NORTHD([
> > > > > >  AT_SETUP([options:requested-chassis with hostname])
> > > > > >
> > > > > > --
> > > > > > 2.34.1
> > > > > >
> > > > > > _______________________________________________
> > > > > > dev mailing list
> > > > > > dev@openvswitch.org
> > > > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > > > >
> > > > >
> > > >
> > > > _______________________________________________
> > > > dev mailing list
> > > > dev@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > >
> > >
> >
diff mbox series

Patch

diff --git a/controller/binding.c b/controller/binding.c
index 96a158225..9f5393a92 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -48,6 +48,67 @@  VLOG_DEFINE_THIS_MODULE(binding);
 
 #define OVN_QOS_TYPE "linux-htb"
 
+#define CLAIM_TIME_THRESHOLD_MS 500
+
+struct claimed_port {
+    long long int last_claimed;
+};
+
+static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
+static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
+
+struct sset *
+get_postponed_ports(void)
+{
+    return &_postponed_ports;
+}
+
+static long long int
+get_claim_timestamp(const char *port_name)
+{
+    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
+    return cp ? cp->last_claimed : 0;
+}
+
+static void
+register_claim_timestamp(const char *port_name, long long int t)
+{
+    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
+    if (!cp) {
+        cp = xzalloc(sizeof *cp);
+        shash_add(&_claimed_ports, port_name, cp);
+    }
+    cp->last_claimed = t;
+}
+
+static void
+cleanup_claimed_port_timestamps(void)
+{
+    long long int now = time_msec();
+    struct shash_node *node;
+    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
+        struct claimed_port *cp = (struct claimed_port *) node->data;
+        if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
+            free(cp);
+            shash_delete(&_claimed_ports, node);
+        }
+    }
+}
+
+/* Schedule any pending binding work. Runs with in the main ovn-controller
+ * thread context.*/
+void
+binding_wait(void)
+{
+    const char *port_name;
+    SSET_FOR_EACH (port_name, &_postponed_ports) {
+        long long int t = get_claim_timestamp(port_name);
+        if (t) {
+            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
+        }
+    }
+}
+
 struct qos_queue {
     struct hmap_node node;
     uint32_t queue_id;
@@ -996,6 +1057,21 @@  remove_additional_chassis(const struct sbrec_port_binding *pb,
     remove_additional_encap_for_chassis(pb, chassis_rec);
 }
 
+static bool
+lport_maybe_postpone(const char *port_name, long long int now,
+                     struct sset *postponed_ports)
+{
+    long long int last_claimed = get_claim_timestamp(port_name);
+    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
+        return false;
+    }
+
+    sset_add(postponed_ports, port_name);
+    VLOG_DBG("Postponed claim on logical port %s.", port_name);
+
+    return true;
+}
+
 /* Returns false if lport is not claimed due to 'sb_readonly'.
  * Returns true otherwise.
  */
@@ -1006,7 +1082,8 @@  claim_lport(const struct sbrec_port_binding *pb,
             const struct ovsrec_interface *iface_rec,
             bool sb_readonly, bool notify_up,
             struct hmap *tracked_datapaths,
-            struct if_status_mgr *if_mgr)
+            struct if_status_mgr *if_mgr,
+            struct sset *postponed_ports)
 {
     if (!sb_readonly) {
         claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
@@ -1021,7 +1098,12 @@  claim_lport(const struct sbrec_port_binding *pb,
                 return false;
             }
 
+            long long int now = time_msec();
             if (pb->chassis) {
+                if (lport_maybe_postpone(pb->logical_port, now,
+                                         postponed_ports)) {
+                    return true;
+                }
                 VLOG_INFO("Changing chassis for lport %s from %s to %s.",
                         pb->logical_port, pb->chassis->name,
                         chassis_rec->name);
@@ -1038,6 +1120,9 @@  claim_lport(const struct sbrec_port_binding *pb,
                 remove_additional_chassis(pb, chassis_rec);
             }
             update_tracked = true;
+
+            register_claim_timestamp(pb->logical_port, now);
+            sset_find_and_delete(postponed_ports, pb->logical_port);
         }
     } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
         if (!is_additional_chassis(pb, chassis_rec)) {
@@ -1060,8 +1145,10 @@  claim_lport(const struct sbrec_port_binding *pb,
         }
     }
 
-    if (update_tracked && tracked_datapaths) {
-        update_lport_tracking(pb, tracked_datapaths, true);
+    if (update_tracked) {
+        if (tracked_datapaths) {
+            update_lport_tracking(pb, tracked_datapaths, true);
+        }
     }
 
     /* Check if the port encap binding, if any, has changed */
@@ -1223,7 +1310,8 @@  consider_vif_lport_(const struct sbrec_port_binding *pb,
                              b_lport->lbinding->iface,
                              !b_ctx_in->ovnsb_idl_txn,
                              !parent_pb, b_ctx_out->tracked_dp_bindings,
-                             b_ctx_out->if_mgr)){
+                             b_ctx_out->if_mgr,
+                             b_ctx_out->postponed_ports)) {
                 return false;
             }
 
@@ -1519,7 +1607,8 @@  consider_nonvif_lport_(const struct sbrec_port_binding *pb,
         return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
                            !b_ctx_in->ovnsb_idl_txn, false,
                            b_ctx_out->tracked_dp_bindings,
-                           b_ctx_out->if_mgr);
+                           b_ctx_out->if_mgr,
+                           b_ctx_out->postponed_ports);
     }
 
     if (pb->chassis == b_ctx_in->chassis_rec ||
@@ -1843,6 +1932,8 @@  binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out)
     }
 
     destroy_qos_map(&qos_map);
+
+    cleanup_claimed_port_timestamps();
 }
 
 /* Returns true if the database is all cleaned up, false if more work is
@@ -2740,6 +2831,25 @@  delete_done:
         }
     }
 
+    /* Also handle any postponed (throttled) ports. */
+    const char *port_name;
+    struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
+    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
+    SSET_FOR_EACH (port_name, &postponed_ports) {
+        pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
+                                  port_name);
+        if (!pb) {
+            sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
+            continue;
+        }
+        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
+        if (!handled) {
+            break;
+        }
+    }
+    sset_destroy(&postponed_ports);
+    cleanup_claimed_port_timestamps();
+
     if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
                                                b_ctx_in->port_table,
                                                b_ctx_in->qos_table,
@@ -3182,3 +3292,10 @@  ovs_iface_matches_lport_iface_id_ver(const struct ovsrec_interface *iface,
 
     return true;
 }
+
+void
+binding_destroy(void)
+{
+    shash_destroy_free_data(&_claimed_ports);
+    sset_clear(&_postponed_ports);
+}
diff --git a/controller/binding.h b/controller/binding.h
index 1fed06674..b2360bac2 100644
--- a/controller/binding.h
+++ b/controller/binding.h
@@ -103,6 +103,8 @@  struct binding_ctx_out {
     struct hmap *tracked_dp_bindings;
 
     struct if_status_mgr *if_mgr;
+
+    struct sset *postponed_ports;
 };
 
 /* Local bindings. binding.c module binds the logical port (represented by
@@ -219,4 +221,12 @@  struct binding_lport {
     size_t n_port_security;
 };
 
+struct sset *get_postponed_ports(void);
+
+/* Schedule any pending binding work. */
+void binding_wait(void);
+
+/* Clean up module state. */
+void binding_destroy(void);
+
 #endif /* controller/binding.h */
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 5449743e8..8268726e6 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1175,6 +1175,41 @@  en_activated_ports_run(struct engine_node *node, void *data_)
     engine_set_node_state(node, state);
 }
 
+struct ed_type_postponed_ports {
+    struct sset *postponed_ports;
+};
+
+static void *
+en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
+                        struct engine_arg *arg OVS_UNUSED)
+{
+    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
+    data->postponed_ports = get_postponed_ports();
+    return data;
+}
+
+static void
+en_postponed_ports_cleanup(void *data_)
+{
+    struct ed_type_postponed_ports *data = data_;
+    if (!data->postponed_ports) {
+        return;
+    }
+    data->postponed_ports = NULL;
+}
+
+static void
+en_postponed_ports_run(struct engine_node *node, void *data_)
+{
+    struct ed_type_postponed_ports *data = data_;
+    enum engine_node_state state = EN_UNCHANGED;
+    data->postponed_ports = get_postponed_ports();
+    if (!sset_is_empty(data->postponed_ports)) {
+        state = EN_UPDATED;
+    }
+    engine_set_node_state(node, state);
+}
+
 struct ed_type_runtime_data {
     /* Contains "struct local_datapath" nodes. */
     struct hmap local_datapaths;
@@ -1205,6 +1240,8 @@  struct ed_type_runtime_data {
 
     struct shash local_active_ports_ipv6_pd;
     struct shash local_active_ports_ras;
+
+    struct sset *postponed_ports;
 };
 
 /* struct ed_type_runtime_data has the below members for tracking the
@@ -1405,6 +1442,7 @@  init_binding_ctx(struct engine_node *node,
     b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
     b_ctx_out->lbinding_data = &rt_data->lbinding_data;
     b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
+    b_ctx_out->postponed_ports = rt_data->postponed_ports;
     b_ctx_out->tracked_dp_bindings = NULL;
     b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
 }
@@ -1442,6 +1480,10 @@  en_runtime_data_run(struct engine_node *node, void *data)
         local_binding_data_init(&rt_data->lbinding_data);
     }
 
+    struct ed_type_postponed_ports *pp_data =
+        engine_get_input_data("postponed_ports", node);
+    rt_data->postponed_ports = pp_data->postponed_ports;
+
     struct binding_ctx_in b_ctx_in;
     struct binding_ctx_out b_ctx_out;
     init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
@@ -3542,6 +3584,7 @@  main(int argc, char *argv[])
     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
     ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
+    ENGINE_NODE(postponed_ports, "postponed_ports");
     ENGINE_NODE(pflow_output, "physical_flow_output");
     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
     ENGINE_NODE(flow_output, "flow_output");
@@ -3681,6 +3724,9 @@  main(int argc, char *argv[])
                      runtime_data_sb_datapath_binding_handler);
     engine_add_input(&en_runtime_data, &en_sb_port_binding,
                      runtime_data_sb_port_binding_handler);
+    /* Reuse the same handler for any previously postponed ports. */
+    engine_add_input(&en_runtime_data, &en_postponed_ports,
+                     runtime_data_sb_port_binding_handler);
 
     /* The OVS interface handler for runtime_data changes MUST be executed
      * after the sb_port_binding_handler as port_binding deletes must be
@@ -4191,6 +4237,8 @@  main(int argc, char *argv[])
                 ofctrl_wait();
                 pinctrl_wait(ovnsb_idl_txn);
             }
+
+            binding_wait();
         }
 
         if (!northd_version_match && br_int) {
@@ -4318,6 +4366,7 @@  loop_done:
     lflow_destroy();
     ofctrl_destroy();
     pinctrl_destroy();
+    binding_destroy();
     patch_destroy();
     if_status_mgr_destroy(if_mgr);
     shash_destroy(&vif_plug_deleted_iface_ids);
diff --git a/tests/ovn.at b/tests/ovn.at
index 23b205791..c8cc8cde4 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -15274,6 +15274,47 @@  OVN_CLEANUP([hv1],[hv2])
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([tug-of-war between two chassis for the same port])
+ovn_start
+
+ovn-nbctl ls-add ls0
+ovn-nbctl lsp-add ls0 lsp0
+
+net_add n1
+for i in 1 2; do
+    sim_add hv$i
+    as hv$i
+    ovs-vsctl add-br br-phys
+    ovn_attach n1 br-phys 192.168.0.$i
+done
+
+for i in 1 2; do
+    as hv$i
+    ovs-vsctl -- add-port br-int vif \
+              -- set Interface vif external-ids:iface-id=lsp0
+done
+
+# give controllers some time to fight for the port binding
+sleep 3
+
+# calculate the number of port claims registered by each fighting chassis
+hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis' hv1/ovn-controller.log)
+hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis' hv2/ovn-controller.log)
+
+echo "hv1 claimed ${hv1_claims} times"
+echo "hv2 claimed ${hv2_claims} times"
+
+# check that neither registered an outrageous number of port claims
+max_claims=10
+AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
+AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
+
+OVN_CLEANUP([hv1],[hv2])
+
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:requested-chassis with hostname])