diff mbox series

[ovs-dev,ovn,6/6] ovn-controller: I-P for ct zone and OVS interface changes in flow output stage.

Message ID 20200316111619.1687790-1-numans@ovn.org
State Superseded
Headers show
Series Incremental processing improvements. | expand

Commit Message

Numan Siddique March 16, 2020, 11:16 a.m. UTC
From: Numan Siddique <numans@ovn.org>

This patch handles ct zone changes and OVS interface changes incrementally
in the flow output stage.

Any changes to ct zone can be handled by running physical_run() instead of running
flow_output_run(). And any changes to OVS interfaces can be either handled
incrementally (for OVS interfaces representing VIFs) or just running
physical_run() (for tunnel and other types of interfaces).

To better handle this, a new engine node 'physical_flow_changes' is added which
handles changes to ct zone and OVS interfaces.

After this patch, We don't trigger full recompute in flow output stage for
runtime data changes. There's noop handler for this. With this we avoid
calls to lflow_run() a lot.

For the below commands the lflow_run counter without this patch is 16
and with this patch is 3.

-----------------------
ovn-nbctl ls-add sw0
ovn-nbctl lsp-add sw0 sw0-port1
ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 192.168.0.2"

ovn-nbctl ls-add sw1
ovn-nbctl lsp-add sw1 sw1-port1
ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"

ovn-nbctl lr-add lr0
ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
ovn-nbctl lsp-add sw0 lrp0-attachment
ovn-nbctl lsp-set-type lrp0-attachment router
ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
ovn-nbctl lsp-add sw1 lrp1-attachment
ovn-nbctl lsp-set-type lrp1-attachment router
ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1

ovs-vsctl add-port br-int p1 -- \
    set Interface p1 external_ids:iface-id=sw0-port1
ovs-vsctl add-port br-int p2 -- \
    set Interface p2 external_ids:iface-id=sw1-port1
ovn-appctl -t ovn-controller coverage/read-counter lflow_run
-------------------

Signed-off-by: Numan Siddique <numans@ovn.org>
---
 controller/binding.c        | 22 ---------
 controller/lflow.c          | 13 +++++
 controller/lflow.h          |  2 +
 controller/ovn-controller.c | 96 ++++++++++++++++++++++++++++++++++++-
 controller/ovn-controller.h | 22 +++++++++
 controller/physical.c       | 48 +++++++++++++++++++
 controller/physical.h       |  5 +-
 tests/ovn-performance.at    | 14 +++---
 8 files changed, 190 insertions(+), 32 deletions(-)

Comments

Han Zhou March 25, 2020, 6:51 a.m. UTC | #1
On Mon, Mar 16, 2020 at 4:16 AM <numans@ovn.org> wrote:
>
> From: Numan Siddique <numans@ovn.org>
>
> This patch handles ct zone changes and OVS interface changes incrementally
> in the flow output stage.
>
> Any changes to ct zone can be handled by running physical_run() instead
of running
> flow_output_run(). And any changes to OVS interfaces can be either handled
> incrementally (for OVS interfaces representing VIFs) or just running
> physical_run() (for tunnel and other types of interfaces).
>
> To better handle this, a new engine node 'physical_flow_changes' is added
which
> handles changes to ct zone and OVS interfaces.
>
> After this patch, We don't trigger full recompute in flow output stage for
> runtime data changes. There's noop handler for this. With this we avoid
> calls to lflow_run() a lot.
>
> For the below commands the lflow_run counter without this patch is 16
> and with this patch is 3.
>
> -----------------------
> ovn-nbctl ls-add sw0
> ovn-nbctl lsp-add sw0 sw0-port1
> ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 192.168.0.2"
>
> ovn-nbctl ls-add sw1
> ovn-nbctl lsp-add sw1 sw1-port1
> ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"
>
> ovn-nbctl lr-add lr0
> ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
> ovn-nbctl lsp-add sw0 lrp0-attachment
> ovn-nbctl lsp-set-type lrp0-attachment router
> ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
> ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
> ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
> ovn-nbctl lsp-add sw1 lrp1-attachment
> ovn-nbctl lsp-set-type lrp1-attachment router
> ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
> ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
>
> ovs-vsctl add-port br-int p1 -- \
>     set Interface p1 external_ids:iface-id=sw0-port1
> ovs-vsctl add-port br-int p2 -- \
>     set Interface p2 external_ids:iface-id=sw1-port1
> ovn-appctl -t ovn-controller coverage/read-counter lflow_run
> -------------------
>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
>  controller/binding.c        | 22 ---------
>  controller/lflow.c          | 13 +++++
>  controller/lflow.h          |  2 +
>  controller/ovn-controller.c | 96 ++++++++++++++++++++++++++++++++++++-
>  controller/ovn-controller.h | 22 +++++++++
>  controller/physical.c       | 48 +++++++++++++++++++
>  controller/physical.h       |  5 +-
>  tests/ovn-performance.at    | 14 +++---
>  8 files changed, 190 insertions(+), 32 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index ce4467f31..9dc1c3f38 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -501,22 +501,6 @@ remove_local_lport_ids(const struct
sbrec_port_binding *binding_rec,
>          sset_find_and_delete(local_lport_ids, buf);
>  }
>
> -enum local_binding_type {
> -    BT_VIF,
> -    BT_CHILD,
> -    BT_VIRTUAL
> -};
> -
> -struct local_binding {
> -    struct ovs_list node;       /* In parent if any. */
> -    char *name;
> -    enum local_binding_type type;
> -    const struct ovsrec_interface *iface;
> -    const struct sbrec_port_binding *pb;
> -
> -    struct ovs_list children;
> -};
> -
>  static struct local_binding *
>  local_binding_create(const char *name, const struct ovsrec_interface
*iface,
>                       const struct sbrec_port_binding *pb,
> @@ -537,12 +521,6 @@ local_binding_add(struct shash *local_bindings,
struct local_binding *lbinding)
>      shash_add(local_bindings, lbinding->name, lbinding);
>  }
>
> -static struct local_binding *
> -local_binding_find(struct shash *local_bindings, const char *name)
> -{
> -    return shash_find_data(local_bindings, name);
> -}
> -
>  static void
>  local_binding_destroy(struct local_binding *lbinding)
>  {
> diff --git a/controller/lflow.c b/controller/lflow.c
> index 01214a3a6..512258cd3 100644
> --- a/controller/lflow.c
> +++ b/controller/lflow.c
> @@ -847,3 +847,16 @@ lflow_destroy(void)
>      expr_symtab_destroy(&symtab);
>      shash_destroy(&symtab);
>  }
> +
> +bool
> +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
*pb_table)
> +{
> +    const struct sbrec_port_binding *binding;
> +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
> +        if (!strcmp(binding->type, "remote")) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> diff --git a/controller/lflow.h b/controller/lflow.h
> index f02f709d7..fa54d4de2 100644
> --- a/controller/lflow.h
> +++ b/controller/lflow.h
> @@ -48,6 +48,7 @@ struct sbrec_dhcp_options_table;
>  struct sbrec_dhcpv6_options_table;
>  struct sbrec_logical_flow_table;
>  struct sbrec_mac_binding_table;
> +struct sbrec_port_binding_table;
>  struct simap;
>  struct sset;
>  struct uuid;
> @@ -153,6 +154,7 @@ void lflow_handle_changed_neighbors(
>      const struct hmap *local_datapaths,
>      struct ovn_desired_flow_table *);
>
> +bool lflow_evaluate_pb_changes(const struct sbrec_port_binding_table *);
>  void lflow_destroy(void);
>
>  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 07823f020..448e9908e 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1360,6 +1360,10 @@ static void init_physical_ctx(struct engine_node
*node,
>
>      ovs_assert(br_int && chassis);
>
> +    struct ovsrec_interface_table *iface_table =
> +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_interface", node));
> +
>      struct ed_type_ct_zones *ct_zones_data =
>          engine_get_input_data("ct_zones", node);
>      struct simap *ct_zones = &ct_zones_data->current;
> @@ -1369,12 +1373,14 @@ static void init_physical_ctx(struct engine_node
*node,
>      p_ctx->mc_group_table = multicast_group_table;
>      p_ctx->br_int = br_int;
>      p_ctx->chassis_table = chassis_table;
> +    p_ctx->iface_table = iface_table;
>      p_ctx->chassis = chassis;
>      p_ctx->active_tunnels = &rt_data->active_tunnels;
>      p_ctx->local_datapaths = &rt_data->local_datapaths;
>      p_ctx->local_lports = &rt_data->local_lports;
>      p_ctx->ct_zones = ct_zones;
>      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> +    p_ctx->local_bindings = &rt_data->local_bindings;
>  }
>
>  static void init_lflow_ctx(struct engine_node *node,
> @@ -1637,6 +1643,8 @@ flow_output_sb_port_binding_handler(struct
engine_node *node, void *data)
>       *    always logical router port, and any change to logical router
port
>       *    would just trigger recompute.
>       *
> +     *  - When a port binding of type 'remote' is updated by ovn-ic.
> +     *
>       * Although there is no correctness issue so far (except the unusual
ACL
>       * use case, which doesn't seem to be a real problem), it might be
better
>       * to handle this more gracefully, without the need to consider these
> @@ -1647,6 +1655,14 @@ flow_output_sb_port_binding_handler(struct
engine_node *node, void *data)
>      struct physical_ctx p_ctx;
>      init_physical_ctx(node, rt_data, &p_ctx);
>
> +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
> +        /* If this returns false, it means there is an impact on
> +         * the logical flow processing because of some changes in
> +         * port bindings. Return false so that recompute is triggered
> +         * for this stage. */
> +        return false;
> +    }
> +
>      physical_handle_port_binding_changes(&p_ctx, flow_table);
>
>      engine_set_node_state(node, EN_UPDATED);
> @@ -1782,6 +1798,70 @@ flow_output_port_groups_handler(struct engine_node
*node, void *data)
>      return _flow_output_resource_ref_handler(node, data,
REF_TYPE_PORTGROUP);
>  }
>
> +struct ed_type_physical_flow_changes {
> +    bool ct_zones_changed;
> +};
> +
> +static bool
> +flow_output_physical_flow_changes_handler(struct engine_node *node, void
*data)
> +{
> +    struct ed_type_physical_flow_changes *pfc =
> +        engine_get_input_data("physical_flow_changes", node);
> +    struct ed_type_runtime_data *rt_data =
> +        engine_get_input_data("runtime_data", node);
> +
> +    struct ed_type_flow_output *fo = data;
> +    struct physical_ctx p_ctx;
> +    init_physical_ctx(node, rt_data, &p_ctx);
> +
> +    engine_set_node_state(node, EN_UPDATED);
> +    if (pfc->ct_zones_changed) {
> +        pfc->ct_zones_changed = false;
> +        physical_run(&p_ctx, &fo->flow_table);
> +        return true;
> +    }
> +
> +    return physical_handle_ovs_iface_changes(&p_ctx, &fo->flow_table);
> +}
> +
> +static void *
> +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> +                             struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof *data);
> +    return data;
> +}
> +
> +static void
> +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> +{
> +
> +}
> +
> +static void
> +en_physical_flow_changes_run(struct engine_node *node, void *data
OVS_UNUSED)
> +{
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
> +static bool
> +physical_flow_changes_ct_zones_handler(struct engine_node *node
OVS_UNUSED,
> +                                      void *data OVS_UNUSED)
> +{
> +    struct ed_type_physical_flow_changes *pfc = data;
> +    pfc->ct_zones_changed = true;
> +    engine_set_node_state(node, EN_UPDATED);
> +    return false;
> +}
> +
> +static bool
> +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
> +                          void *data OVS_UNUSED)
> +{
> +    return true;
> +}
> +
> +
>  struct ovn_controller_exit_args {
>      bool *exiting;
>      bool *restart;
> @@ -1904,6 +1984,7 @@ main(int argc, char *argv[])
>      ENGINE_NODE(runtime_data, "runtime_data");
>      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
>      ENGINE_NODE(flow_output, "flow_output");
>      ENGINE_NODE(addr_sets, "addr_sets");
>      ENGINE_NODE(port_groups, "port_groups");
> @@ -1923,16 +2004,27 @@ main(int argc, char *argv[])
>      engine_add_input(&en_port_groups, &en_sb_port_group,
>                       port_groups_sb_port_group_handler);
>
> +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> +                     physical_flow_changes_ct_zones_handler);
> +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> +                     NULL);
> +
>      engine_add_input(&en_flow_output, &en_addr_sets,
>                       flow_output_addr_sets_handler);
>      engine_add_input(&en_flow_output, &en_port_groups,
>                       flow_output_port_groups_handler);
> -    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> +    engine_add_input(&en_flow_output, &en_runtime_data,
> +                     flow_output_noop_handler);

I think this breaks the dependency and would result in correctness problem.
Because flow_output depends on runtime_data, which includes lots of data
structures such as local_lports, local_datapaths, active_tunnels, etc., all
needed for flow compute to produce correct OVS flow output. Now with the
noop handler, the changes in these data structures are ignored. How could
we make sure the flows computed are still up to date?

From my point of view, to handle the runtime data changes incrementally, we
should split the runtime data node into separate engine nodes, so that
different changes can be handled differently for flow output compute. Some
of the changes may be handled incrementally if it is easy to handle and if
it is frequently changed, and leave those less frequent and hard-to-handle
changes and let them trigger recompute.

>      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> +                     flow_output_physical_flow_changes_handler);
>
>      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
>      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> +    engine_add_input(&en_flow_output, &en_ovs_interface,
> +                     flow_output_noop_handler);
> +    engine_add_input(&en_flow_output, &en_ct_zones,
> +                     flow_output_noop_handler);
>
>      engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
>      engine_add_input(&en_flow_output, &en_sb_encap, NULL);
> diff --git a/controller/ovn-controller.h b/controller/ovn-controller.h
> index 5d9466880..40c358cef 100644
> --- a/controller/ovn-controller.h
> +++ b/controller/ovn-controller.h
> @@ -72,6 +72,28 @@ struct local_datapath {
>  struct local_datapath *get_local_datapath(const struct hmap *,
>                                            uint32_t tunnel_key);
>
> +enum local_binding_type {
> +    BT_VIF,
> +    BT_CHILD,
> +    BT_VIRTUAL
> +};
> +
> +struct local_binding {
> +    struct ovs_list node;       /* In parent if any. */
> +    char *name;
> +    enum local_binding_type type;
> +    const struct ovsrec_interface *iface;
> +    const struct sbrec_port_binding *pb;
> +
> +    struct ovs_list children;
> +};
> +
> +static inline struct local_binding *
> +local_binding_find(struct shash *local_bindings, const char *name)
> +{
> +    return shash_find_data(local_bindings, name);
> +}
> +
>  const struct ovsrec_bridge *get_bridge(const struct ovsrec_bridge_table
*,
>                                         const char *br_name);
>
> diff --git a/controller/physical.c b/controller/physical.c
> index 144aeb7bd..09a5d9b39 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1780,6 +1780,54 @@ physical_run(struct physical_ctx *p_ctx,
>      simap_destroy(&new_tunnel_to_ofport);
>  }
>
> +bool
> +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> +                                  struct ovn_desired_flow_table
*flow_table)
> +{
> +    const struct ovsrec_interface *iface_rec;
> +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
p_ctx->iface_table) {
> +        if (!strcmp(iface_rec->type, "geneve") ||
> +            !strcmp(iface_rec->type, "patch")) {
> +            return false;
> +        }
> +    }
> +
> +    struct ofpbuf ofpacts;
> +    ofpbuf_init(&ofpacts, 0);
> +
> +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
p_ctx->iface_table) {
> +        const char *iface_id = smap_get(&iface_rec->external_ids,
"iface-id");
> +        if (!iface_id) {
> +            continue;
> +        }
> +
> +        const struct local_binding *lb =
> +            local_binding_find(p_ctx->local_bindings, iface_id);
> +
> +        if (!lb || !lb->pb) {
> +            continue;
> +        }
> +
> +        if (ovsrec_interface_is_deleted(iface_rec)) {
> +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> +        } else {
> +            if (!ovsrec_interface_is_new(iface_rec)) {
> +                ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> +            }
> +
> +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> +                                  p_ctx->mff_ovn_geneve, p_ctx->ct_zones,
> +                                  p_ctx->active_tunnels,
> +                                  p_ctx->local_datapaths,
> +                                  lb->pb, p_ctx->chassis,
> +                                  flow_table, &ofpacts);
> +        }
> +    }
> +
> +    ofpbuf_uninit(&ofpacts);
> +    return true;
> +}
> +
>  bool
>  get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t
*ofport)
>  {
> diff --git a/controller/physical.h b/controller/physical.h
> index dadf84ea1..9ca34436a 100644
> --- a/controller/physical.h
> +++ b/controller/physical.h
> @@ -49,11 +49,13 @@ struct physical_ctx {
>      const struct ovsrec_bridge *br_int;
>      const struct sbrec_chassis_table *chassis_table;
>      const struct sbrec_chassis *chassis;
> +    const struct ovsrec_interface_table *iface_table;
>      const struct sset *active_tunnels;
>      struct hmap *local_datapaths;
>      struct sset *local_lports;
>      const struct simap *ct_zones;
>      enum mf_field_id mff_ovn_geneve;
> +    struct shash *local_bindings;
>  };
>
>  void physical_register_ovs_idl(struct ovsdb_idl *);
> @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
physical_ctx *,
>                                            struct ovn_desired_flow_table
*);
>  void physical_handle_mc_group_changes(struct physical_ctx *,
>                                        struct ovn_desired_flow_table *);
> -
> +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> +                                       struct ovn_desired_flow_table *);
>  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>                         ofp_port_t *ofport);
>  #endif /* controller/physical.h */
> diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
> index 5c21f6dd7..555434484 100644
> --- a/tests/ovn-performance.at
> +++ b/tests/ovn-performance.at
> @@ -263,7 +263,7 @@ for i in 1 2; do
>      )
>
>      # Add router port to $ls
> -    OVN_CONTROLLER_EXPECT_HIT(
> +    OVN_CONTROLLER_EXPECT_NO_HIT(
>          [hv1 hv2], [lflow_run],
>          [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01
10.0.$i.1/24]
>      )
> @@ -271,15 +271,15 @@ for i in 1 2; do
>          [hv1 hv2], [lflow_run],
>          [ovn-nbctl --wait=hv lsp-add $ls $lsp]
>      )
> -    OVN_CONTROLLER_EXPECT_HIT(
> +    OVN_CONTROLLER_EXPECT_NO_HIT(
>          [hv1 hv2], [lflow_run],
>          [ovn-nbctl --wait=hv lsp-set-type $lsp router]
>      )
> -    OVN_CONTROLLER_EXPECT_HIT(
> +    OVN_CONTROLLER_EXPECT_NO_HIT(
>          [hv1 hv2], [lflow_run],
>          [ovn-nbctl --wait=hv lsp-set-options $lsp router-port=$lrp]
>      )
> -    OVN_CONTROLLER_EXPECT_HIT(
> +    OVN_CONTROLLER_EXPECT_NO_HIT(
>          [hv1 hv2], [lflow_run],
>          [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
>      )
> @@ -393,8 +393,8 @@ for i in 1 2; do
>      lp=lp$i
>
>      # Delete port $lp
> -    OVN_CONTROLLER_EXPECT_HIT_COND(
> -        [hv$i hv$j], [lflow_run], [>0 =0],
> +    OVN_CONTROLLER_EXPECT_NO_HIT(
> +        [hv$i hv$j], [lflow_run],
>          [ovn-nbctl --wait=hv lsp-del $lp]
>      )
>  done
> @@ -409,7 +409,7 @@ for i in 1 2; do
>      ls=ls$i
>
>      # Delete switch $ls
> -    OVN_CONTROLLER_EXPECT_HIT(
> +    OVN_CONTROLLER_EXPECT_NO_HIT(
>          [hv1 hv2], [lflow_run],
>          [ovn-nbctl --wait=hv ls-del $ls]
>      )
> --
> 2.24.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique March 31, 2020, 12:40 p.m. UTC | #2
On Wed, Mar 25, 2020 at 12:23 PM Han Zhou <hzhou@ovn.org> wrote:
>
> On Mon, Mar 16, 2020 at 4:16 AM <numans@ovn.org> wrote:
> >
> > From: Numan Siddique <numans@ovn.org>
> >
> > This patch handles ct zone changes and OVS interface changes incrementally
> > in the flow output stage.
> >
> > Any changes to ct zone can be handled by running physical_run() instead
> of running
> > flow_output_run(). And any changes to OVS interfaces can be either handled
> > incrementally (for OVS interfaces representing VIFs) or just running
> > physical_run() (for tunnel and other types of interfaces).
> >
> > To better handle this, a new engine node 'physical_flow_changes' is added
> which
> > handles changes to ct zone and OVS interfaces.
> >
> > After this patch, We don't trigger full recompute in flow output stage for
> > runtime data changes. There's noop handler for this. With this we avoid
> > calls to lflow_run() a lot.
> >
> > For the below commands the lflow_run counter without this patch is 16
> > and with this patch is 3.
> >
> > -----------------------
> > ovn-nbctl ls-add sw0
> > ovn-nbctl lsp-add sw0 sw0-port1
> > ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 192.168.0.2"
> >
> > ovn-nbctl ls-add sw1
> > ovn-nbctl lsp-add sw1 sw1-port1
> > ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"
> >
> > ovn-nbctl lr-add lr0
> > ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
> > ovn-nbctl lsp-add sw0 lrp0-attachment
> > ovn-nbctl lsp-set-type lrp0-attachment router
> > ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
> > ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
> > ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
> > ovn-nbctl lsp-add sw1 lrp1-attachment
> > ovn-nbctl lsp-set-type lrp1-attachment router
> > ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
> > ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
> >
> > ovs-vsctl add-port br-int p1 -- \
> >     set Interface p1 external_ids:iface-id=sw0-port1
> > ovs-vsctl add-port br-int p2 -- \
> >     set Interface p2 external_ids:iface-id=sw1-port1
> > ovn-appctl -t ovn-controller coverage/read-counter lflow_run
> > -------------------
> >
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> >  controller/binding.c        | 22 ---------
> >  controller/lflow.c          | 13 +++++
> >  controller/lflow.h          |  2 +
> >  controller/ovn-controller.c | 96 ++++++++++++++++++++++++++++++++++++-
> >  controller/ovn-controller.h | 22 +++++++++
> >  controller/physical.c       | 48 +++++++++++++++++++
> >  controller/physical.h       |  5 +-
> >  tests/ovn-performance.at    | 14 +++---
> >  8 files changed, 190 insertions(+), 32 deletions(-)
> >
> > diff --git a/controller/binding.c b/controller/binding.c
> > index ce4467f31..9dc1c3f38 100644
> > --- a/controller/binding.c
> > +++ b/controller/binding.c
> > @@ -501,22 +501,6 @@ remove_local_lport_ids(const struct
> sbrec_port_binding *binding_rec,
> >          sset_find_and_delete(local_lport_ids, buf);
> >  }
> >
> > -enum local_binding_type {
> > -    BT_VIF,
> > -    BT_CHILD,
> > -    BT_VIRTUAL
> > -};
> > -
> > -struct local_binding {
> > -    struct ovs_list node;       /* In parent if any. */
> > -    char *name;
> > -    enum local_binding_type type;
> > -    const struct ovsrec_interface *iface;
> > -    const struct sbrec_port_binding *pb;
> > -
> > -    struct ovs_list children;
> > -};
> > -
> >  static struct local_binding *
> >  local_binding_create(const char *name, const struct ovsrec_interface
> *iface,
> >                       const struct sbrec_port_binding *pb,
> > @@ -537,12 +521,6 @@ local_binding_add(struct shash *local_bindings,
> struct local_binding *lbinding)
> >      shash_add(local_bindings, lbinding->name, lbinding);
> >  }
> >
> > -static struct local_binding *
> > -local_binding_find(struct shash *local_bindings, const char *name)
> > -{
> > -    return shash_find_data(local_bindings, name);
> > -}
> > -
> >  static void
> >  local_binding_destroy(struct local_binding *lbinding)
> >  {
> > diff --git a/controller/lflow.c b/controller/lflow.c
> > index 01214a3a6..512258cd3 100644
> > --- a/controller/lflow.c
> > +++ b/controller/lflow.c
> > @@ -847,3 +847,16 @@ lflow_destroy(void)
> >      expr_symtab_destroy(&symtab);
> >      shash_destroy(&symtab);
> >  }
> > +
> > +bool
> > +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
> *pb_table)
> > +{
> > +    const struct sbrec_port_binding *binding;
> > +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
> > +        if (!strcmp(binding->type, "remote")) {
> > +            return false;
> > +        }
> > +    }
> > +
> > +    return true;
> > +}
> > diff --git a/controller/lflow.h b/controller/lflow.h
> > index f02f709d7..fa54d4de2 100644
> > --- a/controller/lflow.h
> > +++ b/controller/lflow.h
> > @@ -48,6 +48,7 @@ struct sbrec_dhcp_options_table;
> >  struct sbrec_dhcpv6_options_table;
> >  struct sbrec_logical_flow_table;
> >  struct sbrec_mac_binding_table;
> > +struct sbrec_port_binding_table;
> >  struct simap;
> >  struct sset;
> >  struct uuid;
> > @@ -153,6 +154,7 @@ void lflow_handle_changed_neighbors(
> >      const struct hmap *local_datapaths,
> >      struct ovn_desired_flow_table *);
> >
> > +bool lflow_evaluate_pb_changes(const struct sbrec_port_binding_table *);
> >  void lflow_destroy(void);
> >
> >  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 07823f020..448e9908e 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1360,6 +1360,10 @@ static void init_physical_ctx(struct engine_node
> *node,
> >
> >      ovs_assert(br_int && chassis);
> >
> > +    struct ovsrec_interface_table *iface_table =
> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > +            engine_get_input("OVS_interface", node));
> > +
> >      struct ed_type_ct_zones *ct_zones_data =
> >          engine_get_input_data("ct_zones", node);
> >      struct simap *ct_zones = &ct_zones_data->current;
> > @@ -1369,12 +1373,14 @@ static void init_physical_ctx(struct engine_node
> *node,
> >      p_ctx->mc_group_table = multicast_group_table;
> >      p_ctx->br_int = br_int;
> >      p_ctx->chassis_table = chassis_table;
> > +    p_ctx->iface_table = iface_table;
> >      p_ctx->chassis = chassis;
> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> >      p_ctx->local_lports = &rt_data->local_lports;
> >      p_ctx->ct_zones = ct_zones;
> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> > +    p_ctx->local_bindings = &rt_data->local_bindings;
> >  }
> >
> >  static void init_lflow_ctx(struct engine_node *node,
> > @@ -1637,6 +1643,8 @@ flow_output_sb_port_binding_handler(struct
> engine_node *node, void *data)
> >       *    always logical router port, and any change to logical router
> port
> >       *    would just trigger recompute.
> >       *
> > +     *  - When a port binding of type 'remote' is updated by ovn-ic.
> > +     *
> >       * Although there is no correctness issue so far (except the unusual
> ACL
> >       * use case, which doesn't seem to be a real problem), it might be
> better
> >       * to handle this more gracefully, without the need to consider these
> > @@ -1647,6 +1655,14 @@ flow_output_sb_port_binding_handler(struct
> engine_node *node, void *data)
> >      struct physical_ctx p_ctx;
> >      init_physical_ctx(node, rt_data, &p_ctx);
> >
> > +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
> > +        /* If this returns false, it means there is an impact on
> > +         * the logical flow processing because of some changes in
> > +         * port bindings. Return false so that recompute is triggered
> > +         * for this stage. */
> > +        return false;
> > +    }
> > +
> >      physical_handle_port_binding_changes(&p_ctx, flow_table);
> >
> >      engine_set_node_state(node, EN_UPDATED);
> > @@ -1782,6 +1798,70 @@ flow_output_port_groups_handler(struct engine_node
> *node, void *data)
> >      return _flow_output_resource_ref_handler(node, data,
> REF_TYPE_PORTGROUP);
> >  }
> >
> > +struct ed_type_physical_flow_changes {
> > +    bool ct_zones_changed;
> > +};
> > +
> > +static bool
> > +flow_output_physical_flow_changes_handler(struct engine_node *node, void
> *data)
> > +{
> > +    struct ed_type_physical_flow_changes *pfc =
> > +        engine_get_input_data("physical_flow_changes", node);
> > +    struct ed_type_runtime_data *rt_data =
> > +        engine_get_input_data("runtime_data", node);
> > +
> > +    struct ed_type_flow_output *fo = data;
> > +    struct physical_ctx p_ctx;
> > +    init_physical_ctx(node, rt_data, &p_ctx);
> > +
> > +    engine_set_node_state(node, EN_UPDATED);
> > +    if (pfc->ct_zones_changed) {
> > +        pfc->ct_zones_changed = false;
> > +        physical_run(&p_ctx, &fo->flow_table);
> > +        return true;
> > +    }
> > +
> > +    return physical_handle_ovs_iface_changes(&p_ctx, &fo->flow_table);
> > +}
> > +
> > +static void *
> > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> > +                             struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof *data);
> > +    return data;
> > +}
> > +
> > +static void
> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> > +{
> > +
> > +}
> > +
> > +static void
> > +en_physical_flow_changes_run(struct engine_node *node, void *data
> OVS_UNUSED)
> > +{
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> > +static bool
> > +physical_flow_changes_ct_zones_handler(struct engine_node *node
> OVS_UNUSED,
> > +                                      void *data OVS_UNUSED)
> > +{
> > +    struct ed_type_physical_flow_changes *pfc = data;
> > +    pfc->ct_zones_changed = true;
> > +    engine_set_node_state(node, EN_UPDATED);
> > +    return false;
> > +}
> > +
> > +static bool
> > +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
> > +                          void *data OVS_UNUSED)
> > +{
> > +    return true;
> > +}
> > +
> > +
> >  struct ovn_controller_exit_args {
> >      bool *exiting;
> >      bool *restart;
> > @@ -1904,6 +1984,7 @@ main(int argc, char *argv[])
> >      ENGINE_NODE(runtime_data, "runtime_data");
> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> >      ENGINE_NODE(flow_output, "flow_output");
> >      ENGINE_NODE(addr_sets, "addr_sets");
> >      ENGINE_NODE(port_groups, "port_groups");
> > @@ -1923,16 +2004,27 @@ main(int argc, char *argv[])
> >      engine_add_input(&en_port_groups, &en_sb_port_group,
> >                       port_groups_sb_port_group_handler);
> >
> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> > +                     physical_flow_changes_ct_zones_handler);
> > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> > +                     NULL);
> > +
> >      engine_add_input(&en_flow_output, &en_addr_sets,
> >                       flow_output_addr_sets_handler);
> >      engine_add_input(&en_flow_output, &en_port_groups,
> >                       flow_output_port_groups_handler);
> > -    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> > +    engine_add_input(&en_flow_output, &en_runtime_data,
> > +                     flow_output_noop_handler);
>
> I think this breaks the dependency and would result in correctness problem.
> Because flow_output depends on runtime_data, which includes lots of data
> structures such as local_lports, local_datapaths, active_tunnels, etc., all
> needed for flow compute to produce correct OVS flow output. Now with the
> noop handler, the changes in these data structures are ignored. How could
> we make sure the flows computed are still up to date?
>

Thanks for the comments Han. I get your point.

Let's say we have data structures 'A' and 'B' in runtime data. And if
events 'X' and 'Y' result in the modifications of these data structures in
runtime_data handlers, then handling (properly) these events  - 'X'
and 'Y' in the
flow_output engine should be enough to ensure the correctness of the
flows right ?

The present master has the below data structures in run_time data
  - local_datapaths (hmap)
  - local_lports (sset)
  - local_lport_ids (sset)
  - active_tunnels (sset)

This I-P series has the below data structures in run_time data
  - local_datapaths (hmap)
  - local_lports (sset)
  - local_lport_ids (sset)
  - local_bindings (shash)
  - active_tunnels (sset)
  - egress_ifaces (sset)
  - local_iface_ids (smap)

Except for active_tunnels, rest of the data structures are calculated
in binding.c
and in ovn-controller.c (during init, run and destroy of runtime_data)
And these data structures are modified in binding_run and in the ovs
interface change
handler and SB port binding change handler.

I think if we handle the events - ovs interface changes and sb port
binding changes in
flow output engine, then it should ensure correctness provided we
handle these changes
properly.

This patch series handles ovs interface changes  in flow_output
engine. We already
handle port binding changes, but we don't do any flow handling. I
think if we can
enhance this, we can solve this problem.

The present patch series definitely has issues with the runtime noop handler,
but it still has no issues with correctness because the ovn sb idl txn is NULL
for any updates ovn-controller does to SB DB, resulting in flow recomputation.
But of course this is wrong. And we should probably enhance the IDL tracker.

> From my point of view, to handle the runtime data changes incrementally, we
> should split the runtime data node into separate engine nodes, so that
> different changes can be handled differently for flow output compute. Some
> of the changes may be handled incrementally if it is easy to handle and if
> it is frequently changed, and leave those less frequent and hard-to-handle
> changes and let them trigger recompute.

I don't think this is straightforward to split them. If you see, most
of the runtime data
is related to port binding and is updated during port binding.

Please let me know what you think about the approach I mentioned above to
handle the correctness issue.

I have addressed your comments for the other patches.
Shall I go ahead and submit v2 of first 4 patches of this series ?
I think these patches can be considered.

Thanks
Numan

>
> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> > +                     flow_output_physical_flow_changes_handler);
> >
> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> > +    engine_add_input(&en_flow_output, &en_ovs_interface,
> > +                     flow_output_noop_handler);
> > +    engine_add_input(&en_flow_output, &en_ct_zones,
> > +                     flow_output_noop_handler);
> >
> >      engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
> >      engine_add_input(&en_flow_output, &en_sb_encap, NULL);
> > diff --git a/controller/ovn-controller.h b/controller/ovn-controller.h
> > index 5d9466880..40c358cef 100644
> > --- a/controller/ovn-controller.h
> > +++ b/controller/ovn-controller.h
> > @@ -72,6 +72,28 @@ struct local_datapath {
> >  struct local_datapath *get_local_datapath(const struct hmap *,
> >                                            uint32_t tunnel_key);
> >
> > +enum local_binding_type {
> > +    BT_VIF,
> > +    BT_CHILD,
> > +    BT_VIRTUAL
> > +};
> > +
> > +struct local_binding {
> > +    struct ovs_list node;       /* In parent if any. */
> > +    char *name;
> > +    enum local_binding_type type;
> > +    const struct ovsrec_interface *iface;
> > +    const struct sbrec_port_binding *pb;
> > +
> > +    struct ovs_list children;
> > +};
> > +
> > +static inline struct local_binding *
> > +local_binding_find(struct shash *local_bindings, const char *name)
> > +{
> > +    return shash_find_data(local_bindings, name);
> > +}
> > +
> >  const struct ovsrec_bridge *get_bridge(const struct ovsrec_bridge_table
> *,
> >                                         const char *br_name);
> >
> > diff --git a/controller/physical.c b/controller/physical.c
> > index 144aeb7bd..09a5d9b39 100644
> > --- a/controller/physical.c
> > +++ b/controller/physical.c
> > @@ -1780,6 +1780,54 @@ physical_run(struct physical_ctx *p_ctx,
> >      simap_destroy(&new_tunnel_to_ofport);
> >  }
> >
> > +bool
> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> > +                                  struct ovn_desired_flow_table
> *flow_table)
> > +{
> > +    const struct ovsrec_interface *iface_rec;
> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> p_ctx->iface_table) {
> > +        if (!strcmp(iface_rec->type, "geneve") ||
> > +            !strcmp(iface_rec->type, "patch")) {
> > +            return false;
> > +        }
> > +    }
> > +
> > +    struct ofpbuf ofpacts;
> > +    ofpbuf_init(&ofpacts, 0);
> > +
> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> p_ctx->iface_table) {
> > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> "iface-id");
> > +        if (!iface_id) {
> > +            continue;
> > +        }
> > +
> > +        const struct local_binding *lb =
> > +            local_binding_find(p_ctx->local_bindings, iface_id);
> > +
> > +        if (!lb || !lb->pb) {
> > +            continue;
> > +        }
> > +
> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> > +        } else {
> > +            if (!ovsrec_interface_is_new(iface_rec)) {
> > +                ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> > +            }
> > +
> > +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> > +                                  p_ctx->mff_ovn_geneve, p_ctx->ct_zones,
> > +                                  p_ctx->active_tunnels,
> > +                                  p_ctx->local_datapaths,
> > +                                  lb->pb, p_ctx->chassis,
> > +                                  flow_table, &ofpacts);
> > +        }
> > +    }
> > +
> > +    ofpbuf_uninit(&ofpacts);
> > +    return true;
> > +}
> > +
> >  bool
> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t
> *ofport)
> >  {
> > diff --git a/controller/physical.h b/controller/physical.h
> > index dadf84ea1..9ca34436a 100644
> > --- a/controller/physical.h
> > +++ b/controller/physical.h
> > @@ -49,11 +49,13 @@ struct physical_ctx {
> >      const struct ovsrec_bridge *br_int;
> >      const struct sbrec_chassis_table *chassis_table;
> >      const struct sbrec_chassis *chassis;
> > +    const struct ovsrec_interface_table *iface_table;
> >      const struct sset *active_tunnels;
> >      struct hmap *local_datapaths;
> >      struct sset *local_lports;
> >      const struct simap *ct_zones;
> >      enum mf_field_id mff_ovn_geneve;
> > +    struct shash *local_bindings;
> >  };
> >
> >  void physical_register_ovs_idl(struct ovsdb_idl *);
> > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> physical_ctx *,
> >                                            struct ovn_desired_flow_table
> *);
> >  void physical_handle_mc_group_changes(struct physical_ctx *,
> >                                        struct ovn_desired_flow_table *);
> > -
> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> > +                                       struct ovn_desired_flow_table *);
> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >                         ofp_port_t *ofport);
> >  #endif /* controller/physical.h */
> > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
> > index 5c21f6dd7..555434484 100644
> > --- a/tests/ovn-performance.at
> > +++ b/tests/ovn-performance.at
> > @@ -263,7 +263,7 @@ for i in 1 2; do
> >      )
> >
> >      # Add router port to $ls
> > -    OVN_CONTROLLER_EXPECT_HIT(
> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >          [hv1 hv2], [lflow_run],
> >          [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01
> 10.0.$i.1/24]
> >      )
> > @@ -271,15 +271,15 @@ for i in 1 2; do
> >          [hv1 hv2], [lflow_run],
> >          [ovn-nbctl --wait=hv lsp-add $ls $lsp]
> >      )
> > -    OVN_CONTROLLER_EXPECT_HIT(
> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >          [hv1 hv2], [lflow_run],
> >          [ovn-nbctl --wait=hv lsp-set-type $lsp router]
> >      )
> > -    OVN_CONTROLLER_EXPECT_HIT(
> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >          [hv1 hv2], [lflow_run],
> >          [ovn-nbctl --wait=hv lsp-set-options $lsp router-port=$lrp]
> >      )
> > -    OVN_CONTROLLER_EXPECT_HIT(
> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >          [hv1 hv2], [lflow_run],
> >          [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
> >      )
> > @@ -393,8 +393,8 @@ for i in 1 2; do
> >      lp=lp$i
> >
> >      # Delete port $lp
> > -    OVN_CONTROLLER_EXPECT_HIT_COND(
> > -        [hv$i hv$j], [lflow_run], [>0 =0],
> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > +        [hv$i hv$j], [lflow_run],
> >          [ovn-nbctl --wait=hv lsp-del $lp]
> >      )
> >  done
> > @@ -409,7 +409,7 @@ for i in 1 2; do
> >      ls=ls$i
> >
> >      # Delete switch $ls
> > -    OVN_CONTROLLER_EXPECT_HIT(
> > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >          [hv1 hv2], [lflow_run],
> >          [ovn-nbctl --wait=hv ls-del $ls]
> >      )
> > --
> > 2.24.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Han Zhou March 31, 2020, 5:53 p.m. UTC | #3
On Tue, Mar 31, 2020 at 5:40 AM Numan Siddique <numans@ovn.org> wrote:
>
> On Wed, Mar 25, 2020 at 12:23 PM Han Zhou <hzhou@ovn.org> wrote:
> >
> > On Mon, Mar 16, 2020 at 4:16 AM <numans@ovn.org> wrote:
> > >
> > > From: Numan Siddique <numans@ovn.org>
> > >
> > > This patch handles ct zone changes and OVS interface changes
incrementally
> > > in the flow output stage.
> > >
> > > Any changes to ct zone can be handled by running physical_run()
instead
> > of running
> > > flow_output_run(). And any changes to OVS interfaces can be either
handled
> > > incrementally (for OVS interfaces representing VIFs) or just running
> > > physical_run() (for tunnel and other types of interfaces).
> > >
> > > To better handle this, a new engine node 'physical_flow_changes' is
added
> > which
> > > handles changes to ct zone and OVS interfaces.
> > >
> > > After this patch, We don't trigger full recompute in flow output
stage for
> > > runtime data changes. There's noop handler for this. With this we
avoid
> > > calls to lflow_run() a lot.
> > >
> > > For the below commands the lflow_run counter without this patch is 16
> > > and with this patch is 3.
> > >
> > > -----------------------
> > > ovn-nbctl ls-add sw0
> > > ovn-nbctl lsp-add sw0 sw0-port1
> > > ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 192.168.0.2"
> > >
> > > ovn-nbctl ls-add sw1
> > > ovn-nbctl lsp-add sw1 sw1-port1
> > > ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"
> > >
> > > ovn-nbctl lr-add lr0
> > > ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
> > > ovn-nbctl lsp-add sw0 lrp0-attachment
> > > ovn-nbctl lsp-set-type lrp0-attachment router
> > > ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
> > > ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
> > > ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
> > > ovn-nbctl lsp-add sw1 lrp1-attachment
> > > ovn-nbctl lsp-set-type lrp1-attachment router
> > > ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
> > > ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
> > >
> > > ovs-vsctl add-port br-int p1 -- \
> > >     set Interface p1 external_ids:iface-id=sw0-port1
> > > ovs-vsctl add-port br-int p2 -- \
> > >     set Interface p2 external_ids:iface-id=sw1-port1
> > > ovn-appctl -t ovn-controller coverage/read-counter lflow_run
> > > -------------------
> > >
> > > Signed-off-by: Numan Siddique <numans@ovn.org>
> > > ---
> > >  controller/binding.c        | 22 ---------
> > >  controller/lflow.c          | 13 +++++
> > >  controller/lflow.h          |  2 +
> > >  controller/ovn-controller.c | 96
++++++++++++++++++++++++++++++++++++-
> > >  controller/ovn-controller.h | 22 +++++++++
> > >  controller/physical.c       | 48 +++++++++++++++++++
> > >  controller/physical.h       |  5 +-
> > >  tests/ovn-performance.at    | 14 +++---
> > >  8 files changed, 190 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/controller/binding.c b/controller/binding.c
> > > index ce4467f31..9dc1c3f38 100644
> > > --- a/controller/binding.c
> > > +++ b/controller/binding.c
> > > @@ -501,22 +501,6 @@ remove_local_lport_ids(const struct
> > sbrec_port_binding *binding_rec,
> > >          sset_find_and_delete(local_lport_ids, buf);
> > >  }
> > >
> > > -enum local_binding_type {
> > > -    BT_VIF,
> > > -    BT_CHILD,
> > > -    BT_VIRTUAL
> > > -};
> > > -
> > > -struct local_binding {
> > > -    struct ovs_list node;       /* In parent if any. */
> > > -    char *name;
> > > -    enum local_binding_type type;
> > > -    const struct ovsrec_interface *iface;
> > > -    const struct sbrec_port_binding *pb;
> > > -
> > > -    struct ovs_list children;
> > > -};
> > > -
> > >  static struct local_binding *
> > >  local_binding_create(const char *name, const struct ovsrec_interface
> > *iface,
> > >                       const struct sbrec_port_binding *pb,
> > > @@ -537,12 +521,6 @@ local_binding_add(struct shash *local_bindings,
> > struct local_binding *lbinding)
> > >      shash_add(local_bindings, lbinding->name, lbinding);
> > >  }
> > >
> > > -static struct local_binding *
> > > -local_binding_find(struct shash *local_bindings, const char *name)
> > > -{
> > > -    return shash_find_data(local_bindings, name);
> > > -}
> > > -
> > >  static void
> > >  local_binding_destroy(struct local_binding *lbinding)
> > >  {
> > > diff --git a/controller/lflow.c b/controller/lflow.c
> > > index 01214a3a6..512258cd3 100644
> > > --- a/controller/lflow.c
> > > +++ b/controller/lflow.c
> > > @@ -847,3 +847,16 @@ lflow_destroy(void)
> > >      expr_symtab_destroy(&symtab);
> > >      shash_destroy(&symtab);
> > >  }
> > > +
> > > +bool
> > > +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
> > *pb_table)
> > > +{
> > > +    const struct sbrec_port_binding *binding;
> > > +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
> > > +        if (!strcmp(binding->type, "remote")) {
> > > +            return false;
> > > +        }
> > > +    }
> > > +
> > > +    return true;
> > > +}
> > > diff --git a/controller/lflow.h b/controller/lflow.h
> > > index f02f709d7..fa54d4de2 100644
> > > --- a/controller/lflow.h
> > > +++ b/controller/lflow.h
> > > @@ -48,6 +48,7 @@ struct sbrec_dhcp_options_table;
> > >  struct sbrec_dhcpv6_options_table;
> > >  struct sbrec_logical_flow_table;
> > >  struct sbrec_mac_binding_table;
> > > +struct sbrec_port_binding_table;
> > >  struct simap;
> > >  struct sset;
> > >  struct uuid;
> > > @@ -153,6 +154,7 @@ void lflow_handle_changed_neighbors(
> > >      const struct hmap *local_datapaths,
> > >      struct ovn_desired_flow_table *);
> > >
> > > +bool lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
*);
> > >  void lflow_destroy(void);
> > >
> > >  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index 07823f020..448e9908e 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -1360,6 +1360,10 @@ static void init_physical_ctx(struct
engine_node
> > *node,
> > >
> > >      ovs_assert(br_int && chassis);
> > >
> > > +    struct ovsrec_interface_table *iface_table =
> > > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > > +            engine_get_input("OVS_interface", node));
> > > +
> > >      struct ed_type_ct_zones *ct_zones_data =
> > >          engine_get_input_data("ct_zones", node);
> > >      struct simap *ct_zones = &ct_zones_data->current;
> > > @@ -1369,12 +1373,14 @@ static void init_physical_ctx(struct
engine_node
> > *node,
> > >      p_ctx->mc_group_table = multicast_group_table;
> > >      p_ctx->br_int = br_int;
> > >      p_ctx->chassis_table = chassis_table;
> > > +    p_ctx->iface_table = iface_table;
> > >      p_ctx->chassis = chassis;
> > >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> > >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> > >      p_ctx->local_lports = &rt_data->local_lports;
> > >      p_ctx->ct_zones = ct_zones;
> > >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> > > +    p_ctx->local_bindings = &rt_data->local_bindings;
> > >  }
> > >
> > >  static void init_lflow_ctx(struct engine_node *node,
> > > @@ -1637,6 +1643,8 @@ flow_output_sb_port_binding_handler(struct
> > engine_node *node, void *data)
> > >       *    always logical router port, and any change to logical
router
> > port
> > >       *    would just trigger recompute.
> > >       *
> > > +     *  - When a port binding of type 'remote' is updated by ovn-ic.
> > > +     *
> > >       * Although there is no correctness issue so far (except the
unusual
> > ACL
> > >       * use case, which doesn't seem to be a real problem), it might
be
> > better
> > >       * to handle this more gracefully, without the need to consider
these
> > > @@ -1647,6 +1655,14 @@ flow_output_sb_port_binding_handler(struct
> > engine_node *node, void *data)
> > >      struct physical_ctx p_ctx;
> > >      init_physical_ctx(node, rt_data, &p_ctx);
> > >
> > > +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
> > > +        /* If this returns false, it means there is an impact on
> > > +         * the logical flow processing because of some changes in
> > > +         * port bindings. Return false so that recompute is triggered
> > > +         * for this stage. */
> > > +        return false;
> > > +    }
> > > +
> > >      physical_handle_port_binding_changes(&p_ctx, flow_table);
> > >
> > >      engine_set_node_state(node, EN_UPDATED);
> > > @@ -1782,6 +1798,70 @@ flow_output_port_groups_handler(struct
engine_node
> > *node, void *data)
> > >      return _flow_output_resource_ref_handler(node, data,
> > REF_TYPE_PORTGROUP);
> > >  }
> > >
> > > +struct ed_type_physical_flow_changes {
> > > +    bool ct_zones_changed;
> > > +};
> > > +
> > > +static bool
> > > +flow_output_physical_flow_changes_handler(struct engine_node *node,
void
> > *data)
> > > +{
> > > +    struct ed_type_physical_flow_changes *pfc =
> > > +        engine_get_input_data("physical_flow_changes", node);
> > > +    struct ed_type_runtime_data *rt_data =
> > > +        engine_get_input_data("runtime_data", node);
> > > +
> > > +    struct ed_type_flow_output *fo = data;
> > > +    struct physical_ctx p_ctx;
> > > +    init_physical_ctx(node, rt_data, &p_ctx);
> > > +
> > > +    engine_set_node_state(node, EN_UPDATED);
> > > +    if (pfc->ct_zones_changed) {
> > > +        pfc->ct_zones_changed = false;
> > > +        physical_run(&p_ctx, &fo->flow_table);
> > > +        return true;
> > > +    }
> > > +
> > > +    return physical_handle_ovs_iface_changes(&p_ctx,
&fo->flow_table);
> > > +}
> > > +
> > > +static void *
> > > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> > > +                             struct engine_arg *arg OVS_UNUSED)
> > > +{
> > > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
*data);
> > > +    return data;
> > > +}
> > > +
> > > +static void
> > > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> > > +{
> > > +
> > > +}
> > > +
> > > +static void
> > > +en_physical_flow_changes_run(struct engine_node *node, void *data
> > OVS_UNUSED)
> > > +{
> > > +    engine_set_node_state(node, EN_UPDATED);
> > > +}
> > > +
> > > +static bool
> > > +physical_flow_changes_ct_zones_handler(struct engine_node *node
> > OVS_UNUSED,
> > > +                                      void *data OVS_UNUSED)
> > > +{
> > > +    struct ed_type_physical_flow_changes *pfc = data;
> > > +    pfc->ct_zones_changed = true;
> > > +    engine_set_node_state(node, EN_UPDATED);
> > > +    return false;
> > > +}
> > > +
> > > +static bool
> > > +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
> > > +                          void *data OVS_UNUSED)
> > > +{
> > > +    return true;
> > > +}
> > > +
> > > +
> > >  struct ovn_controller_exit_args {
> > >      bool *exiting;
> > >      bool *restart;
> > > @@ -1904,6 +1984,7 @@ main(int argc, char *argv[])
> > >      ENGINE_NODE(runtime_data, "runtime_data");
> > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> > >      ENGINE_NODE(flow_output, "flow_output");
> > >      ENGINE_NODE(addr_sets, "addr_sets");
> > >      ENGINE_NODE(port_groups, "port_groups");
> > > @@ -1923,16 +2004,27 @@ main(int argc, char *argv[])
> > >      engine_add_input(&en_port_groups, &en_sb_port_group,
> > >                       port_groups_sb_port_group_handler);
> > >
> > > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> > > +                     physical_flow_changes_ct_zones_handler);
> > > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> > > +                     NULL);
> > > +
> > >      engine_add_input(&en_flow_output, &en_addr_sets,
> > >                       flow_output_addr_sets_handler);
> > >      engine_add_input(&en_flow_output, &en_port_groups,
> > >                       flow_output_port_groups_handler);
> > > -    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> > > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> > > +    engine_add_input(&en_flow_output, &en_runtime_data,
> > > +                     flow_output_noop_handler);
> >
> > I think this breaks the dependency and would result in correctness
problem.
> > Because flow_output depends on runtime_data, which includes lots of data
> > structures such as local_lports, local_datapaths, active_tunnels, etc.,
all
> > needed for flow compute to produce correct OVS flow output. Now with the
> > noop handler, the changes in these data structures are ignored. How
could
> > we make sure the flows computed are still up to date?
> >
>
> Thanks for the comments Han. I get your point.
>
> Let's say we have data structures 'A' and 'B' in runtime data. And if
> events 'X' and 'Y' result in the modifications of these data structures in
> runtime_data handlers, then handling (properly) these events  - 'X'
> and 'Y' in the
> flow_output engine should be enough to ensure the correctness of the
> flows right ?
>
Well, this is not how the I-P engine supposed to work. The engine is purely
data-driven. In your example, X or Y is just input data (another engine
node) change of runtime data. They need to be handled in runtime data node.
If flow_output engine also directly depend on X or Y, of course flow_output
node also need to handle the change of X or Y, in addition to handle the
change of runtime data. But handling X and Y in flow_output doesn't replace
the handling in runtime data. A short cut is, if handling of X or Y always
trigger recompute in flow_output, of course we can say it is still correct
even if they are not handled in runtime data. I hope this clarifies the
design.

> The present master has the below data structures in run_time data
>   - local_datapaths (hmap)
>   - local_lports (sset)
>   - local_lport_ids (sset)
>   - active_tunnels (sset)
>
> This I-P series has the below data structures in run_time data
>   - local_datapaths (hmap)
>   - local_lports (sset)
>   - local_lport_ids (sset)
>   - local_bindings (shash)
>   - active_tunnels (sset)
>   - egress_ifaces (sset)
>   - local_iface_ids (smap)
>
> Except for active_tunnels, rest of the data structures are calculated
> in binding.c
> and in ovn-controller.c (during init, run and destroy of runtime_data)
> And these data structures are modified in binding_run and in the ovs
> interface change
> handler and SB port binding change handler.
>
> I think if we handle the events - ovs interface changes and sb port
> binding changes in
> flow output engine, then it should ensure correctness provided we
> handle these changes
> properly.
>
Since flow_output depends on runtime data, and it take into consideration
all the data structures you listed above for flow computing, it means if
any of these data changes we need to evaluate them in flow_output.
Otherwise, it is either incorrect or some data in runtime data node is
useless. It doesn't seem right that the data is needed but we just ignore
the change. If, some of the data is needed as output but not needed as
input of flow_output, then we can simply split them out from runtime data
and don't add them as input of flow_output node. In this case, not handling
those changes is not a problem.

> This patch series handles ovs interface changes  in flow_output
> engine. We already
> handle port binding changes, but we don't do any flow handling. I
> think if we can
> enhance this, we can solve this problem.
>
> The present patch series definitely has issues with the runtime noop
handler,
> but it still has no issues with correctness because the ovn sb idl txn is
NULL
> for any updates ovn-controller does to SB DB, resulting in flow
recomputation.
> But of course this is wrong. And we should probably enhance the IDL
tracker.
>
> > From my point of view, to handle the runtime data changes
incrementally, we
> > should split the runtime data node into separate engine nodes, so that
> > different changes can be handled differently for flow output compute.
Some
> > of the changes may be handled incrementally if it is easy to handle and
if
> > it is frequently changed, and leave those less frequent and
hard-to-handle
> > changes and let them trigger recompute.
>
> I don't think this is straightforward to split them. If you see, most
> of the runtime data
> is related to port binding and is updated during port binding.

If a data structure depends on port_binding, it is straightforward to split
it as a new node and add the SB_port_binding node as its input node, and
then add itself as input node for flow_output. Then implement the run() and
change_handler for SB_port_binding.
Usually it may be not that simple because the new node may also depend on
some other data structures in runtime data, so the dependency needs to be
added and handled as well.
The split strategy should be based on the need of I-P, i.e. split the part
of data that triggers most recomputes, and handle it incrementally in most
situation. This ensures least effort for best outcome.

>
> Please let me know what you think about the approach I mentioned above to
> handle the correctness issue.
>
> I have addressed your comments for the other patches.
> Shall I go ahead and submit v2 of first 4 patches of this series ?
> I think these patches can be considered.

Since those are not bug fixes and more related to the rest of the series,
I'd suggest to submit v2 together for the whole series in case you want to
change them in a different way.

Thanks again for taking the heavy-lift part of I-P!

>
> Thanks
> Numan
>
> >
> > >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> > > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> > > +                     flow_output_physical_flow_changes_handler);
> > >
> > >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> > >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> > > +    engine_add_input(&en_flow_output, &en_ovs_interface,
> > > +                     flow_output_noop_handler);
> > > +    engine_add_input(&en_flow_output, &en_ct_zones,
> > > +                     flow_output_noop_handler);
> > >
> > >      engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
> > >      engine_add_input(&en_flow_output, &en_sb_encap, NULL);
> > > diff --git a/controller/ovn-controller.h b/controller/ovn-controller.h
> > > index 5d9466880..40c358cef 100644
> > > --- a/controller/ovn-controller.h
> > > +++ b/controller/ovn-controller.h
> > > @@ -72,6 +72,28 @@ struct local_datapath {
> > >  struct local_datapath *get_local_datapath(const struct hmap *,
> > >                                            uint32_t tunnel_key);
> > >
> > > +enum local_binding_type {
> > > +    BT_VIF,
> > > +    BT_CHILD,
> > > +    BT_VIRTUAL
> > > +};
> > > +
> > > +struct local_binding {
> > > +    struct ovs_list node;       /* In parent if any. */
> > > +    char *name;
> > > +    enum local_binding_type type;
> > > +    const struct ovsrec_interface *iface;
> > > +    const struct sbrec_port_binding *pb;
> > > +
> > > +    struct ovs_list children;
> > > +};
> > > +
> > > +static inline struct local_binding *
> > > +local_binding_find(struct shash *local_bindings, const char *name)
> > > +{
> > > +    return shash_find_data(local_bindings, name);
> > > +}
> > > +
> > >  const struct ovsrec_bridge *get_bridge(const struct
ovsrec_bridge_table
> > *,
> > >                                         const char *br_name);
> > >
> > > diff --git a/controller/physical.c b/controller/physical.c
> > > index 144aeb7bd..09a5d9b39 100644
> > > --- a/controller/physical.c
> > > +++ b/controller/physical.c
> > > @@ -1780,6 +1780,54 @@ physical_run(struct physical_ctx *p_ctx,
> > >      simap_destroy(&new_tunnel_to_ofport);
> > >  }
> > >
> > > +bool
> > > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> > > +                                  struct ovn_desired_flow_table
> > *flow_table)
> > > +{
> > > +    const struct ovsrec_interface *iface_rec;
> > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> > p_ctx->iface_table) {
> > > +        if (!strcmp(iface_rec->type, "geneve") ||
> > > +            !strcmp(iface_rec->type, "patch")) {
> > > +            return false;
> > > +        }
> > > +    }
> > > +
> > > +    struct ofpbuf ofpacts;
> > > +    ofpbuf_init(&ofpacts, 0);
> > > +
> > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> > p_ctx->iface_table) {
> > > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> > "iface-id");
> > > +        if (!iface_id) {
> > > +            continue;
> > > +        }
> > > +
> > > +        const struct local_binding *lb =
> > > +            local_binding_find(p_ctx->local_bindings, iface_id);
> > > +
> > > +        if (!lb || !lb->pb) {
> > > +            continue;
> > > +        }
> > > +
> > > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> > > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> > > +        } else {
> > > +            if (!ovsrec_interface_is_new(iface_rec)) {
> > > +                ofctrl_remove_flows(flow_table,
&lb->pb->header_.uuid);
> > > +            }
> > > +
> > > +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> > > +                                  p_ctx->mff_ovn_geneve,
p_ctx->ct_zones,
> > > +                                  p_ctx->active_tunnels,
> > > +                                  p_ctx->local_datapaths,
> > > +                                  lb->pb, p_ctx->chassis,
> > > +                                  flow_table, &ofpacts);
> > > +        }
> > > +    }
> > > +
> > > +    ofpbuf_uninit(&ofpacts);
> > > +    return true;
> > > +}
> > > +
> > >  bool
> > >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
ofp_port_t
> > *ofport)
> > >  {
> > > diff --git a/controller/physical.h b/controller/physical.h
> > > index dadf84ea1..9ca34436a 100644
> > > --- a/controller/physical.h
> > > +++ b/controller/physical.h
> > > @@ -49,11 +49,13 @@ struct physical_ctx {
> > >      const struct ovsrec_bridge *br_int;
> > >      const struct sbrec_chassis_table *chassis_table;
> > >      const struct sbrec_chassis *chassis;
> > > +    const struct ovsrec_interface_table *iface_table;
> > >      const struct sset *active_tunnels;
> > >      struct hmap *local_datapaths;
> > >      struct sset *local_lports;
> > >      const struct simap *ct_zones;
> > >      enum mf_field_id mff_ovn_geneve;
> > > +    struct shash *local_bindings;
> > >  };
> > >
> > >  void physical_register_ovs_idl(struct ovsdb_idl *);
> > > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> > physical_ctx *,
> > >                                            struct
ovn_desired_flow_table
> > *);
> > >  void physical_handle_mc_group_changes(struct physical_ctx *,
> > >                                        struct ovn_desired_flow_table
*);
> > > -
> > > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> > > +                                       struct ovn_desired_flow_table
*);
> > >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> > >                         ofp_port_t *ofport);
> > >  #endif /* controller/physical.h */
> > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
> > > index 5c21f6dd7..555434484 100644
> > > --- a/tests/ovn-performance.at
> > > +++ b/tests/ovn-performance.at
> > > @@ -263,7 +263,7 @@ for i in 1 2; do
> > >      )
> > >
> > >      # Add router port to $ls
> > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > >          [hv1 hv2], [lflow_run],
> > >          [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01
> > 10.0.$i.1/24]
> > >      )
> > > @@ -271,15 +271,15 @@ for i in 1 2; do
> > >          [hv1 hv2], [lflow_run],
> > >          [ovn-nbctl --wait=hv lsp-add $ls $lsp]
> > >      )
> > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > >          [hv1 hv2], [lflow_run],
> > >          [ovn-nbctl --wait=hv lsp-set-type $lsp router]
> > >      )
> > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > >          [hv1 hv2], [lflow_run],
> > >          [ovn-nbctl --wait=hv lsp-set-options $lsp router-port=$lrp]
> > >      )
> > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > >          [hv1 hv2], [lflow_run],
> > >          [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
> > >      )
> > > @@ -393,8 +393,8 @@ for i in 1 2; do
> > >      lp=lp$i
> > >
> > >      # Delete port $lp
> > > -    OVN_CONTROLLER_EXPECT_HIT_COND(
> > > -        [hv$i hv$j], [lflow_run], [>0 =0],
> > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > +        [hv$i hv$j], [lflow_run],
> > >          [ovn-nbctl --wait=hv lsp-del $lp]
> > >      )
> > >  done
> > > @@ -409,7 +409,7 @@ for i in 1 2; do
> > >      ls=ls$i
> > >
> > >      # Delete switch $ls
> > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > >          [hv1 hv2], [lflow_run],
> > >          [ovn-nbctl --wait=hv ls-del $ls]
> > >      )
> > > --
> > > 2.24.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
Numan Siddique March 31, 2020, 7:23 p.m. UTC | #4
Thanks Han for the comments. I will go through the comments and come back to
you with further comments or questions tomorrow.

I have one comment on the last part. Please see below.

Thanks
Numan


On Tue, Mar 31, 2020, 11:25 PM Han Zhou <hzhou@ovn.org> wrote:

> On Tue, Mar 31, 2020 at 5:40 AM Numan Siddique <numans@ovn.org> wrote:
> >
> > On Wed, Mar 25, 2020 at 12:23 PM Han Zhou <hzhou@ovn.org> wrote:
> > >
> > > On Mon, Mar 16, 2020 at 4:16 AM <numans@ovn.org> wrote:
> > > >
> > > > From: Numan Siddique <numans@ovn.org>
> > > >
> > > > This patch handles ct zone changes and OVS interface changes
> incrementally
> > > > in the flow output stage.
> > > >
> > > > Any changes to ct zone can be handled by running physical_run()
> instead
> > > of running
> > > > flow_output_run(). And any changes to OVS interfaces can be either
> handled
> > > > incrementally (for OVS interfaces representing VIFs) or just running
> > > > physical_run() (for tunnel and other types of interfaces).
> > > >
> > > > To better handle this, a new engine node 'physical_flow_changes' is
> added
> > > which
> > > > handles changes to ct zone and OVS interfaces.
> > > >
> > > > After this patch, We don't trigger full recompute in flow output
> stage for
> > > > runtime data changes. There's noop handler for this. With this we
> avoid
> > > > calls to lflow_run() a lot.
> > > >
> > > > For the below commands the lflow_run counter without this patch is 16
> > > > and with this patch is 3.
> > > >
> > > > -----------------------
> > > > ovn-nbctl ls-add sw0
> > > > ovn-nbctl lsp-add sw0 sw0-port1
> > > > ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 192.168.0.2"
> > > >
> > > > ovn-nbctl ls-add sw1
> > > > ovn-nbctl lsp-add sw1 sw1-port1
> > > > ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"
> > > >
> > > > ovn-nbctl lr-add lr0
> > > > ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
> > > > ovn-nbctl lsp-add sw0 lrp0-attachment
> > > > ovn-nbctl lsp-set-type lrp0-attachment router
> > > > ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
> > > > ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
> > > > ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
> > > > ovn-nbctl lsp-add sw1 lrp1-attachment
> > > > ovn-nbctl lsp-set-type lrp1-attachment router
> > > > ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
> > > > ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
> > > >
> > > > ovs-vsctl add-port br-int p1 -- \
> > > >     set Interface p1 external_ids:iface-id=sw0-port1
> > > > ovs-vsctl add-port br-int p2 -- \
> > > >     set Interface p2 external_ids:iface-id=sw1-port1
> > > > ovn-appctl -t ovn-controller coverage/read-counter lflow_run
> > > > -------------------
> > > >
> > > > Signed-off-by: Numan Siddique <numans@ovn.org>
> > > > ---
> > > >  controller/binding.c        | 22 ---------
> > > >  controller/lflow.c          | 13 +++++
> > > >  controller/lflow.h          |  2 +
> > > >  controller/ovn-controller.c | 96
> ++++++++++++++++++++++++++++++++++++-
> > > >  controller/ovn-controller.h | 22 +++++++++
> > > >  controller/physical.c       | 48 +++++++++++++++++++
> > > >  controller/physical.h       |  5 +-
> > > >  tests/ovn-performance.at    | 14 +++---
> > > >  8 files changed, 190 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/controller/binding.c b/controller/binding.c
> > > > index ce4467f31..9dc1c3f38 100644
> > > > --- a/controller/binding.c
> > > > +++ b/controller/binding.c
> > > > @@ -501,22 +501,6 @@ remove_local_lport_ids(const struct
> > > sbrec_port_binding *binding_rec,
> > > >          sset_find_and_delete(local_lport_ids, buf);
> > > >  }
> > > >
> > > > -enum local_binding_type {
> > > > -    BT_VIF,
> > > > -    BT_CHILD,
> > > > -    BT_VIRTUAL
> > > > -};
> > > > -
> > > > -struct local_binding {
> > > > -    struct ovs_list node;       /* In parent if any. */
> > > > -    char *name;
> > > > -    enum local_binding_type type;
> > > > -    const struct ovsrec_interface *iface;
> > > > -    const struct sbrec_port_binding *pb;
> > > > -
> > > > -    struct ovs_list children;
> > > > -};
> > > > -
> > > >  static struct local_binding *
> > > >  local_binding_create(const char *name, const struct ovsrec_interface
> > > *iface,
> > > >                       const struct sbrec_port_binding *pb,
> > > > @@ -537,12 +521,6 @@ local_binding_add(struct shash *local_bindings,
> > > struct local_binding *lbinding)
> > > >      shash_add(local_bindings, lbinding->name, lbinding);
> > > >  }
> > > >
> > > > -static struct local_binding *
> > > > -local_binding_find(struct shash *local_bindings, const char *name)
> > > > -{
> > > > -    return shash_find_data(local_bindings, name);
> > > > -}
> > > > -
> > > >  static void
> > > >  local_binding_destroy(struct local_binding *lbinding)
> > > >  {
> > > > diff --git a/controller/lflow.c b/controller/lflow.c
> > > > index 01214a3a6..512258cd3 100644
> > > > --- a/controller/lflow.c
> > > > +++ b/controller/lflow.c
> > > > @@ -847,3 +847,16 @@ lflow_destroy(void)
> > > >      expr_symtab_destroy(&symtab);
> > > >      shash_destroy(&symtab);
> > > >  }
> > > > +
> > > > +bool
> > > > +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
> > > *pb_table)
> > > > +{
> > > > +    const struct sbrec_port_binding *binding;
> > > > +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
> > > > +        if (!strcmp(binding->type, "remote")) {
> > > > +            return false;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return true;
> > > > +}
> > > > diff --git a/controller/lflow.h b/controller/lflow.h
> > > > index f02f709d7..fa54d4de2 100644
> > > > --- a/controller/lflow.h
> > > > +++ b/controller/lflow.h
> > > > @@ -48,6 +48,7 @@ struct sbrec_dhcp_options_table;
> > > >  struct sbrec_dhcpv6_options_table;
> > > >  struct sbrec_logical_flow_table;
> > > >  struct sbrec_mac_binding_table;
> > > > +struct sbrec_port_binding_table;
> > > >  struct simap;
> > > >  struct sset;
> > > >  struct uuid;
> > > > @@ -153,6 +154,7 @@ void lflow_handle_changed_neighbors(
> > > >      const struct hmap *local_datapaths,
> > > >      struct ovn_desired_flow_table *);
> > > >
> > > > +bool lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
> *);
> > > >  void lflow_destroy(void);
> > > >
> > > >  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> > > > diff --git a/controller/ovn-controller.c
> b/controller/ovn-controller.c
> > > > index 07823f020..448e9908e 100644
> > > > --- a/controller/ovn-controller.c
> > > > +++ b/controller/ovn-controller.c
> > > > @@ -1360,6 +1360,10 @@ static void init_physical_ctx(struct
> engine_node
> > > *node,
> > > >
> > > >      ovs_assert(br_int && chassis);
> > > >
> > > > +    struct ovsrec_interface_table *iface_table =
> > > > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > > > +            engine_get_input("OVS_interface", node));
> > > > +
> > > >      struct ed_type_ct_zones *ct_zones_data =
> > > >          engine_get_input_data("ct_zones", node);
> > > >      struct simap *ct_zones = &ct_zones_data->current;
> > > > @@ -1369,12 +1373,14 @@ static void init_physical_ctx(struct
> engine_node
> > > *node,
> > > >      p_ctx->mc_group_table = multicast_group_table;
> > > >      p_ctx->br_int = br_int;
> > > >      p_ctx->chassis_table = chassis_table;
> > > > +    p_ctx->iface_table = iface_table;
> > > >      p_ctx->chassis = chassis;
> > > >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> > > >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> > > >      p_ctx->local_lports = &rt_data->local_lports;
> > > >      p_ctx->ct_zones = ct_zones;
> > > >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> > > > +    p_ctx->local_bindings = &rt_data->local_bindings;
> > > >  }
> > > >
> > > >  static void init_lflow_ctx(struct engine_node *node,
> > > > @@ -1637,6 +1643,8 @@ flow_output_sb_port_binding_handler(struct
> > > engine_node *node, void *data)
> > > >       *    always logical router port, and any change to logical
> router
> > > port
> > > >       *    would just trigger recompute.
> > > >       *
> > > > +     *  - When a port binding of type 'remote' is updated by ovn-ic.
> > > > +     *
> > > >       * Although there is no correctness issue so far (except the
> unusual
> > > ACL
> > > >       * use case, which doesn't seem to be a real problem), it might
> be
> > > better
> > > >       * to handle this more gracefully, without the need to consider
> these
> > > > @@ -1647,6 +1655,14 @@ flow_output_sb_port_binding_handler(struct
> > > engine_node *node, void *data)
> > > >      struct physical_ctx p_ctx;
> > > >      init_physical_ctx(node, rt_data, &p_ctx);
> > > >
> > > > +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
> > > > +        /* If this returns false, it means there is an impact on
> > > > +         * the logical flow processing because of some changes in
> > > > +         * port bindings. Return false so that recompute is
> triggered
> > > > +         * for this stage. */
> > > > +        return false;
> > > > +    }
> > > > +
> > > >      physical_handle_port_binding_changes(&p_ctx, flow_table);
> > > >
> > > >      engine_set_node_state(node, EN_UPDATED);
> > > > @@ -1782,6 +1798,70 @@ flow_output_port_groups_handler(struct
> engine_node
> > > *node, void *data)
> > > >      return _flow_output_resource_ref_handler(node, data,
> > > REF_TYPE_PORTGROUP);
> > > >  }
> > > >
> > > > +struct ed_type_physical_flow_changes {
> > > > +    bool ct_zones_changed;
> > > > +};
> > > > +
> > > > +static bool
> > > > +flow_output_physical_flow_changes_handler(struct engine_node *node,
> void
> > > *data)
> > > > +{
> > > > +    struct ed_type_physical_flow_changes *pfc =
> > > > +        engine_get_input_data("physical_flow_changes", node);
> > > > +    struct ed_type_runtime_data *rt_data =
> > > > +        engine_get_input_data("runtime_data", node);
> > > > +
> > > > +    struct ed_type_flow_output *fo = data;
> > > > +    struct physical_ctx p_ctx;
> > > > +    init_physical_ctx(node, rt_data, &p_ctx);
> > > > +
> > > > +    engine_set_node_state(node, EN_UPDATED);
> > > > +    if (pfc->ct_zones_changed) {
> > > > +        pfc->ct_zones_changed = false;
> > > > +        physical_run(&p_ctx, &fo->flow_table);
> > > > +        return true;
> > > > +    }
> > > > +
> > > > +    return physical_handle_ovs_iface_changes(&p_ctx,
> &fo->flow_table);
> > > > +}
> > > > +
> > > > +static void *
> > > > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> > > > +                             struct engine_arg *arg OVS_UNUSED)
> > > > +{
> > > > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
> *data);
> > > > +    return data;
> > > > +}
> > > > +
> > > > +static void
> > > > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> > > > +{
> > > > +
> > > > +}
> > > > +
> > > > +static void
> > > > +en_physical_flow_changes_run(struct engine_node *node, void *data
> > > OVS_UNUSED)
> > > > +{
> > > > +    engine_set_node_state(node, EN_UPDATED);
> > > > +}
> > > > +
> > > > +static bool
> > > > +physical_flow_changes_ct_zones_handler(struct engine_node *node
> > > OVS_UNUSED,
> > > > +                                      void *data OVS_UNUSED)
> > > > +{
> > > > +    struct ed_type_physical_flow_changes *pfc = data;
> > > > +    pfc->ct_zones_changed = true;
> > > > +    engine_set_node_state(node, EN_UPDATED);
> > > > +    return false;
> > > > +}
> > > > +
> > > > +static bool
> > > > +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
> > > > +                          void *data OVS_UNUSED)
> > > > +{
> > > > +    return true;
> > > > +}
> > > > +
> > > > +
> > > >  struct ovn_controller_exit_args {
> > > >      bool *exiting;
> > > >      bool *restart;
> > > > @@ -1904,6 +1984,7 @@ main(int argc, char *argv[])
> > > >      ENGINE_NODE(runtime_data, "runtime_data");
> > > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > > > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> > > >      ENGINE_NODE(flow_output, "flow_output");
> > > >      ENGINE_NODE(addr_sets, "addr_sets");
> > > >      ENGINE_NODE(port_groups, "port_groups");
> > > > @@ -1923,16 +2004,27 @@ main(int argc, char *argv[])
> > > >      engine_add_input(&en_port_groups, &en_sb_port_group,
> > > >                       port_groups_sb_port_group_handler);
> > > >
> > > > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> > > > +                     physical_flow_changes_ct_zones_handler);
> > > > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> > > > +                     NULL);
> > > > +
> > > >      engine_add_input(&en_flow_output, &en_addr_sets,
> > > >                       flow_output_addr_sets_handler);
> > > >      engine_add_input(&en_flow_output, &en_port_groups,
> > > >                       flow_output_port_groups_handler);
> > > > -    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> > > > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> > > > +    engine_add_input(&en_flow_output, &en_runtime_data,
> > > > +                     flow_output_noop_handler);
> > >
> > > I think this breaks the dependency and would result in correctness
> problem.
> > > Because flow_output depends on runtime_data, which includes lots of
> data
> > > structures such as local_lports, local_datapaths, active_tunnels, etc.,
> all
> > > needed for flow compute to produce correct OVS flow output. Now with
> the
> > > noop handler, the changes in these data structures are ignored. How
> could
> > > we make sure the flows computed are still up to date?
> > >
> >
> > Thanks for the comments Han. I get your point.
> >
> > Let's say we have data structures 'A' and 'B' in runtime data. And if
> > events 'X' and 'Y' result in the modifications of these data structures
> in
> > runtime_data handlers, then handling (properly) these events  - 'X'
> > and 'Y' in the
> > flow_output engine should be enough to ensure the correctness of the
> > flows right ?
> >
> Well, this is not how the I-P engine supposed to work. The engine is purely
> data-driven. In your example, X or Y is just input data (another engine
> node) change of runtime data. They need to be handled in runtime data node.
> If flow_output engine also directly depend on X or Y, of course flow_output
> node also need to handle the change of X or Y, in addition to handle the
> change of runtime data. But handling X and Y in flow_output doesn't replace
> the handling in runtime data. A short cut is, if handling of X or Y always
> trigger recompute in flow_output, of course we can say it is still correct
> even if they are not handled in runtime data. I hope this clarifies the
> design.
>



>
> > The present master has the below data structures in run_time data
> >   - local_datapaths (hmap)
> >   - local_lports (sset)
> >   - local_lport_ids (sset)
> >   - active_tunnels (sset)
> >
> > This I-P series has the below data structures in run_time data
> >   - local_datapaths (hmap)
> >   - local_lports (sset)
> >   - local_lport_ids (sset)
> >   - local_bindings (shash)
> >   - active_tunnels (sset)
> >   - egress_ifaces (sset)
> >   - local_iface_ids (smap)
> >
> > Except for active_tunnels, rest of the data structures are calculated
> > in binding.c
> > and in ovn-controller.c (during init, run and destroy of runtime_data)
> > And these data structures are modified in binding_run and in the ovs
> > interface change
> > handler and SB port binding change handler.
> >
> > I think if we handle the events - ovs interface changes and sb port
> > binding changes in
> > flow output engine, then it should ensure correctness provided we
> > handle these changes
> > properly.
> >
> Since flow_output depends on runtime data, and it take into consideration
> all the data structures you listed above for flow computing, it means if
> any of these data changes we need to evaluate them in flow_output.
> Otherwise, it is either incorrect or some data in runtime data node is
> useless. It doesn't seem right that the data is needed but we just ignore
> the change. If, some of the data is needed as output but not needed as
> input of flow_output, then we can simply split them out from runtime data
> and don't add them as input of flow_output node. In this case, not handling
> those changes is not a problem.
>
> > This patch series handles ovs interface changes  in flow_output
> > engine. We already
> > handle port binding changes, but we don't do any flow handling. I
> > think if we can
> > enhance this, we can solve this problem.
> >
> > The present patch series definitely has issues with the runtime noop
> handler,
> > but it still has no issues with correctness because the ovn sb idl txn is
> NULL
> > for any updates ovn-controller does to SB DB, resulting in flow
> recomputation.
> > But of course this is wrong. And we should probably enhance the IDL
> tracker.
> >
> > > From my point of view, to handle the runtime data changes
> incrementally, we
> > > should split the runtime data node into separate engine nodes, so that
> > > different changes can be handled differently for flow output compute.
> Some
> > > of the changes may be handled incrementally if it is easy to handle and
> if
> > > it is frequently changed, and leave those less frequent and
> hard-to-handle
> > > changes and let them trigger recompute.
> >
> > I don't think this is straightforward to split them. If you see, most
> > of the runtime data
> > is related to port binding and is updated during port binding.
>
> If a data structure depends on port_binding, it is straightforward to split
> it as a new node and add the SB_port_binding node as its input node, and
> then add itself as input node for flow_output. Then implement the run() and
> change_handler for SB_port_binding.
> Usually it may be not that simple because the new node may also depend on
> some other data structures in runtime data, so the dependency needs to be
> added and handled as well.
> The split strategy should be based on the need of I-P, i.e. split the part
> of data that triggers most recomputes, and handle it incrementally in most
> situation. This ensures least effort for best outcome.
>
> >
> > Please let me know what you think about the approach I mentioned above to
> > handle the correctness issue.
> >
> > I have addressed your comments for the other patches.
> > Shall I go ahead and submit v2 of first 4 patches of this series ?
> > I think these patches can be considered.
>
> Since those are not bug fixes and more related to the rest of the series,
> I'd suggest to submit v2 together for the whole series in case you want to
> change them in a different way.
>
>
Thanks for the comments. Patch 3 and 4 add I-P for ovs interface and port
binding
changes in the runtime data engine. Even if we drop patch 5 and 6 from the
series
(lets just assume for now that it just complicates further), I think we can
definitely
handle ovs interface and port binding changes for runtime data. This avoids
running binding_run(). In binding_run() we run a for loop for all the port
binding rows.

I will explore further on patch 6 approach. But I would like to pursue on
the rest
of the patches.


Thanks
Numan



Thanks again for taking the heavy-lift part of I-P!
>
> >
> > Thanks
> > Numan
> >
> > >
> > > >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> > > > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> > > > +                     flow_output_physical_flow_changes_handler);
> > > >
> > > >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> > > >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> > > > +    engine_add_input(&en_flow_output, &en_ovs_interface,
> > > > +                     flow_output_noop_handler);
> > > > +    engine_add_input(&en_flow_output, &en_ct_zones,
> > > > +                     flow_output_noop_handler);
> > > >
> > > >      engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
> > > >      engine_add_input(&en_flow_output, &en_sb_encap, NULL);
> > > > diff --git a/controller/ovn-controller.h
> b/controller/ovn-controller.h
> > > > index 5d9466880..40c358cef 100644
> > > > --- a/controller/ovn-controller.h
> > > > +++ b/controller/ovn-controller.h
> > > > @@ -72,6 +72,28 @@ struct local_datapath {
> > > >  struct local_datapath *get_local_datapath(const struct hmap *,
> > > >                                            uint32_t tunnel_key);
> > > >
> > > > +enum local_binding_type {
> > > > +    BT_VIF,
> > > > +    BT_CHILD,
> > > > +    BT_VIRTUAL
> > > > +};
> > > > +
> > > > +struct local_binding {
> > > > +    struct ovs_list node;       /* In parent if any. */
> > > > +    char *name;
> > > > +    enum local_binding_type type;
> > > > +    const struct ovsrec_interface *iface;
> > > > +    const struct sbrec_port_binding *pb;
> > > > +
> > > > +    struct ovs_list children;
> > > > +};
> > > > +
> > > > +static inline struct local_binding *
> > > > +local_binding_find(struct shash *local_bindings, const char *name)
> > > > +{
> > > > +    return shash_find_data(local_bindings, name);
> > > > +}
> > > > +
> > > >  const struct ovsrec_bridge *get_bridge(const struct
> ovsrec_bridge_table
> > > *,
> > > >                                         const char *br_name);
> > > >
> > > > diff --git a/controller/physical.c b/controller/physical.c
> > > > index 144aeb7bd..09a5d9b39 100644
> > > > --- a/controller/physical.c
> > > > +++ b/controller/physical.c
> > > > @@ -1780,6 +1780,54 @@ physical_run(struct physical_ctx *p_ctx,
> > > >      simap_destroy(&new_tunnel_to_ofport);
> > > >  }
> > > >
> > > > +bool
> > > > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> > > > +                                  struct ovn_desired_flow_table
> > > *flow_table)
> > > > +{
> > > > +    const struct ovsrec_interface *iface_rec;
> > > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> > > p_ctx->iface_table) {
> > > > +        if (!strcmp(iface_rec->type, "geneve") ||
> > > > +            !strcmp(iface_rec->type, "patch")) {
> > > > +            return false;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    struct ofpbuf ofpacts;
> > > > +    ofpbuf_init(&ofpacts, 0);
> > > > +
> > > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> > > p_ctx->iface_table) {
> > > > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> > > "iface-id");
> > > > +        if (!iface_id) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        const struct local_binding *lb =
> > > > +            local_binding_find(p_ctx->local_bindings, iface_id);
> > > > +
> > > > +        if (!lb || !lb->pb) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> > > > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> > > > +        } else {
> > > > +            if (!ovsrec_interface_is_new(iface_rec)) {
> > > > +                ofctrl_remove_flows(flow_table,
> &lb->pb->header_.uuid);
> > > > +            }
> > > > +
> > > > +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> > > > +                                  p_ctx->mff_ovn_geneve,
> p_ctx->ct_zones,
> > > > +                                  p_ctx->active_tunnels,
> > > > +                                  p_ctx->local_datapaths,
> > > > +                                  lb->pb, p_ctx->chassis,
> > > > +                                  flow_table, &ofpacts);
> > > > +        }
> > > > +    }
> > > > +
> > > > +    ofpbuf_uninit(&ofpacts);
> > > > +    return true;
> > > > +}
> > > > +
> > > >  bool
> > > >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> ofp_port_t
> > > *ofport)
> > > >  {
> > > > diff --git a/controller/physical.h b/controller/physical.h
> > > > index dadf84ea1..9ca34436a 100644
> > > > --- a/controller/physical.h
> > > > +++ b/controller/physical.h
> > > > @@ -49,11 +49,13 @@ struct physical_ctx {
> > > >      const struct ovsrec_bridge *br_int;
> > > >      const struct sbrec_chassis_table *chassis_table;
> > > >      const struct sbrec_chassis *chassis;
> > > > +    const struct ovsrec_interface_table *iface_table;
> > > >      const struct sset *active_tunnels;
> > > >      struct hmap *local_datapaths;
> > > >      struct sset *local_lports;
> > > >      const struct simap *ct_zones;
> > > >      enum mf_field_id mff_ovn_geneve;
> > > > +    struct shash *local_bindings;
> > > >  };
> > > >
> > > >  void physical_register_ovs_idl(struct ovsdb_idl *);
> > > > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> > > physical_ctx *,
> > > >                                            struct
> ovn_desired_flow_table
> > > *);
> > > >  void physical_handle_mc_group_changes(struct physical_ctx *,
> > > >                                        struct ovn_desired_flow_table
> *);
> > > > -
> > > > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> > > > +                                       struct ovn_desired_flow_table
> *);
> > > >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> > > >                         ofp_port_t *ofport);
> > > >  #endif /* controller/physical.h */
> > > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
> > > > index 5c21f6dd7..555434484 100644
> > > > --- a/tests/ovn-performance.at
> > > > +++ b/tests/ovn-performance.at
> > > > @@ -263,7 +263,7 @@ for i in 1 2; do
> > > >      )
> > > >
> > > >      # Add router port to $ls
> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > >          [hv1 hv2], [lflow_run],
> > > >          [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01
> > > 10.0.$i.1/24]
> > > >      )
> > > > @@ -271,15 +271,15 @@ for i in 1 2; do
> > > >          [hv1 hv2], [lflow_run],
> > > >          [ovn-nbctl --wait=hv lsp-add $ls $lsp]
> > > >      )
> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > >          [hv1 hv2], [lflow_run],
> > > >          [ovn-nbctl --wait=hv lsp-set-type $lsp router]
> > > >      )
> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > >          [hv1 hv2], [lflow_run],
> > > >          [ovn-nbctl --wait=hv lsp-set-options $lsp router-port=$lrp]
> > > >      )
> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > >          [hv1 hv2], [lflow_run],
> > > >          [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
> > > >      )
> > > > @@ -393,8 +393,8 @@ for i in 1 2; do
> > > >      lp=lp$i
> > > >
> > > >      # Delete port $lp
> > > > -    OVN_CONTROLLER_EXPECT_HIT_COND(
> > > > -        [hv$i hv$j], [lflow_run], [>0 =0],
> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > > +        [hv$i hv$j], [lflow_run],
> > > >          [ovn-nbctl --wait=hv lsp-del $lp]
> > > >      )
> > > >  done
> > > > @@ -409,7 +409,7 @@ for i in 1 2; do
> > > >      ls=ls$i
> > > >
> > > >      # Delete switch $ls
> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> > > >          [hv1 hv2], [lflow_run],
> > > >          [ovn-nbctl --wait=hv ls-del $ls]
> > > >      )
> > > > --
> > > > 2.24.1
> > > >
> > > > _______________________________________________
> > > > dev mailing list
> > > > dev@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Han Zhou March 31, 2020, 7:43 p.m. UTC | #5
On Tue, Mar 31, 2020 at 12:23 PM Numan Siddique <numans@ovn.org> wrote:
>
>
> Thanks Han for the comments. I will go through the comments and come back
to
> you with further comments or questions tomorrow.
>
> I have one comment on the last part. Please see below.
>
> Thanks
> Numan
>
>
> On Tue, Mar 31, 2020, 11:25 PM Han Zhou <hzhou@ovn.org> wrote:
>>
>> On Tue, Mar 31, 2020 at 5:40 AM Numan Siddique <numans@ovn.org> wrote:
>> >
>> > On Wed, Mar 25, 2020 at 12:23 PM Han Zhou <hzhou@ovn.org> wrote:
>> > >
>> > > On Mon, Mar 16, 2020 at 4:16 AM <numans@ovn.org> wrote:
>> > > >
>> > > > From: Numan Siddique <numans@ovn.org>
>> > > >
>> > > > This patch handles ct zone changes and OVS interface changes
>> incrementally
>> > > > in the flow output stage.
>> > > >
>> > > > Any changes to ct zone can be handled by running physical_run()
>> instead
>> > > of running
>> > > > flow_output_run(). And any changes to OVS interfaces can be either
>> handled
>> > > > incrementally (for OVS interfaces representing VIFs) or just
running
>> > > > physical_run() (for tunnel and other types of interfaces).
>> > > >
>> > > > To better handle this, a new engine node 'physical_flow_changes' is
>> added
>> > > which
>> > > > handles changes to ct zone and OVS interfaces.
>> > > >
>> > > > After this patch, We don't trigger full recompute in flow output
>> stage for
>> > > > runtime data changes. There's noop handler for this. With this we
>> avoid
>> > > > calls to lflow_run() a lot.
>> > > >
>> > > > For the below commands the lflow_run counter without this patch is
16
>> > > > and with this patch is 3.
>> > > >
>> > > > -----------------------
>> > > > ovn-nbctl ls-add sw0
>> > > > ovn-nbctl lsp-add sw0 sw0-port1
>> > > > ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01
192.168.0.2"
>> > > >
>> > > > ovn-nbctl ls-add sw1
>> > > > ovn-nbctl lsp-add sw1 sw1-port1
>> > > > ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"
>> > > >
>> > > > ovn-nbctl lr-add lr0
>> > > > ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
>> > > > ovn-nbctl lsp-add sw0 lrp0-attachment
>> > > > ovn-nbctl lsp-set-type lrp0-attachment router
>> > > > ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
>> > > > ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
>> > > > ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
>> > > > ovn-nbctl lsp-add sw1 lrp1-attachment
>> > > > ovn-nbctl lsp-set-type lrp1-attachment router
>> > > > ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
>> > > > ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
>> > > >
>> > > > ovs-vsctl add-port br-int p1 -- \
>> > > >     set Interface p1 external_ids:iface-id=sw0-port1
>> > > > ovs-vsctl add-port br-int p2 -- \
>> > > >     set Interface p2 external_ids:iface-id=sw1-port1
>> > > > ovn-appctl -t ovn-controller coverage/read-counter lflow_run
>> > > > -------------------
>> > > >
>> > > > Signed-off-by: Numan Siddique <numans@ovn.org>
>> > > > ---
>> > > >  controller/binding.c        | 22 ---------
>> > > >  controller/lflow.c          | 13 +++++
>> > > >  controller/lflow.h          |  2 +
>> > > >  controller/ovn-controller.c | 96
>> ++++++++++++++++++++++++++++++++++++-
>> > > >  controller/ovn-controller.h | 22 +++++++++
>> > > >  controller/physical.c       | 48 +++++++++++++++++++
>> > > >  controller/physical.h       |  5 +-
>> > > >  tests/ovn-performance.at    | 14 +++---
>> > > >  8 files changed, 190 insertions(+), 32 deletions(-)
>> > > >
>> > > > diff --git a/controller/binding.c b/controller/binding.c
>> > > > index ce4467f31..9dc1c3f38 100644
>> > > > --- a/controller/binding.c
>> > > > +++ b/controller/binding.c
>> > > > @@ -501,22 +501,6 @@ remove_local_lport_ids(const struct
>> > > sbrec_port_binding *binding_rec,
>> > > >          sset_find_and_delete(local_lport_ids, buf);
>> > > >  }
>> > > >
>> > > > -enum local_binding_type {
>> > > > -    BT_VIF,
>> > > > -    BT_CHILD,
>> > > > -    BT_VIRTUAL
>> > > > -};
>> > > > -
>> > > > -struct local_binding {
>> > > > -    struct ovs_list node;       /* In parent if any. */
>> > > > -    char *name;
>> > > > -    enum local_binding_type type;
>> > > > -    const struct ovsrec_interface *iface;
>> > > > -    const struct sbrec_port_binding *pb;
>> > > > -
>> > > > -    struct ovs_list children;
>> > > > -};
>> > > > -
>> > > >  static struct local_binding *
>> > > >  local_binding_create(const char *name, const struct
ovsrec_interface
>> > > *iface,
>> > > >                       const struct sbrec_port_binding *pb,
>> > > > @@ -537,12 +521,6 @@ local_binding_add(struct shash
*local_bindings,
>> > > struct local_binding *lbinding)
>> > > >      shash_add(local_bindings, lbinding->name, lbinding);
>> > > >  }
>> > > >
>> > > > -static struct local_binding *
>> > > > -local_binding_find(struct shash *local_bindings, const char *name)
>> > > > -{
>> > > > -    return shash_find_data(local_bindings, name);
>> > > > -}
>> > > > -
>> > > >  static void
>> > > >  local_binding_destroy(struct local_binding *lbinding)
>> > > >  {
>> > > > diff --git a/controller/lflow.c b/controller/lflow.c
>> > > > index 01214a3a6..512258cd3 100644
>> > > > --- a/controller/lflow.c
>> > > > +++ b/controller/lflow.c
>> > > > @@ -847,3 +847,16 @@ lflow_destroy(void)
>> > > >      expr_symtab_destroy(&symtab);
>> > > >      shash_destroy(&symtab);
>> > > >  }
>> > > > +
>> > > > +bool
>> > > > +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
>> > > *pb_table)
>> > > > +{
>> > > > +    const struct sbrec_port_binding *binding;
>> > > > +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table)
{
>> > > > +        if (!strcmp(binding->type, "remote")) {
>> > > > +            return false;
>> > > > +        }
>> > > > +    }
>> > > > +
>> > > > +    return true;
>> > > > +}
>> > > > diff --git a/controller/lflow.h b/controller/lflow.h
>> > > > index f02f709d7..fa54d4de2 100644
>> > > > --- a/controller/lflow.h
>> > > > +++ b/controller/lflow.h
>> > > > @@ -48,6 +48,7 @@ struct sbrec_dhcp_options_table;
>> > > >  struct sbrec_dhcpv6_options_table;
>> > > >  struct sbrec_logical_flow_table;
>> > > >  struct sbrec_mac_binding_table;
>> > > > +struct sbrec_port_binding_table;
>> > > >  struct simap;
>> > > >  struct sset;
>> > > >  struct uuid;
>> > > > @@ -153,6 +154,7 @@ void lflow_handle_changed_neighbors(
>> > > >      const struct hmap *local_datapaths,
>> > > >      struct ovn_desired_flow_table *);
>> > > >
>> > > > +bool lflow_evaluate_pb_changes(const struct
sbrec_port_binding_table
>> *);
>> > > >  void lflow_destroy(void);
>> > > >
>> > > >  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
>> > > > diff --git a/controller/ovn-controller.c
b/controller/ovn-controller.c
>> > > > index 07823f020..448e9908e 100644
>> > > > --- a/controller/ovn-controller.c
>> > > > +++ b/controller/ovn-controller.c
>> > > > @@ -1360,6 +1360,10 @@ static void init_physical_ctx(struct
>> engine_node
>> > > *node,
>> > > >
>> > > >      ovs_assert(br_int && chassis);
>> > > >
>> > > > +    struct ovsrec_interface_table *iface_table =
>> > > > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
>> > > > +            engine_get_input("OVS_interface", node));
>> > > > +
>> > > >      struct ed_type_ct_zones *ct_zones_data =
>> > > >          engine_get_input_data("ct_zones", node);
>> > > >      struct simap *ct_zones = &ct_zones_data->current;
>> > > > @@ -1369,12 +1373,14 @@ static void init_physical_ctx(struct
>> engine_node
>> > > *node,
>> > > >      p_ctx->mc_group_table = multicast_group_table;
>> > > >      p_ctx->br_int = br_int;
>> > > >      p_ctx->chassis_table = chassis_table;
>> > > > +    p_ctx->iface_table = iface_table;
>> > > >      p_ctx->chassis = chassis;
>> > > >      p_ctx->active_tunnels = &rt_data->active_tunnels;
>> > > >      p_ctx->local_datapaths = &rt_data->local_datapaths;
>> > > >      p_ctx->local_lports = &rt_data->local_lports;
>> > > >      p_ctx->ct_zones = ct_zones;
>> > > >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
>> > > > +    p_ctx->local_bindings = &rt_data->local_bindings;
>> > > >  }
>> > > >
>> > > >  static void init_lflow_ctx(struct engine_node *node,
>> > > > @@ -1637,6 +1643,8 @@ flow_output_sb_port_binding_handler(struct
>> > > engine_node *node, void *data)
>> > > >       *    always logical router port, and any change to logical
>> router
>> > > port
>> > > >       *    would just trigger recompute.
>> > > >       *
>> > > > +     *  - When a port binding of type 'remote' is updated by
ovn-ic.
>> > > > +     *
>> > > >       * Although there is no correctness issue so far (except the
>> unusual
>> > > ACL
>> > > >       * use case, which doesn't seem to be a real problem), it
might
>> be
>> > > better
>> > > >       * to handle this more gracefully, without the need to
consider
>> these
>> > > > @@ -1647,6 +1655,14 @@ flow_output_sb_port_binding_handler(struct
>> > > engine_node *node, void *data)
>> > > >      struct physical_ctx p_ctx;
>> > > >      init_physical_ctx(node, rt_data, &p_ctx);
>> > > >
>> > > > +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
>> > > > +        /* If this returns false, it means there is an impact on
>> > > > +         * the logical flow processing because of some changes in
>> > > > +         * port bindings. Return false so that recompute is
triggered
>> > > > +         * for this stage. */
>> > > > +        return false;
>> > > > +    }
>> > > > +
>> > > >      physical_handle_port_binding_changes(&p_ctx, flow_table);
>> > > >
>> > > >      engine_set_node_state(node, EN_UPDATED);
>> > > > @@ -1782,6 +1798,70 @@ flow_output_port_groups_handler(struct
>> engine_node
>> > > *node, void *data)
>> > > >      return _flow_output_resource_ref_handler(node, data,
>> > > REF_TYPE_PORTGROUP);
>> > > >  }
>> > > >
>> > > > +struct ed_type_physical_flow_changes {
>> > > > +    bool ct_zones_changed;
>> > > > +};
>> > > > +
>> > > > +static bool
>> > > > +flow_output_physical_flow_changes_handler(struct engine_node
*node,
>> void
>> > > *data)
>> > > > +{
>> > > > +    struct ed_type_physical_flow_changes *pfc =
>> > > > +        engine_get_input_data("physical_flow_changes", node);
>> > > > +    struct ed_type_runtime_data *rt_data =
>> > > > +        engine_get_input_data("runtime_data", node);
>> > > > +
>> > > > +    struct ed_type_flow_output *fo = data;
>> > > > +    struct physical_ctx p_ctx;
>> > > > +    init_physical_ctx(node, rt_data, &p_ctx);
>> > > > +
>> > > > +    engine_set_node_state(node, EN_UPDATED);
>> > > > +    if (pfc->ct_zones_changed) {
>> > > > +        pfc->ct_zones_changed = false;
>> > > > +        physical_run(&p_ctx, &fo->flow_table);
>> > > > +        return true;
>> > > > +    }
>> > > > +
>> > > > +    return physical_handle_ovs_iface_changes(&p_ctx,
>> &fo->flow_table);
>> > > > +}
>> > > > +
>> > > > +static void *
>> > > > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
>> > > > +                             struct engine_arg *arg OVS_UNUSED)
>> > > > +{
>> > > > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
>> *data);
>> > > > +    return data;
>> > > > +}
>> > > > +
>> > > > +static void
>> > > > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
>> > > > +{
>> > > > +
>> > > > +}
>> > > > +
>> > > > +static void
>> > > > +en_physical_flow_changes_run(struct engine_node *node, void *data
>> > > OVS_UNUSED)
>> > > > +{
>> > > > +    engine_set_node_state(node, EN_UPDATED);
>> > > > +}
>> > > > +
>> > > > +static bool
>> > > > +physical_flow_changes_ct_zones_handler(struct engine_node *node
>> > > OVS_UNUSED,
>> > > > +                                      void *data OVS_UNUSED)
>> > > > +{
>> > > > +    struct ed_type_physical_flow_changes *pfc = data;
>> > > > +    pfc->ct_zones_changed = true;
>> > > > +    engine_set_node_state(node, EN_UPDATED);
>> > > > +    return false;
>> > > > +}
>> > > > +
>> > > > +static bool
>> > > > +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
>> > > > +                          void *data OVS_UNUSED)
>> > > > +{
>> > > > +    return true;
>> > > > +}
>> > > > +
>> > > > +
>> > > >  struct ovn_controller_exit_args {
>> > > >      bool *exiting;
>> > > >      bool *restart;
>> > > > @@ -1904,6 +1984,7 @@ main(int argc, char *argv[])
>> > > >      ENGINE_NODE(runtime_data, "runtime_data");
>> > > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>> > > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
>> > > > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
>> > > >      ENGINE_NODE(flow_output, "flow_output");
>> > > >      ENGINE_NODE(addr_sets, "addr_sets");
>> > > >      ENGINE_NODE(port_groups, "port_groups");
>> > > > @@ -1923,16 +2004,27 @@ main(int argc, char *argv[])
>> > > >      engine_add_input(&en_port_groups, &en_sb_port_group,
>> > > >                       port_groups_sb_port_group_handler);
>> > > >
>> > > > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
>> > > > +                     physical_flow_changes_ct_zones_handler);
>> > > > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
>> > > > +                     NULL);
>> > > > +
>> > > >      engine_add_input(&en_flow_output, &en_addr_sets,
>> > > >                       flow_output_addr_sets_handler);
>> > > >      engine_add_input(&en_flow_output, &en_port_groups,
>> > > >                       flow_output_port_groups_handler);
>> > > > -    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
>> > > > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
>> > > > +    engine_add_input(&en_flow_output, &en_runtime_data,
>> > > > +                     flow_output_noop_handler);
>> > >
>> > > I think this breaks the dependency and would result in correctness
>> problem.
>> > > Because flow_output depends on runtime_data, which includes lots of
data
>> > > structures such as local_lports, local_datapaths, active_tunnels,
etc.,
>> all
>> > > needed for flow compute to produce correct OVS flow output. Now with
the
>> > > noop handler, the changes in these data structures are ignored. How
>> could
>> > > we make sure the flows computed are still up to date?
>> > >
>> >
>> > Thanks for the comments Han. I get your point.
>> >
>> > Let's say we have data structures 'A' and 'B' in runtime data. And if
>> > events 'X' and 'Y' result in the modifications of these data
structures in
>> > runtime_data handlers, then handling (properly) these events  - 'X'
>> > and 'Y' in the
>> > flow_output engine should be enough to ensure the correctness of the
>> > flows right ?
>> >
>> Well, this is not how the I-P engine supposed to work. The engine is
purely
>> data-driven. In your example, X or Y is just input data (another engine
>> node) change of runtime data. They need to be handled in runtime data
node.
>> If flow_output engine also directly depend on X or Y, of course
flow_output
>> node also need to handle the change of X or Y, in addition to handle the
>> change of runtime data. But handling X and Y in flow_output doesn't
replace
>> the handling in runtime data. A short cut is, if handling of X or Y
always
>> trigger recompute in flow_output, of course we can say it is still
correct
>> even if they are not handled in runtime data. I hope this clarifies the
>> design.
>
>
>
>>
>>
>> > The present master has the below data structures in run_time data
>> >   - local_datapaths (hmap)
>> >   - local_lports (sset)
>> >   - local_lport_ids (sset)
>> >   - active_tunnels (sset)
>> >
>> > This I-P series has the below data structures in run_time data
>> >   - local_datapaths (hmap)
>> >   - local_lports (sset)
>> >   - local_lport_ids (sset)
>> >   - local_bindings (shash)
>> >   - active_tunnels (sset)
>> >   - egress_ifaces (sset)
>> >   - local_iface_ids (smap)
>> >
>> > Except for active_tunnels, rest of the data structures are calculated
>> > in binding.c
>> > and in ovn-controller.c (during init, run and destroy of runtime_data)
>> > And these data structures are modified in binding_run and in the ovs
>> > interface change
>> > handler and SB port binding change handler.
>> >
>> > I think if we handle the events - ovs interface changes and sb port
>> > binding changes in
>> > flow output engine, then it should ensure correctness provided we
>> > handle these changes
>> > properly.
>> >
>> Since flow_output depends on runtime data, and it take into consideration
>> all the data structures you listed above for flow computing, it means if
>> any of these data changes we need to evaluate them in flow_output.
>> Otherwise, it is either incorrect or some data in runtime data node is
>> useless. It doesn't seem right that the data is needed but we just ignore
>> the change. If, some of the data is needed as output but not needed as
>> input of flow_output, then we can simply split them out from runtime data
>> and don't add them as input of flow_output node. In this case, not
handling
>> those changes is not a problem.
>>
>> > This patch series handles ovs interface changes  in flow_output
>> > engine. We already
>> > handle port binding changes, but we don't do any flow handling. I
>> > think if we can
>> > enhance this, we can solve this problem.
>> >
>> > The present patch series definitely has issues with the runtime noop
>> handler,
>> > but it still has no issues with correctness because the ovn sb idl txn
is
>> NULL
>> > for any updates ovn-controller does to SB DB, resulting in flow
>> recomputation.
>> > But of course this is wrong. And we should probably enhance the IDL
>> tracker.
>> >
>> > > From my point of view, to handle the runtime data changes
>> incrementally, we
>> > > should split the runtime data node into separate engine nodes, so
that
>> > > different changes can be handled differently for flow output compute.
>> Some
>> > > of the changes may be handled incrementally if it is easy to handle
and
>> if
>> > > it is frequently changed, and leave those less frequent and
>> hard-to-handle
>> > > changes and let them trigger recompute.
>> >
>> > I don't think this is straightforward to split them. If you see, most
>> > of the runtime data
>> > is related to port binding and is updated during port binding.
>>
>> If a data structure depends on port_binding, it is straightforward to
split
>> it as a new node and add the SB_port_binding node as its input node, and
>> then add itself as input node for flow_output. Then implement the run()
and
>> change_handler for SB_port_binding.
>> Usually it may be not that simple because the new node may also depend on
>> some other data structures in runtime data, so the dependency needs to be
>> added and handled as well.
>> The split strategy should be based on the need of I-P, i.e. split the
part
>> of data that triggers most recomputes, and handle it incrementally in
most
>> situation. This ensures least effort for best outcome.
>>
>> >
>> > Please let me know what you think about the approach I mentioned above
to
>> > handle the correctness issue.
>> >
>> > I have addressed your comments for the other patches.
>> > Shall I go ahead and submit v2 of first 4 patches of this series ?
>> > I think these patches can be considered.
>>
>> Since those are not bug fixes and more related to the rest of the series,
>> I'd suggest to submit v2 together for the whole series in case you want
to
>> change them in a different way.
>>
>
> Thanks for the comments. Patch 3 and 4 add I-P for ovs interface and port
binding
> changes in the runtime data engine. Even if we drop patch 5 and 6 from
the series
> (lets just assume for now that it just complicates further), I think we
can definitely
> handle ovs interface and port binding changes for runtime data. This
avoids
> running binding_run(). In binding_run() we run a for loop for all the
port binding rows.
>
> I will explore further on patch 6 approach. But I would like to pursue on
the rest
> of the patches.
>

Ok, I will take a closer look at patch 3 and 4 if you believe it is right
approach to proceed, but remember that even we handle changes incrementally
in runtime_data, since there is no change_handler for runtime_data in
flow_output, it will still trigger flow recompute, which makes the
improvement of avoiding binding_run() not quite obvious compare with the
cost of lflow_run. It would be effective only if the node is splited with
e2e I-P.

>
> Thanks
> Numan
>
>
>
>> Thanks again for taking the heavy-lift part of I-P!
>>
>> >
>> > Thanks
>> > Numan
>> >
>> > >
>> > > >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
>> > > > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
>> > > > +                     flow_output_physical_flow_changes_handler);
>> > > >
>> > > >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
>> > > >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
>> > > > +    engine_add_input(&en_flow_output, &en_ovs_interface,
>> > > > +                     flow_output_noop_handler);
>> > > > +    engine_add_input(&en_flow_output, &en_ct_zones,
>> > > > +                     flow_output_noop_handler);
>> > > >
>> > > >      engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
>> > > >      engine_add_input(&en_flow_output, &en_sb_encap, NULL);
>> > > > diff --git a/controller/ovn-controller.h
b/controller/ovn-controller.h
>> > > > index 5d9466880..40c358cef 100644
>> > > > --- a/controller/ovn-controller.h
>> > > > +++ b/controller/ovn-controller.h
>> > > > @@ -72,6 +72,28 @@ struct local_datapath {
>> > > >  struct local_datapath *get_local_datapath(const struct hmap *,
>> > > >                                            uint32_t tunnel_key);
>> > > >
>> > > > +enum local_binding_type {
>> > > > +    BT_VIF,
>> > > > +    BT_CHILD,
>> > > > +    BT_VIRTUAL
>> > > > +};
>> > > > +
>> > > > +struct local_binding {
>> > > > +    struct ovs_list node;       /* In parent if any. */
>> > > > +    char *name;
>> > > > +    enum local_binding_type type;
>> > > > +    const struct ovsrec_interface *iface;
>> > > > +    const struct sbrec_port_binding *pb;
>> > > > +
>> > > > +    struct ovs_list children;
>> > > > +};
>> > > > +
>> > > > +static inline struct local_binding *
>> > > > +local_binding_find(struct shash *local_bindings, const char *name)
>> > > > +{
>> > > > +    return shash_find_data(local_bindings, name);
>> > > > +}
>> > > > +
>> > > >  const struct ovsrec_bridge *get_bridge(const struct
>> ovsrec_bridge_table
>> > > *,
>> > > >                                         const char *br_name);
>> > > >
>> > > > diff --git a/controller/physical.c b/controller/physical.c
>> > > > index 144aeb7bd..09a5d9b39 100644
>> > > > --- a/controller/physical.c
>> > > > +++ b/controller/physical.c
>> > > > @@ -1780,6 +1780,54 @@ physical_run(struct physical_ctx *p_ctx,
>> > > >      simap_destroy(&new_tunnel_to_ofport);
>> > > >  }
>> > > >
>> > > > +bool
>> > > > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
>> > > > +                                  struct ovn_desired_flow_table
>> > > *flow_table)
>> > > > +{
>> > > > +    const struct ovsrec_interface *iface_rec;
>> > > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> > > p_ctx->iface_table) {
>> > > > +        if (!strcmp(iface_rec->type, "geneve") ||
>> > > > +            !strcmp(iface_rec->type, "patch")) {
>> > > > +            return false;
>> > > > +        }
>> > > > +    }
>> > > > +
>> > > > +    struct ofpbuf ofpacts;
>> > > > +    ofpbuf_init(&ofpacts, 0);
>> > > > +
>> > > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> > > p_ctx->iface_table) {
>> > > > +        const char *iface_id = smap_get(&iface_rec->external_ids,
>> > > "iface-id");
>> > > > +        if (!iface_id) {
>> > > > +            continue;
>> > > > +        }
>> > > > +
>> > > > +        const struct local_binding *lb =
>> > > > +            local_binding_find(p_ctx->local_bindings, iface_id);
>> > > > +
>> > > > +        if (!lb || !lb->pb) {
>> > > > +            continue;
>> > > > +        }
>> > > > +
>> > > > +        if (ovsrec_interface_is_deleted(iface_rec)) {
>> > > > +            ofctrl_remove_flows(flow_table,
&lb->pb->header_.uuid);
>> > > > +        } else {
>> > > > +            if (!ovsrec_interface_is_new(iface_rec)) {
>> > > > +                ofctrl_remove_flows(flow_table,
>> &lb->pb->header_.uuid);
>> > > > +            }
>> > > > +
>> > > > +
 consider_port_binding(p_ctx->sbrec_port_binding_by_name,
>> > > > +                                  p_ctx->mff_ovn_geneve,
>> p_ctx->ct_zones,
>> > > > +                                  p_ctx->active_tunnels,
>> > > > +                                  p_ctx->local_datapaths,
>> > > > +                                  lb->pb, p_ctx->chassis,
>> > > > +                                  flow_table, &ofpacts);
>> > > > +        }
>> > > > +    }
>> > > > +
>> > > > +    ofpbuf_uninit(&ofpacts);
>> > > > +    return true;
>> > > > +}
>> > > > +
>> > > >  bool
>> > > >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>> ofp_port_t
>> > > *ofport)
>> > > >  {
>> > > > diff --git a/controller/physical.h b/controller/physical.h
>> > > > index dadf84ea1..9ca34436a 100644
>> > > > --- a/controller/physical.h
>> > > > +++ b/controller/physical.h
>> > > > @@ -49,11 +49,13 @@ struct physical_ctx {
>> > > >      const struct ovsrec_bridge *br_int;
>> > > >      const struct sbrec_chassis_table *chassis_table;
>> > > >      const struct sbrec_chassis *chassis;
>> > > > +    const struct ovsrec_interface_table *iface_table;
>> > > >      const struct sset *active_tunnels;
>> > > >      struct hmap *local_datapaths;
>> > > >      struct sset *local_lports;
>> > > >      const struct simap *ct_zones;
>> > > >      enum mf_field_id mff_ovn_geneve;
>> > > > +    struct shash *local_bindings;
>> > > >  };
>> > > >
>> > > >  void physical_register_ovs_idl(struct ovsdb_idl *);
>> > > > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
>> > > physical_ctx *,
>> > > >                                            struct
>> ovn_desired_flow_table
>> > > *);
>> > > >  void physical_handle_mc_group_changes(struct physical_ctx *,
>> > > >                                        struct
ovn_desired_flow_table
>> *);
>> > > > -
>> > > > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
>> > > > +                                       struct
ovn_desired_flow_table
>> *);
>> > > >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>> > > >                         ofp_port_t *ofport);
>> > > >  #endif /* controller/physical.h */
>> > > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
>> > > > index 5c21f6dd7..555434484 100644
>> > > > --- a/tests/ovn-performance.at
>> > > > +++ b/tests/ovn-performance.at
>> > > > @@ -263,7 +263,7 @@ for i in 1 2; do
>> > > >      )
>> > > >
>> > > >      # Add router port to $ls
>> > > > -    OVN_CONTROLLER_EXPECT_HIT(
>> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > > >          [hv1 hv2], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01
>> > > 10.0.$i.1/24]
>> > > >      )
>> > > > @@ -271,15 +271,15 @@ for i in 1 2; do
>> > > >          [hv1 hv2], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv lsp-add $ls $lsp]
>> > > >      )
>> > > > -    OVN_CONTROLLER_EXPECT_HIT(
>> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > > >          [hv1 hv2], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv lsp-set-type $lsp router]
>> > > >      )
>> > > > -    OVN_CONTROLLER_EXPECT_HIT(
>> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > > >          [hv1 hv2], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv lsp-set-options $lsp
router-port=$lrp]
>> > > >      )
>> > > > -    OVN_CONTROLLER_EXPECT_HIT(
>> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > > >          [hv1 hv2], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
>> > > >      )
>> > > > @@ -393,8 +393,8 @@ for i in 1 2; do
>> > > >      lp=lp$i
>> > > >
>> > > >      # Delete port $lp
>> > > > -    OVN_CONTROLLER_EXPECT_HIT_COND(
>> > > > -        [hv$i hv$j], [lflow_run], [>0 =0],
>> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > > > +        [hv$i hv$j], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv lsp-del $lp]
>> > > >      )
>> > > >  done
>> > > > @@ -409,7 +409,7 @@ for i in 1 2; do
>> > > >      ls=ls$i
>> > > >
>> > > >      # Delete switch $ls
>> > > > -    OVN_CONTROLLER_EXPECT_HIT(
>> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
>> > > >          [hv1 hv2], [lflow_run],
>> > > >          [ovn-nbctl --wait=hv ls-del $ls]
>> > > >      )
>> > > > --
>> > > > 2.24.1
>> > > >
>> > > > _______________________________________________
>> > > > dev mailing list
>> > > > dev@openvswitch.org
>> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> > > _______________________________________________
>> > > dev mailing list
>> > > dev@openvswitch.org
>> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> > >
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
Numan Siddique April 2, 2020, 4:49 p.m. UTC | #6
On Wed, Apr 1, 2020 at 1:14 AM Han Zhou <hzhou@ovn.org> wrote:
>
> On Tue, Mar 31, 2020 at 12:23 PM Numan Siddique <numans@ovn.org> wrote:
> >
> >
> > Thanks Han for the comments. I will go through the comments and come back
> to
> > you with further comments or questions tomorrow.
> >
> > I have one comment on the last part. Please see below.
> >
> > Thanks
> > Numan
> >
> >
> > On Tue, Mar 31, 2020, 11:25 PM Han Zhou <hzhou@ovn.org> wrote:
> >>
> >> On Tue, Mar 31, 2020 at 5:40 AM Numan Siddique <numans@ovn.org> wrote:
> >> >
> >> > On Wed, Mar 25, 2020 at 12:23 PM Han Zhou <hzhou@ovn.org> wrote:
> >> > >
> >> > > On Mon, Mar 16, 2020 at 4:16 AM <numans@ovn.org> wrote:
> >> > > >
> >> > > > From: Numan Siddique <numans@ovn.org>
> >> > > >
> >> > > > This patch handles ct zone changes and OVS interface changes
> >> incrementally
> >> > > > in the flow output stage.
> >> > > >
> >> > > > Any changes to ct zone can be handled by running physical_run()
> >> instead
> >> > > of running
> >> > > > flow_output_run(). And any changes to OVS interfaces can be either
> >> handled
> >> > > > incrementally (for OVS interfaces representing VIFs) or just
> running
> >> > > > physical_run() (for tunnel and other types of interfaces).
> >> > > >
> >> > > > To better handle this, a new engine node 'physical_flow_changes' is
> >> added
> >> > > which
> >> > > > handles changes to ct zone and OVS interfaces.
> >> > > >
> >> > > > After this patch, We don't trigger full recompute in flow output
> >> stage for
> >> > > > runtime data changes. There's noop handler for this. With this we
> >> avoid
> >> > > > calls to lflow_run() a lot.
> >> > > >
> >> > > > For the below commands the lflow_run counter without this patch is
> 16
> >> > > > and with this patch is 3.
> >> > > >
> >> > > > -----------------------
> >> > > > ovn-nbctl ls-add sw0
> >> > > > ovn-nbctl lsp-add sw0 sw0-port1
> >> > > > ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01
> 192.168.0.2"
> >> > > >
> >> > > > ovn-nbctl ls-add sw1
> >> > > > ovn-nbctl lsp-add sw1 sw1-port1
> >> > > > ovn-nbctl lsp-set-addresses sw1-port1 "50:54:00:00:00:03 11.0.0.2"
> >> > > >
> >> > > > ovn-nbctl lr-add lr0
> >> > > > ovn-nbctl lrp-add lr0 lrp0 00:00:00:00:ff:01 192.168.0.1/24
> >> > > > ovn-nbctl lsp-add sw0 lrp0-attachment
> >> > > > ovn-nbctl lsp-set-type lrp0-attachment router
> >> > > > ovn-nbctl lsp-set-addresses lrp0-attachment 00:00:00:00:ff:01
> >> > > > ovn-nbctl lsp-set-options lrp0-attachment router-port=lrp0
> >> > > > ovn-nbctl lrp-add lr0 lrp1 00:00:00:00:ff:02 11.0.0.1/24
> >> > > > ovn-nbctl lsp-add sw1 lrp1-attachment
> >> > > > ovn-nbctl lsp-set-type lrp1-attachment router
> >> > > > ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
> >> > > > ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
> >> > > >
> >> > > > ovs-vsctl add-port br-int p1 -- \
> >> > > >     set Interface p1 external_ids:iface-id=sw0-port1
> >> > > > ovs-vsctl add-port br-int p2 -- \
> >> > > >     set Interface p2 external_ids:iface-id=sw1-port1
> >> > > > ovn-appctl -t ovn-controller coverage/read-counter lflow_run
> >> > > > -------------------
> >> > > >
> >> > > > Signed-off-by: Numan Siddique <numans@ovn.org>
> >> > > > ---
> >> > > >  controller/binding.c        | 22 ---------
> >> > > >  controller/lflow.c          | 13 +++++
> >> > > >  controller/lflow.h          |  2 +
> >> > > >  controller/ovn-controller.c | 96
> >> ++++++++++++++++++++++++++++++++++++-
> >> > > >  controller/ovn-controller.h | 22 +++++++++
> >> > > >  controller/physical.c       | 48 +++++++++++++++++++
> >> > > >  controller/physical.h       |  5 +-
> >> > > >  tests/ovn-performance.at    | 14 +++---
> >> > > >  8 files changed, 190 insertions(+), 32 deletions(-)
> >> > > >
> >> > > > diff --git a/controller/binding.c b/controller/binding.c
> >> > > > index ce4467f31..9dc1c3f38 100644
> >> > > > --- a/controller/binding.c
> >> > > > +++ b/controller/binding.c
> >> > > > @@ -501,22 +501,6 @@ remove_local_lport_ids(const struct
> >> > > sbrec_port_binding *binding_rec,
> >> > > >          sset_find_and_delete(local_lport_ids, buf);
> >> > > >  }
> >> > > >
> >> > > > -enum local_binding_type {
> >> > > > -    BT_VIF,
> >> > > > -    BT_CHILD,
> >> > > > -    BT_VIRTUAL
> >> > > > -};
> >> > > > -
> >> > > > -struct local_binding {
> >> > > > -    struct ovs_list node;       /* In parent if any. */
> >> > > > -    char *name;
> >> > > > -    enum local_binding_type type;
> >> > > > -    const struct ovsrec_interface *iface;
> >> > > > -    const struct sbrec_port_binding *pb;
> >> > > > -
> >> > > > -    struct ovs_list children;
> >> > > > -};
> >> > > > -
> >> > > >  static struct local_binding *
> >> > > >  local_binding_create(const char *name, const struct
> ovsrec_interface
> >> > > *iface,
> >> > > >                       const struct sbrec_port_binding *pb,
> >> > > > @@ -537,12 +521,6 @@ local_binding_add(struct shash
> *local_bindings,
> >> > > struct local_binding *lbinding)
> >> > > >      shash_add(local_bindings, lbinding->name, lbinding);
> >> > > >  }
> >> > > >
> >> > > > -static struct local_binding *
> >> > > > -local_binding_find(struct shash *local_bindings, const char *name)
> >> > > > -{
> >> > > > -    return shash_find_data(local_bindings, name);
> >> > > > -}
> >> > > > -
> >> > > >  static void
> >> > > >  local_binding_destroy(struct local_binding *lbinding)
> >> > > >  {
> >> > > > diff --git a/controller/lflow.c b/controller/lflow.c
> >> > > > index 01214a3a6..512258cd3 100644
> >> > > > --- a/controller/lflow.c
> >> > > > +++ b/controller/lflow.c
> >> > > > @@ -847,3 +847,16 @@ lflow_destroy(void)
> >> > > >      expr_symtab_destroy(&symtab);
> >> > > >      shash_destroy(&symtab);
> >> > > >  }
> >> > > > +
> >> > > > +bool
> >> > > > +lflow_evaluate_pb_changes(const struct sbrec_port_binding_table
> >> > > *pb_table)
> >> > > > +{
> >> > > > +    const struct sbrec_port_binding *binding;
> >> > > > +    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table)
> {
> >> > > > +        if (!strcmp(binding->type, "remote")) {
> >> > > > +            return false;
> >> > > > +        }
> >> > > > +    }
> >> > > > +
> >> > > > +    return true;
> >> > > > +}
> >> > > > diff --git a/controller/lflow.h b/controller/lflow.h
> >> > > > index f02f709d7..fa54d4de2 100644
> >> > > > --- a/controller/lflow.h
> >> > > > +++ b/controller/lflow.h
> >> > > > @@ -48,6 +48,7 @@ struct sbrec_dhcp_options_table;
> >> > > >  struct sbrec_dhcpv6_options_table;
> >> > > >  struct sbrec_logical_flow_table;
> >> > > >  struct sbrec_mac_binding_table;
> >> > > > +struct sbrec_port_binding_table;
> >> > > >  struct simap;
> >> > > >  struct sset;
> >> > > >  struct uuid;
> >> > > > @@ -153,6 +154,7 @@ void lflow_handle_changed_neighbors(
> >> > > >      const struct hmap *local_datapaths,
> >> > > >      struct ovn_desired_flow_table *);
> >> > > >
> >> > > > +bool lflow_evaluate_pb_changes(const struct
> sbrec_port_binding_table
> >> *);
> >> > > >  void lflow_destroy(void);
> >> > > >
> >> > > >  void lflow_expr_destroy(struct hmap *lflow_expr_cache);
> >> > > > diff --git a/controller/ovn-controller.c
> b/controller/ovn-controller.c
> >> > > > index 07823f020..448e9908e 100644
> >> > > > --- a/controller/ovn-controller.c
> >> > > > +++ b/controller/ovn-controller.c
> >> > > > @@ -1360,6 +1360,10 @@ static void init_physical_ctx(struct
> >> engine_node
> >> > > *node,
> >> > > >
> >> > > >      ovs_assert(br_int && chassis);
> >> > > >
> >> > > > +    struct ovsrec_interface_table *iface_table =
> >> > > > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> >> > > > +            engine_get_input("OVS_interface", node));
> >> > > > +
> >> > > >      struct ed_type_ct_zones *ct_zones_data =
> >> > > >          engine_get_input_data("ct_zones", node);
> >> > > >      struct simap *ct_zones = &ct_zones_data->current;
> >> > > > @@ -1369,12 +1373,14 @@ static void init_physical_ctx(struct
> >> engine_node
> >> > > *node,
> >> > > >      p_ctx->mc_group_table = multicast_group_table;
> >> > > >      p_ctx->br_int = br_int;
> >> > > >      p_ctx->chassis_table = chassis_table;
> >> > > > +    p_ctx->iface_table = iface_table;
> >> > > >      p_ctx->chassis = chassis;
> >> > > >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> >> > > >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> >> > > >      p_ctx->local_lports = &rt_data->local_lports;
> >> > > >      p_ctx->ct_zones = ct_zones;
> >> > > >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> >> > > > +    p_ctx->local_bindings = &rt_data->local_bindings;
> >> > > >  }
> >> > > >
> >> > > >  static void init_lflow_ctx(struct engine_node *node,
> >> > > > @@ -1637,6 +1643,8 @@ flow_output_sb_port_binding_handler(struct
> >> > > engine_node *node, void *data)
> >> > > >       *    always logical router port, and any change to logical
> >> router
> >> > > port
> >> > > >       *    would just trigger recompute.
> >> > > >       *
> >> > > > +     *  - When a port binding of type 'remote' is updated by
> ovn-ic.
> >> > > > +     *
> >> > > >       * Although there is no correctness issue so far (except the
> >> unusual
> >> > > ACL
> >> > > >       * use case, which doesn't seem to be a real problem), it
> might
> >> be
> >> > > better
> >> > > >       * to handle this more gracefully, without the need to
> consider
> >> these
> >> > > > @@ -1647,6 +1655,14 @@ flow_output_sb_port_binding_handler(struct
> >> > > engine_node *node, void *data)
> >> > > >      struct physical_ctx p_ctx;
> >> > > >      init_physical_ctx(node, rt_data, &p_ctx);
> >> > > >
> >> > > > +    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
> >> > > > +        /* If this returns false, it means there is an impact on
> >> > > > +         * the logical flow processing because of some changes in
> >> > > > +         * port bindings. Return false so that recompute is
> triggered
> >> > > > +         * for this stage. */
> >> > > > +        return false;
> >> > > > +    }
> >> > > > +
> >> > > >      physical_handle_port_binding_changes(&p_ctx, flow_table);
> >> > > >
> >> > > >      engine_set_node_state(node, EN_UPDATED);
> >> > > > @@ -1782,6 +1798,70 @@ flow_output_port_groups_handler(struct
> >> engine_node
> >> > > *node, void *data)
> >> > > >      return _flow_output_resource_ref_handler(node, data,
> >> > > REF_TYPE_PORTGROUP);
> >> > > >  }
> >> > > >
> >> > > > +struct ed_type_physical_flow_changes {
> >> > > > +    bool ct_zones_changed;
> >> > > > +};
> >> > > > +
> >> > > > +static bool
> >> > > > +flow_output_physical_flow_changes_handler(struct engine_node
> *node,
> >> void
> >> > > *data)
> >> > > > +{
> >> > > > +    struct ed_type_physical_flow_changes *pfc =
> >> > > > +        engine_get_input_data("physical_flow_changes", node);
> >> > > > +    struct ed_type_runtime_data *rt_data =
> >> > > > +        engine_get_input_data("runtime_data", node);
> >> > > > +
> >> > > > +    struct ed_type_flow_output *fo = data;
> >> > > > +    struct physical_ctx p_ctx;
> >> > > > +    init_physical_ctx(node, rt_data, &p_ctx);
> >> > > > +
> >> > > > +    engine_set_node_state(node, EN_UPDATED);
> >> > > > +    if (pfc->ct_zones_changed) {
> >> > > > +        pfc->ct_zones_changed = false;
> >> > > > +        physical_run(&p_ctx, &fo->flow_table);
> >> > > > +        return true;
> >> > > > +    }
> >> > > > +
> >> > > > +    return physical_handle_ovs_iface_changes(&p_ctx,
> >> &fo->flow_table);
> >> > > > +}
> >> > > > +
> >> > > > +static void *
> >> > > > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> >> > > > +                             struct engine_arg *arg OVS_UNUSED)
> >> > > > +{
> >> > > > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
> >> *data);
> >> > > > +    return data;
> >> > > > +}
> >> > > > +
> >> > > > +static void
> >> > > > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> >> > > > +{
> >> > > > +
> >> > > > +}
> >> > > > +
> >> > > > +static void
> >> > > > +en_physical_flow_changes_run(struct engine_node *node, void *data
> >> > > OVS_UNUSED)
> >> > > > +{
> >> > > > +    engine_set_node_state(node, EN_UPDATED);
> >> > > > +}
> >> > > > +
> >> > > > +static bool
> >> > > > +physical_flow_changes_ct_zones_handler(struct engine_node *node
> >> > > OVS_UNUSED,
> >> > > > +                                      void *data OVS_UNUSED)
> >> > > > +{
> >> > > > +    struct ed_type_physical_flow_changes *pfc = data;
> >> > > > +    pfc->ct_zones_changed = true;
> >> > > > +    engine_set_node_state(node, EN_UPDATED);
> >> > > > +    return false;
> >> > > > +}
> >> > > > +
> >> > > > +static bool
> >> > > > +flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
> >> > > > +                          void *data OVS_UNUSED)
> >> > > > +{
> >> > > > +    return true;
> >> > > > +}
> >> > > > +
> >> > > > +
> >> > > >  struct ovn_controller_exit_args {
> >> > > >      bool *exiting;
> >> > > >      bool *restart;
> >> > > > @@ -1904,6 +1984,7 @@ main(int argc, char *argv[])
> >> > > >      ENGINE_NODE(runtime_data, "runtime_data");
> >> > > >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >> > > >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> >> > > > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> >> > > >      ENGINE_NODE(flow_output, "flow_output");
> >> > > >      ENGINE_NODE(addr_sets, "addr_sets");
> >> > > >      ENGINE_NODE(port_groups, "port_groups");
> >> > > > @@ -1923,16 +2004,27 @@ main(int argc, char *argv[])
> >> > > >      engine_add_input(&en_port_groups, &en_sb_port_group,
> >> > > >                       port_groups_sb_port_group_handler);
> >> > > >
> >> > > > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> >> > > > +                     physical_flow_changes_ct_zones_handler);
> >> > > > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> >> > > > +                     NULL);
> >> > > > +
> >> > > >      engine_add_input(&en_flow_output, &en_addr_sets,
> >> > > >                       flow_output_addr_sets_handler);
> >> > > >      engine_add_input(&en_flow_output, &en_port_groups,
> >> > > >                       flow_output_port_groups_handler);
> >> > > > -    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> >> > > > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> >> > > > +    engine_add_input(&en_flow_output, &en_runtime_data,
> >> > > > +                     flow_output_noop_handler);
> >> > >
> >> > > I think this breaks the dependency and would result in correctness
> >> problem.
> >> > > Because flow_output depends on runtime_data, which includes lots of
> data
> >> > > structures such as local_lports, local_datapaths, active_tunnels,
> etc.,
> >> all
> >> > > needed for flow compute to produce correct OVS flow output. Now with
> the
> >> > > noop handler, the changes in these data structures are ignored. How
> >> could
> >> > > we make sure the flows computed are still up to date?
> >> > >
> >> >
> >> > Thanks for the comments Han. I get your point.
> >> >
> >> > Let's say we have data structures 'A' and 'B' in runtime data. And if
> >> > events 'X' and 'Y' result in the modifications of these data
> structures in
> >> > runtime_data handlers, then handling (properly) these events  - 'X'
> >> > and 'Y' in the
> >> > flow_output engine should be enough to ensure the correctness of the
> >> > flows right ?
> >> >
> >> Well, this is not how the I-P engine supposed to work. The engine is
> purely
> >> data-driven. In your example, X or Y is just input data (another engine
> >> node) change of runtime data. They need to be handled in runtime data
> node.
> >> If flow_output engine also directly depend on X or Y, of course
> flow_output
> >> node also need to handle the change of X or Y, in addition to handle the
> >> change of runtime data. But handling X and Y in flow_output doesn't
> replace
> >> the handling in runtime data. A short cut is, if handling of X or Y
> always
> >> trigger recompute in flow_output, of course we can say it is still
> correct
> >> even if they are not handled in runtime data. I hope this clarifies the
> >> design.
> >
> >
> >
> >>
> >>
> >> > The present master has the below data structures in run_time data
> >> >   - local_datapaths (hmap)
> >> >   - local_lports (sset)
> >> >   - local_lport_ids (sset)
> >> >   - active_tunnels (sset)
> >> >
> >> > This I-P series has the below data structures in run_time data
> >> >   - local_datapaths (hmap)
> >> >   - local_lports (sset)
> >> >   - local_lport_ids (sset)
> >> >   - local_bindings (shash)
> >> >   - active_tunnels (sset)
> >> >   - egress_ifaces (sset)
> >> >   - local_iface_ids (smap)
> >> >
> >> > Except for active_tunnels, rest of the data structures are calculated
> >> > in binding.c
> >> > and in ovn-controller.c (during init, run and destroy of runtime_data)
> >> > And these data structures are modified in binding_run and in the ovs
> >> > interface change
> >> > handler and SB port binding change handler.
> >> >
> >> > I think if we handle the events - ovs interface changes and sb port
> >> > binding changes in
> >> > flow output engine, then it should ensure correctness provided we
> >> > handle these changes
> >> > properly.
> >> >
> >> Since flow_output depends on runtime data, and it take into consideration
> >> all the data structures you listed above for flow computing, it means if
> >> any of these data changes we need to evaluate them in flow_output.
> >> Otherwise, it is either incorrect or some data in runtime data node is
> >> useless. It doesn't seem right that the data is needed but we just ignore
> >> the change. If, some of the data is needed as output but not needed as
> >> input of flow_output, then we can simply split them out from runtime data
> >> and don't add them as input of flow_output node. In this case, not
> handling
> >> those changes is not a problem.
> >>
> >> > This patch series handles ovs interface changes  in flow_output
> >> > engine. We already
> >> > handle port binding changes, but we don't do any flow handling. I
> >> > think if we can
> >> > enhance this, we can solve this problem.
> >> >
> >> > The present patch series definitely has issues with the runtime noop
> >> handler,
> >> > but it still has no issues with correctness because the ovn sb idl txn
> is
> >> NULL
> >> > for any updates ovn-controller does to SB DB, resulting in flow
> >> recomputation.
> >> > But of course this is wrong. And we should probably enhance the IDL
> >> tracker.
> >> >
> >> > > From my point of view, to handle the runtime data changes
> >> incrementally, we
> >> > > should split the runtime data node into separate engine nodes, so
> that
> >> > > different changes can be handled differently for flow output compute.
> >> Some
> >> > > of the changes may be handled incrementally if it is easy to handle
> and
> >> if
> >> > > it is frequently changed, and leave those less frequent and
> >> hard-to-handle
> >> > > changes and let them trigger recompute.
> >> >
> >> > I don't think this is straightforward to split them. If you see, most
> >> > of the runtime data
> >> > is related to port binding and is updated during port binding.
> >>
> >> If a data structure depends on port_binding, it is straightforward to
> split
> >> it as a new node and add the SB_port_binding node as its input node, and
> >> then add itself as input node for flow_output. Then implement the run()
> and
> >> change_handler for SB_port_binding.
> >> Usually it may be not that simple because the new node may also depend on
> >> some other data structures in runtime data, so the dependency needs to be
> >> added and handled as well.
> >> The split strategy should be based on the need of I-P, i.e. split the
> part
> >> of data that triggers most recomputes, and handle it incrementally in
> most
> >> situation. This ensures least effort for best outcome.
> >>
> >> >
> >> > Please let me know what you think about the approach I mentioned above
> to
> >> > handle the correctness issue.
> >> >
> >> > I have addressed your comments for the other patches.
> >> > Shall I go ahead and submit v2 of first 4 patches of this series ?
> >> > I think these patches can be considered.
> >>
> >> Since those are not bug fixes and more related to the rest of the series,
> >> I'd suggest to submit v2 together for the whole series in case you want
> to
> >> change them in a different way.
> >>
> >
> > Thanks for the comments. Patch 3 and 4 add I-P for ovs interface and port
> binding
> > changes in the runtime data engine. Even if we drop patch 5 and 6 from
> the series
> > (lets just assume for now that it just complicates further), I think we
> can definitely
> > handle ovs interface and port binding changes for runtime data. This
> avoids
> > running binding_run(). In binding_run() we run a for loop for all the
> port binding rows.
> >
> > I will explore further on patch 6 approach. But I would like to pursue on
> the rest
> > of the patches.
> >
>
> Ok, I will take a closer look at patch 3 and 4 if you believe it is right
> approach to proceed, but remember that even we handle changes incrementally
> in runtime_data, since there is no change_handler for runtime_data in
> flow_output, it will still trigger flow recompute, which makes the
> improvement of avoiding binding_run() not quite obvious compare with the
> cost of lflow_run. It would be effective only if the node is splited with
> e2e I-P.
>

I've submitted v2 dropping patch 5 and 6  from the series.
Request to be please take a look -
https://patchwork.ozlabs.org/project/openvswitch/list/?series=168278

Thanks
Numan

> >
> > Thanks
> > Numan
> >
> >
> >
> >> Thanks again for taking the heavy-lift part of I-P!
> >>
> >> >
> >> > Thanks
> >> > Numan
> >> >
> >> > >
> >> > > >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> >> > > > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> >> > > > +                     flow_output_physical_flow_changes_handler);
> >> > > >
> >> > > >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> >> > > >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> >> > > > +    engine_add_input(&en_flow_output, &en_ovs_interface,
> >> > > > +                     flow_output_noop_handler);
> >> > > > +    engine_add_input(&en_flow_output, &en_ct_zones,
> >> > > > +                     flow_output_noop_handler);
> >> > > >
> >> > > >      engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
> >> > > >      engine_add_input(&en_flow_output, &en_sb_encap, NULL);
> >> > > > diff --git a/controller/ovn-controller.h
> b/controller/ovn-controller.h
> >> > > > index 5d9466880..40c358cef 100644
> >> > > > --- a/controller/ovn-controller.h
> >> > > > +++ b/controller/ovn-controller.h
> >> > > > @@ -72,6 +72,28 @@ struct local_datapath {
> >> > > >  struct local_datapath *get_local_datapath(const struct hmap *,
> >> > > >                                            uint32_t tunnel_key);
> >> > > >
> >> > > > +enum local_binding_type {
> >> > > > +    BT_VIF,
> >> > > > +    BT_CHILD,
> >> > > > +    BT_VIRTUAL
> >> > > > +};
> >> > > > +
> >> > > > +struct local_binding {
> >> > > > +    struct ovs_list node;       /* In parent if any. */
> >> > > > +    char *name;
> >> > > > +    enum local_binding_type type;
> >> > > > +    const struct ovsrec_interface *iface;
> >> > > > +    const struct sbrec_port_binding *pb;
> >> > > > +
> >> > > > +    struct ovs_list children;
> >> > > > +};
> >> > > > +
> >> > > > +static inline struct local_binding *
> >> > > > +local_binding_find(struct shash *local_bindings, const char *name)
> >> > > > +{
> >> > > > +    return shash_find_data(local_bindings, name);
> >> > > > +}
> >> > > > +
> >> > > >  const struct ovsrec_bridge *get_bridge(const struct
> >> ovsrec_bridge_table
> >> > > *,
> >> > > >                                         const char *br_name);
> >> > > >
> >> > > > diff --git a/controller/physical.c b/controller/physical.c
> >> > > > index 144aeb7bd..09a5d9b39 100644
> >> > > > --- a/controller/physical.c
> >> > > > +++ b/controller/physical.c
> >> > > > @@ -1780,6 +1780,54 @@ physical_run(struct physical_ctx *p_ctx,
> >> > > >      simap_destroy(&new_tunnel_to_ofport);
> >> > > >  }
> >> > > >
> >> > > > +bool
> >> > > > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> >> > > > +                                  struct ovn_desired_flow_table
> >> > > *flow_table)
> >> > > > +{
> >> > > > +    const struct ovsrec_interface *iface_rec;
> >> > > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> > > p_ctx->iface_table) {
> >> > > > +        if (!strcmp(iface_rec->type, "geneve") ||
> >> > > > +            !strcmp(iface_rec->type, "patch")) {
> >> > > > +            return false;
> >> > > > +        }
> >> > > > +    }
> >> > > > +
> >> > > > +    struct ofpbuf ofpacts;
> >> > > > +    ofpbuf_init(&ofpacts, 0);
> >> > > > +
> >> > > > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> > > p_ctx->iface_table) {
> >> > > > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> >> > > "iface-id");
> >> > > > +        if (!iface_id) {
> >> > > > +            continue;
> >> > > > +        }
> >> > > > +
> >> > > > +        const struct local_binding *lb =
> >> > > > +            local_binding_find(p_ctx->local_bindings, iface_id);
> >> > > > +
> >> > > > +        if (!lb || !lb->pb) {
> >> > > > +            continue;
> >> > > > +        }
> >> > > > +
> >> > > > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> >> > > > +            ofctrl_remove_flows(flow_table,
> &lb->pb->header_.uuid);
> >> > > > +        } else {
> >> > > > +            if (!ovsrec_interface_is_new(iface_rec)) {
> >> > > > +                ofctrl_remove_flows(flow_table,
> >> &lb->pb->header_.uuid);
> >> > > > +            }
> >> > > > +
> >> > > > +
>  consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> >> > > > +                                  p_ctx->mff_ovn_geneve,
> >> p_ctx->ct_zones,
> >> > > > +                                  p_ctx->active_tunnels,
> >> > > > +                                  p_ctx->local_datapaths,
> >> > > > +                                  lb->pb, p_ctx->chassis,
> >> > > > +                                  flow_table, &ofpacts);
> >> > > > +        }
> >> > > > +    }
> >> > > > +
> >> > > > +    ofpbuf_uninit(&ofpacts);
> >> > > > +    return true;
> >> > > > +}
> >> > > > +
> >> > > >  bool
> >> > > >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >> ofp_port_t
> >> > > *ofport)
> >> > > >  {
> >> > > > diff --git a/controller/physical.h b/controller/physical.h
> >> > > > index dadf84ea1..9ca34436a 100644
> >> > > > --- a/controller/physical.h
> >> > > > +++ b/controller/physical.h
> >> > > > @@ -49,11 +49,13 @@ struct physical_ctx {
> >> > > >      const struct ovsrec_bridge *br_int;
> >> > > >      const struct sbrec_chassis_table *chassis_table;
> >> > > >      const struct sbrec_chassis *chassis;
> >> > > > +    const struct ovsrec_interface_table *iface_table;
> >> > > >      const struct sset *active_tunnels;
> >> > > >      struct hmap *local_datapaths;
> >> > > >      struct sset *local_lports;
> >> > > >      const struct simap *ct_zones;
> >> > > >      enum mf_field_id mff_ovn_geneve;
> >> > > > +    struct shash *local_bindings;
> >> > > >  };
> >> > > >
> >> > > >  void physical_register_ovs_idl(struct ovsdb_idl *);
> >> > > > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> >> > > physical_ctx *,
> >> > > >                                            struct
> >> ovn_desired_flow_table
> >> > > *);
> >> > > >  void physical_handle_mc_group_changes(struct physical_ctx *,
> >> > > >                                        struct
> ovn_desired_flow_table
> >> *);
> >> > > > -
> >> > > > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> >> > > > +                                       struct
> ovn_desired_flow_table
> >> *);
> >> > > >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >> > > >                         ofp_port_t *ofport);
> >> > > >  #endif /* controller/physical.h */
> >> > > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
> >> > > > index 5c21f6dd7..555434484 100644
> >> > > > --- a/tests/ovn-performance.at
> >> > > > +++ b/tests/ovn-performance.at
> >> > > > @@ -263,7 +263,7 @@ for i in 1 2; do
> >> > > >      )
> >> > > >
> >> > > >      # Add router port to $ls
> >> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> >> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >> > > >          [hv1 hv2], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01
> >> > > 10.0.$i.1/24]
> >> > > >      )
> >> > > > @@ -271,15 +271,15 @@ for i in 1 2; do
> >> > > >          [hv1 hv2], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv lsp-add $ls $lsp]
> >> > > >      )
> >> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> >> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >> > > >          [hv1 hv2], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv lsp-set-type $lsp router]
> >> > > >      )
> >> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> >> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >> > > >          [hv1 hv2], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv lsp-set-options $lsp
> router-port=$lrp]
> >> > > >      )
> >> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> >> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >> > > >          [hv1 hv2], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
> >> > > >      )
> >> > > > @@ -393,8 +393,8 @@ for i in 1 2; do
> >> > > >      lp=lp$i
> >> > > >
> >> > > >      # Delete port $lp
> >> > > > -    OVN_CONTROLLER_EXPECT_HIT_COND(
> >> > > > -        [hv$i hv$j], [lflow_run], [>0 =0],
> >> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >> > > > +        [hv$i hv$j], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv lsp-del $lp]
> >> > > >      )
> >> > > >  done
> >> > > > @@ -409,7 +409,7 @@ for i in 1 2; do
> >> > > >      ls=ls$i
> >> > > >
> >> > > >      # Delete switch $ls
> >> > > > -    OVN_CONTROLLER_EXPECT_HIT(
> >> > > > +    OVN_CONTROLLER_EXPECT_NO_HIT(
> >> > > >          [hv1 hv2], [lflow_run],
> >> > > >          [ovn-nbctl --wait=hv ls-del $ls]
> >> > > >      )
> >> > > > --
> >> > > > 2.24.1
> >> > > >
> >> > > > _______________________________________________
> >> > > > dev mailing list
> >> > > > dev@openvswitch.org
> >> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> > > _______________________________________________
> >> > > dev mailing list
> >> > > dev@openvswitch.org
> >> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> > >
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/binding.c b/controller/binding.c
index ce4467f31..9dc1c3f38 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -501,22 +501,6 @@  remove_local_lport_ids(const struct sbrec_port_binding *binding_rec,
         sset_find_and_delete(local_lport_ids, buf);
 }
 
-enum local_binding_type {
-    BT_VIF,
-    BT_CHILD,
-    BT_VIRTUAL
-};
-
-struct local_binding {
-    struct ovs_list node;       /* In parent if any. */
-    char *name;
-    enum local_binding_type type;
-    const struct ovsrec_interface *iface;
-    const struct sbrec_port_binding *pb;
-
-    struct ovs_list children;
-};
-
 static struct local_binding *
 local_binding_create(const char *name, const struct ovsrec_interface *iface,
                      const struct sbrec_port_binding *pb,
@@ -537,12 +521,6 @@  local_binding_add(struct shash *local_bindings, struct local_binding *lbinding)
     shash_add(local_bindings, lbinding->name, lbinding);
 }
 
-static struct local_binding *
-local_binding_find(struct shash *local_bindings, const char *name)
-{
-    return shash_find_data(local_bindings, name);
-}
-
 static void
 local_binding_destroy(struct local_binding *lbinding)
 {
diff --git a/controller/lflow.c b/controller/lflow.c
index 01214a3a6..512258cd3 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -847,3 +847,16 @@  lflow_destroy(void)
     expr_symtab_destroy(&symtab);
     shash_destroy(&symtab);
 }
+
+bool
+lflow_evaluate_pb_changes(const struct sbrec_port_binding_table *pb_table)
+{
+    const struct sbrec_port_binding *binding;
+    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
+        if (!strcmp(binding->type, "remote")) {
+            return false;
+        }
+    }
+
+    return true;
+}
diff --git a/controller/lflow.h b/controller/lflow.h
index f02f709d7..fa54d4de2 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -48,6 +48,7 @@  struct sbrec_dhcp_options_table;
 struct sbrec_dhcpv6_options_table;
 struct sbrec_logical_flow_table;
 struct sbrec_mac_binding_table;
+struct sbrec_port_binding_table;
 struct simap;
 struct sset;
 struct uuid;
@@ -153,6 +154,7 @@  void lflow_handle_changed_neighbors(
     const struct hmap *local_datapaths,
     struct ovn_desired_flow_table *);
 
+bool lflow_evaluate_pb_changes(const struct sbrec_port_binding_table *);
 void lflow_destroy(void);
 
 void lflow_expr_destroy(struct hmap *lflow_expr_cache);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 07823f020..448e9908e 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1360,6 +1360,10 @@  static void init_physical_ctx(struct engine_node *node,
 
     ovs_assert(br_int && chassis);
 
+    struct ovsrec_interface_table *iface_table =
+        (struct ovsrec_interface_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_interface", node));
+
     struct ed_type_ct_zones *ct_zones_data =
         engine_get_input_data("ct_zones", node);
     struct simap *ct_zones = &ct_zones_data->current;
@@ -1369,12 +1373,14 @@  static void init_physical_ctx(struct engine_node *node,
     p_ctx->mc_group_table = multicast_group_table;
     p_ctx->br_int = br_int;
     p_ctx->chassis_table = chassis_table;
+    p_ctx->iface_table = iface_table;
     p_ctx->chassis = chassis;
     p_ctx->active_tunnels = &rt_data->active_tunnels;
     p_ctx->local_datapaths = &rt_data->local_datapaths;
     p_ctx->local_lports = &rt_data->local_lports;
     p_ctx->ct_zones = ct_zones;
     p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
+    p_ctx->local_bindings = &rt_data->local_bindings;
 }
 
 static void init_lflow_ctx(struct engine_node *node,
@@ -1637,6 +1643,8 @@  flow_output_sb_port_binding_handler(struct engine_node *node, void *data)
      *    always logical router port, and any change to logical router port
      *    would just trigger recompute.
      *
+     *  - When a port binding of type 'remote' is updated by ovn-ic.
+     *
      * Although there is no correctness issue so far (except the unusual ACL
      * use case, which doesn't seem to be a real problem), it might be better
      * to handle this more gracefully, without the need to consider these
@@ -1647,6 +1655,14 @@  flow_output_sb_port_binding_handler(struct engine_node *node, void *data)
     struct physical_ctx p_ctx;
     init_physical_ctx(node, rt_data, &p_ctx);
 
+    if (!lflow_evaluate_pb_changes(p_ctx.port_binding_table)) {
+        /* If this returns false, it means there is an impact on
+         * the logical flow processing because of some changes in
+         * port bindings. Return false so that recompute is triggered
+         * for this stage. */
+        return false;
+    }
+
     physical_handle_port_binding_changes(&p_ctx, flow_table);
 
     engine_set_node_state(node, EN_UPDATED);
@@ -1782,6 +1798,70 @@  flow_output_port_groups_handler(struct engine_node *node, void *data)
     return _flow_output_resource_ref_handler(node, data, REF_TYPE_PORTGROUP);
 }
 
+struct ed_type_physical_flow_changes {
+    bool ct_zones_changed;
+};
+
+static bool
+flow_output_physical_flow_changes_handler(struct engine_node *node, void *data)
+{
+    struct ed_type_physical_flow_changes *pfc =
+        engine_get_input_data("physical_flow_changes", node);
+    struct ed_type_runtime_data *rt_data =
+        engine_get_input_data("runtime_data", node);
+
+    struct ed_type_flow_output *fo = data;
+    struct physical_ctx p_ctx;
+    init_physical_ctx(node, rt_data, &p_ctx);
+
+    engine_set_node_state(node, EN_UPDATED);
+    if (pfc->ct_zones_changed) {
+        pfc->ct_zones_changed = false;
+        physical_run(&p_ctx, &fo->flow_table);
+        return true;
+    }
+
+    return physical_handle_ovs_iface_changes(&p_ctx, &fo->flow_table);
+}
+
+static void *
+en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
+                             struct engine_arg *arg OVS_UNUSED)
+{
+    struct ed_type_physical_flow_changes *data = xzalloc(sizeof *data);
+    return data;
+}
+
+static void
+en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
+{
+
+}
+
+static void
+en_physical_flow_changes_run(struct engine_node *node, void *data OVS_UNUSED)
+{
+    engine_set_node_state(node, EN_UPDATED);
+}
+
+static bool
+physical_flow_changes_ct_zones_handler(struct engine_node *node OVS_UNUSED,
+                                      void *data OVS_UNUSED)
+{
+    struct ed_type_physical_flow_changes *pfc = data;
+    pfc->ct_zones_changed = true;
+    engine_set_node_state(node, EN_UPDATED);
+    return false;
+}
+
+static bool
+flow_output_noop_handler(struct engine_node *node OVS_UNUSED,
+                          void *data OVS_UNUSED)
+{
+    return true;
+}
+
+
 struct ovn_controller_exit_args {
     bool *exiting;
     bool *restart;
@@ -1904,6 +1984,7 @@  main(int argc, char *argv[])
     ENGINE_NODE(runtime_data, "runtime_data");
     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
     ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
+    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
     ENGINE_NODE(flow_output, "flow_output");
     ENGINE_NODE(addr_sets, "addr_sets");
     ENGINE_NODE(port_groups, "port_groups");
@@ -1923,16 +2004,27 @@  main(int argc, char *argv[])
     engine_add_input(&en_port_groups, &en_sb_port_group,
                      port_groups_sb_port_group_handler);
 
+    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
+                     physical_flow_changes_ct_zones_handler);
+    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
+                     NULL);
+
     engine_add_input(&en_flow_output, &en_addr_sets,
                      flow_output_addr_sets_handler);
     engine_add_input(&en_flow_output, &en_port_groups,
                      flow_output_port_groups_handler);
-    engine_add_input(&en_flow_output, &en_runtime_data, NULL);
-    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
+    engine_add_input(&en_flow_output, &en_runtime_data,
+                     flow_output_noop_handler);
     engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
+    engine_add_input(&en_flow_output, &en_physical_flow_changes,
+                     flow_output_physical_flow_changes_handler);
 
     engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
     engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
+    engine_add_input(&en_flow_output, &en_ovs_interface,
+                     flow_output_noop_handler);
+    engine_add_input(&en_flow_output, &en_ct_zones,
+                     flow_output_noop_handler);
 
     engine_add_input(&en_flow_output, &en_sb_chassis, NULL);
     engine_add_input(&en_flow_output, &en_sb_encap, NULL);
diff --git a/controller/ovn-controller.h b/controller/ovn-controller.h
index 5d9466880..40c358cef 100644
--- a/controller/ovn-controller.h
+++ b/controller/ovn-controller.h
@@ -72,6 +72,28 @@  struct local_datapath {
 struct local_datapath *get_local_datapath(const struct hmap *,
                                           uint32_t tunnel_key);
 
+enum local_binding_type {
+    BT_VIF,
+    BT_CHILD,
+    BT_VIRTUAL
+};
+
+struct local_binding {
+    struct ovs_list node;       /* In parent if any. */
+    char *name;
+    enum local_binding_type type;
+    const struct ovsrec_interface *iface;
+    const struct sbrec_port_binding *pb;
+
+    struct ovs_list children;
+};
+
+static inline struct local_binding *
+local_binding_find(struct shash *local_bindings, const char *name)
+{
+    return shash_find_data(local_bindings, name);
+}
+
 const struct ovsrec_bridge *get_bridge(const struct ovsrec_bridge_table *,
                                        const char *br_name);
 
diff --git a/controller/physical.c b/controller/physical.c
index 144aeb7bd..09a5d9b39 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1780,6 +1780,54 @@  physical_run(struct physical_ctx *p_ctx,
     simap_destroy(&new_tunnel_to_ofport);
 }
 
+bool
+physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
+                                  struct ovn_desired_flow_table *flow_table)
+{
+    const struct ovsrec_interface *iface_rec;
+    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec, p_ctx->iface_table) {
+        if (!strcmp(iface_rec->type, "geneve") ||
+            !strcmp(iface_rec->type, "patch")) {
+            return false;
+        }
+    }
+
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+
+    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec, p_ctx->iface_table) {
+        const char *iface_id = smap_get(&iface_rec->external_ids, "iface-id");
+        if (!iface_id) {
+            continue;
+        }
+
+        const struct local_binding *lb =
+            local_binding_find(p_ctx->local_bindings, iface_id);
+
+        if (!lb || !lb->pb) {
+            continue;
+        }
+
+        if (ovsrec_interface_is_deleted(iface_rec)) {
+            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
+        } else {
+            if (!ovsrec_interface_is_new(iface_rec)) {
+                ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
+            }
+
+            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
+                                  p_ctx->mff_ovn_geneve, p_ctx->ct_zones,
+                                  p_ctx->active_tunnels,
+                                  p_ctx->local_datapaths,
+                                  lb->pb, p_ctx->chassis,
+                                  flow_table, &ofpacts);
+        }
+    }
+
+    ofpbuf_uninit(&ofpacts);
+    return true;
+}
+
 bool
 get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
 {
diff --git a/controller/physical.h b/controller/physical.h
index dadf84ea1..9ca34436a 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -49,11 +49,13 @@  struct physical_ctx {
     const struct ovsrec_bridge *br_int;
     const struct sbrec_chassis_table *chassis_table;
     const struct sbrec_chassis *chassis;
+    const struct ovsrec_interface_table *iface_table;
     const struct sset *active_tunnels;
     struct hmap *local_datapaths;
     struct sset *local_lports;
     const struct simap *ct_zones;
     enum mf_field_id mff_ovn_geneve;
+    struct shash *local_bindings;
 };
 
 void physical_register_ovs_idl(struct ovsdb_idl *);
@@ -63,7 +65,8 @@  void physical_handle_port_binding_changes(struct physical_ctx *,
                                           struct ovn_desired_flow_table *);
 void physical_handle_mc_group_changes(struct physical_ctx *,
                                       struct ovn_desired_flow_table *);
-
+bool physical_handle_ovs_iface_changes(struct physical_ctx *,
+                                       struct ovn_desired_flow_table *);
 bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
                        ofp_port_t *ofport);
 #endif /* controller/physical.h */
diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
index 5c21f6dd7..555434484 100644
--- a/tests/ovn-performance.at
+++ b/tests/ovn-performance.at
@@ -263,7 +263,7 @@  for i in 1 2; do
     )
 
     # Add router port to $ls
-    OVN_CONTROLLER_EXPECT_HIT(
+    OVN_CONTROLLER_EXPECT_NO_HIT(
         [hv1 hv2], [lflow_run],
         [ovn-nbctl --wait=hv lrp-add lr1 $lrp 02:00:00:00:0$i:01 10.0.$i.1/24]
     )
@@ -271,15 +271,15 @@  for i in 1 2; do
         [hv1 hv2], [lflow_run],
         [ovn-nbctl --wait=hv lsp-add $ls $lsp]
     )
-    OVN_CONTROLLER_EXPECT_HIT(
+    OVN_CONTROLLER_EXPECT_NO_HIT(
         [hv1 hv2], [lflow_run],
         [ovn-nbctl --wait=hv lsp-set-type $lsp router]
     )
-    OVN_CONTROLLER_EXPECT_HIT(
+    OVN_CONTROLLER_EXPECT_NO_HIT(
         [hv1 hv2], [lflow_run],
         [ovn-nbctl --wait=hv lsp-set-options $lsp router-port=$lrp]
     )
-    OVN_CONTROLLER_EXPECT_HIT(
+    OVN_CONTROLLER_EXPECT_NO_HIT(
         [hv1 hv2], [lflow_run],
         [ovn-nbctl --wait=hv lsp-set-addresses $lsp router]
     )
@@ -393,8 +393,8 @@  for i in 1 2; do
     lp=lp$i
 
     # Delete port $lp
-    OVN_CONTROLLER_EXPECT_HIT_COND(
-        [hv$i hv$j], [lflow_run], [>0 =0],
+    OVN_CONTROLLER_EXPECT_NO_HIT(
+        [hv$i hv$j], [lflow_run],
         [ovn-nbctl --wait=hv lsp-del $lp]
     )
 done
@@ -409,7 +409,7 @@  for i in 1 2; do
     ls=ls$i
 
     # Delete switch $ls
-    OVN_CONTROLLER_EXPECT_HIT(
+    OVN_CONTROLLER_EXPECT_NO_HIT(
         [hv1 hv2], [lflow_run],
         [ovn-nbctl --wait=hv ls-del $ls]
     )