diff mbox series

[ovs-dev,ovn,v7,5/9] ovn-controller: I-P for ct zone and OVS interface changes in flow output stage.

Message ID 20200520194007.2351985-1-numans@ovn.org
State Superseded
Headers show
Series Incremental processing improvements. | expand

Commit Message

Numan Siddique May 20, 2020, 7:40 p.m. UTC
From: Numan Siddique <numans@ovn.org>

This patch handles ct zone changes and OVS interface changes incrementally
in the flow output stage.

Any changes to ct zone can be handled by running physical_run() instead of running
flow_output_run(). And any changes to OVS interfaces can be either handled
incrementally (for OVS interfaces representing VIFs) or just running
physical_run() (for tunnel and other types of interfaces).

To better handle this, a new engine node 'physical_flow_changes' is added which
handles changes to ct zone and OVS interfaces.

Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
---
 controller/binding.c        | 23 +----------
 controller/binding.h        | 24 ++++++++++-
 controller/ovn-controller.c | 82 ++++++++++++++++++++++++++++++++++++-
 controller/physical.c       | 51 +++++++++++++++++++++++
 controller/physical.h       |  5 ++-
 5 files changed, 159 insertions(+), 26 deletions(-)

Comments

Han Zhou May 21, 2020, 6:31 a.m. UTC | #1
On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
>
> From: Numan Siddique <numans@ovn.org>
>
> This patch handles ct zone changes and OVS interface changes incrementally
> in the flow output stage.
>
> Any changes to ct zone can be handled by running physical_run() instead
of running
> flow_output_run(). And any changes to OVS interfaces can be either handled
> incrementally (for OVS interfaces representing VIFs) or just running
> physical_run() (for tunnel and other types of interfaces).
>
> To better handle this, a new engine node 'physical_flow_changes' is added
which
> handles changes to ct zone and OVS interfaces.
>
Hi Numan,

The engine node physical_flow_changes looks weird because it doesn't really
have any data but merely to trigger some compute when VIF changes.
I think it can be handled in a more straightforward way. In fact there was
a miss in my initial I-P patch that there are still global variables used
in physical.c (my bad):
- localvif_to_ofport
- tunnels
In the principle of I-P engine global variable shouldn't be used because
otherwise there is no way to ensure the dependency graph is followed. The
current I-P is sitll correct only because interface changes (which in turn
causes the above global var changes) always triggers recompute.

Now to handle interface changes incrementally, we should simply move these
global vars to engine nodes, and add them as input for flow_output, and
also their input will be OVS_Interface (and probably some other inputs).
With this, the dependency graph would be clear and implementation of each
input handler will be straightforward. Otherwise, it would be really hard
to follow the logic and deduce the correctness for all corner cases,
although it may be correct for normal scenarios that I believe you have
done thorough tests. Do you mind refactoring it?

In addition, please see another comment inlined.

Thanks,
Han


> Acked-by: Mark Michelson <mmichels@redhat.com>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
>  controller/binding.c        | 23 +----------
>  controller/binding.h        | 24 ++++++++++-
>  controller/ovn-controller.c | 82 ++++++++++++++++++++++++++++++++++++-
>  controller/physical.c       | 51 +++++++++++++++++++++++
>  controller/physical.h       |  5 ++-
>  5 files changed, 159 insertions(+), 26 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index d5e8e4773..f00c975ce 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
sbrec_port_binding *binding_rec,
>   * 'struct local_binding' is used. A shash of these local bindings is
>   * maintained with the 'external_ids:iface-id' as the key to the shash.
>   *
> - * struct local_binding has 3 main fields:
> + * struct local_binding (defined in binding.h) has 3 main fields:
>   *    - type
>   *    - OVS interface row object
>   *    - Port_Binding row object
> @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
sbrec_port_binding *binding_rec,
>   *              when it sees the ARP packet from the parent's VIF.
>   *
>   */
> -enum local_binding_type {
> -    BT_VIF,
> -    BT_CONTAINER,
> -    BT_VIRTUAL
> -};
> -
> -struct local_binding {
> -    char *name;
> -    enum local_binding_type type;
> -    const struct ovsrec_interface *iface;
> -    const struct sbrec_port_binding *pb;
> -
> -    /* shash of 'struct local_binding' representing children. */
> -    struct shash children;
> -};
>
>  static struct local_binding *
>  local_binding_create(const char *name, const struct ovsrec_interface
*iface,
> @@ -576,12 +561,6 @@ local_binding_add(struct shash *local_bindings,
struct local_binding *lbinding)
>      shash_add(local_bindings, lbinding->name, lbinding);
>  }
>
> -static struct local_binding *
> -local_binding_find(struct shash *local_bindings, const char *name)
> -{
> -    return shash_find_data(local_bindings, name);
> -}
> -
>  static void
>  local_binding_destroy(struct local_binding *lbinding)
>  {
> diff --git a/controller/binding.h b/controller/binding.h
> index f7ace6faf..21118ecd4 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -18,6 +18,7 @@
>  #define OVN_BINDING_H 1
>
>  #include <stdbool.h>
> +#include "openvswitch/shash.h"
>
>  struct hmap;
>  struct ovsdb_idl;
> @@ -32,7 +33,6 @@ struct sbrec_chassis;
>  struct sbrec_port_binding_table;
>  struct sset;
>  struct sbrec_port_binding;
> -struct shash;
>
>  struct binding_ctx_in {
>      struct ovsdb_idl_txn *ovnsb_idl_txn;
> @@ -60,6 +60,28 @@ struct binding_ctx_out {
>      struct smap *local_iface_ids;
>  };
>
> +enum local_binding_type {
> +    BT_VIF,
> +    BT_CONTAINER,
> +    BT_VIRTUAL
> +};
> +
> +struct local_binding {
> +    char *name;
> +    enum local_binding_type type;
> +    const struct ovsrec_interface *iface;
> +    const struct sbrec_port_binding *pb;
> +
> +    /* shash of 'struct local_binding' representing children. */
> +    struct shash children;
> +};
> +
> +static inline struct local_binding *
> +local_binding_find(struct shash *local_bindings, const char *name)
> +{
> +    return shash_find_data(local_bindings, name);
> +}
> +
>  void binding_register_ovs_idl(struct ovsdb_idl *);
>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 9ad5be1c6..8f95fff1f 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct engine_node
*node,
>
>      ovs_assert(br_int && chassis);
>
> +    struct ovsrec_interface_table *iface_table =
> +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_interface",
> +                engine_get_input("physical_flow_changes", node)));

I think we shouldn't try to get indirect input data (i.e. input of the
input). The engine design didn't take this into consideration, although
nothing could prevent a developer to do this (except code reviewing:)).
If the input is required to compute the output, just add it as direct input
to the node.

>      struct ed_type_ct_zones *ct_zones_data =
> -        engine_get_input_data("ct_zones", node);
> +        engine_get_input_data("ct_zones",
> +            engine_get_input("physical_flow_changes", node));
>      struct simap *ct_zones = &ct_zones_data->current;
>
>      p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct engine_node
*node,
>      p_ctx->mc_group_table = multicast_group_table;
>      p_ctx->br_int = br_int;
>      p_ctx->chassis_table = chassis_table;
> +    p_ctx->iface_table = iface_table;
>      p_ctx->chassis = chassis;
>      p_ctx->active_tunnels = &rt_data->active_tunnels;
>      p_ctx->local_datapaths = &rt_data->local_datapaths;
>      p_ctx->local_lports = &rt_data->local_lports;
>      p_ctx->ct_zones = ct_zones;
>      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> +    p_ctx->local_bindings = &rt_data->local_bindings;
>  }
>
>  static void init_lflow_ctx(struct engine_node *node,
> @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct engine_node
*node, void *data)
>      return _flow_output_resource_ref_handler(node, data,
REF_TYPE_PORTGROUP);
>  }
>
> +struct ed_type_physical_flow_changes {
> +    bool need_physical_run;
> +    bool ovs_ifaces_changed;
> +};
> +
> +static bool
> +flow_output_physical_flow_changes_handler(struct engine_node *node, void
*data)
> +{
> +    struct ed_type_physical_flow_changes *pfc =
> +        engine_get_input_data("physical_flow_changes", node);
> +    struct ed_type_runtime_data *rt_data =
> +        engine_get_input_data("runtime_data", node);
> +
> +    struct ed_type_flow_output *fo = data;
> +    struct physical_ctx p_ctx;
> +    init_physical_ctx(node, rt_data, &p_ctx);
> +
> +    engine_set_node_state(node, EN_UPDATED);
> +    if (pfc->need_physical_run) {
> +        pfc->need_physical_run = false;
> +        physical_run(&p_ctx, &fo->flow_table);
> +        return true;
> +    }
> +
> +    if (pfc->ovs_ifaces_changed) {
> +        pfc->ovs_ifaces_changed = false;
> +        return physical_handle_ovs_iface_changes(&p_ctx,
&fo->flow_table);
> +    }
> +
> +    return true;
> +}
> +
> +static void *
> +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> +                             struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof *data);
> +    return data;
> +}
> +
> +static void
> +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> +{
> +
> +}
> +
> +static void
> +en_physical_flow_changes_run(struct engine_node *node, void *data
OVS_UNUSED)
> +{
> +    struct ed_type_physical_flow_changes *pfc = data;
> +    pfc->need_physical_run = true;
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
> +static bool
> +physical_flow_changes_ovs_iface_handler(struct engine_node *node
OVS_UNUSED,
> +                                        void *data OVS_UNUSED)
> +{
> +    struct ed_type_physical_flow_changes *pfc = data;
> +    pfc->ovs_ifaces_changed = true;
> +    engine_set_node_state(node, EN_UPDATED);
> +    return true;
> +}
> +
>  struct ovn_controller_exit_args {
>      bool *exiting;
>      bool *restart;
> @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
>      ENGINE_NODE(runtime_data, "runtime_data");
>      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
>      ENGINE_NODE(flow_output, "flow_output");
>      ENGINE_NODE(addr_sets, "addr_sets");
>      ENGINE_NODE(port_groups, "port_groups");
> @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
>      engine_add_input(&en_port_groups, &en_sb_port_group,
>                       port_groups_sb_port_group_handler);
>
> +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> +                     NULL);
> +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> +                     physical_flow_changes_ovs_iface_handler);
> +
>      engine_add_input(&en_flow_output, &en_addr_sets,
>                       flow_output_addr_sets_handler);
>      engine_add_input(&en_flow_output, &en_port_groups,
>                       flow_output_port_groups_handler);
>      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
>      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> +                     flow_output_physical_flow_changes_handler);
>
>      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
>      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> diff --git a/controller/physical.c b/controller/physical.c
> index 144aeb7bd..03eb677d1 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
>      simap_destroy(&new_tunnel_to_ofport);
>  }
>
> +bool
> +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> +                                  struct ovn_desired_flow_table
*flow_table)
> +{
> +    const struct ovsrec_interface *iface_rec;
> +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
p_ctx->iface_table) {
> +        if (!strcmp(iface_rec->type, "geneve") ||
> +            !strcmp(iface_rec->type, "patch")) {
> +            return false;
> +        }
> +    }
> +
> +    struct ofpbuf ofpacts;
> +    ofpbuf_init(&ofpacts, 0);
> +
> +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
p_ctx->iface_table) {
> +        const char *iface_id = smap_get(&iface_rec->external_ids,
"iface-id");
> +        if (!iface_id) {
> +            continue;
> +        }
> +
> +        const struct local_binding *lb =
> +            local_binding_find(p_ctx->local_bindings, iface_id);
> +
> +        if (!lb || !lb->pb) {
> +            continue;
> +        }
> +
> +        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0;
> +        if (ovsrec_interface_is_deleted(iface_rec)) {
> +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> +            simap_find_and_delete(&localvif_to_ofport, iface_id);
> +        } else {
> +            if (!ovsrec_interface_is_new(iface_rec)) {
> +                ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> +            }
> +
> +            simap_put(&localvif_to_ofport, iface_id, ofport);
> +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> +                                  p_ctx->mff_ovn_geneve, p_ctx->ct_zones,
> +                                  p_ctx->active_tunnels,
> +                                  p_ctx->local_datapaths,
> +                                  lb->pb, p_ctx->chassis,
> +                                  flow_table, &ofpacts);
> +        }
> +    }
> +
> +    ofpbuf_uninit(&ofpacts);
> +    return true;
> +}
> +
>  bool
>  get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t
*ofport)
>  {
> diff --git a/controller/physical.h b/controller/physical.h
> index dadf84ea1..9ca34436a 100644
> --- a/controller/physical.h
> +++ b/controller/physical.h
> @@ -49,11 +49,13 @@ struct physical_ctx {
>      const struct ovsrec_bridge *br_int;
>      const struct sbrec_chassis_table *chassis_table;
>      const struct sbrec_chassis *chassis;
> +    const struct ovsrec_interface_table *iface_table;
>      const struct sset *active_tunnels;
>      struct hmap *local_datapaths;
>      struct sset *local_lports;
>      const struct simap *ct_zones;
>      enum mf_field_id mff_ovn_geneve;
> +    struct shash *local_bindings;
>  };
>
>  void physical_register_ovs_idl(struct ovsdb_idl *);
> @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
physical_ctx *,
>                                            struct ovn_desired_flow_table
*);
>  void physical_handle_mc_group_changes(struct physical_ctx *,
>                                        struct ovn_desired_flow_table *);
> -
> +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> +                                       struct ovn_desired_flow_table *);
>  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>                         ofp_port_t *ofport);
>  #endif /* controller/physical.h */
> --
> 2.26.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique May 22, 2020, 5:29 p.m. UTC | #2
On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:

> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
> >
> > From: Numan Siddique <numans@ovn.org>
> >
> > This patch handles ct zone changes and OVS interface changes
> incrementally
> > in the flow output stage.
> >
> > Any changes to ct zone can be handled by running physical_run() instead
> of running
> > flow_output_run(). And any changes to OVS interfaces can be either
> handled
> > incrementally (for OVS interfaces representing VIFs) or just running
> > physical_run() (for tunnel and other types of interfaces).
> >
> > To better handle this, a new engine node 'physical_flow_changes' is added
> which
> > handles changes to ct zone and OVS interfaces.
> >
> Hi Numan,
>
> Hi Han,

Thanks for the review and comments. Please see below.



> The engine node physical_flow_changes looks weird because it doesn't really
> have any data but merely to trigger some compute when VIF changes.
> I think it can be handled in a more straightforward way. In fact there was
> a miss in my initial I-P patch that there are still global variables used
> in physical.c (my bad):
> - localvif_to_ofport
> - tunnels
> In the principle of I-P engine global variable shouldn't be used because
> otherwise there is no way to ensure the dependency graph is followed. The
> current I-P is sitll correct only because interface changes (which in turn
> causes the above global var changes) always triggers recompute.
>
> Now to handle interface changes incrementally, we should simply move these
> global vars to engine nodes, and add them as input for flow_output, and
> also their input will be OVS_Interface (and probably some other inputs).
> With this, the dependency graph would be clear and implementation of each
> input handler will be straightforward. Otherwise, it would be really hard
> to follow the logic and deduce the correctness for all corner cases,
> although it may be correct for normal scenarios that I believe you have
> done thorough tests. Do you mind refactoring it?
>

Right now, the flow_output node has an "en_ct_zones" engine as input and it
doesn't have
OVS_interface as input. But this is ok as any runtime data change will
trigger recomputing.

If en_ct_zones engine node changes, there is no need to trigger full
recompute and we
call "physical_run()", it should be enough. Although we can definitely
further improve it
and this can be tackled separately.

In my understanding, I think we can also handle ovs interface changes
incrementally and if we can't
we can just call "physical_run()" instead of full flow recompute.

With this patch, it adds an engine node for physical flow changes and the
input for that is en_ct_zone
and ovs interface. I understand and agree that it's a bit weird.

How to handle it when both
   - en_ct_zone changes
   -  some ovs interface changes.

If both these inputs are direct inputs to the flow_output engine, then we
may end up calling "physical_run()"
twice. And I wanted to avoid that and hence added a physical_flow_changes
node as an intermediate.
How do you suggest we handle this if we directly handle the ovs interface
changes in flow_output engine ?

Thanks
Numan



>
> In addition, please see another comment inlined.
>
> Thanks,
> Han
>
>
> > Acked-by: Mark Michelson <mmichels@redhat.com>
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> >  controller/binding.c        | 23 +----------
> >  controller/binding.h        | 24 ++++++++++-
> >  controller/ovn-controller.c | 82 ++++++++++++++++++++++++++++++++++++-
> >  controller/physical.c       | 51 +++++++++++++++++++++++
> >  controller/physical.h       |  5 ++-
> >  5 files changed, 159 insertions(+), 26 deletions(-)
> >
> > diff --git a/controller/binding.c b/controller/binding.c
> > index d5e8e4773..f00c975ce 100644
> > --- a/controller/binding.c
> > +++ b/controller/binding.c
> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
> sbrec_port_binding *binding_rec,
> >   * 'struct local_binding' is used. A shash of these local bindings is
> >   * maintained with the 'external_ids:iface-id' as the key to the shash.
> >   *
> > - * struct local_binding has 3 main fields:
> > + * struct local_binding (defined in binding.h) has 3 main fields:
> >   *    - type
> >   *    - OVS interface row object
> >   *    - Port_Binding row object
> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
> sbrec_port_binding *binding_rec,
> >   *              when it sees the ARP packet from the parent's VIF.
> >   *
> >   */
> > -enum local_binding_type {
> > -    BT_VIF,
> > -    BT_CONTAINER,
> > -    BT_VIRTUAL
> > -};
> > -
> > -struct local_binding {
> > -    char *name;
> > -    enum local_binding_type type;
> > -    const struct ovsrec_interface *iface;
> > -    const struct sbrec_port_binding *pb;
> > -
> > -    /* shash of 'struct local_binding' representing children. */
> > -    struct shash children;
> > -};
> >
> >  static struct local_binding *
> >  local_binding_create(const char *name, const struct ovsrec_interface
> *iface,
> > @@ -576,12 +561,6 @@ local_binding_add(struct shash *local_bindings,
> struct local_binding *lbinding)
> >      shash_add(local_bindings, lbinding->name, lbinding);
> >  }
> >
> > -static struct local_binding *
> > -local_binding_find(struct shash *local_bindings, const char *name)
> > -{
> > -    return shash_find_data(local_bindings, name);
> > -}
> > -
> >  static void
> >  local_binding_destroy(struct local_binding *lbinding)
> >  {
> > diff --git a/controller/binding.h b/controller/binding.h
> > index f7ace6faf..21118ecd4 100644
> > --- a/controller/binding.h
> > +++ b/controller/binding.h
> > @@ -18,6 +18,7 @@
> >  #define OVN_BINDING_H 1
> >
> >  #include <stdbool.h>
> > +#include "openvswitch/shash.h"
> >
> >  struct hmap;
> >  struct ovsdb_idl;
> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
> >  struct sbrec_port_binding_table;
> >  struct sset;
> >  struct sbrec_port_binding;
> > -struct shash;
> >
> >  struct binding_ctx_in {
> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
> >      struct smap *local_iface_ids;
> >  };
> >
> > +enum local_binding_type {
> > +    BT_VIF,
> > +    BT_CONTAINER,
> > +    BT_VIRTUAL
> > +};
> > +
> > +struct local_binding {
> > +    char *name;
> > +    enum local_binding_type type;
> > +    const struct ovsrec_interface *iface;
> > +    const struct sbrec_port_binding *pb;
> > +
> > +    /* shash of 'struct local_binding' representing children. */
> > +    struct shash children;
> > +};
> > +
> > +static inline struct local_binding *
> > +local_binding_find(struct shash *local_bindings, const char *name)
> > +{
> > +    return shash_find_data(local_bindings, name);
> > +}
> > +
> >  void binding_register_ovs_idl(struct ovsdb_idl *);
> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 9ad5be1c6..8f95fff1f 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct engine_node
> *node,
> >
> >      ovs_assert(br_int && chassis);
> >
> > +    struct ovsrec_interface_table *iface_table =
> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > +            engine_get_input("OVS_interface",
> > +                engine_get_input("physical_flow_changes", node)));
>
> I think we shouldn't try to get indirect input data (i.e. input of the
> input). The engine design didn't take this into consideration, although
> nothing could prevent a developer to do this (except code reviewing:)).
> If the input is required to compute the output, just add it as direct input
> to the node.
>

Sure I can do that. But we may need to have a no-op handler for these new
input nodes.
Is that OK ?

Thanks
Numan


>
> >      struct ed_type_ct_zones *ct_zones_data =
> > -        engine_get_input_data("ct_zones", node);
> > +        engine_get_input_data("ct_zones",
> > +            engine_get_input("physical_flow_changes", node));
> >      struct simap *ct_zones = &ct_zones_data->current;
> >
> >      p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct engine_node
> *node,
> >      p_ctx->mc_group_table = multicast_group_table;
> >      p_ctx->br_int = br_int;
> >      p_ctx->chassis_table = chassis_table;
> > +    p_ctx->iface_table = iface_table;
> >      p_ctx->chassis = chassis;
> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> >      p_ctx->local_lports = &rt_data->local_lports;
> >      p_ctx->ct_zones = ct_zones;
> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> > +    p_ctx->local_bindings = &rt_data->local_bindings;
> >  }
> >
> >  static void init_lflow_ctx(struct engine_node *node,
> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct engine_node
> *node, void *data)
> >      return _flow_output_resource_ref_handler(node, data,
> REF_TYPE_PORTGROUP);
> >  }
> >
> > +struct ed_type_physical_flow_changes {
> > +    bool need_physical_run;
> > +    bool ovs_ifaces_changed;
> > +};
> > +
> > +static bool
> > +flow_output_physical_flow_changes_handler(struct engine_node *node, void
> *data)
> > +{
> > +    struct ed_type_physical_flow_changes *pfc =
> > +        engine_get_input_data("physical_flow_changes", node);
> > +    struct ed_type_runtime_data *rt_data =
> > +        engine_get_input_data("runtime_data", node);
> > +
> > +    struct ed_type_flow_output *fo = data;
> > +    struct physical_ctx p_ctx;
> > +    init_physical_ctx(node, rt_data, &p_ctx);
> > +
> > +    engine_set_node_state(node, EN_UPDATED);
> > +    if (pfc->need_physical_run) {
> > +        pfc->need_physical_run = false;
> > +        physical_run(&p_ctx, &fo->flow_table);
> > +        return true;
> > +    }
> > +
> > +    if (pfc->ovs_ifaces_changed) {
> > +        pfc->ovs_ifaces_changed = false;
> > +        return physical_handle_ovs_iface_changes(&p_ctx,
> &fo->flow_table);
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +static void *
> > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> > +                             struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof *data);
> > +    return data;
> > +}
> > +
> > +static void
> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> > +{
> > +
> > +}
> > +
> > +static void
> > +en_physical_flow_changes_run(struct engine_node *node, void *data
> OVS_UNUSED)
> > +{
> > +    struct ed_type_physical_flow_changes *pfc = data;
> > +    pfc->need_physical_run = true;
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> > +static bool
> > +physical_flow_changes_ovs_iface_handler(struct engine_node *node
> OVS_UNUSED,
> > +                                        void *data OVS_UNUSED)
> > +{
> > +    struct ed_type_physical_flow_changes *pfc = data;
> > +    pfc->ovs_ifaces_changed = true;
> > +    engine_set_node_state(node, EN_UPDATED);
> > +    return true;
> > +}
> > +
> >  struct ovn_controller_exit_args {
> >      bool *exiting;
> >      bool *restart;
> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
> >      ENGINE_NODE(runtime_data, "runtime_data");
> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> >      ENGINE_NODE(flow_output, "flow_output");
> >      ENGINE_NODE(addr_sets, "addr_sets");
> >      ENGINE_NODE(port_groups, "port_groups");
> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
> >      engine_add_input(&en_port_groups, &en_sb_port_group,
> >                       port_groups_sb_port_group_handler);
> >
> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> > +                     NULL);
> > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> > +                     physical_flow_changes_ovs_iface_handler);
> > +
> >      engine_add_input(&en_flow_output, &en_addr_sets,
> >                       flow_output_addr_sets_handler);
> >      engine_add_input(&en_flow_output, &en_port_groups,
> >                       flow_output_port_groups_handler);
> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> > +                     flow_output_physical_flow_changes_handler);
> >
> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> > diff --git a/controller/physical.c b/controller/physical.c
> > index 144aeb7bd..03eb677d1 100644
> > --- a/controller/physical.c
> > +++ b/controller/physical.c
> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
> >      simap_destroy(&new_tunnel_to_ofport);
> >  }
> >
> > +bool
> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> > +                                  struct ovn_desired_flow_table
> *flow_table)
> > +{
> > +    const struct ovsrec_interface *iface_rec;
> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> p_ctx->iface_table) {
> > +        if (!strcmp(iface_rec->type, "geneve") ||
> > +            !strcmp(iface_rec->type, "patch")) {
> > +            return false;
> > +        }
> > +    }
> > +
> > +    struct ofpbuf ofpacts;
> > +    ofpbuf_init(&ofpacts, 0);
> > +
> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> p_ctx->iface_table) {
> > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> "iface-id");
> > +        if (!iface_id) {
> > +            continue;
> > +        }
> > +
> > +        const struct local_binding *lb =
> > +            local_binding_find(p_ctx->local_bindings, iface_id);
> > +
> > +        if (!lb || !lb->pb) {
> > +            continue;
> > +        }
> > +
> > +        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0;
> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> > +            simap_find_and_delete(&localvif_to_ofport, iface_id);
> > +        } else {
> > +            if (!ovsrec_interface_is_new(iface_rec)) {
> > +                ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> > +            }
> > +
> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
> > +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> > +                                  p_ctx->mff_ovn_geneve,
> p_ctx->ct_zones,
> > +                                  p_ctx->active_tunnels,
> > +                                  p_ctx->local_datapaths,
> > +                                  lb->pb, p_ctx->chassis,
> > +                                  flow_table, &ofpacts);
> > +        }
> > +    }
> > +
> > +    ofpbuf_uninit(&ofpacts);
> > +    return true;
> > +}
> > +
> >  bool
> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t
> *ofport)
> >  {
> > diff --git a/controller/physical.h b/controller/physical.h
> > index dadf84ea1..9ca34436a 100644
> > --- a/controller/physical.h
> > +++ b/controller/physical.h
> > @@ -49,11 +49,13 @@ struct physical_ctx {
> >      const struct ovsrec_bridge *br_int;
> >      const struct sbrec_chassis_table *chassis_table;
> >      const struct sbrec_chassis *chassis;
> > +    const struct ovsrec_interface_table *iface_table;
> >      const struct sset *active_tunnels;
> >      struct hmap *local_datapaths;
> >      struct sset *local_lports;
> >      const struct simap *ct_zones;
> >      enum mf_field_id mff_ovn_geneve;
> > +    struct shash *local_bindings;
> >  };
> >
> >  void physical_register_ovs_idl(struct ovsdb_idl *);
> > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> physical_ctx *,
> >                                            struct ovn_desired_flow_table
> *);
> >  void physical_handle_mc_group_changes(struct physical_ctx *,
> >                                        struct ovn_desired_flow_table *);
> > -
> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> > +                                       struct ovn_desired_flow_table *);
> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >                         ofp_port_t *ofport);
> >  #endif /* controller/physical.h */
> > --
> > 2.26.2
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Han Zhou May 23, 2020, 1:59 a.m. UTC | #3
On Fri, May 22, 2020 at 10:29 AM Numan Siddique <numans@ovn.org> wrote:
>
>
>
> On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:
>>
>> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
>> >
>> > From: Numan Siddique <numans@ovn.org>
>> >
>> > This patch handles ct zone changes and OVS interface changes
incrementally
>> > in the flow output stage.
>> >
>> > Any changes to ct zone can be handled by running physical_run() instead
>> of running
>> > flow_output_run(). And any changes to OVS interfaces can be either
handled
>> > incrementally (for OVS interfaces representing VIFs) or just running
>> > physical_run() (for tunnel and other types of interfaces).
>> >
>> > To better handle this, a new engine node 'physical_flow_changes' is
added
>> which
>> > handles changes to ct zone and OVS interfaces.
>> >
>> Hi Numan,
>>
> Hi Han,
>
> Thanks for the review and comments. Please see below.
>
>
>>
>> The engine node physical_flow_changes looks weird because it doesn't
really
>> have any data but merely to trigger some compute when VIF changes.
>> I think it can be handled in a more straightforward way. In fact there
was
>> a miss in my initial I-P patch that there are still global variables used
>> in physical.c (my bad):
>> - localvif_to_ofport
>> - tunnels
>> In the principle of I-P engine global variable shouldn't be used because
>> otherwise there is no way to ensure the dependency graph is followed. The
>> current I-P is sitll correct only because interface changes (which in
turn
>> causes the above global var changes) always triggers recompute.
>>
>> Now to handle interface changes incrementally, we should simply move
these
>> global vars to engine nodes, and add them as input for flow_output, and
>> also their input will be OVS_Interface (and probably some other inputs).
>> With this, the dependency graph would be clear and implementation of each
>> input handler will be straightforward. Otherwise, it would be really hard
>> to follow the logic and deduce the correctness for all corner cases,
>> although it may be correct for normal scenarios that I believe you have
>> done thorough tests. Do you mind refactoring it?
>
>
> Right now, the flow_output node has an "en_ct_zones" engine as input and
it doesn't have
> OVS_interface as input. But this is ok as any runtime data change will
trigger recomputing.
>
Do you mean OVS_interface should actually be input of "en_ct_zones", but it
is ok to not add it? But I don't understand why should it be input of
en_ct_zones. Maybe I missed something?

My point above is about the dependency between flow_output and
OVS_interface. flow_output actually depends on OVS_interface. The two
global variables allowed flow_output to use OVS_interface data without the
dependancy in the graph. Now since we incrementally procssing OVS_interface
changes, we'd better fix the dependency graph to ensure the correctness. I
had encounted very tricky bugs even when some very trivial part of the I-P
engine rule was not followed, and finally found out defining the engine
node properly was the easiest fix.

> If en_ct_zones engine node changes, there is no need to trigger full
recompute and we
> call "physical_run()", it should be enough. Although we can definitely
further improve it
> and this can be tackled separately.
>
> In my understanding, I think we can also handle ovs interface changes
incrementally and if we can't
> we can just call "physical_run()" instead of full flow recompute.
>
> With this patch, it adds an engine node for physical flow changes and the
input for that is en_ct_zone
> and ovs interface. I understand and agree that it's a bit weird.
>
> How to handle it when both
>    - en_ct_zone changes
>    -  some ovs interface changes.
>
> If both these inputs are direct inputs to the flow_output engine, then we
may end up calling "physical_run()"
> twice. And I wanted to avoid that and hence added a physical_flow_changes
node as an intermediate.
> How do you suggest we handle this if we directly handle the ovs interface
changes in flow_output engine ?
>

Not sure if I understand completely, but it seems you are suggesting we can
afford a recompute for physical flows, but not for logical flows, and in
some cases this can happen. Hoewver, you are trying to make a full phyical
computing appear as incremental processing, but you are not happy with this
kind of I-P because the cost is high, so you want to avoid calling it
multiple times, so a intermediate node is added for that purpose.

If this is the case, I'd suggest to handle it more cleanly with the I-P
engine. We can split the flow_output into separate nodes: logical_output
and physical_output, and add a dependency (fake) from physical_output to
logical_output just to make sure physical_output is the last node to
compute, so that even if it is recomputed it doesn't matter that much. Now
we call physical_run only in recompute of physical_output node, and since
it is recompute, it won't be called multiple times. What do you think?


> Thanks
> Numan
>
>
>>
>>
>> In addition, please see another comment inlined.
>>
>> Thanks,
>> Han
>>
>>
>> > Acked-by: Mark Michelson <mmichels@redhat.com>
>> > Signed-off-by: Numan Siddique <numans@ovn.org>
>> > ---
>> >  controller/binding.c        | 23 +----------
>> >  controller/binding.h        | 24 ++++++++++-
>> >  controller/ovn-controller.c | 82 ++++++++++++++++++++++++++++++++++++-
>> >  controller/physical.c       | 51 +++++++++++++++++++++++
>> >  controller/physical.h       |  5 ++-
>> >  5 files changed, 159 insertions(+), 26 deletions(-)
>> >
>> > diff --git a/controller/binding.c b/controller/binding.c
>> > index d5e8e4773..f00c975ce 100644
>> > --- a/controller/binding.c
>> > +++ b/controller/binding.c
>> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
>> sbrec_port_binding *binding_rec,
>> >   * 'struct local_binding' is used. A shash of these local bindings is
>> >   * maintained with the 'external_ids:iface-id' as the key to the
shash.
>> >   *
>> > - * struct local_binding has 3 main fields:
>> > + * struct local_binding (defined in binding.h) has 3 main fields:
>> >   *    - type
>> >   *    - OVS interface row object
>> >   *    - Port_Binding row object
>> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
>> sbrec_port_binding *binding_rec,
>> >   *              when it sees the ARP packet from the parent's VIF.
>> >   *
>> >   */
>> > -enum local_binding_type {
>> > -    BT_VIF,
>> > -    BT_CONTAINER,
>> > -    BT_VIRTUAL
>> > -};
>> > -
>> > -struct local_binding {
>> > -    char *name;
>> > -    enum local_binding_type type;
>> > -    const struct ovsrec_interface *iface;
>> > -    const struct sbrec_port_binding *pb;
>> > -
>> > -    /* shash of 'struct local_binding' representing children. */
>> > -    struct shash children;
>> > -};
>> >
>> >  static struct local_binding *
>> >  local_binding_create(const char *name, const struct ovsrec_interface
>> *iface,
>> > @@ -576,12 +561,6 @@ local_binding_add(struct shash *local_bindings,
>> struct local_binding *lbinding)
>> >      shash_add(local_bindings, lbinding->name, lbinding);
>> >  }
>> >
>> > -static struct local_binding *
>> > -local_binding_find(struct shash *local_bindings, const char *name)
>> > -{
>> > -    return shash_find_data(local_bindings, name);
>> > -}
>> > -
>> >  static void
>> >  local_binding_destroy(struct local_binding *lbinding)
>> >  {
>> > diff --git a/controller/binding.h b/controller/binding.h
>> > index f7ace6faf..21118ecd4 100644
>> > --- a/controller/binding.h
>> > +++ b/controller/binding.h
>> > @@ -18,6 +18,7 @@
>> >  #define OVN_BINDING_H 1
>> >
>> >  #include <stdbool.h>
>> > +#include "openvswitch/shash.h"
>> >
>> >  struct hmap;
>> >  struct ovsdb_idl;
>> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
>> >  struct sbrec_port_binding_table;
>> >  struct sset;
>> >  struct sbrec_port_binding;
>> > -struct shash;
>> >
>> >  struct binding_ctx_in {
>> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
>> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
>> >      struct smap *local_iface_ids;
>> >  };
>> >
>> > +enum local_binding_type {
>> > +    BT_VIF,
>> > +    BT_CONTAINER,
>> > +    BT_VIRTUAL
>> > +};
>> > +
>> > +struct local_binding {
>> > +    char *name;
>> > +    enum local_binding_type type;
>> > +    const struct ovsrec_interface *iface;
>> > +    const struct sbrec_port_binding *pb;
>> > +
>> > +    /* shash of 'struct local_binding' representing children. */
>> > +    struct shash children;
>> > +};
>> > +
>> > +static inline struct local_binding *
>> > +local_binding_find(struct shash *local_bindings, const char *name)
>> > +{
>> > +    return shash_find_data(local_bindings, name);
>> > +}
>> > +
>> >  void binding_register_ovs_idl(struct ovsdb_idl *);
>> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>> > index 9ad5be1c6..8f95fff1f 100644
>> > --- a/controller/ovn-controller.c
>> > +++ b/controller/ovn-controller.c
>> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct engine_node
>> *node,
>> >
>> >      ovs_assert(br_int && chassis);
>> >
>> > +    struct ovsrec_interface_table *iface_table =
>> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
>> > +            engine_get_input("OVS_interface",
>> > +                engine_get_input("physical_flow_changes", node)));
>>
>> I think we shouldn't try to get indirect input data (i.e. input of the
>> input). The engine design didn't take this into consideration, although
>> nothing could prevent a developer to do this (except code reviewing:)).
>> If the input is required to compute the output, just add it as direct
input
>> to the node.
>
>
> Sure I can do that. But we may need to have a no-op handler for these new
input nodes.
> Is that OK ?

Based on the above discussion, the node "physical_flow_change" is added
just to combine the "OVS_interface" and "ct_zones" node's data, so there is
no point to add the two nodes again directly as input to flow_output with
no-op handlers, which is meaningless. If we really want to have the
physical_flow_change node, I think the data should be passed in the
physical_flow_change node, just copy the references of the both input data
and it then becomes "physical_flow_changes" node's data, and flow_output
node and get it directly there. It achieves the same but not breaking I-P
engine rules.

>
> Thanks
> Numan
>
>>
>>
>> >      struct ed_type_ct_zones *ct_zones_data =
>> > -        engine_get_input_data("ct_zones", node);
>> > +        engine_get_input_data("ct_zones",
>> > +            engine_get_input("physical_flow_changes", node));
>> >      struct simap *ct_zones = &ct_zones_data->current;
>> >
>> >      p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
>> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct
engine_node
>> *node,
>> >      p_ctx->mc_group_table = multicast_group_table;
>> >      p_ctx->br_int = br_int;
>> >      p_ctx->chassis_table = chassis_table;
>> > +    p_ctx->iface_table = iface_table;
>> >      p_ctx->chassis = chassis;
>> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
>> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
>> >      p_ctx->local_lports = &rt_data->local_lports;
>> >      p_ctx->ct_zones = ct_zones;
>> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
>> > +    p_ctx->local_bindings = &rt_data->local_bindings;
>> >  }
>> >
>> >  static void init_lflow_ctx(struct engine_node *node,
>> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct
engine_node
>> *node, void *data)
>> >      return _flow_output_resource_ref_handler(node, data,
>> REF_TYPE_PORTGROUP);
>> >  }
>> >
>> > +struct ed_type_physical_flow_changes {
>> > +    bool need_physical_run;
>> > +    bool ovs_ifaces_changed;
>> > +};
>> > +
>> > +static bool
>> > +flow_output_physical_flow_changes_handler(struct engine_node *node,
void
>> *data)
>> > +{
>> > +    struct ed_type_physical_flow_changes *pfc =
>> > +        engine_get_input_data("physical_flow_changes", node);
>> > +    struct ed_type_runtime_data *rt_data =
>> > +        engine_get_input_data("runtime_data", node);
>> > +
>> > +    struct ed_type_flow_output *fo = data;
>> > +    struct physical_ctx p_ctx;
>> > +    init_physical_ctx(node, rt_data, &p_ctx);
>> > +
>> > +    engine_set_node_state(node, EN_UPDATED);
>> > +    if (pfc->need_physical_run) {
>> > +        pfc->need_physical_run = false;
>> > +        physical_run(&p_ctx, &fo->flow_table);
>> > +        return true;
>> > +    }
>> > +
>> > +    if (pfc->ovs_ifaces_changed) {
>> > +        pfc->ovs_ifaces_changed = false;
>> > +        return physical_handle_ovs_iface_changes(&p_ctx,
>> &fo->flow_table);
>> > +    }
>> > +
>> > +    return true;
>> > +}
>> > +
>> > +static void *
>> > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
>> > +                             struct engine_arg *arg OVS_UNUSED)
>> > +{
>> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
*data);
>> > +    return data;
>> > +}
>> > +
>> > +static void
>> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
>> > +{
>> > +
>> > +}
>> > +
>> > +static void
>> > +en_physical_flow_changes_run(struct engine_node *node, void *data
>> OVS_UNUSED)
>> > +{
>> > +    struct ed_type_physical_flow_changes *pfc = data;
>> > +    pfc->need_physical_run = true;
>> > +    engine_set_node_state(node, EN_UPDATED);
>> > +}
>> > +
>> > +static bool
>> > +physical_flow_changes_ovs_iface_handler(struct engine_node *node
>> OVS_UNUSED,
>> > +                                        void *data OVS_UNUSED)
>> > +{
>> > +    struct ed_type_physical_flow_changes *pfc = data;
>> > +    pfc->ovs_ifaces_changed = true;
>> > +    engine_set_node_state(node, EN_UPDATED);
>> > +    return true;
>> > +}
>> > +
>> >  struct ovn_controller_exit_args {
>> >      bool *exiting;
>> >      bool *restart;
>> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
>> >      ENGINE_NODE(runtime_data, "runtime_data");
>> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
>> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
>> >      ENGINE_NODE(flow_output, "flow_output");
>> >      ENGINE_NODE(addr_sets, "addr_sets");
>> >      ENGINE_NODE(port_groups, "port_groups");
>> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
>> >      engine_add_input(&en_port_groups, &en_sb_port_group,
>> >                       port_groups_sb_port_group_handler);
>> >
>> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
>> > +                     NULL);
>> > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
>> > +                     physical_flow_changes_ovs_iface_handler);
>> > +
>> >      engine_add_input(&en_flow_output, &en_addr_sets,
>> >                       flow_output_addr_sets_handler);
>> >      engine_add_input(&en_flow_output, &en_port_groups,
>> >                       flow_output_port_groups_handler);
>> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
>> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
>> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
>> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
>> > +                     flow_output_physical_flow_changes_handler);
>> >
>> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
>> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
>> > diff --git a/controller/physical.c b/controller/physical.c
>> > index 144aeb7bd..03eb677d1 100644
>> > --- a/controller/physical.c
>> > +++ b/controller/physical.c
>> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
>> >      simap_destroy(&new_tunnel_to_ofport);
>> >  }
>> >
>> > +bool
>> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
>> > +                                  struct ovn_desired_flow_table
>> *flow_table)
>> > +{
>> > +    const struct ovsrec_interface *iface_rec;
>> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> p_ctx->iface_table) {
>> > +        if (!strcmp(iface_rec->type, "geneve") ||
>> > +            !strcmp(iface_rec->type, "patch")) {
>> > +            return false;
>> > +        }
>> > +    }
>> > +
>> > +    struct ofpbuf ofpacts;
>> > +    ofpbuf_init(&ofpacts, 0);
>> > +
>> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> p_ctx->iface_table) {
>> > +        const char *iface_id = smap_get(&iface_rec->external_ids,
>> "iface-id");
>> > +        if (!iface_id) {
>> > +            continue;
>> > +        }
>> > +
>> > +        const struct local_binding *lb =
>> > +            local_binding_find(p_ctx->local_bindings, iface_id);
>> > +
>> > +        if (!lb || !lb->pb) {
>> > +            continue;
>> > +        }
>> > +
>> > +        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0;
>> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
>> > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
>> > +            simap_find_and_delete(&localvif_to_ofport, iface_id);
>> > +        } else {
>> > +            if (!ovsrec_interface_is_new(iface_rec)) {
>> > +                ofctrl_remove_flows(flow_table,
&lb->pb->header_.uuid);
>> > +            }
>> > +
>> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
>> > +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
>> > +                                  p_ctx->mff_ovn_geneve,
p_ctx->ct_zones,
>> > +                                  p_ctx->active_tunnels,
>> > +                                  p_ctx->local_datapaths,
>> > +                                  lb->pb, p_ctx->chassis,
>> > +                                  flow_table, &ofpacts);
>> > +        }
>> > +    }
>> > +
>> > +    ofpbuf_uninit(&ofpacts);
>> > +    return true;
>> > +}
>> > +
>> >  bool
>> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t
>> *ofport)
>> >  {
>> > diff --git a/controller/physical.h b/controller/physical.h
>> > index dadf84ea1..9ca34436a 100644
>> > --- a/controller/physical.h
>> > +++ b/controller/physical.h
>> > @@ -49,11 +49,13 @@ struct physical_ctx {
>> >      const struct ovsrec_bridge *br_int;
>> >      const struct sbrec_chassis_table *chassis_table;
>> >      const struct sbrec_chassis *chassis;
>> > +    const struct ovsrec_interface_table *iface_table;
>> >      const struct sset *active_tunnels;
>> >      struct hmap *local_datapaths;
>> >      struct sset *local_lports;
>> >      const struct simap *ct_zones;
>> >      enum mf_field_id mff_ovn_geneve;
>> > +    struct shash *local_bindings;
>> >  };
>> >
>> >  void physical_register_ovs_idl(struct ovsdb_idl *);
>> > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
>> physical_ctx *,
>> >                                            struct
ovn_desired_flow_table
>> *);
>> >  void physical_handle_mc_group_changes(struct physical_ctx *,
>> >                                        struct ovn_desired_flow_table
*);
>> > -
>> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
>> > +                                       struct ovn_desired_flow_table
*);
>> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>> >                         ofp_port_t *ofport);
>> >  #endif /* controller/physical.h */
>> > --
>> > 2.26.2
>> >
>> > _______________________________________________
>> > dev mailing list
>> > dev@openvswitch.org
>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
Numan Siddique May 23, 2020, 6:36 p.m. UTC | #4
On Sat, May 23, 2020 at 7:30 AM Han Zhou <hzhou@ovn.org> wrote:

> On Fri, May 22, 2020 at 10:29 AM Numan Siddique <numans@ovn.org> wrote:
> >
> >
> >
> > On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:
> >>
> >> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
> >> >
> >> > From: Numan Siddique <numans@ovn.org>
> >> >
> >> > This patch handles ct zone changes and OVS interface changes
> incrementally
> >> > in the flow output stage.
> >> >
> >> > Any changes to ct zone can be handled by running physical_run()
> instead
> >> of running
> >> > flow_output_run(). And any changes to OVS interfaces can be either
> handled
> >> > incrementally (for OVS interfaces representing VIFs) or just running
> >> > physical_run() (for tunnel and other types of interfaces).
> >> >
> >> > To better handle this, a new engine node 'physical_flow_changes' is
> added
> >> which
> >> > handles changes to ct zone and OVS interfaces.
> >> >
> >> Hi Numan,
> >>
> > Hi Han,
> >
> > Thanks for the review and comments. Please see below.
> >
> >
> >>
> >> The engine node physical_flow_changes looks weird because it doesn't
> really
> >> have any data but merely to trigger some compute when VIF changes.
> >> I think it can be handled in a more straightforward way. In fact there
> was
> >> a miss in my initial I-P patch that there are still global variables
> used
> >> in physical.c (my bad):
> >> - localvif_to_ofport
> >> - tunnels
> >> In the principle of I-P engine global variable shouldn't be used because
> >> otherwise there is no way to ensure the dependency graph is followed.
> The
> >> current I-P is sitll correct only because interface changes (which in
> turn
> >> causes the above global var changes) always triggers recompute.
> >>
> >> Now to handle interface changes incrementally, we should simply move
> these
> >> global vars to engine nodes, and add them as input for flow_output, and
> >> also their input will be OVS_Interface (and probably some other inputs).
> >> With this, the dependency graph would be clear and implementation of
> each
> >> input handler will be straightforward. Otherwise, it would be really
> hard
> >> to follow the logic and deduce the correctness for all corner cases,
> >> although it may be correct for normal scenarios that I believe you have
> >> done thorough tests. Do you mind refactoring it?
> >
> >
> > Right now, the flow_output node has an "en_ct_zones" engine as input and
> it doesn't have
> > OVS_interface as input. But this is ok as any runtime data change will
> trigger recomputing.
> >
> Do you mean OVS_interface should actually be input of "en_ct_zones", but it
> is ok to not add it? But I don't understand why should it be input of
> en_ct_zones. Maybe I missed something?
>

I mean right now, OVS Interface is not an input to flow_output stage.


> My point above is about the dependency between flow_output and
> OVS_interface. flow_output actually depends on OVS_interface. The two
> global variables allowed flow_output to use OVS_interface data without the
> dependancy in the graph. Now since we incrementally procssing OVS_interface
> changes, we'd better fix the dependency graph to ensure the correctness.


I agree.



> I
> had encounted very tricky bugs even when some very trivial part of the I-P
> engine rule was not followed, and finally found out defining the engine
> node properly was the easiest fix.
>
> > If en_ct_zones engine node changes, there is no need to trigger full
> recompute and we
> > call "physical_run()", it should be enough. Although we can definitely
> further improve it
> > and this can be tackled separately.
> >
> > In my understanding, I think we can also handle ovs interface changes
> incrementally and if we can't
> > we can just call "physical_run()" instead of full flow recompute.
> >
> > With this patch, it adds an engine node for physical flow changes and the
> input for that is en_ct_zone
> > and ovs interface. I understand and agree that it's a bit weird.
> >
> > How to handle it when both
> >    - en_ct_zone changes
> >    -  some ovs interface changes.
> >
> > If both these inputs are direct inputs to the flow_output engine, then we
> may end up calling "physical_run()"
> > twice. And I wanted to avoid that and hence added a physical_flow_changes
> node as an intermediate.
> > How do you suggest we handle this if we directly handle the ovs interface
> changes in flow_output engine ?
> >
>
> Not sure if I understand completely, but it seems you are suggesting we can
> afford a recompute for physical flows, but not for logical flows, and in
> some cases this can happen. Hoewver, you are trying to make a full phyical
> computing appear as incremental processing, but you are not happy with this
> kind of I-P because the cost is high, so you want to avoid calling it
> multiple times, so a intermediate node is added for that purpose.
>

That's right.


>
> If this is the case, I'd suggest to handle it more cleanly with the I-P
> engine. We can split the flow_output into separate nodes: logical_output
> and physical_output, and add a dependency (fake) from physical_output to
> logical_output just to make sure physical_output is the last node to
> compute, so that even if it is recomputed it doesn't matter that much. Now
> we call physical_run only in recompute of physical_output node, and since
> it is recompute, it won't be called multiple times. What do you think?
>
>
Below is what I understand from what you suggested. Can you please confirm
if the
understanding is correct. If not, can you please explain the graph path ?

flow_output
       --- physical_output
                -- en_ct_zones
                -- OVS interface
                -- runtime_data
                -- logical_output
       -- logical_output
             -- runtime_data
                     -- OVS interface
                     -- SB Port Binding
                     -- SB datapath binding
                     -- ...
                        ...
             -- SB flow outout
             -- Address set
             -- SB Port Groups
             -- ..
                ...

The physical_output_run() will call physical_run()
I'm not sure what the flow_output_physical_output_handler()  do ? Call
physical_run() or no-op ?
I need your input for this if my understanding is correct.

The logical_output_run() will call lflow_run()
I'm not sure what the flow_output_logical_output_handler()  do ? Call
lflow_run() or no-op ?
I need your input for this if my understanding is correct.

And the flow_output_run() will remain the same.

Thanks
Numan



The


>
> > Thanks
> > Numan
> >
> >
> >>
> >>
> >> In addition, please see another comment inlined.
> >>
> >> Thanks,
> >> Han
> >>
> >>
> >> > Acked-by: Mark Michelson <mmichels@redhat.com>
> >> > Signed-off-by: Numan Siddique <numans@ovn.org>
> >> > ---
> >> >  controller/binding.c        | 23 +----------
> >> >  controller/binding.h        | 24 ++++++++++-
> >> >  controller/ovn-controller.c | 82
> ++++++++++++++++++++++++++++++++++++-
> >> >  controller/physical.c       | 51 +++++++++++++++++++++++
> >> >  controller/physical.h       |  5 ++-
> >> >  5 files changed, 159 insertions(+), 26 deletions(-)
> >> >
> >> > diff --git a/controller/binding.c b/controller/binding.c
> >> > index d5e8e4773..f00c975ce 100644
> >> > --- a/controller/binding.c
> >> > +++ b/controller/binding.c
> >> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
> >> sbrec_port_binding *binding_rec,
> >> >   * 'struct local_binding' is used. A shash of these local bindings is
> >> >   * maintained with the 'external_ids:iface-id' as the key to the
> shash.
> >> >   *
> >> > - * struct local_binding has 3 main fields:
> >> > + * struct local_binding (defined in binding.h) has 3 main fields:
> >> >   *    - type
> >> >   *    - OVS interface row object
> >> >   *    - Port_Binding row object
> >> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
> >> sbrec_port_binding *binding_rec,
> >> >   *              when it sees the ARP packet from the parent's VIF.
> >> >   *
> >> >   */
> >> > -enum local_binding_type {
> >> > -    BT_VIF,
> >> > -    BT_CONTAINER,
> >> > -    BT_VIRTUAL
> >> > -};
> >> > -
> >> > -struct local_binding {
> >> > -    char *name;
> >> > -    enum local_binding_type type;
> >> > -    const struct ovsrec_interface *iface;
> >> > -    const struct sbrec_port_binding *pb;
> >> > -
> >> > -    /* shash of 'struct local_binding' representing children. */
> >> > -    struct shash children;
> >> > -};
> >> >
> >> >  static struct local_binding *
> >> >  local_binding_create(const char *name, const struct ovsrec_interface
> >> *iface,
> >> > @@ -576,12 +561,6 @@ local_binding_add(struct shash *local_bindings,
> >> struct local_binding *lbinding)
> >> >      shash_add(local_bindings, lbinding->name, lbinding);
> >> >  }
> >> >
> >> > -static struct local_binding *
> >> > -local_binding_find(struct shash *local_bindings, const char *name)
> >> > -{
> >> > -    return shash_find_data(local_bindings, name);
> >> > -}
> >> > -
> >> >  static void
> >> >  local_binding_destroy(struct local_binding *lbinding)
> >> >  {
> >> > diff --git a/controller/binding.h b/controller/binding.h
> >> > index f7ace6faf..21118ecd4 100644
> >> > --- a/controller/binding.h
> >> > +++ b/controller/binding.h
> >> > @@ -18,6 +18,7 @@
> >> >  #define OVN_BINDING_H 1
> >> >
> >> >  #include <stdbool.h>
> >> > +#include "openvswitch/shash.h"
> >> >
> >> >  struct hmap;
> >> >  struct ovsdb_idl;
> >> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
> >> >  struct sbrec_port_binding_table;
> >> >  struct sset;
> >> >  struct sbrec_port_binding;
> >> > -struct shash;
> >> >
> >> >  struct binding_ctx_in {
> >> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
> >> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
> >> >      struct smap *local_iface_ids;
> >> >  };
> >> >
> >> > +enum local_binding_type {
> >> > +    BT_VIF,
> >> > +    BT_CONTAINER,
> >> > +    BT_VIRTUAL
> >> > +};
> >> > +
> >> > +struct local_binding {
> >> > +    char *name;
> >> > +    enum local_binding_type type;
> >> > +    const struct ovsrec_interface *iface;
> >> > +    const struct sbrec_port_binding *pb;
> >> > +
> >> > +    /* shash of 'struct local_binding' representing children. */
> >> > +    struct shash children;
> >> > +};
> >> > +
> >> > +static inline struct local_binding *
> >> > +local_binding_find(struct shash *local_bindings, const char *name)
> >> > +{
> >> > +    return shash_find_data(local_bindings, name);
> >> > +}
> >> > +
> >> >  void binding_register_ovs_idl(struct ovsdb_idl *);
> >> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
> >> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> >> > index 9ad5be1c6..8f95fff1f 100644
> >> > --- a/controller/ovn-controller.c
> >> > +++ b/controller/ovn-controller.c
> >> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct
> engine_node
> >> *node,
> >> >
> >> >      ovs_assert(br_int && chassis);
> >> >
> >> > +    struct ovsrec_interface_table *iface_table =
> >> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> >> > +            engine_get_input("OVS_interface",
> >> > +                engine_get_input("physical_flow_changes", node)));
> >>
> >> I think we shouldn't try to get indirect input data (i.e. input of the
> >> input). The engine design didn't take this into consideration, although
> >> nothing could prevent a developer to do this (except code reviewing:)).
> >> If the input is required to compute the output, just add it as direct
> input
> >> to the node.
> >
> >
> > Sure I can do that. But we may need to have a no-op handler for these new
> input nodes.
> > Is that OK ?
>
> Based on the above discussion, the node "physical_flow_change" is added
> just to combine the "OVS_interface" and "ct_zones" node's data, so there is
> no point to add the two nodes again directly as input to flow_output with
> no-op handlers, which is meaningless. If we really want to have the
> physical_flow_change node, I think the data should be passed in the
> physical_flow_change node, just copy the references of the both input data
> and it then becomes "physical_flow_changes" node's data, and flow_output
> node and get it directly there. It achieves the same but not breaking I-P
> engine rules.
>
> >
> > Thanks
> > Numan
> >
> >>
> >>
> >> >      struct ed_type_ct_zones *ct_zones_data =
> >> > -        engine_get_input_data("ct_zones", node);
> >> > +        engine_get_input_data("ct_zones",
> >> > +            engine_get_input("physical_flow_changes", node));
> >> >      struct simap *ct_zones = &ct_zones_data->current;
> >> >
> >> >      p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> >> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct
> engine_node
> >> *node,
> >> >      p_ctx->mc_group_table = multicast_group_table;
> >> >      p_ctx->br_int = br_int;
> >> >      p_ctx->chassis_table = chassis_table;
> >> > +    p_ctx->iface_table = iface_table;
> >> >      p_ctx->chassis = chassis;
> >> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> >> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> >> >      p_ctx->local_lports = &rt_data->local_lports;
> >> >      p_ctx->ct_zones = ct_zones;
> >> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> >> > +    p_ctx->local_bindings = &rt_data->local_bindings;
> >> >  }
> >> >
> >> >  static void init_lflow_ctx(struct engine_node *node,
> >> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct
> engine_node
> >> *node, void *data)
> >> >      return _flow_output_resource_ref_handler(node, data,
> >> REF_TYPE_PORTGROUP);
> >> >  }
> >> >
> >> > +struct ed_type_physical_flow_changes {
> >> > +    bool need_physical_run;
> >> > +    bool ovs_ifaces_changed;
> >> > +};
> >> > +
> >> > +static bool
> >> > +flow_output_physical_flow_changes_handler(struct engine_node *node,
> void
> >> *data)
> >> > +{
> >> > +    struct ed_type_physical_flow_changes *pfc =
> >> > +        engine_get_input_data("physical_flow_changes", node);
> >> > +    struct ed_type_runtime_data *rt_data =
> >> > +        engine_get_input_data("runtime_data", node);
> >> > +
> >> > +    struct ed_type_flow_output *fo = data;
> >> > +    struct physical_ctx p_ctx;
> >> > +    init_physical_ctx(node, rt_data, &p_ctx);
> >> > +
> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> > +    if (pfc->need_physical_run) {
> >> > +        pfc->need_physical_run = false;
> >> > +        physical_run(&p_ctx, &fo->flow_table);
> >> > +        return true;
> >> > +    }
> >> > +
> >> > +    if (pfc->ovs_ifaces_changed) {
> >> > +        pfc->ovs_ifaces_changed = false;
> >> > +        return physical_handle_ovs_iface_changes(&p_ctx,
> >> &fo->flow_table);
> >> > +    }
> >> > +
> >> > +    return true;
> >> > +}
> >> > +
> >> > +static void *
> >> > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> >> > +                             struct engine_arg *arg OVS_UNUSED)
> >> > +{
> >> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
> *data);
> >> > +    return data;
> >> > +}
> >> > +
> >> > +static void
> >> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> >> > +{
> >> > +
> >> > +}
> >> > +
> >> > +static void
> >> > +en_physical_flow_changes_run(struct engine_node *node, void *data
> >> OVS_UNUSED)
> >> > +{
> >> > +    struct ed_type_physical_flow_changes *pfc = data;
> >> > +    pfc->need_physical_run = true;
> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> > +}
> >> > +
> >> > +static bool
> >> > +physical_flow_changes_ovs_iface_handler(struct engine_node *node
> >> OVS_UNUSED,
> >> > +                                        void *data OVS_UNUSED)
> >> > +{
> >> > +    struct ed_type_physical_flow_changes *pfc = data;
> >> > +    pfc->ovs_ifaces_changed = true;
> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> > +    return true;
> >> > +}
> >> > +
> >> >  struct ovn_controller_exit_args {
> >> >      bool *exiting;
> >> >      bool *restart;
> >> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
> >> >      ENGINE_NODE(runtime_data, "runtime_data");
> >> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> >> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> >> >      ENGINE_NODE(flow_output, "flow_output");
> >> >      ENGINE_NODE(addr_sets, "addr_sets");
> >> >      ENGINE_NODE(port_groups, "port_groups");
> >> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
> >> >      engine_add_input(&en_port_groups, &en_sb_port_group,
> >> >                       port_groups_sb_port_group_handler);
> >> >
> >> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> >> > +                     NULL);
> >> > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> >> > +                     physical_flow_changes_ovs_iface_handler);
> >> > +
> >> >      engine_add_input(&en_flow_output, &en_addr_sets,
> >> >                       flow_output_addr_sets_handler);
> >> >      engine_add_input(&en_flow_output, &en_port_groups,
> >> >                       flow_output_port_groups_handler);
> >> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> >> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> >> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> >> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> >> > +                     flow_output_physical_flow_changes_handler);
> >> >
> >> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> >> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> >> > diff --git a/controller/physical.c b/controller/physical.c
> >> > index 144aeb7bd..03eb677d1 100644
> >> > --- a/controller/physical.c
> >> > +++ b/controller/physical.c
> >> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
> >> >      simap_destroy(&new_tunnel_to_ofport);
> >> >  }
> >> >
> >> > +bool
> >> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> >> > +                                  struct ovn_desired_flow_table
> >> *flow_table)
> >> > +{
> >> > +    const struct ovsrec_interface *iface_rec;
> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> p_ctx->iface_table) {
> >> > +        if (!strcmp(iface_rec->type, "geneve") ||
> >> > +            !strcmp(iface_rec->type, "patch")) {
> >> > +            return false;
> >> > +        }
> >> > +    }
> >> > +
> >> > +    struct ofpbuf ofpacts;
> >> > +    ofpbuf_init(&ofpacts, 0);
> >> > +
> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> p_ctx->iface_table) {
> >> > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> >> "iface-id");
> >> > +        if (!iface_id) {
> >> > +            continue;
> >> > +        }
> >> > +
> >> > +        const struct local_binding *lb =
> >> > +            local_binding_find(p_ctx->local_bindings, iface_id);
> >> > +
> >> > +        if (!lb || !lb->pb) {
> >> > +            continue;
> >> > +        }
> >> > +
> >> > +        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport :
> 0;
> >> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> >> > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
> >> > +            simap_find_and_delete(&localvif_to_ofport, iface_id);
> >> > +        } else {
> >> > +            if (!ovsrec_interface_is_new(iface_rec)) {
> >> > +                ofctrl_remove_flows(flow_table,
> &lb->pb->header_.uuid);
> >> > +            }
> >> > +
> >> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
> >> > +            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> >> > +                                  p_ctx->mff_ovn_geneve,
> p_ctx->ct_zones,
> >> > +                                  p_ctx->active_tunnels,
> >> > +                                  p_ctx->local_datapaths,
> >> > +                                  lb->pb, p_ctx->chassis,
> >> > +                                  flow_table, &ofpacts);
> >> > +        }
> >> > +    }
> >> > +
> >> > +    ofpbuf_uninit(&ofpacts);
> >> > +    return true;
> >> > +}
> >> > +
> >> >  bool
> >> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> ofp_port_t
> >> *ofport)
> >> >  {
> >> > diff --git a/controller/physical.h b/controller/physical.h
> >> > index dadf84ea1..9ca34436a 100644
> >> > --- a/controller/physical.h
> >> > +++ b/controller/physical.h
> >> > @@ -49,11 +49,13 @@ struct physical_ctx {
> >> >      const struct ovsrec_bridge *br_int;
> >> >      const struct sbrec_chassis_table *chassis_table;
> >> >      const struct sbrec_chassis *chassis;
> >> > +    const struct ovsrec_interface_table *iface_table;
> >> >      const struct sset *active_tunnels;
> >> >      struct hmap *local_datapaths;
> >> >      struct sset *local_lports;
> >> >      const struct simap *ct_zones;
> >> >      enum mf_field_id mff_ovn_geneve;
> >> > +    struct shash *local_bindings;
> >> >  };
> >> >
> >> >  void physical_register_ovs_idl(struct ovsdb_idl *);
> >> > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> >> physical_ctx *,
> >> >                                            struct
> ovn_desired_flow_table
> >> *);
> >> >  void physical_handle_mc_group_changes(struct physical_ctx *,
> >> >                                        struct ovn_desired_flow_table
> *);
> >> > -
> >> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> >> > +                                       struct ovn_desired_flow_table
> *);
> >> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >> >                         ofp_port_t *ofport);
> >> >  #endif /* controller/physical.h */
> >> > --
> >> > 2.26.2
> >> >
> >> > _______________________________________________
> >> > dev mailing list
> >> > dev@openvswitch.org
> >> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Han Zhou May 24, 2020, 6:02 a.m. UTC | #5
On Sat, May 23, 2020 at 11:37 AM Numan Siddique <numans@ovn.org> wrote:
>
>
>
> On Sat, May 23, 2020 at 7:30 AM Han Zhou <hzhou@ovn.org> wrote:
>>
>> On Fri, May 22, 2020 at 10:29 AM Numan Siddique <numans@ovn.org> wrote:
>> >
>> >
>> >
>> > On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:
>> >>
>> >> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
>> >> >
>> >> > From: Numan Siddique <numans@ovn.org>
>> >> >
>> >> > This patch handles ct zone changes and OVS interface changes
>> incrementally
>> >> > in the flow output stage.
>> >> >
>> >> > Any changes to ct zone can be handled by running physical_run()
instead
>> >> of running
>> >> > flow_output_run(). And any changes to OVS interfaces can be either
>> handled
>> >> > incrementally (for OVS interfaces representing VIFs) or just running
>> >> > physical_run() (for tunnel and other types of interfaces).
>> >> >
>> >> > To better handle this, a new engine node 'physical_flow_changes' is
>> added
>> >> which
>> >> > handles changes to ct zone and OVS interfaces.
>> >> >
>> >> Hi Numan,
>> >>
>> > Hi Han,
>> >
>> > Thanks for the review and comments. Please see below.
>> >
>> >
>> >>
>> >> The engine node physical_flow_changes looks weird because it doesn't
>> really
>> >> have any data but merely to trigger some compute when VIF changes.
>> >> I think it can be handled in a more straightforward way. In fact there
>> was
>> >> a miss in my initial I-P patch that there are still global variables
used
>> >> in physical.c (my bad):
>> >> - localvif_to_ofport
>> >> - tunnels
>> >> In the principle of I-P engine global variable shouldn't be used
because
>> >> otherwise there is no way to ensure the dependency graph is followed.
The
>> >> current I-P is sitll correct only because interface changes (which in
>> turn
>> >> causes the above global var changes) always triggers recompute.
>> >>
>> >> Now to handle interface changes incrementally, we should simply move
>> these
>> >> global vars to engine nodes, and add them as input for flow_output,
and
>> >> also their input will be OVS_Interface (and probably some other
inputs).
>> >> With this, the dependency graph would be clear and implementation of
each
>> >> input handler will be straightforward. Otherwise, it would be really
hard
>> >> to follow the logic and deduce the correctness for all corner cases,
>> >> although it may be correct for normal scenarios that I believe you
have
>> >> done thorough tests. Do you mind refactoring it?
>> >
>> >
>> > Right now, the flow_output node has an "en_ct_zones" engine as input
and
>> it doesn't have
>> > OVS_interface as input. But this is ok as any runtime data change will
>> trigger recomputing.
>> >
>> Do you mean OVS_interface should actually be input of "en_ct_zones", but
it
>> is ok to not add it? But I don't understand why should it be input of
>> en_ct_zones. Maybe I missed something?
>
>
> I mean right now, OVS Interface is not an input to flow_output stage.
>
>>
>> My point above is about the dependency between flow_output and
>> OVS_interface. flow_output actually depends on OVS_interface. The two
>> global variables allowed flow_output to use OVS_interface data without
the
>> dependancy in the graph. Now since we incrementally procssing
OVS_interface
>> changes, we'd better fix the dependency graph to ensure the correctness.
>
>
> I agree.
>
>
>>
>> I
>> had encounted very tricky bugs even when some very trivial part of the
I-P
>> engine rule was not followed, and finally found out defining the engine
>> node properly was the easiest fix.
>>
>> > If en_ct_zones engine node changes, there is no need to trigger full
>> recompute and we
>> > call "physical_run()", it should be enough. Although we can definitely
>> further improve it
>> > and this can be tackled separately.
>> >
>> > In my understanding, I think we can also handle ovs interface changes
>> incrementally and if we can't
>> > we can just call "physical_run()" instead of full flow recompute.
>> >
>> > With this patch, it adds an engine node for physical flow changes and
the
>> input for that is en_ct_zone
>> > and ovs interface. I understand and agree that it's a bit weird.
>> >
>> > How to handle it when both
>> >    - en_ct_zone changes
>> >    -  some ovs interface changes.
>> >
>> > If both these inputs are direct inputs to the flow_output engine, then
we
>> may end up calling "physical_run()"
>> > twice. And I wanted to avoid that and hence added a
physical_flow_changes
>> node as an intermediate.
>> > How do you suggest we handle this if we directly handle the ovs
interface
>> changes in flow_output engine ?
>> >
>>
>> Not sure if I understand completely, but it seems you are suggesting we
can
>> afford a recompute for physical flows, but not for logical flows, and in
>> some cases this can happen. Hoewver, you are trying to make a full
phyical
>> computing appear as incremental processing, but you are not happy with
this
>> kind of I-P because the cost is high, so you want to avoid calling it
>> multiple times, so a intermediate node is added for that purpose.
>
>
> That's right.
>
>>
>>
>> If this is the case, I'd suggest to handle it more cleanly with the I-P
>> engine. We can split the flow_output into separate nodes: logical_output
>> and physical_output, and add a dependency (fake) from physical_output to
>> logical_output just to make sure physical_output is the last node to
>> compute, so that even if it is recomputed it doesn't matter that much.
Now
>> we call physical_run only in recompute of physical_output node, and since
>> it is recompute, it won't be called multiple times. What do you think?
>>
>
> Below is what I understand from what you suggested. Can you please
confirm if the
> understanding is correct. If not, can you please explain the graph path ?
>
> flow_output
>        --- physical_output
>                 -- en_ct_zones
>                 -- OVS interface
>                 -- runtime_data
>                 -- logical_output
>        -- logical_output
>              -- runtime_data
>                      -- OVS interface
>                      -- SB Port Binding
>                      -- SB datapath binding
>                      -- ...
>                         ...
>              -- SB flow outout
>              -- Address set
>              -- SB Port Groups
>              -- ..
>                 ...
>
> The physical_output_run() will call physical_run()
> I'm not sure what the flow_output_physical_output_handler()  do ? Call
physical_run() or no-op ?
> I need your input for this if my understanding is correct.
>
> The logical_output_run() will call lflow_run()
> I'm not sure what the flow_output_logical_output_handler()  do ? Call
lflow_run() or no-op ?
> I need your input for this if my understanding is correct.
>
> And the flow_output_run() will remain the same.
>

Sorry that I didn't make it clear enough. What I meant is like below:

physical_output
  -> logical_output
        -> runtime_data
        -> ... (include all parameters of lflow_run())
  -> ... (include all parameters of physical_run())

The original flow_output is replaced by physical_output + logical_output.
The dependency phyical_output -> logical_output is fake, because there is
no real dependency (and change-handler can be no-op). It is just to ensure
physical_output is computed after logical_output, so that recompute in
physical_output doesn't trigger recompute of logical_output, which is
expensive.

I think the major part of this change is spliting the data of flow_output
into two parts, including spliting the desired_flow_table. I am not sure
how tricky this change is. If you think it is too big change we can stay
with your approach for now. We can change it in the future when necessary.

Thanks,
Han

> Thanks
> Numan
>
>
>
> The
>
>>
>>
>> > Thanks
>> > Numan
>> >
>> >
>> >>
>> >>
>> >> In addition, please see another comment inlined.
>> >>
>> >> Thanks,
>> >> Han
>> >>
>> >>
>> >> > Acked-by: Mark Michelson <mmichels@redhat.com>
>> >> > Signed-off-by: Numan Siddique <numans@ovn.org>
>> >> > ---
>> >> >  controller/binding.c        | 23 +----------
>> >> >  controller/binding.h        | 24 ++++++++++-
>> >> >  controller/ovn-controller.c | 82
++++++++++++++++++++++++++++++++++++-
>> >> >  controller/physical.c       | 51 +++++++++++++++++++++++
>> >> >  controller/physical.h       |  5 ++-
>> >> >  5 files changed, 159 insertions(+), 26 deletions(-)
>> >> >
>> >> > diff --git a/controller/binding.c b/controller/binding.c
>> >> > index d5e8e4773..f00c975ce 100644
>> >> > --- a/controller/binding.c
>> >> > +++ b/controller/binding.c
>> >> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
>> >> sbrec_port_binding *binding_rec,
>> >> >   * 'struct local_binding' is used. A shash of these local bindings
is
>> >> >   * maintained with the 'external_ids:iface-id' as the key to the
>> shash.
>> >> >   *
>> >> > - * struct local_binding has 3 main fields:
>> >> > + * struct local_binding (defined in binding.h) has 3 main fields:
>> >> >   *    - type
>> >> >   *    - OVS interface row object
>> >> >   *    - Port_Binding row object
>> >> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
>> >> sbrec_port_binding *binding_rec,
>> >> >   *              when it sees the ARP packet from the parent's VIF.
>> >> >   *
>> >> >   */
>> >> > -enum local_binding_type {
>> >> > -    BT_VIF,
>> >> > -    BT_CONTAINER,
>> >> > -    BT_VIRTUAL
>> >> > -};
>> >> > -
>> >> > -struct local_binding {
>> >> > -    char *name;
>> >> > -    enum local_binding_type type;
>> >> > -    const struct ovsrec_interface *iface;
>> >> > -    const struct sbrec_port_binding *pb;
>> >> > -
>> >> > -    /* shash of 'struct local_binding' representing children. */
>> >> > -    struct shash children;
>> >> > -};
>> >> >
>> >> >  static struct local_binding *
>> >> >  local_binding_create(const char *name, const struct
ovsrec_interface
>> >> *iface,
>> >> > @@ -576,12 +561,6 @@ local_binding_add(struct shash *local_bindings,
>> >> struct local_binding *lbinding)
>> >> >      shash_add(local_bindings, lbinding->name, lbinding);
>> >> >  }
>> >> >
>> >> > -static struct local_binding *
>> >> > -local_binding_find(struct shash *local_bindings, const char *name)
>> >> > -{
>> >> > -    return shash_find_data(local_bindings, name);
>> >> > -}
>> >> > -
>> >> >  static void
>> >> >  local_binding_destroy(struct local_binding *lbinding)
>> >> >  {
>> >> > diff --git a/controller/binding.h b/controller/binding.h
>> >> > index f7ace6faf..21118ecd4 100644
>> >> > --- a/controller/binding.h
>> >> > +++ b/controller/binding.h
>> >> > @@ -18,6 +18,7 @@
>> >> >  #define OVN_BINDING_H 1
>> >> >
>> >> >  #include <stdbool.h>
>> >> > +#include "openvswitch/shash.h"
>> >> >
>> >> >  struct hmap;
>> >> >  struct ovsdb_idl;
>> >> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
>> >> >  struct sbrec_port_binding_table;
>> >> >  struct sset;
>> >> >  struct sbrec_port_binding;
>> >> > -struct shash;
>> >> >
>> >> >  struct binding_ctx_in {
>> >> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
>> >> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
>> >> >      struct smap *local_iface_ids;
>> >> >  };
>> >> >
>> >> > +enum local_binding_type {
>> >> > +    BT_VIF,
>> >> > +    BT_CONTAINER,
>> >> > +    BT_VIRTUAL
>> >> > +};
>> >> > +
>> >> > +struct local_binding {
>> >> > +    char *name;
>> >> > +    enum local_binding_type type;
>> >> > +    const struct ovsrec_interface *iface;
>> >> > +    const struct sbrec_port_binding *pb;
>> >> > +
>> >> > +    /* shash of 'struct local_binding' representing children. */
>> >> > +    struct shash children;
>> >> > +};
>> >> > +
>> >> > +static inline struct local_binding *
>> >> > +local_binding_find(struct shash *local_bindings, const char *name)
>> >> > +{
>> >> > +    return shash_find_data(local_bindings, name);
>> >> > +}
>> >> > +
>> >> >  void binding_register_ovs_idl(struct ovsdb_idl *);
>> >> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out
*);
>> >> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>> >> > diff --git a/controller/ovn-controller.c
b/controller/ovn-controller.c
>> >> > index 9ad5be1c6..8f95fff1f 100644
>> >> > --- a/controller/ovn-controller.c
>> >> > +++ b/controller/ovn-controller.c
>> >> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct
engine_node
>> >> *node,
>> >> >
>> >> >      ovs_assert(br_int && chassis);
>> >> >
>> >> > +    struct ovsrec_interface_table *iface_table =
>> >> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
>> >> > +            engine_get_input("OVS_interface",
>> >> > +                engine_get_input("physical_flow_changes", node)));
>> >>
>> >> I think we shouldn't try to get indirect input data (i.e. input of the
>> >> input). The engine design didn't take this into consideration,
although
>> >> nothing could prevent a developer to do this (except code
reviewing:)).
>> >> If the input is required to compute the output, just add it as direct
>> input
>> >> to the node.
>> >
>> >
>> > Sure I can do that. But we may need to have a no-op handler for these
new
>> input nodes.
>> > Is that OK ?
>>
>> Based on the above discussion, the node "physical_flow_change" is added
>> just to combine the "OVS_interface" and "ct_zones" node's data, so there
is
>> no point to add the two nodes again directly as input to flow_output with
>> no-op handlers, which is meaningless. If we really want to have the
>> physical_flow_change node, I think the data should be passed in the
>> physical_flow_change node, just copy the references of the both input
data
>> and it then becomes "physical_flow_changes" node's data, and flow_output
>> node and get it directly there. It achieves the same but not breaking I-P
>> engine rules.
>>
>> >
>> > Thanks
>> > Numan
>> >
>> >>
>> >>
>> >> >      struct ed_type_ct_zones *ct_zones_data =
>> >> > -        engine_get_input_data("ct_zones", node);
>> >> > +        engine_get_input_data("ct_zones",
>> >> > +            engine_get_input("physical_flow_changes", node));
>> >> >      struct simap *ct_zones = &ct_zones_data->current;
>> >> >
>> >> >      p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
>> >> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct
>> engine_node
>> >> *node,
>> >> >      p_ctx->mc_group_table = multicast_group_table;
>> >> >      p_ctx->br_int = br_int;
>> >> >      p_ctx->chassis_table = chassis_table;
>> >> > +    p_ctx->iface_table = iface_table;
>> >> >      p_ctx->chassis = chassis;
>> >> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
>> >> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
>> >> >      p_ctx->local_lports = &rt_data->local_lports;
>> >> >      p_ctx->ct_zones = ct_zones;
>> >> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
>> >> > +    p_ctx->local_bindings = &rt_data->local_bindings;
>> >> >  }
>> >> >
>> >> >  static void init_lflow_ctx(struct engine_node *node,
>> >> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct
>> engine_node
>> >> *node, void *data)
>> >> >      return _flow_output_resource_ref_handler(node, data,
>> >> REF_TYPE_PORTGROUP);
>> >> >  }
>> >> >
>> >> > +struct ed_type_physical_flow_changes {
>> >> > +    bool need_physical_run;
>> >> > +    bool ovs_ifaces_changed;
>> >> > +};
>> >> > +
>> >> > +static bool
>> >> > +flow_output_physical_flow_changes_handler(struct engine_node *node,
>> void
>> >> *data)
>> >> > +{
>> >> > +    struct ed_type_physical_flow_changes *pfc =
>> >> > +        engine_get_input_data("physical_flow_changes", node);
>> >> > +    struct ed_type_runtime_data *rt_data =
>> >> > +        engine_get_input_data("runtime_data", node);
>> >> > +
>> >> > +    struct ed_type_flow_output *fo = data;
>> >> > +    struct physical_ctx p_ctx;
>> >> > +    init_physical_ctx(node, rt_data, &p_ctx);
>> >> > +
>> >> > +    engine_set_node_state(node, EN_UPDATED);
>> >> > +    if (pfc->need_physical_run) {
>> >> > +        pfc->need_physical_run = false;
>> >> > +        physical_run(&p_ctx, &fo->flow_table);
>> >> > +        return true;
>> >> > +    }
>> >> > +
>> >> > +    if (pfc->ovs_ifaces_changed) {
>> >> > +        pfc->ovs_ifaces_changed = false;
>> >> > +        return physical_handle_ovs_iface_changes(&p_ctx,
>> >> &fo->flow_table);
>> >> > +    }
>> >> > +
>> >> > +    return true;
>> >> > +}
>> >> > +
>> >> > +static void *
>> >> > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
>> >> > +                             struct engine_arg *arg OVS_UNUSED)
>> >> > +{
>> >> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
>> *data);
>> >> > +    return data;
>> >> > +}
>> >> > +
>> >> > +static void
>> >> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
>> >> > +{
>> >> > +
>> >> > +}
>> >> > +
>> >> > +static void
>> >> > +en_physical_flow_changes_run(struct engine_node *node, void *data
>> >> OVS_UNUSED)
>> >> > +{
>> >> > +    struct ed_type_physical_flow_changes *pfc = data;
>> >> > +    pfc->need_physical_run = true;
>> >> > +    engine_set_node_state(node, EN_UPDATED);
>> >> > +}
>> >> > +
>> >> > +static bool
>> >> > +physical_flow_changes_ovs_iface_handler(struct engine_node *node
>> >> OVS_UNUSED,
>> >> > +                                        void *data OVS_UNUSED)
>> >> > +{
>> >> > +    struct ed_type_physical_flow_changes *pfc = data;
>> >> > +    pfc->ovs_ifaces_changed = true;
>> >> > +    engine_set_node_state(node, EN_UPDATED);
>> >> > +    return true;
>> >> > +}
>> >> > +
>> >> >  struct ovn_controller_exit_args {
>> >> >      bool *exiting;
>> >> >      bool *restart;
>> >> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
>> >> >      ENGINE_NODE(runtime_data, "runtime_data");
>> >> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>> >> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
>> >> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
>> >> >      ENGINE_NODE(flow_output, "flow_output");
>> >> >      ENGINE_NODE(addr_sets, "addr_sets");
>> >> >      ENGINE_NODE(port_groups, "port_groups");
>> >> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
>> >> >      engine_add_input(&en_port_groups, &en_sb_port_group,
>> >> >                       port_groups_sb_port_group_handler);
>> >> >
>> >> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
>> >> > +                     NULL);
>> >> > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
>> >> > +                     physical_flow_changes_ovs_iface_handler);
>> >> > +
>> >> >      engine_add_input(&en_flow_output, &en_addr_sets,
>> >> >                       flow_output_addr_sets_handler);
>> >> >      engine_add_input(&en_flow_output, &en_port_groups,
>> >> >                       flow_output_port_groups_handler);
>> >> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
>> >> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
>> >> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
>> >> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
>> >> > +                     flow_output_physical_flow_changes_handler);
>> >> >
>> >> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
>> >> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
>> >> > diff --git a/controller/physical.c b/controller/physical.c
>> >> > index 144aeb7bd..03eb677d1 100644
>> >> > --- a/controller/physical.c
>> >> > +++ b/controller/physical.c
>> >> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
>> >> >      simap_destroy(&new_tunnel_to_ofport);
>> >> >  }
>> >> >
>> >> > +bool
>> >> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
>> >> > +                                  struct ovn_desired_flow_table
>> >> *flow_table)
>> >> > +{
>> >> > +    const struct ovsrec_interface *iface_rec;
>> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> >> p_ctx->iface_table) {
>> >> > +        if (!strcmp(iface_rec->type, "geneve") ||
>> >> > +            !strcmp(iface_rec->type, "patch")) {
>> >> > +            return false;
>> >> > +        }
>> >> > +    }
>> >> > +
>> >> > +    struct ofpbuf ofpacts;
>> >> > +    ofpbuf_init(&ofpacts, 0);
>> >> > +
>> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> >> p_ctx->iface_table) {
>> >> > +        const char *iface_id = smap_get(&iface_rec->external_ids,
>> >> "iface-id");
>> >> > +        if (!iface_id) {
>> >> > +            continue;
>> >> > +        }
>> >> > +
>> >> > +        const struct local_binding *lb =
>> >> > +            local_binding_find(p_ctx->local_bindings, iface_id);
>> >> > +
>> >> > +        if (!lb || !lb->pb) {
>> >> > +            continue;
>> >> > +        }
>> >> > +
>> >> > +        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport
: 0;
>> >> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
>> >> > +            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
>> >> > +            simap_find_and_delete(&localvif_to_ofport, iface_id);
>> >> > +        } else {
>> >> > +            if (!ovsrec_interface_is_new(iface_rec)) {
>> >> > +                ofctrl_remove_flows(flow_table,
>> &lb->pb->header_.uuid);
>> >> > +            }
>> >> > +
>> >> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
>> >> > +
 consider_port_binding(p_ctx->sbrec_port_binding_by_name,
>> >> > +                                  p_ctx->mff_ovn_geneve,
>> p_ctx->ct_zones,
>> >> > +                                  p_ctx->active_tunnels,
>> >> > +                                  p_ctx->local_datapaths,
>> >> > +                                  lb->pb, p_ctx->chassis,
>> >> > +                                  flow_table, &ofpacts);
>> >> > +        }
>> >> > +    }
>> >> > +
>> >> > +    ofpbuf_uninit(&ofpacts);
>> >> > +    return true;
>> >> > +}
>> >> > +
>> >> >  bool
>> >> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
ofp_port_t
>> >> *ofport)
>> >> >  {
>> >> > diff --git a/controller/physical.h b/controller/physical.h
>> >> > index dadf84ea1..9ca34436a 100644
>> >> > --- a/controller/physical.h
>> >> > +++ b/controller/physical.h
>> >> > @@ -49,11 +49,13 @@ struct physical_ctx {
>> >> >      const struct ovsrec_bridge *br_int;
>> >> >      const struct sbrec_chassis_table *chassis_table;
>> >> >      const struct sbrec_chassis *chassis;
>> >> > +    const struct ovsrec_interface_table *iface_table;
>> >> >      const struct sset *active_tunnels;
>> >> >      struct hmap *local_datapaths;
>> >> >      struct sset *local_lports;
>> >> >      const struct simap *ct_zones;
>> >> >      enum mf_field_id mff_ovn_geneve;
>> >> > +    struct shash *local_bindings;
>> >> >  };
>> >> >
>> >> >  void physical_register_ovs_idl(struct ovsdb_idl *);
>> >> > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
>> >> physical_ctx *,
>> >> >                                            struct
>> ovn_desired_flow_table
>> >> *);
>> >> >  void physical_handle_mc_group_changes(struct physical_ctx *,
>> >> >                                        struct ovn_desired_flow_table
>> *);
>> >> > -
>> >> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
>> >> > +                                       struct
ovn_desired_flow_table
>> *);
>> >> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>> >> >                         ofp_port_t *ofport);
>> >> >  #endif /* controller/physical.h */
>> >> > --
>> >> > 2.26.2
>> >> >
>> >> > _______________________________________________
>> >> > dev mailing list
>> >> > dev@openvswitch.org
>> >> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> >> _______________________________________________
>> >> dev mailing list
>> >> dev@openvswitch.org
>> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> >>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
Numan Siddique May 25, 2020, 3:02 p.m. UTC | #6
On Sun, May 24, 2020 at 11:32 AM Han Zhou <hzhou@ovn.org> wrote:

> On Sat, May 23, 2020 at 11:37 AM Numan Siddique <numans@ovn.org> wrote:
> >
> >
> >
> > On Sat, May 23, 2020 at 7:30 AM Han Zhou <hzhou@ovn.org> wrote:
> >>
> >> On Fri, May 22, 2020 at 10:29 AM Numan Siddique <numans@ovn.org> wrote:
> >> >
> >> >
> >> >
> >> > On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:
> >> >>
> >> >> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
> >> >> >
> >> >> > From: Numan Siddique <numans@ovn.org>
> >> >> >
> >> >> > This patch handles ct zone changes and OVS interface changes
> >> incrementally
> >> >> > in the flow output stage.
> >> >> >
> >> >> > Any changes to ct zone can be handled by running physical_run()
> instead
> >> >> of running
> >> >> > flow_output_run(). And any changes to OVS interfaces can be either
> >> handled
> >> >> > incrementally (for OVS interfaces representing VIFs) or just
> running
> >> >> > physical_run() (for tunnel and other types of interfaces).
> >> >> >
> >> >> > To better handle this, a new engine node 'physical_flow_changes' is
> >> added
> >> >> which
> >> >> > handles changes to ct zone and OVS interfaces.
> >> >> >
> >> >> Hi Numan,
> >> >>
> >> > Hi Han,
> >> >
> >> > Thanks for the review and comments. Please see below.
> >> >
> >> >
> >> >>
> >> >> The engine node physical_flow_changes looks weird because it doesn't
> >> really
> >> >> have any data but merely to trigger some compute when VIF changes.
> >> >> I think it can be handled in a more straightforward way. In fact
> there
> >> was
> >> >> a miss in my initial I-P patch that there are still global variables
> used
> >> >> in physical.c (my bad):
> >> >> - localvif_to_ofport
> >> >> - tunnels
> >> >> In the principle of I-P engine global variable shouldn't be used
> because
> >> >> otherwise there is no way to ensure the dependency graph is followed.
> The
> >> >> current I-P is sitll correct only because interface changes (which in
> >> turn
> >> >> causes the above global var changes) always triggers recompute.
> >> >>
> >> >> Now to handle interface changes incrementally, we should simply move
> >> these
> >> >> global vars to engine nodes, and add them as input for flow_output,
> and
> >> >> also their input will be OVS_Interface (and probably some other
> inputs).
> >> >> With this, the dependency graph would be clear and implementation of
> each
> >> >> input handler will be straightforward. Otherwise, it would be really
> hard
> >> >> to follow the logic and deduce the correctness for all corner cases,
> >> >> although it may be correct for normal scenarios that I believe you
> have
> >> >> done thorough tests. Do you mind refactoring it?
> >> >
> >> >
> >> > Right now, the flow_output node has an "en_ct_zones" engine as input
> and
> >> it doesn't have
> >> > OVS_interface as input. But this is ok as any runtime data change will
> >> trigger recomputing.
> >> >
> >> Do you mean OVS_interface should actually be input of "en_ct_zones", but
> it
> >> is ok to not add it? But I don't understand why should it be input of
> >> en_ct_zones. Maybe I missed something?
> >
> >
> > I mean right now, OVS Interface is not an input to flow_output stage.
> >
> >>
> >> My point above is about the dependency between flow_output and
> >> OVS_interface. flow_output actually depends on OVS_interface. The two
> >> global variables allowed flow_output to use OVS_interface data without
> the
> >> dependancy in the graph. Now since we incrementally procssing
> OVS_interface
> >> changes, we'd better fix the dependency graph to ensure the correctness.
> >
> >
> > I agree.
> >
> >
> >>
> >> I
> >> had encounted very tricky bugs even when some very trivial part of the
> I-P
> >> engine rule was not followed, and finally found out defining the engine
> >> node properly was the easiest fix.
> >>
> >> > If en_ct_zones engine node changes, there is no need to trigger full
> >> recompute and we
> >> > call "physical_run()", it should be enough. Although we can definitely
> >> further improve it
> >> > and this can be tackled separately.
> >> >
> >> > In my understanding, I think we can also handle ovs interface changes
> >> incrementally and if we can't
> >> > we can just call "physical_run()" instead of full flow recompute.
> >> >
> >> > With this patch, it adds an engine node for physical flow changes and
> the
> >> input for that is en_ct_zone
> >> > and ovs interface. I understand and agree that it's a bit weird.
> >> >
> >> > How to handle it when both
> >> >    - en_ct_zone changes
> >> >    -  some ovs interface changes.
> >> >
> >> > If both these inputs are direct inputs to the flow_output engine, then
> we
> >> may end up calling "physical_run()"
> >> > twice. And I wanted to avoid that and hence added a
> physical_flow_changes
> >> node as an intermediate.
> >> > How do you suggest we handle this if we directly handle the ovs
> interface
> >> changes in flow_output engine ?
> >> >
> >>
> >> Not sure if I understand completely, but it seems you are suggesting we
> can
> >> afford a recompute for physical flows, but not for logical flows, and in
> >> some cases this can happen. Hoewver, you are trying to make a full
> phyical
> >> computing appear as incremental processing, but you are not happy with
> this
> >> kind of I-P because the cost is high, so you want to avoid calling it
> >> multiple times, so a intermediate node is added for that purpose.
> >
> >
> > That's right.
> >
> >>
> >>
> >> If this is the case, I'd suggest to handle it more cleanly with the I-P
> >> engine. We can split the flow_output into separate nodes: logical_output
> >> and physical_output, and add a dependency (fake) from physical_output to
> >> logical_output just to make sure physical_output is the last node to
> >> compute, so that even if it is recomputed it doesn't matter that much.
> Now
> >> we call physical_run only in recompute of physical_output node, and
> since
> >> it is recompute, it won't be called multiple times. What do you think?
> >>
> >
> > Below is what I understand from what you suggested. Can you please
> confirm if the
> > understanding is correct. If not, can you please explain the graph path ?
> >
> > flow_output
> >        --- physical_output
> >                 -- en_ct_zones
> >                 -- OVS interface
> >                 -- runtime_data
> >                 -- logical_output
> >        -- logical_output
> >              -- runtime_data
> >                      -- OVS interface
> >                      -- SB Port Binding
> >                      -- SB datapath binding
> >                      -- ...
> >                         ...
> >              -- SB flow outout
> >              -- Address set
> >              -- SB Port Groups
> >              -- ..
> >                 ...
> >
> > The physical_output_run() will call physical_run()
> > I'm not sure what the flow_output_physical_output_handler()  do ? Call
> physical_run() or no-op ?
> > I need your input for this if my understanding is correct.
> >
> > The logical_output_run() will call lflow_run()
> > I'm not sure what the flow_output_logical_output_handler()  do ? Call
> lflow_run() or no-op ?
> > I need your input for this if my understanding is correct.
> >
> > And the flow_output_run() will remain the same.
> >
>
> Sorry that I didn't make it clear enough. What I meant is like below:
>
> physical_output
>   -> logical_output
>         -> runtime_data
>         -> ... (include all parameters of lflow_run())
>   -> ... (include all parameters of physical_run())
>
> The original flow_output is replaced by physical_output + logical_output.
> The dependency phyical_output -> logical_output is fake, because there is
> no real dependency (and change-handler can be no-op). It is just to ensure
> physical_output is computed after logical_output, so that recompute in
> physical_output doesn't trigger recompute of logical_output, which is
> expensive.
>
> I think the major part of this change is spliting the data of flow_output
> into two parts, including spliting the desired_flow_table. I am not sure
> how tricky this change is. If you think it is too big change we can stay
> with your approach for now. We can change it in the future when necessary.
>

Hi Han,

Thanks for the explanation. I think it will be a significant change, If
you're OK
with the present approach i,e the patch 5 of this series, then I'll
definitely work
on the follow up patch i.e to split to physical_output and logical_output as
soon as this patch series is reviewed and accepted.

Instead of
physical_output
  -> logical_output
        -> runtime_data
  ...

I was thinking to have something like:

flow_output
   -> physical_output
       ...
   -> logical_output

But we can discuss this later :).


My question is are you fine with the present patch 5 or you want me to work
on removing the global localvif_to_ofport simap and make it as part of
engine data
in v8.

Thanks.
Numan



> Thanks,
> Han
>
> > Thanks
> > Numan
> >
> >
> >
> > The
> >
> >>
> >>
> >> > Thanks
> >> > Numan
> >> >
> >> >
> >> >>
> >> >>
> >> >> In addition, please see another comment inlined.
> >> >>
> >> >> Thanks,
> >> >> Han
> >> >>
> >> >>
> >> >> > Acked-by: Mark Michelson <mmichels@redhat.com>
> >> >> > Signed-off-by: Numan Siddique <numans@ovn.org>
> >> >> > ---
> >> >> >  controller/binding.c        | 23 +----------
> >> >> >  controller/binding.h        | 24 ++++++++++-
> >> >> >  controller/ovn-controller.c | 82
> ++++++++++++++++++++++++++++++++++++-
> >> >> >  controller/physical.c       | 51 +++++++++++++++++++++++
> >> >> >  controller/physical.h       |  5 ++-
> >> >> >  5 files changed, 159 insertions(+), 26 deletions(-)
> >> >> >
> >> >> > diff --git a/controller/binding.c b/controller/binding.c
> >> >> > index d5e8e4773..f00c975ce 100644
> >> >> > --- a/controller/binding.c
> >> >> > +++ b/controller/binding.c
> >> >> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
> >> >> sbrec_port_binding *binding_rec,
> >> >> >   * 'struct local_binding' is used. A shash of these local bindings
> is
> >> >> >   * maintained with the 'external_ids:iface-id' as the key to the
> >> shash.
> >> >> >   *
> >> >> > - * struct local_binding has 3 main fields:
> >> >> > + * struct local_binding (defined in binding.h) has 3 main fields:
> >> >> >   *    - type
> >> >> >   *    - OVS interface row object
> >> >> >   *    - Port_Binding row object
> >> >> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
> >> >> sbrec_port_binding *binding_rec,
> >> >> >   *              when it sees the ARP packet from the parent's VIF.
> >> >> >   *
> >> >> >   */
> >> >> > -enum local_binding_type {
> >> >> > -    BT_VIF,
> >> >> > -    BT_CONTAINER,
> >> >> > -    BT_VIRTUAL
> >> >> > -};
> >> >> > -
> >> >> > -struct local_binding {
> >> >> > -    char *name;
> >> >> > -    enum local_binding_type type;
> >> >> > -    const struct ovsrec_interface *iface;
> >> >> > -    const struct sbrec_port_binding *pb;
> >> >> > -
> >> >> > -    /* shash of 'struct local_binding' representing children. */
> >> >> > -    struct shash children;
> >> >> > -};
> >> >> >
> >> >> >  static struct local_binding *
> >> >> >  local_binding_create(const char *name, const struct
> ovsrec_interface
> >> >> *iface,
> >> >> > @@ -576,12 +561,6 @@ local_binding_add(struct shash
> *local_bindings,
> >> >> struct local_binding *lbinding)
> >> >> >      shash_add(local_bindings, lbinding->name, lbinding);
> >> >> >  }
> >> >> >
> >> >> > -static struct local_binding *
> >> >> > -local_binding_find(struct shash *local_bindings, const char *name)
> >> >> > -{
> >> >> > -    return shash_find_data(local_bindings, name);
> >> >> > -}
> >> >> > -
> >> >> >  static void
> >> >> >  local_binding_destroy(struct local_binding *lbinding)
> >> >> >  {
> >> >> > diff --git a/controller/binding.h b/controller/binding.h
> >> >> > index f7ace6faf..21118ecd4 100644
> >> >> > --- a/controller/binding.h
> >> >> > +++ b/controller/binding.h
> >> >> > @@ -18,6 +18,7 @@
> >> >> >  #define OVN_BINDING_H 1
> >> >> >
> >> >> >  #include <stdbool.h>
> >> >> > +#include "openvswitch/shash.h"
> >> >> >
> >> >> >  struct hmap;
> >> >> >  struct ovsdb_idl;
> >> >> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
> >> >> >  struct sbrec_port_binding_table;
> >> >> >  struct sset;
> >> >> >  struct sbrec_port_binding;
> >> >> > -struct shash;
> >> >> >
> >> >> >  struct binding_ctx_in {
> >> >> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
> >> >> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
> >> >> >      struct smap *local_iface_ids;
> >> >> >  };
> >> >> >
> >> >> > +enum local_binding_type {
> >> >> > +    BT_VIF,
> >> >> > +    BT_CONTAINER,
> >> >> > +    BT_VIRTUAL
> >> >> > +};
> >> >> > +
> >> >> > +struct local_binding {
> >> >> > +    char *name;
> >> >> > +    enum local_binding_type type;
> >> >> > +    const struct ovsrec_interface *iface;
> >> >> > +    const struct sbrec_port_binding *pb;
> >> >> > +
> >> >> > +    /* shash of 'struct local_binding' representing children. */
> >> >> > +    struct shash children;
> >> >> > +};
> >> >> > +
> >> >> > +static inline struct local_binding *
> >> >> > +local_binding_find(struct shash *local_bindings, const char *name)
> >> >> > +{
> >> >> > +    return shash_find_data(local_bindings, name);
> >> >> > +}
> >> >> > +
> >> >> >  void binding_register_ovs_idl(struct ovsdb_idl *);
> >> >> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out
> *);
> >> >> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >> >> > diff --git a/controller/ovn-controller.c
> b/controller/ovn-controller.c
> >> >> > index 9ad5be1c6..8f95fff1f 100644
> >> >> > --- a/controller/ovn-controller.c
> >> >> > +++ b/controller/ovn-controller.c
> >> >> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct
> engine_node
> >> >> *node,
> >> >> >
> >> >> >      ovs_assert(br_int && chassis);
> >> >> >
> >> >> > +    struct ovsrec_interface_table *iface_table =
> >> >> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> >> >> > +            engine_get_input("OVS_interface",
> >> >> > +                engine_get_input("physical_flow_changes", node)));
> >> >>
> >> >> I think we shouldn't try to get indirect input data (i.e. input of
> the
> >> >> input). The engine design didn't take this into consideration,
> although
> >> >> nothing could prevent a developer to do this (except code
> reviewing:)).
> >> >> If the input is required to compute the output, just add it as direct
> >> input
> >> >> to the node.
> >> >
> >> >
> >> > Sure I can do that. But we may need to have a no-op handler for these
> new
> >> input nodes.
> >> > Is that OK ?
> >>
> >> Based on the above discussion, the node "physical_flow_change" is added
> >> just to combine the "OVS_interface" and "ct_zones" node's data, so there
> is
> >> no point to add the two nodes again directly as input to flow_output
> with
> >> no-op handlers, which is meaningless. If we really want to have the
> >> physical_flow_change node, I think the data should be passed in the
> >> physical_flow_change node, just copy the references of the both input
> data
> >> and it then becomes "physical_flow_changes" node's data, and flow_output
> >> node and get it directly there. It achieves the same but not breaking
> I-P
> >> engine rules.
> >>
> >> >
> >> > Thanks
> >> > Numan
> >> >
> >> >>
> >> >>
> >> >> >      struct ed_type_ct_zones *ct_zones_data =
> >> >> > -        engine_get_input_data("ct_zones", node);
> >> >> > +        engine_get_input_data("ct_zones",
> >> >> > +            engine_get_input("physical_flow_changes", node));
> >> >> >      struct simap *ct_zones = &ct_zones_data->current;
> >> >> >
> >> >> >      p_ctx->sbrec_port_binding_by_name =
> sbrec_port_binding_by_name;
> >> >> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct
> >> engine_node
> >> >> *node,
> >> >> >      p_ctx->mc_group_table = multicast_group_table;
> >> >> >      p_ctx->br_int = br_int;
> >> >> >      p_ctx->chassis_table = chassis_table;
> >> >> > +    p_ctx->iface_table = iface_table;
> >> >> >      p_ctx->chassis = chassis;
> >> >> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> >> >> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> >> >> >      p_ctx->local_lports = &rt_data->local_lports;
> >> >> >      p_ctx->ct_zones = ct_zones;
> >> >> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> >> >> > +    p_ctx->local_bindings = &rt_data->local_bindings;
> >> >> >  }
> >> >> >
> >> >> >  static void init_lflow_ctx(struct engine_node *node,
> >> >> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct
> >> engine_node
> >> >> *node, void *data)
> >> >> >      return _flow_output_resource_ref_handler(node, data,
> >> >> REF_TYPE_PORTGROUP);
> >> >> >  }
> >> >> >
> >> >> > +struct ed_type_physical_flow_changes {
> >> >> > +    bool need_physical_run;
> >> >> > +    bool ovs_ifaces_changed;
> >> >> > +};
> >> >> > +
> >> >> > +static bool
> >> >> > +flow_output_physical_flow_changes_handler(struct engine_node
> *node,
> >> void
> >> >> *data)
> >> >> > +{
> >> >> > +    struct ed_type_physical_flow_changes *pfc =
> >> >> > +        engine_get_input_data("physical_flow_changes", node);
> >> >> > +    struct ed_type_runtime_data *rt_data =
> >> >> > +        engine_get_input_data("runtime_data", node);
> >> >> > +
> >> >> > +    struct ed_type_flow_output *fo = data;
> >> >> > +    struct physical_ctx p_ctx;
> >> >> > +    init_physical_ctx(node, rt_data, &p_ctx);
> >> >> > +
> >> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> >> > +    if (pfc->need_physical_run) {
> >> >> > +        pfc->need_physical_run = false;
> >> >> > +        physical_run(&p_ctx, &fo->flow_table);
> >> >> > +        return true;
> >> >> > +    }
> >> >> > +
> >> >> > +    if (pfc->ovs_ifaces_changed) {
> >> >> > +        pfc->ovs_ifaces_changed = false;
> >> >> > +        return physical_handle_ovs_iface_changes(&p_ctx,
> >> >> &fo->flow_table);
> >> >> > +    }
> >> >> > +
> >> >> > +    return true;
> >> >> > +}
> >> >> > +
> >> >> > +static void *
> >> >> > +en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
> >> >> > +                             struct engine_arg *arg OVS_UNUSED)
> >> >> > +{
> >> >> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
> >> *data);
> >> >> > +    return data;
> >> >> > +}
> >> >> > +
> >> >> > +static void
> >> >> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> >> >> > +{
> >> >> > +
> >> >> > +}
> >> >> > +
> >> >> > +static void
> >> >> > +en_physical_flow_changes_run(struct engine_node *node, void *data
> >> >> OVS_UNUSED)
> >> >> > +{
> >> >> > +    struct ed_type_physical_flow_changes *pfc = data;
> >> >> > +    pfc->need_physical_run = true;
> >> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> >> > +}
> >> >> > +
> >> >> > +static bool
> >> >> > +physical_flow_changes_ovs_iface_handler(struct engine_node *node
> >> >> OVS_UNUSED,
> >> >> > +                                        void *data OVS_UNUSED)
> >> >> > +{
> >> >> > +    struct ed_type_physical_flow_changes *pfc = data;
> >> >> > +    pfc->ovs_ifaces_changed = true;
> >> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> >> > +    return true;
> >> >> > +}
> >> >> > +
> >> >> >  struct ovn_controller_exit_args {
> >> >> >      bool *exiting;
> >> >> >      bool *restart;
> >> >> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
> >> >> >      ENGINE_NODE(runtime_data, "runtime_data");
> >> >> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >> >> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> >> >> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
> >> >> >      ENGINE_NODE(flow_output, "flow_output");
> >> >> >      ENGINE_NODE(addr_sets, "addr_sets");
> >> >> >      ENGINE_NODE(port_groups, "port_groups");
> >> >> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
> >> >> >      engine_add_input(&en_port_groups, &en_sb_port_group,
> >> >> >                       port_groups_sb_port_group_handler);
> >> >> >
> >> >> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> >> >> > +                     NULL);
> >> >> > +    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
> >> >> > +                     physical_flow_changes_ovs_iface_handler);
> >> >> > +
> >> >> >      engine_add_input(&en_flow_output, &en_addr_sets,
> >> >> >                       flow_output_addr_sets_handler);
> >> >> >      engine_add_input(&en_flow_output, &en_port_groups,
> >> >> >                       flow_output_port_groups_handler);
> >> >> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> >> >> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> >> >> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
> >> >> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
> >> >> > +                     flow_output_physical_flow_changes_handler);
> >> >> >
> >> >> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
> >> >> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> >> >> > diff --git a/controller/physical.c b/controller/physical.c
> >> >> > index 144aeb7bd..03eb677d1 100644
> >> >> > --- a/controller/physical.c
> >> >> > +++ b/controller/physical.c
> >> >> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
> >> >> >      simap_destroy(&new_tunnel_to_ofport);
> >> >> >  }
> >> >> >
> >> >> > +bool
> >> >> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> >> >> > +                                  struct ovn_desired_flow_table
> >> >> *flow_table)
> >> >> > +{
> >> >> > +    const struct ovsrec_interface *iface_rec;
> >> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> >> p_ctx->iface_table) {
> >> >> > +        if (!strcmp(iface_rec->type, "geneve") ||
> >> >> > +            !strcmp(iface_rec->type, "patch")) {
> >> >> > +            return false;
> >> >> > +        }
> >> >> > +    }
> >> >> > +
> >> >> > +    struct ofpbuf ofpacts;
> >> >> > +    ofpbuf_init(&ofpacts, 0);
> >> >> > +
> >> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> >> p_ctx->iface_table) {
> >> >> > +        const char *iface_id = smap_get(&iface_rec->external_ids,
> >> >> "iface-id");
> >> >> > +        if (!iface_id) {
> >> >> > +            continue;
> >> >> > +        }
> >> >> > +
> >> >> > +        const struct local_binding *lb =
> >> >> > +            local_binding_find(p_ctx->local_bindings, iface_id);
> >> >> > +
> >> >> > +        if (!lb || !lb->pb) {
> >> >> > +            continue;
> >> >> > +        }
> >> >> > +
> >> >> > +        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport
> : 0;
> >> >> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> >> >> > +            ofctrl_remove_flows(flow_table,
> &lb->pb->header_.uuid);
> >> >> > +            simap_find_and_delete(&localvif_to_ofport, iface_id);
> >> >> > +        } else {
> >> >> > +            if (!ovsrec_interface_is_new(iface_rec)) {
> >> >> > +                ofctrl_remove_flows(flow_table,
> >> &lb->pb->header_.uuid);
> >> >> > +            }
> >> >> > +
> >> >> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
> >> >> > +
>  consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> >> >> > +                                  p_ctx->mff_ovn_geneve,
> >> p_ctx->ct_zones,
> >> >> > +                                  p_ctx->active_tunnels,
> >> >> > +                                  p_ctx->local_datapaths,
> >> >> > +                                  lb->pb, p_ctx->chassis,
> >> >> > +                                  flow_table, &ofpacts);
> >> >> > +        }
> >> >> > +    }
> >> >> > +
> >> >> > +    ofpbuf_uninit(&ofpacts);
> >> >> > +    return true;
> >> >> > +}
> >> >> > +
> >> >> >  bool
> >> >> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> ofp_port_t
> >> >> *ofport)
> >> >> >  {
> >> >> > diff --git a/controller/physical.h b/controller/physical.h
> >> >> > index dadf84ea1..9ca34436a 100644
> >> >> > --- a/controller/physical.h
> >> >> > +++ b/controller/physical.h
> >> >> > @@ -49,11 +49,13 @@ struct physical_ctx {
> >> >> >      const struct ovsrec_bridge *br_int;
> >> >> >      const struct sbrec_chassis_table *chassis_table;
> >> >> >      const struct sbrec_chassis *chassis;
> >> >> > +    const struct ovsrec_interface_table *iface_table;
> >> >> >      const struct sset *active_tunnels;
> >> >> >      struct hmap *local_datapaths;
> >> >> >      struct sset *local_lports;
> >> >> >      const struct simap *ct_zones;
> >> >> >      enum mf_field_id mff_ovn_geneve;
> >> >> > +    struct shash *local_bindings;
> >> >> >  };
> >> >> >
> >> >> >  void physical_register_ovs_idl(struct ovsdb_idl *);
> >> >> > @@ -63,7 +65,8 @@ void physical_handle_port_binding_changes(struct
> >> >> physical_ctx *,
> >> >> >                                            struct
> >> ovn_desired_flow_table
> >> >> *);
> >> >> >  void physical_handle_mc_group_changes(struct physical_ctx *,
> >> >> >                                        struct
> ovn_desired_flow_table
> >> *);
> >> >> > -
> >> >> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> >> >> > +                                       struct
> ovn_desired_flow_table
> >> *);
> >> >> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >> >> >                         ofp_port_t *ofport);
> >> >> >  #endif /* controller/physical.h */
> >> >> > --
> >> >> > 2.26.2
> >> >> >
> >> >> > _______________________________________________
> >> >> > dev mailing list
> >> >> > dev@openvswitch.org
> >> >> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> >> _______________________________________________
> >> >> dev mailing list
> >> >> dev@openvswitch.org
> >> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Han Zhou May 26, 2020, 2:34 a.m. UTC | #7
On Mon, May 25, 2020 at 8:02 AM Numan Siddique <numans@ovn.org> wrote:
>
>
>
> On Sun, May 24, 2020 at 11:32 AM Han Zhou <hzhou@ovn.org> wrote:
>>
>> On Sat, May 23, 2020 at 11:37 AM Numan Siddique <numans@ovn.org> wrote:
>> >
>> >
>> >
>> > On Sat, May 23, 2020 at 7:30 AM Han Zhou <hzhou@ovn.org> wrote:
>> >>
>> >> On Fri, May 22, 2020 at 10:29 AM Numan Siddique <numans@ovn.org>
wrote:
>> >> >
>> >> >
>> >> >
>> >> > On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:
>> >> >>
>> >> >> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
>> >> >> >
>> >> >> > From: Numan Siddique <numans@ovn.org>
>> >> >> >
>> >> >> > This patch handles ct zone changes and OVS interface changes
>> >> incrementally
>> >> >> > in the flow output stage.
>> >> >> >
>> >> >> > Any changes to ct zone can be handled by running physical_run()
>> instead
>> >> >> of running
>> >> >> > flow_output_run(). And any changes to OVS interfaces can be
either
>> >> handled
>> >> >> > incrementally (for OVS interfaces representing VIFs) or just
running
>> >> >> > physical_run() (for tunnel and other types of interfaces).
>> >> >> >
>> >> >> > To better handle this, a new engine node 'physical_flow_changes'
is
>> >> added
>> >> >> which
>> >> >> > handles changes to ct zone and OVS interfaces.
>> >> >> >
>> >> >> Hi Numan,
>> >> >>
>> >> > Hi Han,
>> >> >
>> >> > Thanks for the review and comments. Please see below.
>> >> >
>> >> >
>> >> >>
>> >> >> The engine node physical_flow_changes looks weird because it
doesn't
>> >> really
>> >> >> have any data but merely to trigger some compute when VIF changes.
>> >> >> I think it can be handled in a more straightforward way. In fact
there
>> >> was
>> >> >> a miss in my initial I-P patch that there are still global
variables
>> used
>> >> >> in physical.c (my bad):
>> >> >> - localvif_to_ofport
>> >> >> - tunnels
>> >> >> In the principle of I-P engine global variable shouldn't be used
>> because
>> >> >> otherwise there is no way to ensure the dependency graph is
followed.
>> The
>> >> >> current I-P is sitll correct only because interface changes (which
in
>> >> turn
>> >> >> causes the above global var changes) always triggers recompute.
>> >> >>
>> >> >> Now to handle interface changes incrementally, we should simply
move
>> >> these
>> >> >> global vars to engine nodes, and add them as input for flow_output,
>> and
>> >> >> also their input will be OVS_Interface (and probably some other
>> inputs).
>> >> >> With this, the dependency graph would be clear and implementation
of
>> each
>> >> >> input handler will be straightforward. Otherwise, it would be
really
>> hard
>> >> >> to follow the logic and deduce the correctness for all corner
cases,
>> >> >> although it may be correct for normal scenarios that I believe you
>> have
>> >> >> done thorough tests. Do you mind refactoring it?
>> >> >
>> >> >
>> >> > Right now, the flow_output node has an "en_ct_zones" engine as input
>> and
>> >> it doesn't have
>> >> > OVS_interface as input. But this is ok as any runtime data change
will
>> >> trigger recomputing.
>> >> >
>> >> Do you mean OVS_interface should actually be input of "en_ct_zones",
but
>> it
>> >> is ok to not add it? But I don't understand why should it be input of
>> >> en_ct_zones. Maybe I missed something?
>> >
>> >
>> > I mean right now, OVS Interface is not an input to flow_output stage.
>> >
>> >>
>> >> My point above is about the dependency between flow_output and
>> >> OVS_interface. flow_output actually depends on OVS_interface. The two
>> >> global variables allowed flow_output to use OVS_interface data without
>> the
>> >> dependancy in the graph. Now since we incrementally procssing
>> OVS_interface
>> >> changes, we'd better fix the dependency graph to ensure the
correctness.
>> >
>> >
>> > I agree.
>> >
>> >
>> >>
>> >> I
>> >> had encounted very tricky bugs even when some very trivial part of the
>> I-P
>> >> engine rule was not followed, and finally found out defining the
engine
>> >> node properly was the easiest fix.
>> >>
>> >> > If en_ct_zones engine node changes, there is no need to trigger full
>> >> recompute and we
>> >> > call "physical_run()", it should be enough. Although we can
definitely
>> >> further improve it
>> >> > and this can be tackled separately.
>> >> >
>> >> > In my understanding, I think we can also handle ovs interface
changes
>> >> incrementally and if we can't
>> >> > we can just call "physical_run()" instead of full flow recompute.
>> >> >
>> >> > With this patch, it adds an engine node for physical flow changes
and
>> the
>> >> input for that is en_ct_zone
>> >> > and ovs interface. I understand and agree that it's a bit weird.
>> >> >
>> >> > How to handle it when both
>> >> >    - en_ct_zone changes
>> >> >    -  some ovs interface changes.
>> >> >
>> >> > If both these inputs are direct inputs to the flow_output engine,
then
>> we
>> >> may end up calling "physical_run()"
>> >> > twice. And I wanted to avoid that and hence added a
>> physical_flow_changes
>> >> node as an intermediate.
>> >> > How do you suggest we handle this if we directly handle the ovs
>> interface
>> >> changes in flow_output engine ?
>> >> >
>> >>
>> >> Not sure if I understand completely, but it seems you are suggesting
we
>> can
>> >> afford a recompute for physical flows, but not for logical flows, and
in
>> >> some cases this can happen. Hoewver, you are trying to make a full
>> phyical
>> >> computing appear as incremental processing, but you are not happy with
>> this
>> >> kind of I-P because the cost is high, so you want to avoid calling it
>> >> multiple times, so a intermediate node is added for that purpose.
>> >
>> >
>> > That's right.
>> >
>> >>
>> >>
>> >> If this is the case, I'd suggest to handle it more cleanly with the
I-P
>> >> engine. We can split the flow_output into separate nodes:
logical_output
>> >> and physical_output, and add a dependency (fake) from physical_output
to
>> >> logical_output just to make sure physical_output is the last node to
>> >> compute, so that even if it is recomputed it doesn't matter that much.
>> Now
>> >> we call physical_run only in recompute of physical_output node, and
since
>> >> it is recompute, it won't be called multiple times. What do you think?
>> >>
>> >
>> > Below is what I understand from what you suggested. Can you please
>> confirm if the
>> > understanding is correct. If not, can you please explain the graph
path ?
>> >
>> > flow_output
>> >        --- physical_output
>> >                 -- en_ct_zones
>> >                 -- OVS interface
>> >                 -- runtime_data
>> >                 -- logical_output
>> >        -- logical_output
>> >              -- runtime_data
>> >                      -- OVS interface
>> >                      -- SB Port Binding
>> >                      -- SB datapath binding
>> >                      -- ...
>> >                         ...
>> >              -- SB flow outout
>> >              -- Address set
>> >              -- SB Port Groups
>> >              -- ..
>> >                 ...
>> >
>> > The physical_output_run() will call physical_run()
>> > I'm not sure what the flow_output_physical_output_handler()  do ? Call
>> physical_run() or no-op ?
>> > I need your input for this if my understanding is correct.
>> >
>> > The logical_output_run() will call lflow_run()
>> > I'm not sure what the flow_output_logical_output_handler()  do ? Call
>> lflow_run() or no-op ?
>> > I need your input for this if my understanding is correct.
>> >
>> > And the flow_output_run() will remain the same.
>> >
>>
>> Sorry that I didn't make it clear enough. What I meant is like below:
>>
>> physical_output
>>   -> logical_output
>>         -> runtime_data
>>         -> ... (include all parameters of lflow_run())
>>   -> ... (include all parameters of physical_run())
>>
>> The original flow_output is replaced by physical_output + logical_output.
>> The dependency phyical_output -> logical_output is fake, because there is
>> no real dependency (and change-handler can be no-op). It is just to
ensure
>> physical_output is computed after logical_output, so that recompute in
>> physical_output doesn't trigger recompute of logical_output, which is
>> expensive.
>>
>> I think the major part of this change is spliting the data of flow_output
>> into two parts, including spliting the desired_flow_table. I am not sure
>> how tricky this change is. If you think it is too big change we can stay
>> with your approach for now. We can change it in the future when
necessary.
>
>
> Hi Han,
>
> Thanks for the explanation. I think it will be a significant change, If
you're OK
> with the present approach i,e the patch 5 of this series, then I'll
definitely work
> on the follow up patch i.e to split to physical_output and logical_output
as
> soon as this patch series is reviewed and accepted.
>
> Instead of
> physical_output
>   -> logical_output
>         -> runtime_data
>   ...
>
> I was thinking to have something like:
>
> flow_output
>    -> physical_output
>        ...
>    -> logical_output
>
> But we can discuss this later :).
>
>
> My question is are you fine with the present patch 5 or you want me to
work
> on removing the global localvif_to_ofport simap and make it as part of
engine data
> in v8.
>

It would be better to have the correct dependency graph since we are
handling interface changes incrementally, but if it's urgent to get the
current improvement released in 20.06, I am ok with it. So please take your
judgement (I suggest to evaluate this carefully since this change is big.
Usually such big changes would take sometime to get stable).

If we decide to continue with the current approach, could you:
1. Add comments to describe the special consideration of the node
physical_flow_changes.
2. Avoid accessing data of indirect dependency inputs, or at least add
comments with XXX mentioning the reason behind this - mostly due to that
the node physical_flow_changes is added to wrap the inputs.
3. I saw that the data of physical_flow_changes is modified directly from
the node flow_output's change handler:
    pfc->need_physical_run = false;
    ...
    pfc->ovs_ifaces_changed = false;
This violates the I-P engine rules, and it can be changed to reset these
flags to false at the node physical_flow_changes' run() or change handlers.

Thanks,
Han

> Thanks.
> Numan
>
>
>>
>> Thanks,
>> Han
>>
>> > Thanks
>> > Numan
>> >
>> >
>> >
>> > The
>> >
>> >>
>> >>
>> >> > Thanks
>> >> > Numan
>> >> >
>> >> >
>> >> >>
>> >> >>
>> >> >> In addition, please see another comment inlined.
>> >> >>
>> >> >> Thanks,
>> >> >> Han
>> >> >>
>> >> >>
>> >> >> > Acked-by: Mark Michelson <mmichels@redhat.com>
>> >> >> > Signed-off-by: Numan Siddique <numans@ovn.org>
>> >> >> > ---
>> >> >> >  controller/binding.c        | 23 +----------
>> >> >> >  controller/binding.h        | 24 ++++++++++-
>> >> >> >  controller/ovn-controller.c | 82
>> ++++++++++++++++++++++++++++++++++++-
>> >> >> >  controller/physical.c       | 51 +++++++++++++++++++++++
>> >> >> >  controller/physical.h       |  5 ++-
>> >> >> >  5 files changed, 159 insertions(+), 26 deletions(-)
>> >> >> >
>> >> >> > diff --git a/controller/binding.c b/controller/binding.c
>> >> >> > index d5e8e4773..f00c975ce 100644
>> >> >> > --- a/controller/binding.c
>> >> >> > +++ b/controller/binding.c
>> >> >> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
>> >> >> sbrec_port_binding *binding_rec,
>> >> >> >   * 'struct local_binding' is used. A shash of these local
bindings
>> is
>> >> >> >   * maintained with the 'external_ids:iface-id' as the key to the
>> >> shash.
>> >> >> >   *
>> >> >> > - * struct local_binding has 3 main fields:
>> >> >> > + * struct local_binding (defined in binding.h) has 3 main
fields:
>> >> >> >   *    - type
>> >> >> >   *    - OVS interface row object
>> >> >> >   *    - Port_Binding row object
>> >> >> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
>> >> >> sbrec_port_binding *binding_rec,
>> >> >> >   *              when it sees the ARP packet from the parent's
VIF.
>> >> >> >   *
>> >> >> >   */
>> >> >> > -enum local_binding_type {
>> >> >> > -    BT_VIF,
>> >> >> > -    BT_CONTAINER,
>> >> >> > -    BT_VIRTUAL
>> >> >> > -};
>> >> >> > -
>> >> >> > -struct local_binding {
>> >> >> > -    char *name;
>> >> >> > -    enum local_binding_type type;
>> >> >> > -    const struct ovsrec_interface *iface;
>> >> >> > -    const struct sbrec_port_binding *pb;
>> >> >> > -
>> >> >> > -    /* shash of 'struct local_binding' representing children. */
>> >> >> > -    struct shash children;
>> >> >> > -};
>> >> >> >
>> >> >> >  static struct local_binding *
>> >> >> >  local_binding_create(const char *name, const struct
>> ovsrec_interface
>> >> >> *iface,
>> >> >> > @@ -576,12 +561,6 @@ local_binding_add(struct shash
*local_bindings,
>> >> >> struct local_binding *lbinding)
>> >> >> >      shash_add(local_bindings, lbinding->name, lbinding);
>> >> >> >  }
>> >> >> >
>> >> >> > -static struct local_binding *
>> >> >> > -local_binding_find(struct shash *local_bindings, const char
*name)
>> >> >> > -{
>> >> >> > -    return shash_find_data(local_bindings, name);
>> >> >> > -}
>> >> >> > -
>> >> >> >  static void
>> >> >> >  local_binding_destroy(struct local_binding *lbinding)
>> >> >> >  {
>> >> >> > diff --git a/controller/binding.h b/controller/binding.h
>> >> >> > index f7ace6faf..21118ecd4 100644
>> >> >> > --- a/controller/binding.h
>> >> >> > +++ b/controller/binding.h
>> >> >> > @@ -18,6 +18,7 @@
>> >> >> >  #define OVN_BINDING_H 1
>> >> >> >
>> >> >> >  #include <stdbool.h>
>> >> >> > +#include "openvswitch/shash.h"
>> >> >> >
>> >> >> >  struct hmap;
>> >> >> >  struct ovsdb_idl;
>> >> >> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
>> >> >> >  struct sbrec_port_binding_table;
>> >> >> >  struct sset;
>> >> >> >  struct sbrec_port_binding;
>> >> >> > -struct shash;
>> >> >> >
>> >> >> >  struct binding_ctx_in {
>> >> >> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
>> >> >> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
>> >> >> >      struct smap *local_iface_ids;
>> >> >> >  };
>> >> >> >
>> >> >> > +enum local_binding_type {
>> >> >> > +    BT_VIF,
>> >> >> > +    BT_CONTAINER,
>> >> >> > +    BT_VIRTUAL
>> >> >> > +};
>> >> >> > +
>> >> >> > +struct local_binding {
>> >> >> > +    char *name;
>> >> >> > +    enum local_binding_type type;
>> >> >> > +    const struct ovsrec_interface *iface;
>> >> >> > +    const struct sbrec_port_binding *pb;
>> >> >> > +
>> >> >> > +    /* shash of 'struct local_binding' representing children. */
>> >> >> > +    struct shash children;
>> >> >> > +};
>> >> >> > +
>> >> >> > +static inline struct local_binding *
>> >> >> > +local_binding_find(struct shash *local_bindings, const char
*name)
>> >> >> > +{
>> >> >> > +    return shash_find_data(local_bindings, name);
>> >> >> > +}
>> >> >> > +
>> >> >> >  void binding_register_ovs_idl(struct ovsdb_idl *);
>> >> >> >  void binding_run(struct binding_ctx_in *, struct binding_ctx_out
>> *);
>> >> >> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>> >> >> > diff --git a/controller/ovn-controller.c
>> b/controller/ovn-controller.c
>> >> >> > index 9ad5be1c6..8f95fff1f 100644
>> >> >> > --- a/controller/ovn-controller.c
>> >> >> > +++ b/controller/ovn-controller.c
>> >> >> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct
>> engine_node
>> >> >> *node,
>> >> >> >
>> >> >> >      ovs_assert(br_int && chassis);
>> >> >> >
>> >> >> > +    struct ovsrec_interface_table *iface_table =
>> >> >> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
>> >> >> > +            engine_get_input("OVS_interface",
>> >> >> > +                engine_get_input("physical_flow_changes",
node)));
>> >> >>
>> >> >> I think we shouldn't try to get indirect input data (i.e. input of
the
>> >> >> input). The engine design didn't take this into consideration,
>> although
>> >> >> nothing could prevent a developer to do this (except code
>> reviewing:)).
>> >> >> If the input is required to compute the output, just add it as
direct
>> >> input
>> >> >> to the node.
>> >> >
>> >> >
>> >> > Sure I can do that. But we may need to have a no-op handler for
these
>> new
>> >> input nodes.
>> >> > Is that OK ?
>> >>
>> >> Based on the above discussion, the node "physical_flow_change" is
added
>> >> just to combine the "OVS_interface" and "ct_zones" node's data, so
there
>> is
>> >> no point to add the two nodes again directly as input to flow_output
with
>> >> no-op handlers, which is meaningless. If we really want to have the
>> >> physical_flow_change node, I think the data should be passed in the
>> >> physical_flow_change node, just copy the references of the both input
>> data
>> >> and it then becomes "physical_flow_changes" node's data, and
flow_output
>> >> node and get it directly there. It achieves the same but not breaking
I-P
>> >> engine rules.
>> >>
>> >> >
>> >> > Thanks
>> >> > Numan
>> >> >
>> >> >>
>> >> >>
>> >> >> >      struct ed_type_ct_zones *ct_zones_data =
>> >> >> > -        engine_get_input_data("ct_zones", node);
>> >> >> > +        engine_get_input_data("ct_zones",
>> >> >> > +            engine_get_input("physical_flow_changes", node));
>> >> >> >      struct simap *ct_zones = &ct_zones_data->current;
>> >> >> >
>> >> >> >      p_ctx->sbrec_port_binding_by_name =
sbrec_port_binding_by_name;
>> >> >> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct
>> >> engine_node
>> >> >> *node,
>> >> >> >      p_ctx->mc_group_table = multicast_group_table;
>> >> >> >      p_ctx->br_int = br_int;
>> >> >> >      p_ctx->chassis_table = chassis_table;
>> >> >> > +    p_ctx->iface_table = iface_table;
>> >> >> >      p_ctx->chassis = chassis;
>> >> >> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
>> >> >> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
>> >> >> >      p_ctx->local_lports = &rt_data->local_lports;
>> >> >> >      p_ctx->ct_zones = ct_zones;
>> >> >> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
>> >> >> > +    p_ctx->local_bindings = &rt_data->local_bindings;
>> >> >> >  }
>> >> >> >
>> >> >> >  static void init_lflow_ctx(struct engine_node *node,
>> >> >> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct
>> >> engine_node
>> >> >> *node, void *data)
>> >> >> >      return _flow_output_resource_ref_handler(node, data,
>> >> >> REF_TYPE_PORTGROUP);
>> >> >> >  }
>> >> >> >
>> >> >> > +struct ed_type_physical_flow_changes {
>> >> >> > +    bool need_physical_run;
>> >> >> > +    bool ovs_ifaces_changed;
>> >> >> > +};
>> >> >> > +
>> >> >> > +static bool
>> >> >> > +flow_output_physical_flow_changes_handler(struct engine_node
*node,
>> >> void
>> >> >> *data)
>> >> >> > +{
>> >> >> > +    struct ed_type_physical_flow_changes *pfc =
>> >> >> > +        engine_get_input_data("physical_flow_changes", node);
>> >> >> > +    struct ed_type_runtime_data *rt_data =
>> >> >> > +        engine_get_input_data("runtime_data", node);
>> >> >> > +
>> >> >> > +    struct ed_type_flow_output *fo = data;
>> >> >> > +    struct physical_ctx p_ctx;
>> >> >> > +    init_physical_ctx(node, rt_data, &p_ctx);
>> >> >> > +
>> >> >> > +    engine_set_node_state(node, EN_UPDATED);
>> >> >> > +    if (pfc->need_physical_run) {
>> >> >> > +        pfc->need_physical_run = false;
>> >> >> > +        physical_run(&p_ctx, &fo->flow_table);
>> >> >> > +        return true;
>> >> >> > +    }
>> >> >> > +
>> >> >> > +    if (pfc->ovs_ifaces_changed) {
>> >> >> > +        pfc->ovs_ifaces_changed = false;
>> >> >> > +        return physical_handle_ovs_iface_changes(&p_ctx,
>> >> >> &fo->flow_table);
>> >> >> > +    }
>> >> >> > +
>> >> >> > +    return true;
>> >> >> > +}
>> >> >> > +
>> >> >> > +static void *
>> >> >> > +en_physical_flow_changes_init(struct engine_node *node
OVS_UNUSED,
>> >> >> > +                             struct engine_arg *arg OVS_UNUSED)
>> >> >> > +{
>> >> >> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
>> >> *data);
>> >> >> > +    return data;
>> >> >> > +}
>> >> >> > +
>> >> >> > +static void
>> >> >> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
>> >> >> > +{
>> >> >> > +
>> >> >> > +}
>> >> >> > +
>> >> >> > +static void
>> >> >> > +en_physical_flow_changes_run(struct engine_node *node, void
*data
>> >> >> OVS_UNUSED)
>> >> >> > +{
>> >> >> > +    struct ed_type_physical_flow_changes *pfc = data;
>> >> >> > +    pfc->need_physical_run = true;
>> >> >> > +    engine_set_node_state(node, EN_UPDATED);
>> >> >> > +}
>> >> >> > +
>> >> >> > +static bool
>> >> >> > +physical_flow_changes_ovs_iface_handler(struct engine_node *node
>> >> >> OVS_UNUSED,
>> >> >> > +                                        void *data OVS_UNUSED)
>> >> >> > +{
>> >> >> > +    struct ed_type_physical_flow_changes *pfc = data;
>> >> >> > +    pfc->ovs_ifaces_changed = true;
>> >> >> > +    engine_set_node_state(node, EN_UPDATED);
>> >> >> > +    return true;
>> >> >> > +}
>> >> >> > +
>> >> >> >  struct ovn_controller_exit_args {
>> >> >> >      bool *exiting;
>> >> >> >      bool *restart;
>> >> >> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
>> >> >> >      ENGINE_NODE(runtime_data, "runtime_data");
>> >> >> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>> >> >> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
>> >> >> > +    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
>> >> >> >      ENGINE_NODE(flow_output, "flow_output");
>> >> >> >      ENGINE_NODE(addr_sets, "addr_sets");
>> >> >> >      ENGINE_NODE(port_groups, "port_groups");
>> >> >> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
>> >> >> >      engine_add_input(&en_port_groups, &en_sb_port_group,
>> >> >> >                       port_groups_sb_port_group_handler);
>> >> >> >
>> >> >> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
>> >> >> > +                     NULL);
>> >> >> > +    engine_add_input(&en_physical_flow_changes,
&en_ovs_interface,
>> >> >> > +                     physical_flow_changes_ovs_iface_handler);
>> >> >> > +
>> >> >> >      engine_add_input(&en_flow_output, &en_addr_sets,
>> >> >> >                       flow_output_addr_sets_handler);
>> >> >> >      engine_add_input(&en_flow_output, &en_port_groups,
>> >> >> >                       flow_output_port_groups_handler);
>> >> >> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
>> >> >> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
>> >> >> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
>> >> >> > +    engine_add_input(&en_flow_output, &en_physical_flow_changes,
>> >> >> > +                     flow_output_physical_flow_changes_handler);
>> >> >> >
>> >> >> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch,
NULL);
>> >> >> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
>> >> >> > diff --git a/controller/physical.c b/controller/physical.c
>> >> >> > index 144aeb7bd..03eb677d1 100644
>> >> >> > --- a/controller/physical.c
>> >> >> > +++ b/controller/physical.c
>> >> >> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
>> >> >> >      simap_destroy(&new_tunnel_to_ofport);
>> >> >> >  }
>> >> >> >
>> >> >> > +bool
>> >> >> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
>> >> >> > +                                  struct ovn_desired_flow_table
>> >> >> *flow_table)
>> >> >> > +{
>> >> >> > +    const struct ovsrec_interface *iface_rec;
>> >> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> >> >> p_ctx->iface_table) {
>> >> >> > +        if (!strcmp(iface_rec->type, "geneve") ||
>> >> >> > +            !strcmp(iface_rec->type, "patch")) {
>> >> >> > +            return false;
>> >> >> > +        }
>> >> >> > +    }
>> >> >> > +
>> >> >> > +    struct ofpbuf ofpacts;
>> >> >> > +    ofpbuf_init(&ofpacts, 0);
>> >> >> > +
>> >> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
>> >> >> p_ctx->iface_table) {
>> >> >> > +        const char *iface_id =
smap_get(&iface_rec->external_ids,
>> >> >> "iface-id");
>> >> >> > +        if (!iface_id) {
>> >> >> > +            continue;
>> >> >> > +        }
>> >> >> > +
>> >> >> > +        const struct local_binding *lb =
>> >> >> > +            local_binding_find(p_ctx->local_bindings, iface_id);
>> >> >> > +
>> >> >> > +        if (!lb || !lb->pb) {
>> >> >> > +            continue;
>> >> >> > +        }
>> >> >> > +
>> >> >> > +        int64_t ofport = iface_rec->n_ofport ?
*iface_rec->ofport
>> : 0;
>> >> >> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
>> >> >> > +            ofctrl_remove_flows(flow_table,
&lb->pb->header_.uuid);
>> >> >> > +            simap_find_and_delete(&localvif_to_ofport,
iface_id);
>> >> >> > +        } else {
>> >> >> > +            if (!ovsrec_interface_is_new(iface_rec)) {
>> >> >> > +                ofctrl_remove_flows(flow_table,
>> >> &lb->pb->header_.uuid);
>> >> >> > +            }
>> >> >> > +
>> >> >> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
>> >> >> > +
>>  consider_port_binding(p_ctx->sbrec_port_binding_by_name,
>> >> >> > +                                  p_ctx->mff_ovn_geneve,
>> >> p_ctx->ct_zones,
>> >> >> > +                                  p_ctx->active_tunnels,
>> >> >> > +                                  p_ctx->local_datapaths,
>> >> >> > +                                  lb->pb, p_ctx->chassis,
>> >> >> > +                                  flow_table, &ofpacts);
>> >> >> > +        }
>> >> >> > +    }
>> >> >> > +
>> >> >> > +    ofpbuf_uninit(&ofpacts);
>> >> >> > +    return true;
>> >> >> > +}
>> >> >> > +
>> >> >> >  bool
>> >> >> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>> ofp_port_t
>> >> >> *ofport)
>> >> >> >  {
>> >> >> > diff --git a/controller/physical.h b/controller/physical.h
>> >> >> > index dadf84ea1..9ca34436a 100644
>> >> >> > --- a/controller/physical.h
>> >> >> > +++ b/controller/physical.h
>> >> >> > @@ -49,11 +49,13 @@ struct physical_ctx {
>> >> >> >      const struct ovsrec_bridge *br_int;
>> >> >> >      const struct sbrec_chassis_table *chassis_table;
>> >> >> >      const struct sbrec_chassis *chassis;
>> >> >> > +    const struct ovsrec_interface_table *iface_table;
>> >> >> >      const struct sset *active_tunnels;
>> >> >> >      struct hmap *local_datapaths;
>> >> >> >      struct sset *local_lports;
>> >> >> >      const struct simap *ct_zones;
>> >> >> >      enum mf_field_id mff_ovn_geneve;
>> >> >> > +    struct shash *local_bindings;
>> >> >> >  };
>> >> >> >
>> >> >> >  void physical_register_ovs_idl(struct ovsdb_idl *);
>> >> >> > @@ -63,7 +65,8 @@ void
physical_handle_port_binding_changes(struct
>> >> >> physical_ctx *,
>> >> >> >                                            struct
>> >> ovn_desired_flow_table
>> >> >> *);
>> >> >> >  void physical_handle_mc_group_changes(struct physical_ctx *,
>> >> >> >                                        struct
ovn_desired_flow_table
>> >> *);
>> >> >> > -
>> >> >> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
>> >> >> > +                                       struct
>> ovn_desired_flow_table
>> >> *);
>> >> >> >  bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
>> >> >> >                         ofp_port_t *ofport);
>> >> >> >  #endif /* controller/physical.h */
>> >> >> > --
>> >> >> > 2.26.2
>> >> >> >
>> >> >> > _______________________________________________
>> >> >> > dev mailing list
>> >> >> > dev@openvswitch.org
>> >> >> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> >> >> _______________________________________________
>> >> >> dev mailing list
>> >> >> dev@openvswitch.org
>> >> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> >> >>
>> >> _______________________________________________
>> >> dev mailing list
>> >> dev@openvswitch.org
>> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> >>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
Numan Siddique May 26, 2020, 12:57 p.m. UTC | #8
On Tue, May 26, 2020 at 8:05 AM Han Zhou <hzhou@ovn.org> wrote:

> On Mon, May 25, 2020 at 8:02 AM Numan Siddique <numans@ovn.org> wrote:
> >
> >
> >
> > On Sun, May 24, 2020 at 11:32 AM Han Zhou <hzhou@ovn.org> wrote:
> >>
> >> On Sat, May 23, 2020 at 11:37 AM Numan Siddique <numans@ovn.org> wrote:
> >> >
> >> >
> >> >
> >> > On Sat, May 23, 2020 at 7:30 AM Han Zhou <hzhou@ovn.org> wrote:
> >> >>
> >> >> On Fri, May 22, 2020 at 10:29 AM Numan Siddique <numans@ovn.org>
> wrote:
> >> >> >
> >> >> >
> >> >> >
> >> >> > On Thu, May 21, 2020 at 12:02 PM Han Zhou <hzhou@ovn.org> wrote:
> >> >> >>
> >> >> >> On Wed, May 20, 2020 at 12:41 PM <numans@ovn.org> wrote:
> >> >> >> >
> >> >> >> > From: Numan Siddique <numans@ovn.org>
> >> >> >> >
> >> >> >> > This patch handles ct zone changes and OVS interface changes
> >> >> incrementally
> >> >> >> > in the flow output stage.
> >> >> >> >
> >> >> >> > Any changes to ct zone can be handled by running physical_run()
> >> instead
> >> >> >> of running
> >> >> >> > flow_output_run(). And any changes to OVS interfaces can be
> either
> >> >> handled
> >> >> >> > incrementally (for OVS interfaces representing VIFs) or just
> running
> >> >> >> > physical_run() (for tunnel and other types of interfaces).
> >> >> >> >
> >> >> >> > To better handle this, a new engine node 'physical_flow_changes'
> is
> >> >> added
> >> >> >> which
> >> >> >> > handles changes to ct zone and OVS interfaces.
> >> >> >> >
> >> >> >> Hi Numan,
> >> >> >>
> >> >> > Hi Han,
> >> >> >
> >> >> > Thanks for the review and comments. Please see below.
> >> >> >
> >> >> >
> >> >> >>
> >> >> >> The engine node physical_flow_changes looks weird because it
> doesn't
> >> >> really
> >> >> >> have any data but merely to trigger some compute when VIF changes.
> >> >> >> I think it can be handled in a more straightforward way. In fact
> there
> >> >> was
> >> >> >> a miss in my initial I-P patch that there are still global
> variables
> >> used
> >> >> >> in physical.c (my bad):
> >> >> >> - localvif_to_ofport
> >> >> >> - tunnels
> >> >> >> In the principle of I-P engine global variable shouldn't be used
> >> because
> >> >> >> otherwise there is no way to ensure the dependency graph is
> followed.
> >> The
> >> >> >> current I-P is sitll correct only because interface changes (which
> in
> >> >> turn
> >> >> >> causes the above global var changes) always triggers recompute.
> >> >> >>
> >> >> >> Now to handle interface changes incrementally, we should simply
> move
> >> >> these
> >> >> >> global vars to engine nodes, and add them as input for
> flow_output,
> >> and
> >> >> >> also their input will be OVS_Interface (and probably some other
> >> inputs).
> >> >> >> With this, the dependency graph would be clear and implementation
> of
> >> each
> >> >> >> input handler will be straightforward. Otherwise, it would be
> really
> >> hard
> >> >> >> to follow the logic and deduce the correctness for all corner
> cases,
> >> >> >> although it may be correct for normal scenarios that I believe you
> >> have
> >> >> >> done thorough tests. Do you mind refactoring it?
> >> >> >
> >> >> >
> >> >> > Right now, the flow_output node has an "en_ct_zones" engine as
> input
> >> and
> >> >> it doesn't have
> >> >> > OVS_interface as input. But this is ok as any runtime data change
> will
> >> >> trigger recomputing.
> >> >> >
> >> >> Do you mean OVS_interface should actually be input of "en_ct_zones",
> but
> >> it
> >> >> is ok to not add it? But I don't understand why should it be input of
> >> >> en_ct_zones. Maybe I missed something?
> >> >
> >> >
> >> > I mean right now, OVS Interface is not an input to flow_output stage.
> >> >
> >> >>
> >> >> My point above is about the dependency between flow_output and
> >> >> OVS_interface. flow_output actually depends on OVS_interface. The two
> >> >> global variables allowed flow_output to use OVS_interface data
> without
> >> the
> >> >> dependancy in the graph. Now since we incrementally procssing
> >> OVS_interface
> >> >> changes, we'd better fix the dependency graph to ensure the
> correctness.
> >> >
> >> >
> >> > I agree.
> >> >
> >> >
> >> >>
> >> >> I
> >> >> had encounted very tricky bugs even when some very trivial part of
> the
> >> I-P
> >> >> engine rule was not followed, and finally found out defining the
> engine
> >> >> node properly was the easiest fix.
> >> >>
> >> >> > If en_ct_zones engine node changes, there is no need to trigger
> full
> >> >> recompute and we
> >> >> > call "physical_run()", it should be enough. Although we can
> definitely
> >> >> further improve it
> >> >> > and this can be tackled separately.
> >> >> >
> >> >> > In my understanding, I think we can also handle ovs interface
> changes
> >> >> incrementally and if we can't
> >> >> > we can just call "physical_run()" instead of full flow recompute.
> >> >> >
> >> >> > With this patch, it adds an engine node for physical flow changes
> and
> >> the
> >> >> input for that is en_ct_zone
> >> >> > and ovs interface. I understand and agree that it's a bit weird.
> >> >> >
> >> >> > How to handle it when both
> >> >> >    - en_ct_zone changes
> >> >> >    -  some ovs interface changes.
> >> >> >
> >> >> > If both these inputs are direct inputs to the flow_output engine,
> then
> >> we
> >> >> may end up calling "physical_run()"
> >> >> > twice. And I wanted to avoid that and hence added a
> >> physical_flow_changes
> >> >> node as an intermediate.
> >> >> > How do you suggest we handle this if we directly handle the ovs
> >> interface
> >> >> changes in flow_output engine ?
> >> >> >
> >> >>
> >> >> Not sure if I understand completely, but it seems you are suggesting
> we
> >> can
> >> >> afford a recompute for physical flows, but not for logical flows, and
> in
> >> >> some cases this can happen. Hoewver, you are trying to make a full
> >> phyical
> >> >> computing appear as incremental processing, but you are not happy
> with
> >> this
> >> >> kind of I-P because the cost is high, so you want to avoid calling it
> >> >> multiple times, so a intermediate node is added for that purpose.
> >> >
> >> >
> >> > That's right.
> >> >
> >> >>
> >> >>
> >> >> If this is the case, I'd suggest to handle it more cleanly with the
> I-P
> >> >> engine. We can split the flow_output into separate nodes:
> logical_output
> >> >> and physical_output, and add a dependency (fake) from physical_output
> to
> >> >> logical_output just to make sure physical_output is the last node to
> >> >> compute, so that even if it is recomputed it doesn't matter that
> much.
> >> Now
> >> >> we call physical_run only in recompute of physical_output node, and
> since
> >> >> it is recompute, it won't be called multiple times. What do you
> think?
> >> >>
> >> >
> >> > Below is what I understand from what you suggested. Can you please
> >> confirm if the
> >> > understanding is correct. If not, can you please explain the graph
> path ?
> >> >
> >> > flow_output
> >> >        --- physical_output
> >> >                 -- en_ct_zones
> >> >                 -- OVS interface
> >> >                 -- runtime_data
> >> >                 -- logical_output
> >> >        -- logical_output
> >> >              -- runtime_data
> >> >                      -- OVS interface
> >> >                      -- SB Port Binding
> >> >                      -- SB datapath binding
> >> >                      -- ...
> >> >                         ...
> >> >              -- SB flow outout
> >> >              -- Address set
> >> >              -- SB Port Groups
> >> >              -- ..
> >> >                 ...
> >> >
> >> > The physical_output_run() will call physical_run()
> >> > I'm not sure what the flow_output_physical_output_handler()  do ? Call
> >> physical_run() or no-op ?
> >> > I need your input for this if my understanding is correct.
> >> >
> >> > The logical_output_run() will call lflow_run()
> >> > I'm not sure what the flow_output_logical_output_handler()  do ? Call
> >> lflow_run() or no-op ?
> >> > I need your input for this if my understanding is correct.
> >> >
> >> > And the flow_output_run() will remain the same.
> >> >
> >>
> >> Sorry that I didn't make it clear enough. What I meant is like below:
> >>
> >> physical_output
> >>   -> logical_output
> >>         -> runtime_data
> >>         -> ... (include all parameters of lflow_run())
> >>   -> ... (include all parameters of physical_run())
> >>
> >> The original flow_output is replaced by physical_output +
> logical_output.
> >> The dependency phyical_output -> logical_output is fake, because there
> is
> >> no real dependency (and change-handler can be no-op). It is just to
> ensure
> >> physical_output is computed after logical_output, so that recompute in
> >> physical_output doesn't trigger recompute of logical_output, which is
> >> expensive.
> >>
> >> I think the major part of this change is spliting the data of
> flow_output
> >> into two parts, including spliting the desired_flow_table. I am not sure
> >> how tricky this change is. If you think it is too big change we can stay
> >> with your approach for now. We can change it in the future when
> necessary.
> >
> >
> > Hi Han,
> >
> > Thanks for the explanation. I think it will be a significant change, If
> you're OK
> > with the present approach i,e the patch 5 of this series, then I'll
> definitely work
> > on the follow up patch i.e to split to physical_output and logical_output
> as
> > soon as this patch series is reviewed and accepted.
> >
> > Instead of
> > physical_output
> >   -> logical_output
> >         -> runtime_data
> >   ...
> >
> > I was thinking to have something like:
> >
> > flow_output
> >    -> physical_output
> >        ...
> >    -> logical_output
> >
> > But we can discuss this later :).
> >
> >
> > My question is are you fine with the present patch 5 or you want me to
> work
> > on removing the global localvif_to_ofport simap and make it as part of
> engine data
> > in v8.
> >
>
> It would be better to have the correct dependency graph since we are
> handling interface changes incrementally, but if it's urgent to get the
> current improvement released in 20.06, I am ok with it. So please take your
> judgement (I suggest to evaluate this carefully since this change is big.
> Usually such big changes would take sometime to get stable).
>
> If we decide to continue with the current approach, could you:
> 1. Add comments to describe the special consideration of the node
> physical_flow_changes.
> 2. Avoid accessing data of indirect dependency inputs, or at least add
> comments with XXX mentioning the reason behind this - mostly due to that
> the node physical_flow_changes is added to wrap the inputs.
>

Thanks. I submitted v8 just a while back. I was fairly confident of the
correctness
of these patches as we did some internal scale testing and at this point I
was
not confident of refactoring the suggestions here.

I've started working on the splitting flow_output ti physical output and
logical output.
So I'll definitely address all the pending ones and submit a new series.

Request to take a look at v8.


> 3. I saw that the data of physical_flow_changes is modified directly from
> the node flow_output's change handler:
>     pfc->need_physical_run = false;
>     ...
>     pfc->ovs_ifaces_changed = false;
>

If an engine_run doesn't result in either recompute of physical_flow_changes
or any of its handlers, the old values will not be cleared.

Hence I moved this data to the tracked data so that these gets cleared at
the
end of engine_run.


> This violates the I-P engine rules, and it can be changed to reset these
> flags to false at the node physical_flow_changes' run() or change handlers.
>

If you get some time can you please submit a document patch about the
I-P rules. This would benefit new contributors and also me :)/
 If the document already exists, can you please point me to that.


Thanks
Numan


Thanks,
> Han
>
> > Thanks.
> > Numan
> >
> >
> >>
> >> Thanks,
> >> Han
> >>
> >> > Thanks
> >> > Numan
> >> >
> >> >
> >> >
> >> > The
> >> >
> >> >>
> >> >>
> >> >> > Thanks
> >> >> > Numan
> >> >> >
> >> >> >
> >> >> >>
> >> >> >>
> >> >> >> In addition, please see another comment inlined.
> >> >> >>
> >> >> >> Thanks,
> >> >> >> Han
> >> >> >>
> >> >> >>
> >> >> >> > Acked-by: Mark Michelson <mmichels@redhat.com>
> >> >> >> > Signed-off-by: Numan Siddique <numans@ovn.org>
> >> >> >> > ---
> >> >> >> >  controller/binding.c        | 23 +----------
> >> >> >> >  controller/binding.h        | 24 ++++++++++-
> >> >> >> >  controller/ovn-controller.c | 82
> >> ++++++++++++++++++++++++++++++++++++-
> >> >> >> >  controller/physical.c       | 51 +++++++++++++++++++++++
> >> >> >> >  controller/physical.h       |  5 ++-
> >> >> >> >  5 files changed, 159 insertions(+), 26 deletions(-)
> >> >> >> >
> >> >> >> > diff --git a/controller/binding.c b/controller/binding.c
> >> >> >> > index d5e8e4773..f00c975ce 100644
> >> >> >> > --- a/controller/binding.c
> >> >> >> > +++ b/controller/binding.c
> >> >> >> > @@ -504,7 +504,7 @@ remove_local_lport_ids(const struct
> >> >> >> sbrec_port_binding *binding_rec,
> >> >> >> >   * 'struct local_binding' is used. A shash of these local
> bindings
> >> is
> >> >> >> >   * maintained with the 'external_ids:iface-id' as the key to
> the
> >> >> shash.
> >> >> >> >   *
> >> >> >> > - * struct local_binding has 3 main fields:
> >> >> >> > + * struct local_binding (defined in binding.h) has 3 main
> fields:
> >> >> >> >   *    - type
> >> >> >> >   *    - OVS interface row object
> >> >> >> >   *    - Port_Binding row object
> >> >> >> > @@ -540,21 +540,6 @@ remove_local_lport_ids(const struct
> >> >> >> sbrec_port_binding *binding_rec,
> >> >> >> >   *              when it sees the ARP packet from the parent's
> VIF.
> >> >> >> >   *
> >> >> >> >   */
> >> >> >> > -enum local_binding_type {
> >> >> >> > -    BT_VIF,
> >> >> >> > -    BT_CONTAINER,
> >> >> >> > -    BT_VIRTUAL
> >> >> >> > -};
> >> >> >> > -
> >> >> >> > -struct local_binding {
> >> >> >> > -    char *name;
> >> >> >> > -    enum local_binding_type type;
> >> >> >> > -    const struct ovsrec_interface *iface;
> >> >> >> > -    const struct sbrec_port_binding *pb;
> >> >> >> > -
> >> >> >> > -    /* shash of 'struct local_binding' representing children.
> */
> >> >> >> > -    struct shash children;
> >> >> >> > -};
> >> >> >> >
> >> >> >> >  static struct local_binding *
> >> >> >> >  local_binding_create(const char *name, const struct
> >> ovsrec_interface
> >> >> >> *iface,
> >> >> >> > @@ -576,12 +561,6 @@ local_binding_add(struct shash
> *local_bindings,
> >> >> >> struct local_binding *lbinding)
> >> >> >> >      shash_add(local_bindings, lbinding->name, lbinding);
> >> >> >> >  }
> >> >> >> >
> >> >> >> > -static struct local_binding *
> >> >> >> > -local_binding_find(struct shash *local_bindings, const char
> *name)
> >> >> >> > -{
> >> >> >> > -    return shash_find_data(local_bindings, name);
> >> >> >> > -}
> >> >> >> > -
> >> >> >> >  static void
> >> >> >> >  local_binding_destroy(struct local_binding *lbinding)
> >> >> >> >  {
> >> >> >> > diff --git a/controller/binding.h b/controller/binding.h
> >> >> >> > index f7ace6faf..21118ecd4 100644
> >> >> >> > --- a/controller/binding.h
> >> >> >> > +++ b/controller/binding.h
> >> >> >> > @@ -18,6 +18,7 @@
> >> >> >> >  #define OVN_BINDING_H 1
> >> >> >> >
> >> >> >> >  #include <stdbool.h>
> >> >> >> > +#include "openvswitch/shash.h"
> >> >> >> >
> >> >> >> >  struct hmap;
> >> >> >> >  struct ovsdb_idl;
> >> >> >> > @@ -32,7 +33,6 @@ struct sbrec_chassis;
> >> >> >> >  struct sbrec_port_binding_table;
> >> >> >> >  struct sset;
> >> >> >> >  struct sbrec_port_binding;
> >> >> >> > -struct shash;
> >> >> >> >
> >> >> >> >  struct binding_ctx_in {
> >> >> >> >      struct ovsdb_idl_txn *ovnsb_idl_txn;
> >> >> >> > @@ -60,6 +60,28 @@ struct binding_ctx_out {
> >> >> >> >      struct smap *local_iface_ids;
> >> >> >> >  };
> >> >> >> >
> >> >> >> > +enum local_binding_type {
> >> >> >> > +    BT_VIF,
> >> >> >> > +    BT_CONTAINER,
> >> >> >> > +    BT_VIRTUAL
> >> >> >> > +};
> >> >> >> > +
> >> >> >> > +struct local_binding {
> >> >> >> > +    char *name;
> >> >> >> > +    enum local_binding_type type;
> >> >> >> > +    const struct ovsrec_interface *iface;
> >> >> >> > +    const struct sbrec_port_binding *pb;
> >> >> >> > +
> >> >> >> > +    /* shash of 'struct local_binding' representing children.
> */
> >> >> >> > +    struct shash children;
> >> >> >> > +};
> >> >> >> > +
> >> >> >> > +static inline struct local_binding *
> >> >> >> > +local_binding_find(struct shash *local_bindings, const char
> *name)
> >> >> >> > +{
> >> >> >> > +    return shash_find_data(local_bindings, name);
> >> >> >> > +}
> >> >> >> > +
> >> >> >> >  void binding_register_ovs_idl(struct ovsdb_idl *);
> >> >> >> >  void binding_run(struct binding_ctx_in *, struct
> binding_ctx_out
> >> *);
> >> >> >> >  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >> >> >> > diff --git a/controller/ovn-controller.c
> >> b/controller/ovn-controller.c
> >> >> >> > index 9ad5be1c6..8f95fff1f 100644
> >> >> >> > --- a/controller/ovn-controller.c
> >> >> >> > +++ b/controller/ovn-controller.c
> >> >> >> > @@ -1369,8 +1369,13 @@ static void init_physical_ctx(struct
> >> engine_node
> >> >> >> *node,
> >> >> >> >
> >> >> >> >      ovs_assert(br_int && chassis);
> >> >> >> >
> >> >> >> > +    struct ovsrec_interface_table *iface_table =
> >> >> >> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> >> >> >> > +            engine_get_input("OVS_interface",
> >> >> >> > +                engine_get_input("physical_flow_changes",
> node)));
> >> >> >>
> >> >> >> I think we shouldn't try to get indirect input data (i.e. input of
> the
> >> >> >> input). The engine design didn't take this into consideration,
> >> although
> >> >> >> nothing could prevent a developer to do this (except code
> >> reviewing:)).
> >> >> >> If the input is required to compute the output, just add it as
> direct
> >> >> input
> >> >> >> to the node.
> >> >> >
> >> >> >
> >> >> > Sure I can do that. But we may need to have a no-op handler for
> these
> >> new
> >> >> input nodes.
> >> >> > Is that OK ?
> >> >>
> >> >> Based on the above discussion, the node "physical_flow_change" is
> added
> >> >> just to combine the "OVS_interface" and "ct_zones" node's data, so
> there
> >> is
> >> >> no point to add the two nodes again directly as input to flow_output
> with
> >> >> no-op handlers, which is meaningless. If we really want to have the
> >> >> physical_flow_change node, I think the data should be passed in the
> >> >> physical_flow_change node, just copy the references of the both input
> >> data
> >> >> and it then becomes "physical_flow_changes" node's data, and
> flow_output
> >> >> node and get it directly there. It achieves the same but not breaking
> I-P
> >> >> engine rules.
> >> >>
> >> >> >
> >> >> > Thanks
> >> >> > Numan
> >> >> >
> >> >> >>
> >> >> >>
> >> >> >> >      struct ed_type_ct_zones *ct_zones_data =
> >> >> >> > -        engine_get_input_data("ct_zones", node);
> >> >> >> > +        engine_get_input_data("ct_zones",
> >> >> >> > +            engine_get_input("physical_flow_changes", node));
> >> >> >> >      struct simap *ct_zones = &ct_zones_data->current;
> >> >> >> >
> >> >> >> >      p_ctx->sbrec_port_binding_by_name =
> sbrec_port_binding_by_name;
> >> >> >> > @@ -1378,12 +1383,14 @@ static void init_physical_ctx(struct
> >> >> engine_node
> >> >> >> *node,
> >> >> >> >      p_ctx->mc_group_table = multicast_group_table;
> >> >> >> >      p_ctx->br_int = br_int;
> >> >> >> >      p_ctx->chassis_table = chassis_table;
> >> >> >> > +    p_ctx->iface_table = iface_table;
> >> >> >> >      p_ctx->chassis = chassis;
> >> >> >> >      p_ctx->active_tunnels = &rt_data->active_tunnels;
> >> >> >> >      p_ctx->local_datapaths = &rt_data->local_datapaths;
> >> >> >> >      p_ctx->local_lports = &rt_data->local_lports;
> >> >> >> >      p_ctx->ct_zones = ct_zones;
> >> >> >> >      p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
> >> >> >> > +    p_ctx->local_bindings = &rt_data->local_bindings;
> >> >> >> >  }
> >> >> >> >
> >> >> >> >  static void init_lflow_ctx(struct engine_node *node,
> >> >> >> > @@ -1791,6 +1798,70 @@ flow_output_port_groups_handler(struct
> >> >> engine_node
> >> >> >> *node, void *data)
> >> >> >> >      return _flow_output_resource_ref_handler(node, data,
> >> >> >> REF_TYPE_PORTGROUP);
> >> >> >> >  }
> >> >> >> >
> >> >> >> > +struct ed_type_physical_flow_changes {
> >> >> >> > +    bool need_physical_run;
> >> >> >> > +    bool ovs_ifaces_changed;
> >> >> >> > +};
> >> >> >> > +
> >> >> >> > +static bool
> >> >> >> > +flow_output_physical_flow_changes_handler(struct engine_node
> *node,
> >> >> void
> >> >> >> *data)
> >> >> >> > +{
> >> >> >> > +    struct ed_type_physical_flow_changes *pfc =
> >> >> >> > +        engine_get_input_data("physical_flow_changes", node);
> >> >> >> > +    struct ed_type_runtime_data *rt_data =
> >> >> >> > +        engine_get_input_data("runtime_data", node);
> >> >> >> > +
> >> >> >> > +    struct ed_type_flow_output *fo = data;
> >> >> >> > +    struct physical_ctx p_ctx;
> >> >> >> > +    init_physical_ctx(node, rt_data, &p_ctx);
> >> >> >> > +
> >> >> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> >> >> > +    if (pfc->need_physical_run) {
> >> >> >> > +        pfc->need_physical_run = false;
> >> >> >> > +        physical_run(&p_ctx, &fo->flow_table);
> >> >> >> > +        return true;
> >> >> >> > +    }
> >> >> >> > +
> >> >> >> > +    if (pfc->ovs_ifaces_changed) {
> >> >> >> > +        pfc->ovs_ifaces_changed = false;
> >> >> >> > +        return physical_handle_ovs_iface_changes(&p_ctx,
> >> >> >> &fo->flow_table);
> >> >> >> > +    }
> >> >> >> > +
> >> >> >> > +    return true;
> >> >> >> > +}
> >> >> >> > +
> >> >> >> > +static void *
> >> >> >> > +en_physical_flow_changes_init(struct engine_node *node
> OVS_UNUSED,
> >> >> >> > +                             struct engine_arg *arg OVS_UNUSED)
> >> >> >> > +{
> >> >> >> > +    struct ed_type_physical_flow_changes *data = xzalloc(sizeof
> >> >> *data);
> >> >> >> > +    return data;
> >> >> >> > +}
> >> >> >> > +
> >> >> >> > +static void
> >> >> >> > +en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
> >> >> >> > +{
> >> >> >> > +
> >> >> >> > +}
> >> >> >> > +
> >> >> >> > +static void
> >> >> >> > +en_physical_flow_changes_run(struct engine_node *node, void
> *data
> >> >> >> OVS_UNUSED)
> >> >> >> > +{
> >> >> >> > +    struct ed_type_physical_flow_changes *pfc = data;
> >> >> >> > +    pfc->need_physical_run = true;
> >> >> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> >> >> > +}
> >> >> >> > +
> >> >> >> > +static bool
> >> >> >> > +physical_flow_changes_ovs_iface_handler(struct engine_node
> *node
> >> >> >> OVS_UNUSED,
> >> >> >> > +                                        void *data OVS_UNUSED)
> >> >> >> > +{
> >> >> >> > +    struct ed_type_physical_flow_changes *pfc = data;
> >> >> >> > +    pfc->ovs_ifaces_changed = true;
> >> >> >> > +    engine_set_node_state(node, EN_UPDATED);
> >> >> >> > +    return true;
> >> >> >> > +}
> >> >> >> > +
> >> >> >> >  struct ovn_controller_exit_args {
> >> >> >> >      bool *exiting;
> >> >> >> >      bool *restart;
> >> >> >> > @@ -1914,6 +1985,7 @@ main(int argc, char *argv[])
> >> >> >> >      ENGINE_NODE(runtime_data, "runtime_data");
> >> >> >> >      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> >> >> >> >      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> >> >> >> > +    ENGINE_NODE(physical_flow_changes,
> "physical_flow_changes");
> >> >> >> >      ENGINE_NODE(flow_output, "flow_output");
> >> >> >> >      ENGINE_NODE(addr_sets, "addr_sets");
> >> >> >> >      ENGINE_NODE(port_groups, "port_groups");
> >> >> >> > @@ -1933,13 +2005,19 @@ main(int argc, char *argv[])
> >> >> >> >      engine_add_input(&en_port_groups, &en_sb_port_group,
> >> >> >> >                       port_groups_sb_port_group_handler);
> >> >> >> >
> >> >> >> > +    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
> >> >> >> > +                     NULL);
> >> >> >> > +    engine_add_input(&en_physical_flow_changes,
> &en_ovs_interface,
> >> >> >> > +                     physical_flow_changes_ovs_iface_handler);
> >> >> >> > +
> >> >> >> >      engine_add_input(&en_flow_output, &en_addr_sets,
> >> >> >> >                       flow_output_addr_sets_handler);
> >> >> >> >      engine_add_input(&en_flow_output, &en_port_groups,
> >> >> >> >                       flow_output_port_groups_handler);
> >> >> >> >      engine_add_input(&en_flow_output, &en_runtime_data, NULL);
> >> >> >> > -    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
> >> >> >> >      engine_add_input(&en_flow_output, &en_mff_ovn_geneve,
> NULL);
> >> >> >> > +    engine_add_input(&en_flow_output,
> &en_physical_flow_changes,
> >> >> >> > +
>  flow_output_physical_flow_changes_handler);
> >> >> >> >
> >> >> >> >      engine_add_input(&en_flow_output, &en_ovs_open_vswitch,
> NULL);
> >> >> >> >      engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
> >> >> >> > diff --git a/controller/physical.c b/controller/physical.c
> >> >> >> > index 144aeb7bd..03eb677d1 100644
> >> >> >> > --- a/controller/physical.c
> >> >> >> > +++ b/controller/physical.c
> >> >> >> > @@ -1780,6 +1780,57 @@ physical_run(struct physical_ctx *p_ctx,
> >> >> >> >      simap_destroy(&new_tunnel_to_ofport);
> >> >> >> >  }
> >> >> >> >
> >> >> >> > +bool
> >> >> >> > +physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
> >> >> >> > +                                  struct ovn_desired_flow_table
> >> >> >> *flow_table)
> >> >> >> > +{
> >> >> >> > +    const struct ovsrec_interface *iface_rec;
> >> >> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> >> >> p_ctx->iface_table) {
> >> >> >> > +        if (!strcmp(iface_rec->type, "geneve") ||
> >> >> >> > +            !strcmp(iface_rec->type, "patch")) {
> >> >> >> > +            return false;
> >> >> >> > +        }
> >> >> >> > +    }
> >> >> >> > +
> >> >> >> > +    struct ofpbuf ofpacts;
> >> >> >> > +    ofpbuf_init(&ofpacts, 0);
> >> >> >> > +
> >> >> >> > +    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec,
> >> >> >> p_ctx->iface_table) {
> >> >> >> > +        const char *iface_id =
> smap_get(&iface_rec->external_ids,
> >> >> >> "iface-id");
> >> >> >> > +        if (!iface_id) {
> >> >> >> > +            continue;
> >> >> >> > +        }
> >> >> >> > +
> >> >> >> > +        const struct local_binding *lb =
> >> >> >> > +            local_binding_find(p_ctx->local_bindings,
> iface_id);
> >> >> >> > +
> >> >> >> > +        if (!lb || !lb->pb) {
> >> >> >> > +            continue;
> >> >> >> > +        }
> >> >> >> > +
> >> >> >> > +        int64_t ofport = iface_rec->n_ofport ?
> *iface_rec->ofport
> >> : 0;
> >> >> >> > +        if (ovsrec_interface_is_deleted(iface_rec)) {
> >> >> >> > +            ofctrl_remove_flows(flow_table,
> &lb->pb->header_.uuid);
> >> >> >> > +            simap_find_and_delete(&localvif_to_ofport,
> iface_id);
> >> >> >> > +        } else {
> >> >> >> > +            if (!ovsrec_interface_is_new(iface_rec)) {
> >> >> >> > +                ofctrl_remove_flows(flow_table,
> >> >> &lb->pb->header_.uuid);
> >> >> >> > +            }
> >> >> >> > +
> >> >> >> > +            simap_put(&localvif_to_ofport, iface_id, ofport);
> >> >> >> > +
> >>  consider_port_binding(p_ctx->sbrec_port_binding_by_name,
> >> >> >> > +                                  p_ctx->mff_ovn_geneve,
> >> >> p_ctx->ct_zones,
> >> >> >> > +                                  p_ctx->active_tunnels,
> >> >> >> > +                                  p_ctx->local_datapaths,
> >> >> >> > +                                  lb->pb, p_ctx->chassis,
> >> >> >> > +                                  flow_table, &ofpacts);
> >> >> >> > +        }
> >> >> >> > +    }
> >> >> >> > +
> >> >> >> > +    ofpbuf_uninit(&ofpacts);
> >> >> >> > +    return true;
> >> >> >> > +}
> >> >> >> > +
> >> >> >> >  bool
> >> >> >> >  get_tunnel_ofport(const char *chassis_name, char *encap_ip,
> >> ofp_port_t
> >> >> >> *ofport)
> >> >> >> >  {
> >> >> >> > diff --git a/controller/physical.h b/controller/physical.h
> >> >> >> > index dadf84ea1..9ca34436a 100644
> >> >> >> > --- a/controller/physical.h
> >> >> >> > +++ b/controller/physical.h
> >> >> >> > @@ -49,11 +49,13 @@ struct physical_ctx {
> >> >> >> >      const struct ovsrec_bridge *br_int;
> >> >> >> >      const struct sbrec_chassis_table *chassis_table;
> >> >> >> >      const struct sbrec_chassis *chassis;
> >> >> >> > +    const struct ovsrec_interface_table *iface_table;
> >> >> >> >      const struct sset *active_tunnels;
> >> >> >> >      struct hmap *local_datapaths;
> >> >> >> >      struct sset *local_lports;
> >> >> >> >      const struct simap *ct_zones;
> >> >> >> >      enum mf_field_id mff_ovn_geneve;
> >> >> >> > +    struct shash *local_bindings;
> >> >> >> >  };
> >> >> >> >
> >> >> >> >  void physical_register_ovs_idl(struct ovsdb_idl *);
> >> >> >> > @@ -63,7 +65,8 @@ void
> physical_handle_port_binding_changes(struct
> >> >> >> physical_ctx *,
> >> >> >> >                                            struct
> >> >> ovn_desired_flow_table
> >> >> >> *);
> >> >> >> >  void physical_handle_mc_group_changes(struct physical_ctx *,
> >> >> >> >                                        struct
> ovn_desired_flow_table
> >> >> *);
> >> >> >> > -
> >> >> >> > +bool physical_handle_ovs_iface_changes(struct physical_ctx *,
> >> >> >> > +                                       struct
> >> ovn_desired_flow_table
> >> >> *);
> >> >> >> >  bool get_tunnel_ofport(const char *chassis_name, char
> *encap_ip,
> >> >> >> >                         ofp_port_t *ofport);
> >> >> >> >  #endif /* controller/physical.h */
> >> >> >> > --
> >> >> >> > 2.26.2
> >> >> >> >
> >> >> >> > _______________________________________________
> >> >> >> > dev mailing list
> >> >> >> > dev@openvswitch.org
> >> >> >> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> >> >> _______________________________________________
> >> >> >> dev mailing list
> >> >> >> dev@openvswitch.org
> >> >> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> >> >>
> >> >> _______________________________________________
> >> >> dev mailing list
> >> >> dev@openvswitch.org
> >> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
diff mbox series

Patch

diff --git a/controller/binding.c b/controller/binding.c
index d5e8e4773..f00c975ce 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -504,7 +504,7 @@  remove_local_lport_ids(const struct sbrec_port_binding *binding_rec,
  * 'struct local_binding' is used. A shash of these local bindings is
  * maintained with the 'external_ids:iface-id' as the key to the shash.
  *
- * struct local_binding has 3 main fields:
+ * struct local_binding (defined in binding.h) has 3 main fields:
  *    - type
  *    - OVS interface row object
  *    - Port_Binding row object
@@ -540,21 +540,6 @@  remove_local_lport_ids(const struct sbrec_port_binding *binding_rec,
  *              when it sees the ARP packet from the parent's VIF.
  *
  */
-enum local_binding_type {
-    BT_VIF,
-    BT_CONTAINER,
-    BT_VIRTUAL
-};
-
-struct local_binding {
-    char *name;
-    enum local_binding_type type;
-    const struct ovsrec_interface *iface;
-    const struct sbrec_port_binding *pb;
-
-    /* shash of 'struct local_binding' representing children. */
-    struct shash children;
-};
 
 static struct local_binding *
 local_binding_create(const char *name, const struct ovsrec_interface *iface,
@@ -576,12 +561,6 @@  local_binding_add(struct shash *local_bindings, struct local_binding *lbinding)
     shash_add(local_bindings, lbinding->name, lbinding);
 }
 
-static struct local_binding *
-local_binding_find(struct shash *local_bindings, const char *name)
-{
-    return shash_find_data(local_bindings, name);
-}
-
 static void
 local_binding_destroy(struct local_binding *lbinding)
 {
diff --git a/controller/binding.h b/controller/binding.h
index f7ace6faf..21118ecd4 100644
--- a/controller/binding.h
+++ b/controller/binding.h
@@ -18,6 +18,7 @@ 
 #define OVN_BINDING_H 1
 
 #include <stdbool.h>
+#include "openvswitch/shash.h"
 
 struct hmap;
 struct ovsdb_idl;
@@ -32,7 +33,6 @@  struct sbrec_chassis;
 struct sbrec_port_binding_table;
 struct sset;
 struct sbrec_port_binding;
-struct shash;
 
 struct binding_ctx_in {
     struct ovsdb_idl_txn *ovnsb_idl_txn;
@@ -60,6 +60,28 @@  struct binding_ctx_out {
     struct smap *local_iface_ids;
 };
 
+enum local_binding_type {
+    BT_VIF,
+    BT_CONTAINER,
+    BT_VIRTUAL
+};
+
+struct local_binding {
+    char *name;
+    enum local_binding_type type;
+    const struct ovsrec_interface *iface;
+    const struct sbrec_port_binding *pb;
+
+    /* shash of 'struct local_binding' representing children. */
+    struct shash children;
+};
+
+static inline struct local_binding *
+local_binding_find(struct shash *local_bindings, const char *name)
+{
+    return shash_find_data(local_bindings, name);
+}
+
 void binding_register_ovs_idl(struct ovsdb_idl *);
 void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
 bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 9ad5be1c6..8f95fff1f 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1369,8 +1369,13 @@  static void init_physical_ctx(struct engine_node *node,
 
     ovs_assert(br_int && chassis);
 
+    struct ovsrec_interface_table *iface_table =
+        (struct ovsrec_interface_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_interface",
+                engine_get_input("physical_flow_changes", node)));
     struct ed_type_ct_zones *ct_zones_data =
-        engine_get_input_data("ct_zones", node);
+        engine_get_input_data("ct_zones",
+            engine_get_input("physical_flow_changes", node));
     struct simap *ct_zones = &ct_zones_data->current;
 
     p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
@@ -1378,12 +1383,14 @@  static void init_physical_ctx(struct engine_node *node,
     p_ctx->mc_group_table = multicast_group_table;
     p_ctx->br_int = br_int;
     p_ctx->chassis_table = chassis_table;
+    p_ctx->iface_table = iface_table;
     p_ctx->chassis = chassis;
     p_ctx->active_tunnels = &rt_data->active_tunnels;
     p_ctx->local_datapaths = &rt_data->local_datapaths;
     p_ctx->local_lports = &rt_data->local_lports;
     p_ctx->ct_zones = ct_zones;
     p_ctx->mff_ovn_geneve = ed_mff_ovn_geneve->mff_ovn_geneve;
+    p_ctx->local_bindings = &rt_data->local_bindings;
 }
 
 static void init_lflow_ctx(struct engine_node *node,
@@ -1791,6 +1798,70 @@  flow_output_port_groups_handler(struct engine_node *node, void *data)
     return _flow_output_resource_ref_handler(node, data, REF_TYPE_PORTGROUP);
 }
 
+struct ed_type_physical_flow_changes {
+    bool need_physical_run;
+    bool ovs_ifaces_changed;
+};
+
+static bool
+flow_output_physical_flow_changes_handler(struct engine_node *node, void *data)
+{
+    struct ed_type_physical_flow_changes *pfc =
+        engine_get_input_data("physical_flow_changes", node);
+    struct ed_type_runtime_data *rt_data =
+        engine_get_input_data("runtime_data", node);
+
+    struct ed_type_flow_output *fo = data;
+    struct physical_ctx p_ctx;
+    init_physical_ctx(node, rt_data, &p_ctx);
+
+    engine_set_node_state(node, EN_UPDATED);
+    if (pfc->need_physical_run) {
+        pfc->need_physical_run = false;
+        physical_run(&p_ctx, &fo->flow_table);
+        return true;
+    }
+
+    if (pfc->ovs_ifaces_changed) {
+        pfc->ovs_ifaces_changed = false;
+        return physical_handle_ovs_iface_changes(&p_ctx, &fo->flow_table);
+    }
+
+    return true;
+}
+
+static void *
+en_physical_flow_changes_init(struct engine_node *node OVS_UNUSED,
+                             struct engine_arg *arg OVS_UNUSED)
+{
+    struct ed_type_physical_flow_changes *data = xzalloc(sizeof *data);
+    return data;
+}
+
+static void
+en_physical_flow_changes_cleanup(void *data OVS_UNUSED)
+{
+
+}
+
+static void
+en_physical_flow_changes_run(struct engine_node *node, void *data OVS_UNUSED)
+{
+    struct ed_type_physical_flow_changes *pfc = data;
+    pfc->need_physical_run = true;
+    engine_set_node_state(node, EN_UPDATED);
+}
+
+static bool
+physical_flow_changes_ovs_iface_handler(struct engine_node *node OVS_UNUSED,
+                                        void *data OVS_UNUSED)
+{
+    struct ed_type_physical_flow_changes *pfc = data;
+    pfc->ovs_ifaces_changed = true;
+    engine_set_node_state(node, EN_UPDATED);
+    return true;
+}
+
 struct ovn_controller_exit_args {
     bool *exiting;
     bool *restart;
@@ -1914,6 +1985,7 @@  main(int argc, char *argv[])
     ENGINE_NODE(runtime_data, "runtime_data");
     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
     ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
+    ENGINE_NODE(physical_flow_changes, "physical_flow_changes");
     ENGINE_NODE(flow_output, "flow_output");
     ENGINE_NODE(addr_sets, "addr_sets");
     ENGINE_NODE(port_groups, "port_groups");
@@ -1933,13 +2005,19 @@  main(int argc, char *argv[])
     engine_add_input(&en_port_groups, &en_sb_port_group,
                      port_groups_sb_port_group_handler);
 
+    engine_add_input(&en_physical_flow_changes, &en_ct_zones,
+                     NULL);
+    engine_add_input(&en_physical_flow_changes, &en_ovs_interface,
+                     physical_flow_changes_ovs_iface_handler);
+
     engine_add_input(&en_flow_output, &en_addr_sets,
                      flow_output_addr_sets_handler);
     engine_add_input(&en_flow_output, &en_port_groups,
                      flow_output_port_groups_handler);
     engine_add_input(&en_flow_output, &en_runtime_data, NULL);
-    engine_add_input(&en_flow_output, &en_ct_zones, NULL);
     engine_add_input(&en_flow_output, &en_mff_ovn_geneve, NULL);
+    engine_add_input(&en_flow_output, &en_physical_flow_changes,
+                     flow_output_physical_flow_changes_handler);
 
     engine_add_input(&en_flow_output, &en_ovs_open_vswitch, NULL);
     engine_add_input(&en_flow_output, &en_ovs_bridge, NULL);
diff --git a/controller/physical.c b/controller/physical.c
index 144aeb7bd..03eb677d1 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1780,6 +1780,57 @@  physical_run(struct physical_ctx *p_ctx,
     simap_destroy(&new_tunnel_to_ofport);
 }
 
+bool
+physical_handle_ovs_iface_changes(struct physical_ctx *p_ctx,
+                                  struct ovn_desired_flow_table *flow_table)
+{
+    const struct ovsrec_interface *iface_rec;
+    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec, p_ctx->iface_table) {
+        if (!strcmp(iface_rec->type, "geneve") ||
+            !strcmp(iface_rec->type, "patch")) {
+            return false;
+        }
+    }
+
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+
+    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface_rec, p_ctx->iface_table) {
+        const char *iface_id = smap_get(&iface_rec->external_ids, "iface-id");
+        if (!iface_id) {
+            continue;
+        }
+
+        const struct local_binding *lb =
+            local_binding_find(p_ctx->local_bindings, iface_id);
+
+        if (!lb || !lb->pb) {
+            continue;
+        }
+
+        int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0;
+        if (ovsrec_interface_is_deleted(iface_rec)) {
+            ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
+            simap_find_and_delete(&localvif_to_ofport, iface_id);
+        } else {
+            if (!ovsrec_interface_is_new(iface_rec)) {
+                ofctrl_remove_flows(flow_table, &lb->pb->header_.uuid);
+            }
+
+            simap_put(&localvif_to_ofport, iface_id, ofport);
+            consider_port_binding(p_ctx->sbrec_port_binding_by_name,
+                                  p_ctx->mff_ovn_geneve, p_ctx->ct_zones,
+                                  p_ctx->active_tunnels,
+                                  p_ctx->local_datapaths,
+                                  lb->pb, p_ctx->chassis,
+                                  flow_table, &ofpacts);
+        }
+    }
+
+    ofpbuf_uninit(&ofpacts);
+    return true;
+}
+
 bool
 get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
 {
diff --git a/controller/physical.h b/controller/physical.h
index dadf84ea1..9ca34436a 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -49,11 +49,13 @@  struct physical_ctx {
     const struct ovsrec_bridge *br_int;
     const struct sbrec_chassis_table *chassis_table;
     const struct sbrec_chassis *chassis;
+    const struct ovsrec_interface_table *iface_table;
     const struct sset *active_tunnels;
     struct hmap *local_datapaths;
     struct sset *local_lports;
     const struct simap *ct_zones;
     enum mf_field_id mff_ovn_geneve;
+    struct shash *local_bindings;
 };
 
 void physical_register_ovs_idl(struct ovsdb_idl *);
@@ -63,7 +65,8 @@  void physical_handle_port_binding_changes(struct physical_ctx *,
                                           struct ovn_desired_flow_table *);
 void physical_handle_mc_group_changes(struct physical_ctx *,
                                       struct ovn_desired_flow_table *);
-
+bool physical_handle_ovs_iface_changes(struct physical_ctx *,
+                                       struct ovn_desired_flow_table *);
 bool get_tunnel_ofport(const char *chassis_name, char *encap_ip,
                        ofp_port_t *ofport);
 #endif /* controller/physical.h */