diff mbox series

[ovs-dev,v7,12/12] controller: Consider plugging of ports on CMS request.

Message ID 20211019101348.3833652-13-frode.nordahl@canonical.com
State Handled Elsewhere
Headers show
Series [ovs-dev,v7,01/12] ovn-sb: Add requested_chassis column to Port_Binding. | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning

Commit Message

Frode Nordahl Oct. 19, 2021, 10:13 a.m. UTC
When OVN is linked with an appropriate plugging implementation,
CMS can request OVN to plug individual lports into the local
Open vSwitch instance.

The port and instance record will be maintained during the lifetime
of the lport and it will be removed on release of lport.

Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 controller/ovn-controller.c | 222 +++++++++++++++++++++++++++++++++++-
 ovn-nb.xml                  |  21 ++++
 tests/ovn-controller.at     |   1 -
 tests/ovn-macros.at         |   2 +-
 tests/ovn.at                | 121 ++++++++++++++++++++
 5 files changed, 364 insertions(+), 3 deletions(-)

Comments

0-day Robot Oct. 19, 2021, 10:34 a.m. UTC | #1
Bleep bloop.  Greetings Frode Nordahl, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


Patch skipped due to previous failure.

Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Han Zhou Oct. 28, 2021, 6:51 a.m. UTC | #2
On Tue, Oct 19, 2021 at 3:14 AM Frode Nordahl <frode.nordahl@canonical.com>
wrote:
>
> When OVN is linked with an appropriate plugging implementation,
> CMS can request OVN to plug individual lports into the local
> Open vSwitch instance.
>
> The port and instance record will be maintained during the lifetime
> of the lport and it will be removed on release of lport.
>
> Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> ---
>  controller/ovn-controller.c | 222 +++++++++++++++++++++++++++++++++++-
>  ovn-nb.xml                  |  21 ++++
>  tests/ovn-controller.at     |   1 -
>  tests/ovn-macros.at         |   2 +-
>  tests/ovn.at                | 121 ++++++++++++++++++++
>  5 files changed, 364 insertions(+), 3 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index ef2bdadc8..a92d2820a 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -937,6 +937,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>      binding_register_ovs_idl(ovs_idl);
>      bfd_register_ovs_idl(ovs_idl);
>      physical_register_ovs_idl(ovs_idl);
> +    plug_register_ovs_idl(ovs_idl);
>  }
>
>  #define SB_NODES \
> @@ -2978,6 +2979,180 @@ flow_output_lflow_output_handler(struct
engine_node *node,
>      return true;
>  }
>
> +static void *
> +en_plug_provider_lookup_init(struct engine_node *node OVS_UNUSED,
> +                             struct engine_arg *arg OVS_UNUSED)
> +{
> +    return NULL;
> +}
> +
> +static void
> +en_plug_provider_lookup_cleanup(void *data OVS_UNUSED)
> +{
> +
> +}
> +
> +static void
> +en_plug_provider_lookup_run(struct engine_node *node,
> +                            void *data OVS_UNUSED)
> +{
> +    if (!plug_provider_has_providers()) {
> +        engine_set_node_state(node, EN_UNCHANGED);
> +        return;
> +    }
> +
> +    if (plug_provider_run_all()) {
> +        engine_set_node_state(node, EN_UPDATED);

Formally, this engine node should have its data, which may be simply a
boolean flag, telling if something has changed in the "provider's internal
lookup table", and the plug_provider node would take this as its input and
read the flag. But it looks ok this way since there is no change handler
needed for this input and it would always trigger a full run (recompute) in
the plug_provider node. So I am fine with it.

> +    } else {
> +        engine_set_node_state(node, EN_UNCHANGED);
> +    }
> +}
> +
> +
> +struct ed_type_plug_provider {
> +    struct shash deleted_iface_ids;
> +    struct shash changed_iface_ids;
> +    bool pb_handler_has_run;
> +};
> +
> +static void *
> +en_plug_provider_init(struct engine_node *node OVS_UNUSED,
> +                      struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_plug_provider *data = xzalloc(sizeof *data);
> +
> +    shash_init(&data->deleted_iface_ids);
> +    shash_init(&data->changed_iface_ids);
> +    data->pb_handler_has_run = false;
> +    return data;
> +}
> +
> +static void
> +en_plug_provider_cleanup(void *data)
> +{
> +    struct ed_type_plug_provider *plug_provider_data = data;
> +
> +    shash_destroy_free_data(&plug_provider_data->deleted_iface_ids);
> +    shash_destroy_free_data(&plug_provider_data->changed_iface_ids);
> +}
> +
> +static void
> +init_plug_ctx(struct engine_node *node,
> +              void *data,
> +              struct plug_ctx_in *plug_ctx_in,
> +              struct plug_ctx_out *plug_ctx_out)
> +{
> +    struct ovsrec_open_vswitch_table *ovs_table =
> +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_open_vswitch", node));
> +    struct ovsrec_bridge_table *bridge_table =
> +        (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_bridge", node));
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> +    const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
ovs_table);
> +
> +    ovs_assert(br_int && chassis_id);
> +
> +    struct ovsdb_idl_index *sbrec_chassis_by_name =
> +        engine_ovsdb_node_get_index(
> +                engine_get_input("SB_chassis", node),
> +                "name");
> +
> +    const struct sbrec_chassis *chassis
> +        = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> +    ovs_assert(chassis);
> +
> +    struct ovsrec_interface_table *iface_table =
> +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_interface", node));
> +
> +    struct sbrec_port_binding_table *pb_table =
> +        (struct sbrec_port_binding_table *)EN_OVSDB_GET(
> +            engine_get_input("SB_port_binding", node));
> +
> +    struct ovsdb_idl_index *sbrec_port_binding_by_name =
> +        engine_ovsdb_node_get_index(
> +                engine_get_input("SB_port_binding", node),
> +                "name");
> +
> +    struct ovsdb_idl_index *ovsrec_port_by_interfaces =
> +        engine_ovsdb_node_get_index(
> +                engine_get_input("OVS_port", node),
> +                "interfaces");
> +
> +    struct ed_type_runtime_data *rt_data =
> +        engine_get_input_data("runtime_data", node);
> +
> +    struct ed_type_plug_provider *plug_provider_data = data;
> +
> +    plug_ctx_in->ovs_idl_txn = engine_get_context()->ovs_idl_txn;
> +    plug_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> +    plug_ctx_in->ovsrec_port_by_interfaces = ovsrec_port_by_interfaces;
> +    plug_ctx_in->ovs_table = ovs_table;
> +    plug_ctx_in->br_int = br_int;
> +    plug_ctx_in->iface_table = iface_table;
> +    plug_ctx_in->chassis_rec = chassis;
> +    plug_ctx_in->port_binding_table = pb_table;
> +    plug_ctx_in->local_bindings = &rt_data->lbinding_data.bindings;
> +    plug_ctx_in->pb_handler_has_run =
plug_provider_data->pb_handler_has_run;
> +
> +    plug_ctx_out->deleted_iface_ids =
&plug_provider_data->deleted_iface_ids;
> +    plug_ctx_out->changed_iface_ids =
&plug_provider_data->changed_iface_ids;
> +}
> +
> +static void
> +en_plug_provider_run(struct engine_node *node,
> +                     void *data)
> +{
> +    if (!plug_provider_has_providers()) {
> +        engine_set_node_state(node, EN_UNCHANGED);
> +        return;
> +    }
> +    struct plug_ctx_in plug_ctx_in;
> +    struct plug_ctx_out plug_ctx_out;
> +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> +    plug_run(&plug_ctx_in, &plug_ctx_out);
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
> +static bool
> +plug_provider_port_binding_handler(struct engine_node *node,
> +                                   void *data)
> +{
> +    if (!plug_provider_has_providers()) {
> +        engine_set_node_state(node, EN_UNCHANGED);
> +        return true;
> +    }
> +    struct plug_ctx_in plug_ctx_in;
> +    struct plug_ctx_out plug_ctx_out;
> +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> +
> +    struct ed_type_plug_provider *plug_provider_data = data;
> +
> +    bool handled = plug_handle_port_binding_changes(&plug_ctx_in,
> +                                                    &plug_ctx_out);
> +
> +    plug_provider_data->pb_handler_has_run = true;
> +
> +    return handled;
> +}
> +
> +static bool
> +plug_provider_ovs_interface_handler(struct engine_node *node,
> +                                    void *data)
> +{
> +    if (!plug_provider_has_providers()) {
> +        engine_set_node_state(node, EN_UNCHANGED);
> +        return true;
> +    }
> +
> +    struct plug_ctx_in plug_ctx_in;
> +    struct plug_ctx_out plug_ctx_out;
> +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> +
> +    return plug_handle_ovs_interface_changes(&plug_ctx_in,
&plug_ctx_out);
> +}
> +
>  struct ovn_controller_exit_args {
>      bool *exiting;
>      bool *restart;
> @@ -3214,6 +3389,11 @@ main(int argc, char *argv[])
>      ENGINE_NODE(flow_output, "flow_output");
>      ENGINE_NODE(addr_sets, "addr_sets");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_groups, "port_groups");
> +    /* The plug provider has two engine nodes.  One that checks for and
reacts
> +     * to change to plug provider lookup tables, and another that reacts
to
> +     * change to OVS interface, OVN Port_Binding and runtime data */
> +    ENGINE_NODE(plug_provider_lookup, "plug_provider_lookup");
> +    ENGINE_NODE(plug_provider, "plug_provider");
>
>  #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
>      SB_NODES
> @@ -3240,6 +3420,24 @@ main(int argc, char *argv[])
>      engine_add_input(&en_non_vif_data, &en_ovs_interface,
>                       non_vif_data_ovs_iface_handler);
>
> +    engine_add_input(&en_plug_provider, &en_plug_provider_lookup, NULL);
> +    engine_add_input(&en_plug_provider, &en_ovs_open_vswitch, NULL);
> +    engine_add_input(&en_plug_provider, &en_ovs_bridge, NULL);
> +    engine_add_input(&en_plug_provider, &en_sb_chassis, NULL);
> +    engine_add_input(&en_plug_provider, &en_runtime_data,
> +                     engine_noop_handler);
> +    engine_add_input(&en_plug_provider, &en_sb_port_binding,
> +                     plug_provider_port_binding_handler);
> +    engine_add_input(&en_plug_provider, &en_ovs_port,
> +                     engine_noop_handler);

Could you add a comment for each noop handler (en_runtime_data and
en_ovs_port) added? It is always special when a node depends on an input
but doesn't need to handle the changes.
For en_ovs_port, I guess it is because it always changes together with
en_ovs_interface.
For en_runtime_data, I don't understand. The plug_ctx_in->local_binding
belongs to en_runtime_data. If it is changed, should the VIF plugs be
reconsidered?

In addition, I'd like to point out that it seems to me not quite necessary
to add the VIF plugging handling to the I-P engine. Initially in an earlier
version you had the logic embedded in the binding module, which had some
problems for I-P. Now that you moved the whole logic out, which is much
cleaner, I think it is not really necessary to get involved in the I-P
engine any more, unless there is a performance issue foreseeable. Given
that the number of interfaces on a node is quite limited (hundreds would be
very big already), it doesn't seem so. The only thing would be iterating
the whole SB port-binding table, which I commented in patch 10 that we may
use an IDL index on the requested-chassis column to make it more efficient.
I don't have a strong opinion to move VIF plugging away from I-P because it
is implemented already and looks good to me, but just want to point out
that there is an alternative which could simplify things, especially if any
of my comments would make it hard to handle in I-P engine.

Thanks again for working on this feature!

Han

> +    engine_add_input(&en_plug_provider, &en_ovs_interface,
> +                     plug_provider_ovs_interface_handler);
> +    /* The node needs somewhere to output to run */
> +    engine_add_input(&en_flow_output, &en_plug_provider,
> +                     engine_noop_handler);
> +
> +
> +
>      /* Note: The order of inputs is important, all OVS interface changes
must
>       * be handled before any ct_zone changes.
>       */
> @@ -3800,7 +3998,18 @@ main(int argc, char *argv[])
>              engine_set_force_recompute(true);
>          }
>
> -        if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
> +        int ovs_txn_status =
ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop);
> +        if (!ovs_txn_status) {
> +            /* The transaction failed. */
> +            struct ed_type_plug_provider *plug_provider_data =
engine_get_data(
> +                &en_plug_provider);
> +            if (plug_provider_data) {
> +
 plug_clear_deleted(&plug_provider_data->deleted_iface_ids);
> +
 plug_clear_changed(&plug_provider_data->changed_iface_ids);
> +            }
> +        } else if (ovs_txn_status == 1) {
> +            /* The transaction committed successfully
> +             * (or it did not change anything in the database). */
>              ct_zones_data = engine_get_data(&en_ct_zones);
>              if (ct_zones_data) {
>                  struct shash_node *iter, *iter_next;
> @@ -3813,6 +4022,17 @@ main(int argc, char *argv[])
>                      }
>                  }
>              }
> +
> +            struct ed_type_plug_provider *plug_provider_data =
engine_get_data(
> +                &en_plug_provider);
> +            if (plug_provider_data) {
> +
 plug_finish_deleted(&plug_provider_data->deleted_iface_ids);
> +
 plug_finish_changed(&plug_provider_data->changed_iface_ids);
> +            }
> +        } else if (ovs_txn_status == -1) {
> +            /* The commit is still in progress */
> +        } else {
> +            OVS_NOT_REACHED();
>          }
>
>          ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index e31578fb6..c619f6bdd 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1031,6 +1031,27 @@
>              DHCP reply.
>            </p>
>          </column>
> +
> +        <group title="VIF Plugging Options">
> +          <column name="options" key="plug-type">
> +            If set, OVN will attempt to perform plugging of this VIF.
In order
> +            to get this port plugged by the OVN controller, OVN must be
built
> +            with support for VIF plugging.  The default behavior is for
the CMS
> +            to do the VIF plugging.  Each plug provider have their own
options
> +            namespaced by name, for example "plug:representor:key".
> +
> +            Please refer to the plug provider documentation located in
> +            Documentation/topics/plug_providers/ for more information.
> +          </column>
> +
> +          <column name="options" key="plug-mtu-request">
> +            Requested MTU for plugged interfaces.  When set the OVN
controller
> +            will fill the <ref table="Interface" column="mtu_request"/>
column
> +            of the Open vSwitch database's
> +            <ref table="Interface" db="vswitch"/> table.  This in turn
will
> +            make OVS vswitchd update the MTU of the linked interface.
> +          </column>
> +        </group>
>        </group>
>
>        <group title="Virtual port Options">
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 7c683cbcc..41443a30a 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -803,6 +803,5 @@ OVN_CLEANUP_SBOX([hv])
>  OVN_CLEANUP_VSWITCH([main])
>  as ovn-sb
>  OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> -
>  AT_CLEANUP
>  ])
> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
> index f06f2e68e..b3c2d72fa 100644
> --- a/tests/ovn-macros.at
> +++ b/tests/ovn-macros.at
> @@ -327,7 +327,7 @@ ovn_az_attach() {
>          -- --may-exist add-br br-int \
>          -- set bridge br-int fail-mode=secure
other-config:disable-in-band=true \
>          || return 1
> -    start_daemon ovn-controller || return 1
> +    start_daemon ovn-controller --enable-dummy-plug || return 1
>  }
>
>  # ovn_attach NETWORK BRIDGE IP [MASKLEN]
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 7cfdede77..168a0b301 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -28789,3 +28789,124 @@ AT_CHECK([grep -q "Not claiming"
hv1/ovn-controller.log], [1], [])
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - plugging])
> +AT_KEYWORDS([plugging])
> +
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +sim_add hv2
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.2
> +
> +check ovn-nbctl ls-add lsw0
> +
> +check ovn-nbctl lsp-add lsw0 lsp1
> +check ovn-nbctl lsp-set-addresses lsp1 "f0:00:00:00:00:01 172.16.0.101"
> +check ovn-nbctl lsp-set-options lsp1 \
> +    requested-chassis=hv1 \
> +    plug-type=dummy \
> +    plug-mtu-request=42
> +
> +check ovn-nbctl lsp-add lsw0 lsp2
> +check ovn-nbctl lsp-set-addresses lsp2 "f0:00:00:00:00:02 172.16.0.102"
> +check ovn-nbctl lsp-set-options lsp2 \
> +    requested-chassis=hv2 \
> +    plug-type=dummy \
> +    plug-mtu-request=42
> +
> +wait_for_ports_up lsp1 lsp2
> +
> +lsp1_uuid=$(ovn-nbctl --bare --columns _uuid find Logical_Switch_Port
name=lsp1)
> +iface1_uuid=$(as hv1 ovs-vsctl --bare --columns _uuid find Interface
name=lsp1)
> +
> +lsp2_uuid=$(ovn-nbctl --bare --columns _uuid find Logical_Switch_Port
name=lsp2)
> +iface2_uuid=$(as hv2 ovs-vsctl --bare --columns _uuid find Interface
name=lsp2)
> +
> +# Check that the lport was plugged
> +AT_CHECK([test xvalue = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid}
options:plug-dummy-option)], [0], [])
> +AT_CHECK([test x42 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid}
mtu_request)], [0], [])
> +
> +# Check that updating the lport updates the local iface
> +check ovn-nbctl --wait=hv lsp-set-options lsp1 \
> +    requested-chassis=hv1 \
> +    plug-type=dummy \
> +    plug-mtu-request=43
> +OVS_WAIT_UNTIL([
> +    test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid}
mtu_request)
> +])
> +
> +# Check that local modification of iface will trigger ovn-controller to
update
> +# the iface record
> +check as hv1 ovs-vsctl set interface ${iface1_uuid} mtu_request=44
> +OVS_WAIT_UNTIL([
> +    test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid}
mtu_request)
> +])
> +
> +# Check that pointing requested-chassis somewhere else will unplug the
port
> +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
> +    options:requested-chassis=non-existent-chassis
> +OVS_WAIT_UNTIL([
> +    ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
> +])
> +
> +# Check that removing an lport will unplug it
> +AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface
${iface2_uuid} _uuid)], [0], [])
> +check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
> +OVS_WAIT_UNTIL([
> +    ! as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid
> +])
> +
> +# Check that port is unplugged when we simulate presence of a port
previously
> +# plugged by us in local OVSDB with no record in SB DB.
> +check as hv2 ovs-vsctl \
> +    -- add-port br-int vif1
> +
> +# From one moment it's there...
> +vif1_uuid=$(as hv2 ovs-vsctl --bare --columns _uuid find Interface
name=vif1)
> +OVS_WAIT_UNTIL([
> +    as hv2 ovs-vsctl get Interface ${vif1_uuid} _uuid
> +])
> +
> +# Add the external-ids we expect
> +check as hv2 ovs-vsctl \
> +    -- set Interface ${vif1_uuid} \
> +           external-ids:ovn-plugged=dummy \
> +           external-ids:iface-id=non-existing-lsp
> +
> +# ...to the next moment it's gone.
> +OVS_WAIT_UNTIL([
> +    ! as hv2 ovs-vsctl get Interface ${vif1_uuid} _uuid
> +])
> +
> +# Check that a warning is logged when CMS requests plugging of an
interface
> +# with lbinding already plugged by someone else.
> +check as hv2 ovs-vsctl \
> +    -- add-port br-int vif3 \
> +    -- set Interface vif3 \
> +       external-ids:iface-id=lsp3
> +
> +check ovn-nbctl lsp-add lsw0 lsp3
> +check ovn-nbctl lsp-set-addresses lsp3 "f0:00:00:00:00:03 172.16.0.103"
> +check ovn-nbctl lsp-set-options lsp3 \
> +    requested-chassis=hv2
> +
> +wait_for_ports_up lsp3
> +
> +check ovn-nbctl --wait=hv lsp-set-options lsp3 \
> +    requested-chassis=hv2 \
> +    plug-type=dummy
> +
> +OVS_WAIT_UNTIL([
> +    grep -q "CMS requested plugging of lport lsp3" hv2/ovn-controller.log
> +])
> +
> +OVN_CLEANUP([hv1],[hv2])
> +AT_CLEANUP
> +])
> --
> 2.32.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Frode Nordahl Oct. 28, 2021, 8:19 a.m. UTC | #3
On Thu, Oct 28, 2021 at 8:51 AM Han Zhou <hzhou@ovn.org> wrote:
>
>
>
> On Tue, Oct 19, 2021 at 3:14 AM Frode Nordahl <frode.nordahl@canonical.com> wrote:
> >
> > When OVN is linked with an appropriate plugging implementation,
> > CMS can request OVN to plug individual lports into the local
> > Open vSwitch instance.
> >
> > The port and instance record will be maintained during the lifetime
> > of the lport and it will be removed on release of lport.
> >
> > Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> > ---
> >  controller/ovn-controller.c | 222 +++++++++++++++++++++++++++++++++++-
> >  ovn-nb.xml                  |  21 ++++
> >  tests/ovn-controller.at     |   1 -
> >  tests/ovn-macros.at         |   2 +-
> >  tests/ovn.at                | 121 ++++++++++++++++++++
> >  5 files changed, 364 insertions(+), 3 deletions(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index ef2bdadc8..a92d2820a 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -937,6 +937,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> >      binding_register_ovs_idl(ovs_idl);
> >      bfd_register_ovs_idl(ovs_idl);
> >      physical_register_ovs_idl(ovs_idl);
> > +    plug_register_ovs_idl(ovs_idl);
> >  }
> >
> >  #define SB_NODES \
> > @@ -2978,6 +2979,180 @@ flow_output_lflow_output_handler(struct engine_node *node,
> >      return true;
> >  }
> >
> > +static void *
> > +en_plug_provider_lookup_init(struct engine_node *node OVS_UNUSED,
> > +                             struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    return NULL;
> > +}
> > +
> > +static void
> > +en_plug_provider_lookup_cleanup(void *data OVS_UNUSED)
> > +{
> > +
> > +}
> > +
> > +static void
> > +en_plug_provider_lookup_run(struct engine_node *node,
> > +                            void *data OVS_UNUSED)
> > +{
> > +    if (!plug_provider_has_providers()) {
> > +        engine_set_node_state(node, EN_UNCHANGED);
> > +        return;
> > +    }
> > +
> > +    if (plug_provider_run_all()) {
> > +        engine_set_node_state(node, EN_UPDATED);
>
> Formally, this engine node should have its data, which may be simply a boolean flag, telling if something has changed in the "provider's internal lookup table", and the plug_provider node would take this as its input and read the flag. But it looks ok this way since there is no change handler needed for this input and it would always trigger a full run (recompute) in the plug_provider node. So I am fine with it.

Thank you for providing this insight.

> > +    } else {
> > +        engine_set_node_state(node, EN_UNCHANGED);
> > +    }
> > +}
> > +
> > +
> > +struct ed_type_plug_provider {
> > +    struct shash deleted_iface_ids;
> > +    struct shash changed_iface_ids;
> > +    bool pb_handler_has_run;
> > +};
> > +
> > +static void *
> > +en_plug_provider_init(struct engine_node *node OVS_UNUSED,
> > +                      struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    struct ed_type_plug_provider *data = xzalloc(sizeof *data);
> > +
> > +    shash_init(&data->deleted_iface_ids);
> > +    shash_init(&data->changed_iface_ids);
> > +    data->pb_handler_has_run = false;
> > +    return data;
> > +}
> > +
> > +static void
> > +en_plug_provider_cleanup(void *data)
> > +{
> > +    struct ed_type_plug_provider *plug_provider_data = data;
> > +
> > +    shash_destroy_free_data(&plug_provider_data->deleted_iface_ids);
> > +    shash_destroy_free_data(&plug_provider_data->changed_iface_ids);
> > +}
> > +
> > +static void
> > +init_plug_ctx(struct engine_node *node,
> > +              void *data,
> > +              struct plug_ctx_in *plug_ctx_in,
> > +              struct plug_ctx_out *plug_ctx_out)
> > +{
> > +    struct ovsrec_open_vswitch_table *ovs_table =
> > +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> > +            engine_get_input("OVS_open_vswitch", node));
> > +    struct ovsrec_bridge_table *bridge_table =
> > +        (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> > +            engine_get_input("OVS_bridge", node));
> > +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> > +    const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> > +
> > +    ovs_assert(br_int && chassis_id);
> > +
> > +    struct ovsdb_idl_index *sbrec_chassis_by_name =
> > +        engine_ovsdb_node_get_index(
> > +                engine_get_input("SB_chassis", node),
> > +                "name");
> > +
> > +    const struct sbrec_chassis *chassis
> > +        = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> > +    ovs_assert(chassis);
> > +
> > +    struct ovsrec_interface_table *iface_table =
> > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > +            engine_get_input("OVS_interface", node));
> > +
> > +    struct sbrec_port_binding_table *pb_table =
> > +        (struct sbrec_port_binding_table *)EN_OVSDB_GET(
> > +            engine_get_input("SB_port_binding", node));
> > +
> > +    struct ovsdb_idl_index *sbrec_port_binding_by_name =
> > +        engine_ovsdb_node_get_index(
> > +                engine_get_input("SB_port_binding", node),
> > +                "name");
> > +
> > +    struct ovsdb_idl_index *ovsrec_port_by_interfaces =
> > +        engine_ovsdb_node_get_index(
> > +                engine_get_input("OVS_port", node),
> > +                "interfaces");
> > +
> > +    struct ed_type_runtime_data *rt_data =
> > +        engine_get_input_data("runtime_data", node);
> > +
> > +    struct ed_type_plug_provider *plug_provider_data = data;
> > +
> > +    plug_ctx_in->ovs_idl_txn = engine_get_context()->ovs_idl_txn;
> > +    plug_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> > +    plug_ctx_in->ovsrec_port_by_interfaces = ovsrec_port_by_interfaces;
> > +    plug_ctx_in->ovs_table = ovs_table;
> > +    plug_ctx_in->br_int = br_int;
> > +    plug_ctx_in->iface_table = iface_table;
> > +    plug_ctx_in->chassis_rec = chassis;
> > +    plug_ctx_in->port_binding_table = pb_table;
> > +    plug_ctx_in->local_bindings = &rt_data->lbinding_data.bindings;
> > +    plug_ctx_in->pb_handler_has_run = plug_provider_data->pb_handler_has_run;
> > +
> > +    plug_ctx_out->deleted_iface_ids = &plug_provider_data->deleted_iface_ids;
> > +    plug_ctx_out->changed_iface_ids = &plug_provider_data->changed_iface_ids;
> > +}
> > +
> > +static void
> > +en_plug_provider_run(struct engine_node *node,
> > +                     void *data)
> > +{
> > +    if (!plug_provider_has_providers()) {
> > +        engine_set_node_state(node, EN_UNCHANGED);
> > +        return;
> > +    }
> > +    struct plug_ctx_in plug_ctx_in;
> > +    struct plug_ctx_out plug_ctx_out;
> > +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> > +    plug_run(&plug_ctx_in, &plug_ctx_out);
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> > +static bool
> > +plug_provider_port_binding_handler(struct engine_node *node,
> > +                                   void *data)
> > +{
> > +    if (!plug_provider_has_providers()) {
> > +        engine_set_node_state(node, EN_UNCHANGED);
> > +        return true;
> > +    }
> > +    struct plug_ctx_in plug_ctx_in;
> > +    struct plug_ctx_out plug_ctx_out;
> > +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> > +
> > +    struct ed_type_plug_provider *plug_provider_data = data;
> > +
> > +    bool handled = plug_handle_port_binding_changes(&plug_ctx_in,
> > +                                                    &plug_ctx_out);
> > +
> > +    plug_provider_data->pb_handler_has_run = true;
> > +
> > +    return handled;
> > +}
> > +
> > +static bool
> > +plug_provider_ovs_interface_handler(struct engine_node *node,
> > +                                    void *data)
> > +{
> > +    if (!plug_provider_has_providers()) {
> > +        engine_set_node_state(node, EN_UNCHANGED);
> > +        return true;
> > +    }
> > +
> > +    struct plug_ctx_in plug_ctx_in;
> > +    struct plug_ctx_out plug_ctx_out;
> > +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> > +
> > +    return plug_handle_ovs_interface_changes(&plug_ctx_in, &plug_ctx_out);
> > +}
> > +
> >  struct ovn_controller_exit_args {
> >      bool *exiting;
> >      bool *restart;
> > @@ -3214,6 +3389,11 @@ main(int argc, char *argv[])
> >      ENGINE_NODE(flow_output, "flow_output");
> >      ENGINE_NODE(addr_sets, "addr_sets");
> >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_groups, "port_groups");
> > +    /* The plug provider has two engine nodes.  One that checks for and reacts
> > +     * to change to plug provider lookup tables, and another that reacts to
> > +     * change to OVS interface, OVN Port_Binding and runtime data */
> > +    ENGINE_NODE(plug_provider_lookup, "plug_provider_lookup");
> > +    ENGINE_NODE(plug_provider, "plug_provider");
> >
> >  #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
> >      SB_NODES
> > @@ -3240,6 +3420,24 @@ main(int argc, char *argv[])
> >      engine_add_input(&en_non_vif_data, &en_ovs_interface,
> >                       non_vif_data_ovs_iface_handler);
> >
> > +    engine_add_input(&en_plug_provider, &en_plug_provider_lookup, NULL);
> > +    engine_add_input(&en_plug_provider, &en_ovs_open_vswitch, NULL);
> > +    engine_add_input(&en_plug_provider, &en_ovs_bridge, NULL);
> > +    engine_add_input(&en_plug_provider, &en_sb_chassis, NULL);
> > +    engine_add_input(&en_plug_provider, &en_runtime_data,
> > +                     engine_noop_handler);
> > +    engine_add_input(&en_plug_provider, &en_sb_port_binding,
> > +                     plug_provider_port_binding_handler);
> > +    engine_add_input(&en_plug_provider, &en_ovs_port,
> > +                     engine_noop_handler);
>
> Could you add a comment for each noop handler (en_runtime_data and en_ovs_port) added? It is always special when a node depends on an input but doesn't need to handle the changes.
> For en_ovs_port, I guess it is because it always changes together with en_ovs_interface.

Ack, will add comments. Yes ovs_port and ovs_interface will always
change together.

> For en_runtime_data, I don't understand. The plug_ctx_in->local_binding belongs to en_runtime_data. If it is changed, should the VIF plugs be reconsidered?

The en_runtime_data input is mostly there to register our dependency
on the data. The reasoning behind adding a noop handler is similar to
the port/interface relationship. I was thinking that whenever
en_runtime_data changes, there will also be port/interface or pb
changes too, so the changes would be handled when those are called.

If this is ok I'll add comments that explain this.

> In addition, I'd like to point out that it seems to me not quite necessary to add the VIF plugging handling to the I-P engine. Initially in an earlier version you had the logic embedded in the binding module, which had some problems for I-P. Now that you moved the whole logic out, which is much cleaner, I think it is not really necessary to get involved in the I-P engine any more, unless there is a performance issue foreseeable. Given that the number of interfaces on a node is quite limited (hundreds would be very big already), it doesn't seem so. The only thing would be iterating the whole SB port-binding table, which I commented in patch 10 that we may use an IDL index on the requested-chassis column to make it more efficient. I don't have a strong opinion to move VIF plugging away from I-P because it is implemented already and looks good to me, but just want to point out that there is an alternative which could simplify things, especially if any of my comments would make it hard
  to handle in I-P engine.

Adding separate I-P engine nodes was on the back on review feedback
from Numan (https://mail.openvswitch.org/pipermail/ovs-dev/2021-September/387799.html)
who pointed out that adding more complexity directly to the binding
module was not desirable. He did offer to piggy-back on the runtime
engine node, but I thought it would be cleaner with separate nodes.
The need for detecting and reacting to changes in the plug provider
internal lookup tables is also a contributing factor.

> Thanks again for working on this feature!

Thank you for providing your insights and thorough reviews, much appreciated!
Frode Nordahl Nov. 2, 2021, 9:10 a.m. UTC | #4
On Thu, Oct 28, 2021 at 10:19 AM Frode Nordahl
<frode.nordahl@canonical.com> wrote:
>
> On Thu, Oct 28, 2021 at 8:51 AM Han Zhou <hzhou@ovn.org> wrote:
> >
> >
> >
> > On Tue, Oct 19, 2021 at 3:14 AM Frode Nordahl <frode.nordahl@canonical.com> wrote:
> > >
> > > When OVN is linked with an appropriate plugging implementation,
> > > CMS can request OVN to plug individual lports into the local
> > > Open vSwitch instance.
> > >
> > > The port and instance record will be maintained during the lifetime
> > > of the lport and it will be removed on release of lport.
> > >
> > > Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> > > ---
> > >  controller/ovn-controller.c | 222 +++++++++++++++++++++++++++++++++++-
> > >  ovn-nb.xml                  |  21 ++++
> > >  tests/ovn-controller.at     |   1 -
> > >  tests/ovn-macros.at         |   2 +-
> > >  tests/ovn.at                | 121 ++++++++++++++++++++
> > >  5 files changed, 364 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index ef2bdadc8..a92d2820a 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -937,6 +937,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> > >      binding_register_ovs_idl(ovs_idl);
> > >      bfd_register_ovs_idl(ovs_idl);
> > >      physical_register_ovs_idl(ovs_idl);
> > > +    plug_register_ovs_idl(ovs_idl);
> > >  }
> > >
> > >  #define SB_NODES \
> > > @@ -2978,6 +2979,180 @@ flow_output_lflow_output_handler(struct engine_node *node,
> > >      return true;
> > >  }
> > >
> > > +static void *
> > > +en_plug_provider_lookup_init(struct engine_node *node OVS_UNUSED,
> > > +                             struct engine_arg *arg OVS_UNUSED)
> > > +{
> > > +    return NULL;
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_lookup_cleanup(void *data OVS_UNUSED)
> > > +{
> > > +
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_lookup_run(struct engine_node *node,
> > > +                            void *data OVS_UNUSED)
> > > +{
> > > +    if (!plug_provider_has_providers()) {
> > > +        engine_set_node_state(node, EN_UNCHANGED);
> > > +        return;
> > > +    }
> > > +
> > > +    if (plug_provider_run_all()) {
> > > +        engine_set_node_state(node, EN_UPDATED);
> >
> > Formally, this engine node should have its data, which may be simply a boolean flag, telling if something has changed in the "provider's internal lookup table", and the plug_provider node would take this as its input and read the flag. But it looks ok this way since there is no change handler needed for this input and it would always trigger a full run (recompute) in the plug_provider node. So I am fine with it.
>
> Thank you for providing this insight.
>
> > > +    } else {
> > > +        engine_set_node_state(node, EN_UNCHANGED);
> > > +    }
> > > +}
> > > +
> > > +
> > > +struct ed_type_plug_provider {
> > > +    struct shash deleted_iface_ids;
> > > +    struct shash changed_iface_ids;
> > > +    bool pb_handler_has_run;
> > > +};
> > > +
> > > +static void *
> > > +en_plug_provider_init(struct engine_node *node OVS_UNUSED,
> > > +                      struct engine_arg *arg OVS_UNUSED)
> > > +{
> > > +    struct ed_type_plug_provider *data = xzalloc(sizeof *data);
> > > +
> > > +    shash_init(&data->deleted_iface_ids);
> > > +    shash_init(&data->changed_iface_ids);
> > > +    data->pb_handler_has_run = false;
> > > +    return data;
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_cleanup(void *data)
> > > +{
> > > +    struct ed_type_plug_provider *plug_provider_data = data;
> > > +
> > > +    shash_destroy_free_data(&plug_provider_data->deleted_iface_ids);
> > > +    shash_destroy_free_data(&plug_provider_data->changed_iface_ids);
> > > +}
> > > +
> > > +static void
> > > +init_plug_ctx(struct engine_node *node,
> > > +              void *data,
> > > +              struct plug_ctx_in *plug_ctx_in,
> > > +              struct plug_ctx_out *plug_ctx_out)
> > > +{
> > > +    struct ovsrec_open_vswitch_table *ovs_table =
> > > +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> > > +            engine_get_input("OVS_open_vswitch", node));
> > > +    struct ovsrec_bridge_table *bridge_table =
> > > +        (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> > > +            engine_get_input("OVS_bridge", node));
> > > +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> > > +    const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> > > +
> > > +    ovs_assert(br_int && chassis_id);
> > > +
> > > +    struct ovsdb_idl_index *sbrec_chassis_by_name =
> > > +        engine_ovsdb_node_get_index(
> > > +                engine_get_input("SB_chassis", node),
> > > +                "name");
> > > +
> > > +    const struct sbrec_chassis *chassis
> > > +        = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> > > +    ovs_assert(chassis);
> > > +
> > > +    struct ovsrec_interface_table *iface_table =
> > > +        (struct ovsrec_interface_table *)EN_OVSDB_GET(
> > > +            engine_get_input("OVS_interface", node));
> > > +
> > > +    struct sbrec_port_binding_table *pb_table =
> > > +        (struct sbrec_port_binding_table *)EN_OVSDB_GET(
> > > +            engine_get_input("SB_port_binding", node));
> > > +
> > > +    struct ovsdb_idl_index *sbrec_port_binding_by_name =
> > > +        engine_ovsdb_node_get_index(
> > > +                engine_get_input("SB_port_binding", node),
> > > +                "name");
> > > +
> > > +    struct ovsdb_idl_index *ovsrec_port_by_interfaces =
> > > +        engine_ovsdb_node_get_index(
> > > +                engine_get_input("OVS_port", node),
> > > +                "interfaces");
> > > +
> > > +    struct ed_type_runtime_data *rt_data =
> > > +        engine_get_input_data("runtime_data", node);
> > > +
> > > +    struct ed_type_plug_provider *plug_provider_data = data;
> > > +
> > > +    plug_ctx_in->ovs_idl_txn = engine_get_context()->ovs_idl_txn;
> > > +    plug_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
> > > +    plug_ctx_in->ovsrec_port_by_interfaces = ovsrec_port_by_interfaces;
> > > +    plug_ctx_in->ovs_table = ovs_table;
> > > +    plug_ctx_in->br_int = br_int;
> > > +    plug_ctx_in->iface_table = iface_table;
> > > +    plug_ctx_in->chassis_rec = chassis;
> > > +    plug_ctx_in->port_binding_table = pb_table;
> > > +    plug_ctx_in->local_bindings = &rt_data->lbinding_data.bindings;
> > > +    plug_ctx_in->pb_handler_has_run = plug_provider_data->pb_handler_has_run;
> > > +
> > > +    plug_ctx_out->deleted_iface_ids = &plug_provider_data->deleted_iface_ids;
> > > +    plug_ctx_out->changed_iface_ids = &plug_provider_data->changed_iface_ids;
> > > +}
> > > +
> > > +static void
> > > +en_plug_provider_run(struct engine_node *node,
> > > +                     void *data)
> > > +{
> > > +    if (!plug_provider_has_providers()) {
> > > +        engine_set_node_state(node, EN_UNCHANGED);
> > > +        return;
> > > +    }
> > > +    struct plug_ctx_in plug_ctx_in;
> > > +    struct plug_ctx_out plug_ctx_out;
> > > +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> > > +    plug_run(&plug_ctx_in, &plug_ctx_out);
> > > +    engine_set_node_state(node, EN_UPDATED);
> > > +}
> > > +
> > > +static bool
> > > +plug_provider_port_binding_handler(struct engine_node *node,
> > > +                                   void *data)
> > > +{
> > > +    if (!plug_provider_has_providers()) {
> > > +        engine_set_node_state(node, EN_UNCHANGED);
> > > +        return true;
> > > +    }
> > > +    struct plug_ctx_in plug_ctx_in;
> > > +    struct plug_ctx_out plug_ctx_out;
> > > +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> > > +
> > > +    struct ed_type_plug_provider *plug_provider_data = data;
> > > +
> > > +    bool handled = plug_handle_port_binding_changes(&plug_ctx_in,
> > > +                                                    &plug_ctx_out);
> > > +
> > > +    plug_provider_data->pb_handler_has_run = true;
> > > +
> > > +    return handled;
> > > +}
> > > +
> > > +static bool
> > > +plug_provider_ovs_interface_handler(struct engine_node *node,
> > > +                                    void *data)
> > > +{
> > > +    if (!plug_provider_has_providers()) {
> > > +        engine_set_node_state(node, EN_UNCHANGED);
> > > +        return true;
> > > +    }
> > > +
> > > +    struct plug_ctx_in plug_ctx_in;
> > > +    struct plug_ctx_out plug_ctx_out;
> > > +    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
> > > +
> > > +    return plug_handle_ovs_interface_changes(&plug_ctx_in, &plug_ctx_out);
> > > +}
> > > +
> > >  struct ovn_controller_exit_args {
> > >      bool *exiting;
> > >      bool *restart;
> > > @@ -3214,6 +3389,11 @@ main(int argc, char *argv[])
> > >      ENGINE_NODE(flow_output, "flow_output");
> > >      ENGINE_NODE(addr_sets, "addr_sets");
> > >      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_groups, "port_groups");
> > > +    /* The plug provider has two engine nodes.  One that checks for and reacts
> > > +     * to change to plug provider lookup tables, and another that reacts to
> > > +     * change to OVS interface, OVN Port_Binding and runtime data */
> > > +    ENGINE_NODE(plug_provider_lookup, "plug_provider_lookup");
> > > +    ENGINE_NODE(plug_provider, "plug_provider");
> > >
> > >  #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
> > >      SB_NODES
> > > @@ -3240,6 +3420,24 @@ main(int argc, char *argv[])
> > >      engine_add_input(&en_non_vif_data, &en_ovs_interface,
> > >                       non_vif_data_ovs_iface_handler);
> > >
> > > +    engine_add_input(&en_plug_provider, &en_plug_provider_lookup, NULL);
> > > +    engine_add_input(&en_plug_provider, &en_ovs_open_vswitch, NULL);
> > > +    engine_add_input(&en_plug_provider, &en_ovs_bridge, NULL);
> > > +    engine_add_input(&en_plug_provider, &en_sb_chassis, NULL);
> > > +    engine_add_input(&en_plug_provider, &en_runtime_data,
> > > +                     engine_noop_handler);
> > > +    engine_add_input(&en_plug_provider, &en_sb_port_binding,
> > > +                     plug_provider_port_binding_handler);
> > > +    engine_add_input(&en_plug_provider, &en_ovs_port,
> > > +                     engine_noop_handler);
> >
> > Could you add a comment for each noop handler (en_runtime_data and en_ovs_port) added? It is always special when a node depends on an input but doesn't need to handle the changes.
> > For en_ovs_port, I guess it is because it always changes together with en_ovs_interface.
>
> Ack, will add comments. Yes ovs_port and ovs_interface will always
> change together.
>
> > For en_runtime_data, I don't understand. The plug_ctx_in->local_binding belongs to en_runtime_data. If it is changed, should the VIF plugs be reconsidered?
>
> The en_runtime_data input is mostly there to register our dependency
> on the data. The reasoning behind adding a noop handler is similar to
> the port/interface relationship. I was thinking that whenever
> en_runtime_data changes, there will also be port/interface or pb
> changes too, so the changes would be handled when those are called.
>
> If this is ok I'll add comments that explain this.
>
> > In addition, I'd like to point out that it seems to me not quite necessary to add the VIF plugging handling to the I-P engine. Initially in an earlier version you had the logic embedded in the binding module, which had some problems for I-P. Now that you moved the whole logic out, which is much cleaner, I think it is not really necessary to get involved in the I-P engine any more, unless there is a performance issue foreseeable. Given that the number of interfaces on a node is quite limited (hundreds would be very big already), it doesn't seem so. The only thing would be iterating the whole SB port-binding table, which I commented in patch 10 that we may use an IDL index on the requested-chassis column to make it more efficient. I don't have a strong opinion to move VIF plugging away from I-P because it is implemented already and looks good to me, but just want to point out that there is an alternative which could simplify things, especially if any of my comments would make it ha
 rd to handle in I-P engine.
>
> Adding separate I-P engine nodes was on the back on review feedback
> from Numan (https://mail.openvswitch.org/pipermail/ovs-dev/2021-September/387799.html)
> who pointed out that adding more complexity directly to the binding
> module was not desirable. He did offer to piggy-back on the runtime
> engine node, but I thought it would be cleaner with separate nodes.
> The need for detecting and reacting to changes in the plug provider
> internal lookup tables is also a contributing factor.

After our discussion on IRC following the previous OVN meeting I have
investigated a bit what it would look like to not introduce I-P for
VIF plugging at this time.  I extended the current test case with this
patch:

diff --git a/tests/ovn.at b/tests/ovn.at
index 82303bf1b..7ba1f1d34 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -28907,6 +28907,58 @@ OVS_WAIT_UNTIL([
     grep -q "CMS requested plugging of lport lsp3" hv2/ovn-controller.log
 ])

+check as hv2 ovs-vsctl del-port br-int vif3
+check ovn-nbctl lsp-del lsp1
+check ovn-nbctl lsp-del lsp3
+
+for n in {1..200}; do
+    check ovn-nbctl lsp-add lsw0 lsp1-${n}
+    check ovn-nbctl lsp-set-addresses lsp1-${n} \
+        "f0:00:00:00:00:02 172.16.1.${n}"
+    check ovn-nbctl lsp-set-options lsp1-${n} \
+        requested-chassis=hv1 \
+        vif-plug-type=dummy \
+        vif-plug-mtu-request=42
+done
+for n in {1..200}; do
+    check ovn-nbctl lsp-add lsw0 lsp2-${n}
+    check ovn-nbctl lsp-set-addresses lsp2-${n} \
+        "f0:00:00:00:00:02 172.16.2.${n}"
+    check ovn-nbctl lsp-set-options lsp2-${n} \
+        requested-chassis=hv1 \
+        vif-plug-type=dummy \
+        vif-plug-mtu-request=42
+done
+for n in {1..200}; do
+    check ovn-nbctl lsp-add lsw0 lsp3-${n}
+    check ovn-nbctl lsp-set-addresses lsp3-${n} \
+        "f0:00:00:00:00:02 172.16.3.${n}"
+    check ovn-nbctl lsp-set-options lsp3-${n} \
+        requested-chassis=hv1 \
+        vif-plug-type=dummy \
+        vif-plug-mtu-request=42
+done
+for n in {1..200}; do
+    check ovn-nbctl lsp-add lsw0 lsp4-${n}
+    check ovn-nbctl lsp-set-addresses lsp4-${n} \
+        "f0:00:00:00:00:02 172.16.4.${n}"
+    check ovn-nbctl lsp-set-options lsp4-${n} \
+        requested-chassis=hv1 \
+        vif-plug-type=dummy \
+        vif-plug-mtu-request=42
+done
+
+wait_for_ports_up
+
+for n in {95..105}; do
+    check ovn-nbctl --wait=hv lsp-del lsp1-${n}
+    check ovn-nbctl --wait=hv lsp-del lsp2-${n}
+    check ovn-nbctl --wait=hv lsp-del lsp3-${n}
+    check ovn-nbctl --wait=hv lsp-del lsp4-${n}
+done
+
+as hv1 ovn-appctl -t ovn-controller stopwatch/show vif-plug-run
+
 OVN_CLEANUP([hv1],[hv2])
 AT_CLEANUP
 ])

Essentially adding 800 plugged ports to a single hv, and then removing
a small number of ports. With this test there is negligible difference
in total runtime of the test and the stopwatch results for
vif_plug_run looks like this:

Statistics for 'vif-plug-run'
  Total samples: 5161
  Maximum: 6 msec
  Minimum: 0 msec
  95th percentile: 3.908490 msec
  Short term average: 1.430664 msec
  Long term average: 2.234984 msec

In a real world scenario we would have to add some time for
checking/refreshing lookup tables, but given this is kernel interfaces
and currently a relatively low number of ports I don't think that will
add much to the runtime.

So to conclude: given we are iterating over Port_Binding table using
an index, which was made possible by introducing the
`requested_chassis` column, I agree with you that we do not need to
introduce I-P for VIF plug providers at this time, let's revisit in
the event plug providers grow support for Scalable Functions, which
would increase density.

I'll put up a reroll of the remaining patches shortly to address this
and the other points discussed.

Cheers!
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index ef2bdadc8..a92d2820a 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -937,6 +937,7 @@  ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     binding_register_ovs_idl(ovs_idl);
     bfd_register_ovs_idl(ovs_idl);
     physical_register_ovs_idl(ovs_idl);
+    plug_register_ovs_idl(ovs_idl);
 }
 
 #define SB_NODES \
@@ -2978,6 +2979,180 @@  flow_output_lflow_output_handler(struct engine_node *node,
     return true;
 }
 
+static void *
+en_plug_provider_lookup_init(struct engine_node *node OVS_UNUSED,
+                             struct engine_arg *arg OVS_UNUSED)
+{
+    return NULL;
+}
+
+static void
+en_plug_provider_lookup_cleanup(void *data OVS_UNUSED)
+{
+
+}
+
+static void
+en_plug_provider_lookup_run(struct engine_node *node,
+                            void *data OVS_UNUSED)
+{
+    if (!plug_provider_has_providers()) {
+        engine_set_node_state(node, EN_UNCHANGED);
+        return;
+    }
+
+    if (plug_provider_run_all()) {
+        engine_set_node_state(node, EN_UPDATED);
+    } else {
+        engine_set_node_state(node, EN_UNCHANGED);
+    }
+}
+
+
+struct ed_type_plug_provider {
+    struct shash deleted_iface_ids;
+    struct shash changed_iface_ids;
+    bool pb_handler_has_run;
+};
+
+static void *
+en_plug_provider_init(struct engine_node *node OVS_UNUSED,
+                      struct engine_arg *arg OVS_UNUSED)
+{
+    struct ed_type_plug_provider *data = xzalloc(sizeof *data);
+
+    shash_init(&data->deleted_iface_ids);
+    shash_init(&data->changed_iface_ids);
+    data->pb_handler_has_run = false;
+    return data;
+}
+
+static void
+en_plug_provider_cleanup(void *data)
+{
+    struct ed_type_plug_provider *plug_provider_data = data;
+
+    shash_destroy_free_data(&plug_provider_data->deleted_iface_ids);
+    shash_destroy_free_data(&plug_provider_data->changed_iface_ids);
+}
+
+static void
+init_plug_ctx(struct engine_node *node,
+              void *data,
+              struct plug_ctx_in *plug_ctx_in,
+              struct plug_ctx_out *plug_ctx_out)
+{
+    struct ovsrec_open_vswitch_table *ovs_table =
+        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_open_vswitch", node));
+    struct ovsrec_bridge_table *bridge_table =
+        (struct ovsrec_bridge_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_bridge", node));
+    const char *chassis_id = get_ovs_chassis_id(ovs_table);
+    const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
+
+    ovs_assert(br_int && chassis_id);
+
+    struct ovsdb_idl_index *sbrec_chassis_by_name =
+        engine_ovsdb_node_get_index(
+                engine_get_input("SB_chassis", node),
+                "name");
+
+    const struct sbrec_chassis *chassis
+        = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
+    ovs_assert(chassis);
+
+    struct ovsrec_interface_table *iface_table =
+        (struct ovsrec_interface_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_interface", node));
+
+    struct sbrec_port_binding_table *pb_table =
+        (struct sbrec_port_binding_table *)EN_OVSDB_GET(
+            engine_get_input("SB_port_binding", node));
+
+    struct ovsdb_idl_index *sbrec_port_binding_by_name =
+        engine_ovsdb_node_get_index(
+                engine_get_input("SB_port_binding", node),
+                "name");
+
+    struct ovsdb_idl_index *ovsrec_port_by_interfaces =
+        engine_ovsdb_node_get_index(
+                engine_get_input("OVS_port", node),
+                "interfaces");
+
+    struct ed_type_runtime_data *rt_data =
+        engine_get_input_data("runtime_data", node);
+
+    struct ed_type_plug_provider *plug_provider_data = data;
+
+    plug_ctx_in->ovs_idl_txn = engine_get_context()->ovs_idl_txn;
+    plug_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
+    plug_ctx_in->ovsrec_port_by_interfaces = ovsrec_port_by_interfaces;
+    plug_ctx_in->ovs_table = ovs_table;
+    plug_ctx_in->br_int = br_int;
+    plug_ctx_in->iface_table = iface_table;
+    plug_ctx_in->chassis_rec = chassis;
+    plug_ctx_in->port_binding_table = pb_table;
+    plug_ctx_in->local_bindings = &rt_data->lbinding_data.bindings;
+    plug_ctx_in->pb_handler_has_run = plug_provider_data->pb_handler_has_run;
+
+    plug_ctx_out->deleted_iface_ids = &plug_provider_data->deleted_iface_ids;
+    plug_ctx_out->changed_iface_ids = &plug_provider_data->changed_iface_ids;
+}
+
+static void
+en_plug_provider_run(struct engine_node *node,
+                     void *data)
+{
+    if (!plug_provider_has_providers()) {
+        engine_set_node_state(node, EN_UNCHANGED);
+        return;
+    }
+    struct plug_ctx_in plug_ctx_in;
+    struct plug_ctx_out plug_ctx_out;
+    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
+    plug_run(&plug_ctx_in, &plug_ctx_out);
+    engine_set_node_state(node, EN_UPDATED);
+}
+
+static bool
+plug_provider_port_binding_handler(struct engine_node *node,
+                                   void *data)
+{
+    if (!plug_provider_has_providers()) {
+        engine_set_node_state(node, EN_UNCHANGED);
+        return true;
+    }
+    struct plug_ctx_in plug_ctx_in;
+    struct plug_ctx_out plug_ctx_out;
+    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
+
+    struct ed_type_plug_provider *plug_provider_data = data;
+
+    bool handled = plug_handle_port_binding_changes(&plug_ctx_in,
+                                                    &plug_ctx_out);
+
+    plug_provider_data->pb_handler_has_run = true;
+
+    return handled;
+}
+
+static bool
+plug_provider_ovs_interface_handler(struct engine_node *node,
+                                    void *data)
+{
+    if (!plug_provider_has_providers()) {
+        engine_set_node_state(node, EN_UNCHANGED);
+        return true;
+    }
+
+    struct plug_ctx_in plug_ctx_in;
+    struct plug_ctx_out plug_ctx_out;
+    init_plug_ctx(node, data, &plug_ctx_in, &plug_ctx_out);
+
+    return plug_handle_ovs_interface_changes(&plug_ctx_in, &plug_ctx_out);
+}
+
 struct ovn_controller_exit_args {
     bool *exiting;
     bool *restart;
@@ -3214,6 +3389,11 @@  main(int argc, char *argv[])
     ENGINE_NODE(flow_output, "flow_output");
     ENGINE_NODE(addr_sets, "addr_sets");
     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_groups, "port_groups");
+    /* The plug provider has two engine nodes.  One that checks for and reacts
+     * to change to plug provider lookup tables, and another that reacts to
+     * change to OVS interface, OVN Port_Binding and runtime data */
+    ENGINE_NODE(plug_provider_lookup, "plug_provider_lookup");
+    ENGINE_NODE(plug_provider, "plug_provider");
 
 #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
     SB_NODES
@@ -3240,6 +3420,24 @@  main(int argc, char *argv[])
     engine_add_input(&en_non_vif_data, &en_ovs_interface,
                      non_vif_data_ovs_iface_handler);
 
+    engine_add_input(&en_plug_provider, &en_plug_provider_lookup, NULL);
+    engine_add_input(&en_plug_provider, &en_ovs_open_vswitch, NULL);
+    engine_add_input(&en_plug_provider, &en_ovs_bridge, NULL);
+    engine_add_input(&en_plug_provider, &en_sb_chassis, NULL);
+    engine_add_input(&en_plug_provider, &en_runtime_data,
+                     engine_noop_handler);
+    engine_add_input(&en_plug_provider, &en_sb_port_binding,
+                     plug_provider_port_binding_handler);
+    engine_add_input(&en_plug_provider, &en_ovs_port,
+                     engine_noop_handler);
+    engine_add_input(&en_plug_provider, &en_ovs_interface,
+                     plug_provider_ovs_interface_handler);
+    /* The node needs somewhere to output to run */
+    engine_add_input(&en_flow_output, &en_plug_provider,
+                     engine_noop_handler);
+
+
+
     /* Note: The order of inputs is important, all OVS interface changes must
      * be handled before any ct_zone changes.
      */
@@ -3800,7 +3998,18 @@  main(int argc, char *argv[])
             engine_set_force_recompute(true);
         }
 
-        if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
+        int ovs_txn_status = ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop);
+        if (!ovs_txn_status) {
+            /* The transaction failed. */
+            struct ed_type_plug_provider *plug_provider_data = engine_get_data(
+                &en_plug_provider);
+            if (plug_provider_data) {
+                plug_clear_deleted(&plug_provider_data->deleted_iface_ids);
+                plug_clear_changed(&plug_provider_data->changed_iface_ids);
+            }
+        } else if (ovs_txn_status == 1) {
+            /* The transaction committed successfully
+             * (or it did not change anything in the database). */
             ct_zones_data = engine_get_data(&en_ct_zones);
             if (ct_zones_data) {
                 struct shash_node *iter, *iter_next;
@@ -3813,6 +4022,17 @@  main(int argc, char *argv[])
                     }
                 }
             }
+
+            struct ed_type_plug_provider *plug_provider_data = engine_get_data(
+                &en_plug_provider);
+            if (plug_provider_data) {
+                plug_finish_deleted(&plug_provider_data->deleted_iface_ids);
+                plug_finish_changed(&plug_provider_data->changed_iface_ids);
+            }
+        } else if (ovs_txn_status == -1) {
+            /* The commit is still in progress */
+        } else {
+            OVS_NOT_REACHED();
         }
 
         ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
diff --git a/ovn-nb.xml b/ovn-nb.xml
index e31578fb6..c619f6bdd 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -1031,6 +1031,27 @@ 
             DHCP reply.
           </p>
         </column>
+
+        <group title="VIF Plugging Options">
+          <column name="options" key="plug-type">
+            If set, OVN will attempt to perform plugging of this VIF.  In order
+            to get this port plugged by the OVN controller, OVN must be built
+            with support for VIF plugging.  The default behavior is for the CMS
+            to do the VIF plugging.  Each plug provider have their own options
+            namespaced by name, for example "plug:representor:key".
+
+            Please refer to the plug provider documentation located in
+            Documentation/topics/plug_providers/ for more information.
+          </column>
+
+          <column name="options" key="plug-mtu-request">
+            Requested MTU for plugged interfaces.  When set the OVN controller
+            will fill the <ref table="Interface" column="mtu_request"/> column
+            of the Open vSwitch database's
+            <ref table="Interface" db="vswitch"/> table.  This in turn will
+            make OVS vswitchd update the MTU of the linked interface.
+          </column>
+        </group>
       </group>
 
       <group title="Virtual port Options">
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 7c683cbcc..41443a30a 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -803,6 +803,5 @@  OVN_CLEANUP_SBOX([hv])
 OVN_CLEANUP_VSWITCH([main])
 as ovn-sb
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
-
 AT_CLEANUP
 ])
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index f06f2e68e..b3c2d72fa 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -327,7 +327,7 @@  ovn_az_attach() {
         -- --may-exist add-br br-int \
         -- set bridge br-int fail-mode=secure other-config:disable-in-band=true \
         || return 1
-    start_daemon ovn-controller || return 1
+    start_daemon ovn-controller --enable-dummy-plug || return 1
 }
 
 # ovn_attach NETWORK BRIDGE IP [MASKLEN]
diff --git a/tests/ovn.at b/tests/ovn.at
index 7cfdede77..168a0b301 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -28789,3 +28789,124 @@  AT_CHECK([grep -q "Not claiming" hv1/ovn-controller.log], [1], [])
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - plugging])
+AT_KEYWORDS([plugging])
+
+ovn_start
+
+net_add n1
+sim_add hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+sim_add hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+
+check ovn-nbctl ls-add lsw0
+
+check ovn-nbctl lsp-add lsw0 lsp1
+check ovn-nbctl lsp-set-addresses lsp1 "f0:00:00:00:00:01 172.16.0.101"
+check ovn-nbctl lsp-set-options lsp1 \
+    requested-chassis=hv1 \
+    plug-type=dummy \
+    plug-mtu-request=42
+
+check ovn-nbctl lsp-add lsw0 lsp2
+check ovn-nbctl lsp-set-addresses lsp2 "f0:00:00:00:00:02 172.16.0.102"
+check ovn-nbctl lsp-set-options lsp2 \
+    requested-chassis=hv2 \
+    plug-type=dummy \
+    plug-mtu-request=42
+
+wait_for_ports_up lsp1 lsp2
+
+lsp1_uuid=$(ovn-nbctl --bare --columns _uuid find Logical_Switch_Port name=lsp1)
+iface1_uuid=$(as hv1 ovs-vsctl --bare --columns _uuid find Interface name=lsp1)
+
+lsp2_uuid=$(ovn-nbctl --bare --columns _uuid find Logical_Switch_Port name=lsp2)
+iface2_uuid=$(as hv2 ovs-vsctl --bare --columns _uuid find Interface name=lsp2)
+
+# Check that the lport was plugged
+AT_CHECK([test xvalue = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} options:plug-dummy-option)], [0], [])
+AT_CHECK([test x42 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request)], [0], [])
+
+# Check that updating the lport updates the local iface
+check ovn-nbctl --wait=hv lsp-set-options lsp1 \
+    requested-chassis=hv1 \
+    plug-type=dummy \
+    plug-mtu-request=43
+OVS_WAIT_UNTIL([
+    test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request)
+])
+
+# Check that local modification of iface will trigger ovn-controller to update
+# the iface record
+check as hv1 ovs-vsctl set interface ${iface1_uuid} mtu_request=44
+OVS_WAIT_UNTIL([
+    test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request)
+])
+
+# Check that pointing requested-chassis somewhere else will unplug the port
+check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
+    options:requested-chassis=non-existent-chassis
+OVS_WAIT_UNTIL([
+    ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
+])
+
+# Check that removing an lport will unplug it
+AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], [])
+check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
+OVS_WAIT_UNTIL([
+    ! as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid
+])
+
+# Check that port is unplugged when we simulate presence of a port previously
+# plugged by us in local OVSDB with no record in SB DB.
+check as hv2 ovs-vsctl \
+    -- add-port br-int vif1
+
+# From one moment it's there...
+vif1_uuid=$(as hv2 ovs-vsctl --bare --columns _uuid find Interface name=vif1)
+OVS_WAIT_UNTIL([
+    as hv2 ovs-vsctl get Interface ${vif1_uuid} _uuid
+])
+
+# Add the external-ids we expect
+check as hv2 ovs-vsctl \
+    -- set Interface ${vif1_uuid} \
+           external-ids:ovn-plugged=dummy \
+           external-ids:iface-id=non-existing-lsp
+
+# ...to the next moment it's gone.
+OVS_WAIT_UNTIL([
+    ! as hv2 ovs-vsctl get Interface ${vif1_uuid} _uuid
+])
+
+# Check that a warning is logged when CMS requests plugging of an interface
+# with lbinding already plugged by someone else.
+check as hv2 ovs-vsctl \
+    -- add-port br-int vif3 \
+    -- set Interface vif3 \
+       external-ids:iface-id=lsp3
+
+check ovn-nbctl lsp-add lsw0 lsp3
+check ovn-nbctl lsp-set-addresses lsp3 "f0:00:00:00:00:03 172.16.0.103"
+check ovn-nbctl lsp-set-options lsp3 \
+    requested-chassis=hv2
+
+wait_for_ports_up lsp3
+
+check ovn-nbctl --wait=hv lsp-set-options lsp3 \
+    requested-chassis=hv2 \
+    plug-type=dummy
+
+OVS_WAIT_UNTIL([
+    grep -q "CMS requested plugging of lport lsp3" hv2/ovn-controller.log
+])
+
+OVN_CLEANUP([hv1],[hv2])
+AT_CLEANUP
+])