diff mbox series

[ovs-dev] controller: Clear tunnels from old integration bridge

Message ID 20230330075259.371472-1-amusil@redhat.com
State Changes Requested
Headers show
Series [ovs-dev] controller: Clear tunnels from old integration bridge | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Ales Musil March 30, 2023, 7:52 a.m. UTC
After integration bridge change the tunnels would
stay on the old bridge preventing new tunnels creation
and disrupting traffic. Detect the bridge change
and clear the tunnels from the old integration bridge.

Reported-at: https://bugzilla.redhat.com/2173635
Signed-off-by: Ales Musil <amusil@redhat.com>
---
 controller/encaps.c         | 36 +++++++++++++++-
 controller/encaps.h         |  4 +-
 controller/ovn-controller.c | 26 +++++++++++-
 tests/ovn.at                | 83 +++++++++++++++++++++++++++++++++++++
 4 files changed, 145 insertions(+), 4 deletions(-)

Comments

Ihar Hrachyshka March 30, 2023, 8:46 p.m. UTC | #1
On Thu, Mar 30, 2023 at 3:53 AM Ales Musil <amusil@redhat.com> wrote:
>
> After integration bridge change the tunnels would
> stay on the old bridge preventing new tunnels creation
> and disrupting traffic. Detect the bridge change
> and clear the tunnels from the old integration bridge.
>
> Reported-at: https://bugzilla.redhat.com/2173635
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
>  controller/encaps.c         | 36 +++++++++++++++-
>  controller/encaps.h         |  4 +-
>  controller/ovn-controller.c | 26 +++++++++++-
>  tests/ovn.at                | 83 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 145 insertions(+), 4 deletions(-)
>
> diff --git a/controller/encaps.c b/controller/encaps.c
> index 2662eaf98..c66743d3b 100644
> --- a/controller/encaps.c
> +++ b/controller/encaps.c
> @@ -386,6 +386,20 @@ chassis_tzones_overlap(const struct sset *transport_zones,
>      return false;
>  }
>
> +static void
> +clear_old_tunnels(const struct ovsrec_bridge *old_br_int)
> +{
> +    for (size_t i = 0; i < old_br_int->n_ports; i++) {
> +        const struct ovsrec_port *port = old_br_int->ports[i];
> +        const char *id = smap_get(&port->external_ids, "ovn-chassis-id");
> +        if (id) {
> +            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge "
> +                     "\"%s\".", port->name, id, old_br_int->name);
> +            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
> +        }
> +    }
> +}
> +
>  void
>  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>             const struct ovsrec_bridge *br_int,
> @@ -393,12 +407,32 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>             const struct sbrec_chassis *this_chassis,
>             const struct sbrec_sb_global *sbg,
>             const struct ovsrec_open_vswitch_table *ovs_table,
> -           const struct sset *transport_zones)
> +           const struct sset *transport_zones,
> +           const struct ovsrec_bridge_table *bridge_table,
> +           const char *br_int_name)
>  {
>      if (!ovs_idl_txn || !br_int) {
>          return;
>      }
>
> +    if (!br_int_name) {
> +        /* The controller has just started, we need to look through all
> +         * bridges for old tunnel ports. */
> +        const struct ovsrec_bridge *br;
> +        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
> +            if (!strcmp(br->name, br_int->name)) {
> +                continue;
> +            }
> +            clear_old_tunnels(br);
> +        }
> +    } else if (strcmp(br_int_name, br_int->name)) {
> +        /* The integration bridge was changed, clear tunnel ports from
> +         * the old one. */
> +        const struct ovsrec_bridge *old_br_int =
> +            get_bridge(bridge_table, br_int_name);
> +        clear_old_tunnels(old_br_int);

When two ovn-controllers are running on the same host, won't this
remove tunnels that belong to the other ovn-controller (which are part
of the other controller' bridge)? I think you should only remove
tunnels that match the names as constructed by tunnel_create_name()
[the names should include the so-called unique "chassis index"].


> +    }
> +
>      const struct sbrec_chassis *chassis_rec;
>
>      struct tunnel_ctx tc = {
> diff --git a/controller/encaps.h b/controller/encaps.h
> index 867c6f28c..cf38dac1a 100644
> --- a/controller/encaps.h
> +++ b/controller/encaps.h
> @@ -35,7 +35,9 @@ void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>                  const struct sbrec_chassis *,
>                  const struct sbrec_sb_global *,
>                  const struct ovsrec_open_vswitch_table *,
> -                const struct sset *transport_zones);
> +                const struct sset *transport_zones,
> +                const struct ovsrec_bridge_table *bridge_table,
> +                const char *br_int_name);
>
>  bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
>                      const struct ovsrec_bridge *br_int);
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 76be2426e..242d93823 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -536,6 +536,21 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
>      *br_int_ = br_int;
>  }
>
> +static void
> +consider_br_int_change(const struct ovsrec_bridge *br_int, char **current_name)
> +{
> +    ovs_assert(current_name);
> +
> +    if (!*current_name) {
> +        *current_name = xstrdup(br_int->name);
> +    }
> +
> +    if (strcmp(*current_name, br_int->name)) {
> +        free(*current_name);
> +        *current_name = xstrdup(br_int->name);
> +    }
> +}
> +
>  static void
>  update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
>  {
> @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
>      char *ovn_version = ovn_get_internal_version();
>      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
>
> +    char *current_br_int_name = NULL;
> +
>      /* Main loop. */
>      exiting = false;
>      restart = false;
> @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
>                                 chassis,
>                                 sbrec_sb_global_first(ovnsb_idl_loop.idl),
>                                 ovs_table,
> -                               &transport_zones);
> +                               &transport_zones,
> +                               bridge_table,
> +                               current_br_int_name);
>
>                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
>                                      time_msec());
> @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                     time_msec());
>                  }
> -
> +                /* The name needs to be reflected at the end of the block.
> +                 * This allows us to detect br-int changes and act
> +                 * accordingly. */
> +                consider_br_int_change(br_int, &current_br_int_name);
>              }
>
>              if (!engine_has_run()) {
> diff --git a/tests/ovn.at b/tests/ovn.at
> index a892691ca..8ca2e2a0c 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -35179,3 +35179,86 @@ AT_CHECK([test "$(grep -c "Flushing CT for 5-tuple" hv1/ovn-controller.log)" = "
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([Re-create encap tunnels during integration bridge migration])
> +ovn_start
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +sim_add hv2
> +as hv2
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.2
> +
> +check ovn-nbctl --wait=hv sync
> +
> +check_tunnel_port() {
> +    local hv=$1
> +    local br=$2
> +    local id=$3
> +
> +    as $hv
> +    OVS_WAIT_UNTIL([
> +        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
> +    ])
> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
> +}
> +
> +# Check that both chassis have tunnel
> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> +check_tunnel_port hv2 br-int hv1@192.168.0.1
> +
> +# Stop ovn-controller on hv1
> +check as hv1 ovn-appctl -t ovn-controller exit --restart
> +
> +# The tunnel should remain intact
> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> +
> +# Change the bridge to br-int1 on hv1
> +as hv1
> +check ovs-vsctl add-br br-int1
> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> +start_daemon ovn-controller --verbose="encaps:dbg"
> +check ovn-nbctl --wait=hv sync
> +
> +# Check that the tunnel was created on br-int1 instead
> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> +check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2) from bridge \"br-int\"" hv1/ovn-controller.log
> +
> +# Change the bridge to br-int1 on hv2
> +as hv2
> +check ovn-appctl vlog/set encaps:dbg
> +check ovs-vsctl add-br br-int1
> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> +check ovn-nbctl --wait=hv sync
> +
> +
> +# Check that the tunnel was created on br-int1 instead
> +check_tunnel_port hv2 br-int1 hv1@192.168.0.1
> +check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int\"" hv2/ovn-controller.log
> +
> +# Stop ovn-controller on hv1
> +check as hv1 ovn-appctl -t ovn-controller exit --restart
> +
> +# The tunnel should remain intact
> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> +prev_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
> +
> +# Start the controller again
> +start_daemon ovn-controller --verbose="encaps:dbg"
> +check ovn-nbctl --wait=hv sync
> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> +current_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
> +
> +# The tunnel should be the same after restart
> +check test "$current_id" = "$prev_id"
> +
> +OVN_CLEANUP([hv1],[hv2])
> +AT_CLEANUP
> +])
> --
> 2.39.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Ales Musil March 31, 2023, 7:32 a.m. UTC | #2
On Thu, Mar 30, 2023 at 10:47 PM Ihar Hrachyshka <ihrachys@redhat.com>
wrote:

> On Thu, Mar 30, 2023 at 3:53 AM Ales Musil <amusil@redhat.com> wrote:
> >
> > After integration bridge change the tunnels would
> > stay on the old bridge preventing new tunnels creation
> > and disrupting traffic. Detect the bridge change
> > and clear the tunnels from the old integration bridge.
> >
> > Reported-at: https://bugzilla.redhat.com/2173635
> > Signed-off-by: Ales Musil <amusil@redhat.com>
> > ---
> >  controller/encaps.c         | 36 +++++++++++++++-
> >  controller/encaps.h         |  4 +-
> >  controller/ovn-controller.c | 26 +++++++++++-
> >  tests/ovn.at                | 83 +++++++++++++++++++++++++++++++++++++
> >  4 files changed, 145 insertions(+), 4 deletions(-)
> >
> > diff --git a/controller/encaps.c b/controller/encaps.c
> > index 2662eaf98..c66743d3b 100644
> > --- a/controller/encaps.c
> > +++ b/controller/encaps.c
> > @@ -386,6 +386,20 @@ chassis_tzones_overlap(const struct sset
> *transport_zones,
> >      return false;
> >  }
> >
> > +static void
> > +clear_old_tunnels(const struct ovsrec_bridge *old_br_int)
> > +{
> > +    for (size_t i = 0; i < old_br_int->n_ports; i++) {
> > +        const struct ovsrec_port *port = old_br_int->ports[i];
> > +        const char *id = smap_get(&port->external_ids,
> "ovn-chassis-id");
> > +        if (id) {
> > +            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge "
> > +                     "\"%s\".", port->name, id, old_br_int->name);
> > +            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
> > +        }
> > +    }
> > +}
> > +
> >  void
> >  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
> >             const struct ovsrec_bridge *br_int,
> > @@ -393,12 +407,32 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
> >             const struct sbrec_chassis *this_chassis,
> >             const struct sbrec_sb_global *sbg,
> >             const struct ovsrec_open_vswitch_table *ovs_table,
> > -           const struct sset *transport_zones)
> > +           const struct sset *transport_zones,
> > +           const struct ovsrec_bridge_table *bridge_table,
> > +           const char *br_int_name)
> >  {
> >      if (!ovs_idl_txn || !br_int) {
> >          return;
> >      }
> >
> > +    if (!br_int_name) {
> > +        /* The controller has just started, we need to look through all
> > +         * bridges for old tunnel ports. */
> > +        const struct ovsrec_bridge *br;
> > +        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
> > +            if (!strcmp(br->name, br_int->name)) {
> > +                continue;
> > +            }
> > +            clear_old_tunnels(br);
> > +        }
> > +    } else if (strcmp(br_int_name, br_int->name)) {
> > +        /* The integration bridge was changed, clear tunnel ports from
> > +         * the old one. */
> > +        const struct ovsrec_bridge *old_br_int =
> > +            get_bridge(bridge_table, br_int_name);
> > +        clear_old_tunnels(old_br_int);
>
> When two ovn-controllers are running on the same host, won't this
> remove tunnels that belong to the other ovn-controller (which are part
> of the other controller' bridge)? I think you should only remove
> tunnels that match the names as constructed by tunnel_create_name()
> [the names should include the so-called unique "chassis index"].
>

Yeah you are right, I've missed that, even though this would happen only
on a fresh start of the controller, but it still is wrong. I'll fix that in
v2.

Thanks,
Ales

>
>
> > +    }
> > +
> >      const struct sbrec_chassis *chassis_rec;
> >
> >      struct tunnel_ctx tc = {
> > diff --git a/controller/encaps.h b/controller/encaps.h
> > index 867c6f28c..cf38dac1a 100644
> > --- a/controller/encaps.h
> > +++ b/controller/encaps.h
> > @@ -35,7 +35,9 @@ void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
> >                  const struct sbrec_chassis *,
> >                  const struct sbrec_sb_global *,
> >                  const struct ovsrec_open_vswitch_table *,
> > -                const struct sset *transport_zones);
> > +                const struct sset *transport_zones,
> > +                const struct ovsrec_bridge_table *bridge_table,
> > +                const char *br_int_name);
> >
> >  bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
> >                      const struct ovsrec_bridge *br_int);
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 76be2426e..242d93823 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -536,6 +536,21 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
> >      *br_int_ = br_int;
> >  }
> >
> > +static void
> > +consider_br_int_change(const struct ovsrec_bridge *br_int, char
> **current_name)
> > +{
> > +    ovs_assert(current_name);
> > +
> > +    if (!*current_name) {
> > +        *current_name = xstrdup(br_int->name);
> > +    }
> > +
> > +    if (strcmp(*current_name, br_int->name)) {
> > +        free(*current_name);
> > +        *current_name = xstrdup(br_int->name);
> > +    }
> > +}
> > +
> >  static void
> >  update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
> >  {
> > @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
> >      char *ovn_version = ovn_get_internal_version();
> >      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
> >
> > +    char *current_br_int_name = NULL;
> > +
> >      /* Main loop. */
> >      exiting = false;
> >      restart = false;
> > @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
> >                                 chassis,
> >
>  sbrec_sb_global_first(ovnsb_idl_loop.idl),
> >                                 ovs_table,
> > -                               &transport_zones);
> > +                               &transport_zones,
> > +                               bridge_table,
> > +                               current_br_int_name);
> >
> >                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> >                                      time_msec());
> > @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
> >                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >                                     time_msec());
> >                  }
> > -
> > +                /* The name needs to be reflected at the end of the
> block.
> > +                 * This allows us to detect br-int changes and act
> > +                 * accordingly. */
> > +                consider_br_int_change(br_int, &current_br_int_name);
> >              }
> >
> >              if (!engine_has_run()) {
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index a892691ca..8ca2e2a0c 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -35179,3 +35179,86 @@ AT_CHECK([test "$(grep -c "Flushing CT for
> 5-tuple" hv1/ovn-controller.log)" = "
> >  OVN_CLEANUP([hv1])
> >  AT_CLEANUP
> >  ])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([Re-create encap tunnels during integration bridge migration])
> > +ovn_start
> > +net_add n1
> > +
> > +sim_add hv1
> > +as hv1
> > +ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.1
> > +
> > +sim_add hv2
> > +as hv2
> > +ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.2
> > +
> > +check ovn-nbctl --wait=hv sync
> > +
> > +check_tunnel_port() {
> > +    local hv=$1
> > +    local br=$2
> > +    local id=$3
> > +
> > +    as $hv
> > +    OVS_WAIT_UNTIL([
> > +        test "$(ovs-vsctl --format=table --no-headings find port
> external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
> > +    ])
> > +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="$id")
> > +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" |
> grep -q "$tunnel_id"])
> > +}
> > +
> > +# Check that both chassis have tunnel
> > +check_tunnel_port hv1 br-int hv2@192.168.0.2
> > +check_tunnel_port hv2 br-int hv1@192.168.0.1
> > +
> > +# Stop ovn-controller on hv1
> > +check as hv1 ovn-appctl -t ovn-controller exit --restart
> > +
> > +# The tunnel should remain intact
> > +check_tunnel_port hv1 br-int hv2@192.168.0.2
> > +
> > +# Change the bridge to br-int1 on hv1
> > +as hv1
> > +check ovs-vsctl add-br br-int1
> > +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> > +start_daemon ovn-controller --verbose="encaps:dbg"
> > +check ovn-nbctl --wait=hv sync
> > +
> > +# Check that the tunnel was created on br-int1 instead
> > +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> > +check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2)
> from bridge \"br-int\"" hv1/ovn-controller.log
> > +
> > +# Change the bridge to br-int1 on hv2
> > +as hv2
> > +check ovn-appctl vlog/set encaps:dbg
> > +check ovs-vsctl add-br br-int1
> > +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> > +check ovn-nbctl --wait=hv sync
> > +
> > +
> > +# Check that the tunnel was created on br-int1 instead
> > +check_tunnel_port hv2 br-int1 hv1@192.168.0.1
> > +check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1)
> from bridge \"br-int\"" hv2/ovn-controller.log
> > +
> > +# Stop ovn-controller on hv1
> > +check as hv1 ovn-appctl -t ovn-controller exit --restart
> > +
> > +# The tunnel should remain intact
> > +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> > +prev_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv2@192.168.0.2")
> > +
> > +# Start the controller again
> > +start_daemon ovn-controller --verbose="encaps:dbg"
> > +check ovn-nbctl --wait=hv sync
> > +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> > +current_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv2@192.168.0.2")
> > +
> > +# The tunnel should be the same after restart
> > +check test "$current_id" = "$prev_id"
> > +
> > +OVN_CLEANUP([hv1],[hv2])
> > +AT_CLEANUP
> > +])
> > --
> > 2.39.2
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
>
Dumitru Ceara March 31, 2023, 8 a.m. UTC | #3
On 3/31/23 09:32, Ales Musil wrote:
>> When two ovn-controllers are running on the same host, won't this
>> remove tunnels that belong to the other ovn-controller (which are part
>> of the other controller' bridge)? I think you should only remove
>> tunnels that match the names as constructed by tunnel_create_name()
>> [the names should include the so-called unique "chassis index"].
>>
> Yeah you are right, I've missed that, even though this would happen only
> on a fresh start of the controller, but it still is wrong. I'll fix that in
> v2.
> 

Would it be possible to add a test to ensure we don't break this in the
future?

Thanks,
Dumitru
Ales Musil March 31, 2023, 8:03 a.m. UTC | #4
On Fri, Mar 31, 2023 at 10:00 AM Dumitru Ceara <dceara@redhat.com> wrote:

> On 3/31/23 09:32, Ales Musil wrote:
> >> When two ovn-controllers are running on the same host, won't this
> >> remove tunnels that belong to the other ovn-controller (which are part
> >> of the other controller' bridge)? I think you should only remove
> >> tunnels that match the names as constructed by tunnel_create_name()
> >> [the names should include the so-called unique "chassis index"].
> >>
> > Yeah you are right, I've missed that, even though this would happen only
> > on a fresh start of the controller, but it still is wrong. I'll fix that
> in
> > v2.
> >
>
> Would it be possible to add a test to ensure we don't break this in the
> future?
>

Of course.


>
> Thanks,
> Dumitru
>
>
diff mbox series

Patch

diff --git a/controller/encaps.c b/controller/encaps.c
index 2662eaf98..c66743d3b 100644
--- a/controller/encaps.c
+++ b/controller/encaps.c
@@ -386,6 +386,20 @@  chassis_tzones_overlap(const struct sset *transport_zones,
     return false;
 }
 
+static void
+clear_old_tunnels(const struct ovsrec_bridge *old_br_int)
+{
+    for (size_t i = 0; i < old_br_int->n_ports; i++) {
+        const struct ovsrec_port *port = old_br_int->ports[i];
+        const char *id = smap_get(&port->external_ids, "ovn-chassis-id");
+        if (id) {
+            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge "
+                     "\"%s\".", port->name, id, old_br_int->name);
+            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
+        }
+    }
+}
+
 void
 encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
            const struct ovsrec_bridge *br_int,
@@ -393,12 +407,32 @@  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
            const struct sbrec_chassis *this_chassis,
            const struct sbrec_sb_global *sbg,
            const struct ovsrec_open_vswitch_table *ovs_table,
-           const struct sset *transport_zones)
+           const struct sset *transport_zones,
+           const struct ovsrec_bridge_table *bridge_table,
+           const char *br_int_name)
 {
     if (!ovs_idl_txn || !br_int) {
         return;
     }
 
+    if (!br_int_name) {
+        /* The controller has just started, we need to look through all
+         * bridges for old tunnel ports. */
+        const struct ovsrec_bridge *br;
+        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
+            if (!strcmp(br->name, br_int->name)) {
+                continue;
+            }
+            clear_old_tunnels(br);
+        }
+    } else if (strcmp(br_int_name, br_int->name)) {
+        /* The integration bridge was changed, clear tunnel ports from
+         * the old one. */
+        const struct ovsrec_bridge *old_br_int =
+            get_bridge(bridge_table, br_int_name);
+        clear_old_tunnels(old_br_int);
+    }
+
     const struct sbrec_chassis *chassis_rec;
 
     struct tunnel_ctx tc = {
diff --git a/controller/encaps.h b/controller/encaps.h
index 867c6f28c..cf38dac1a 100644
--- a/controller/encaps.h
+++ b/controller/encaps.h
@@ -35,7 +35,9 @@  void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
                 const struct sbrec_chassis *,
                 const struct sbrec_sb_global *,
                 const struct ovsrec_open_vswitch_table *,
-                const struct sset *transport_zones);
+                const struct sset *transport_zones,
+                const struct ovsrec_bridge_table *bridge_table,
+                const char *br_int_name);
 
 bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
                     const struct ovsrec_bridge *br_int);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 76be2426e..242d93823 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -536,6 +536,21 @@  process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
     *br_int_ = br_int;
 }
 
+static void
+consider_br_int_change(const struct ovsrec_bridge *br_int, char **current_name)
+{
+    ovs_assert(current_name);
+
+    if (!*current_name) {
+        *current_name = xstrdup(br_int->name);
+    }
+
+    if (strcmp(*current_name, br_int->name)) {
+        free(*current_name);
+        *current_name = xstrdup(br_int->name);
+    }
+}
+
 static void
 update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
 {
@@ -4918,6 +4933,8 @@  main(int argc, char *argv[])
     char *ovn_version = ovn_get_internal_version();
     VLOG_INFO("OVN internal version is : [%s]", ovn_version);
 
+    char *current_br_int_name = NULL;
+
     /* Main loop. */
     exiting = false;
     restart = false;
@@ -5070,7 +5087,9 @@  main(int argc, char *argv[])
                                chassis,
                                sbrec_sb_global_first(ovnsb_idl_loop.idl),
                                ovs_table,
-                               &transport_zones);
+                               &transport_zones,
+                               bridge_table,
+                               current_br_int_name);
 
                     stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
                                     time_msec());
@@ -5257,7 +5276,10 @@  main(int argc, char *argv[])
                     stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
                                    time_msec());
                 }
-
+                /* The name needs to be reflected at the end of the block.
+                 * This allows us to detect br-int changes and act
+                 * accordingly. */
+                consider_br_int_change(br_int, &current_br_int_name);
             }
 
             if (!engine_has_run()) {
diff --git a/tests/ovn.at b/tests/ovn.at
index a892691ca..8ca2e2a0c 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -35179,3 +35179,86 @@  AT_CHECK([test "$(grep -c "Flushing CT for 5-tuple" hv1/ovn-controller.log)" = "
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Re-create encap tunnels during integration bridge migration])
+ovn_start
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+
+check ovn-nbctl --wait=hv sync
+
+check_tunnel_port() {
+    local hv=$1
+    local br=$2
+    local id=$3
+
+    as $hv
+    OVS_WAIT_UNTIL([
+        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
+    ])
+    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
+    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
+}
+
+# Check that both chassis have tunnel
+check_tunnel_port hv1 br-int hv2@192.168.0.2
+check_tunnel_port hv2 br-int hv1@192.168.0.1
+
+# Stop ovn-controller on hv1
+check as hv1 ovn-appctl -t ovn-controller exit --restart
+
+# The tunnel should remain intact
+check_tunnel_port hv1 br-int hv2@192.168.0.2
+
+# Change the bridge to br-int1 on hv1
+as hv1
+check ovs-vsctl add-br br-int1
+check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
+start_daemon ovn-controller --verbose="encaps:dbg"
+check ovn-nbctl --wait=hv sync
+
+# Check that the tunnel was created on br-int1 instead
+check_tunnel_port hv1 br-int1 hv2@192.168.0.2
+check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2) from bridge \"br-int\"" hv1/ovn-controller.log
+
+# Change the bridge to br-int1 on hv2
+as hv2
+check ovn-appctl vlog/set encaps:dbg
+check ovs-vsctl add-br br-int1
+check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
+check ovn-nbctl --wait=hv sync
+
+
+# Check that the tunnel was created on br-int1 instead
+check_tunnel_port hv2 br-int1 hv1@192.168.0.1
+check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int\"" hv2/ovn-controller.log
+
+# Stop ovn-controller on hv1
+check as hv1 ovn-appctl -t ovn-controller exit --restart
+
+# The tunnel should remain intact
+check_tunnel_port hv1 br-int1 hv2@192.168.0.2
+prev_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
+
+# Start the controller again
+start_daemon ovn-controller --verbose="encaps:dbg"
+check ovn-nbctl --wait=hv sync
+check_tunnel_port hv1 br-int1 hv2@192.168.0.2
+current_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
+
+# The tunnel should be the same after restart
+check test "$current_id" = "$prev_id"
+
+OVN_CLEANUP([hv1],[hv2])
+AT_CLEANUP
+])