diff mbox series

[ovs-dev,v2] controller: Clear tunnels from old integration bridge

Message ID 20230403095026.2897669-1-amusil@redhat.com
State Superseded
Headers show
Series [ovs-dev,v2] controller: Clear tunnels from old integration bridge | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Ales Musil April 3, 2023, 9:50 a.m. UTC
After integration bridge change the tunnels would
stay on the old bridge preventing new tunnels creation
and disrupting traffic. Detect the bridge change
and clear the tunnels from the old integration bridge.

Reported-at: https://bugzilla.redhat.com/2173635
Signed-off-by: Ales Musil <amusil@redhat.com>
---
v2: Make sure that we don't clear tunnels belonging to
    other ovn-controllers on the same host.
---
 controller/encaps.c         |  42 ++++++++-
 controller/encaps.h         |   4 +-
 controller/ovn-controller.c |  26 +++++-
 tests/ovn.at                | 168 ++++++++++++++++++++++++++++++++++++
 4 files changed, 236 insertions(+), 4 deletions(-)

Comments

Ihar Hrachyshka April 5, 2023, 1:18 p.m. UTC | #1
Thank you for v2! All good now.

Acked-By: Ihar Hrachyshka <ihrachys@redhat.com>

On Mon, Apr 3, 2023 at 5:50 AM Ales Musil <amusil@redhat.com> wrote:
>
> After integration bridge change the tunnels would
> stay on the old bridge preventing new tunnels creation
> and disrupting traffic. Detect the bridge change
> and clear the tunnels from the old integration bridge.
>
> Reported-at: https://bugzilla.redhat.com/2173635
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
> v2: Make sure that we don't clear tunnels belonging to
>     other ovn-controllers on the same host.
> ---
>  controller/encaps.c         |  42 ++++++++-
>  controller/encaps.h         |   4 +-
>  controller/ovn-controller.c |  26 +++++-
>  tests/ovn.at                | 168 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 236 insertions(+), 4 deletions(-)
>
> diff --git a/controller/encaps.c b/controller/encaps.c
> index 2662eaf98..4a79030b8 100644
> --- a/controller/encaps.c
> +++ b/controller/encaps.c
> @@ -386,6 +386,21 @@ chassis_tzones_overlap(const struct sset *transport_zones,
>      return false;
>  }
>
> +static void
> +clear_old_tunnels(const struct ovsrec_bridge *old_br_int, const char *prefix,
> +                  size_t prefix_len)
> +{
> +    for (size_t i = 0; i < old_br_int->n_ports; i++) {
> +        const struct ovsrec_port *port = old_br_int->ports[i];
> +        const char *id = smap_get(&port->external_ids, "ovn-chassis-id");
> +        if (id && !strncmp(port->name, prefix, prefix_len)) {
> +            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge "
> +                     "\"%s\".", port->name, id, old_br_int->name);
> +            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
> +        }
> +    }
> +}
> +
>  void
>  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>             const struct ovsrec_bridge *br_int,
> @@ -393,12 +408,37 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>             const struct sbrec_chassis *this_chassis,
>             const struct sbrec_sb_global *sbg,
>             const struct ovsrec_open_vswitch_table *ovs_table,
> -           const struct sset *transport_zones)
> +           const struct sset *transport_zones,
> +           const struct ovsrec_bridge_table *bridge_table,
> +           const char *br_int_name)
>  {
>      if (!ovs_idl_txn || !br_int) {
>          return;
>      }
>
> +    if (!br_int_name) {
> +        /* The controller has just started, we need to look through all
> +         * bridges for old tunnel ports. */
> +        char *tunnel_prefix = xasprintf("ovn%s-", get_chassis_idx(ovs_table));
> +        size_t prefix_len = strlen(tunnel_prefix);
> +
> +        const struct ovsrec_bridge *br;
> +        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
> +            if (!strcmp(br->name, br_int->name)) {
> +                continue;
> +            }
> +            clear_old_tunnels(br, tunnel_prefix, prefix_len);
> +        }
> +
> +        free(tunnel_prefix);
> +    } else if (strcmp(br_int_name, br_int->name)) {
> +        /* The integration bridge was changed, clear tunnel ports from
> +         * the old one. */
> +        const struct ovsrec_bridge *old_br_int =
> +            get_bridge(bridge_table, br_int_name);
> +        clear_old_tunnels(old_br_int, "", 0);
> +    }
> +
>      const struct sbrec_chassis *chassis_rec;
>
>      struct tunnel_ctx tc = {
> diff --git a/controller/encaps.h b/controller/encaps.h
> index 867c6f28c..cf38dac1a 100644
> --- a/controller/encaps.h
> +++ b/controller/encaps.h
> @@ -35,7 +35,9 @@ void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>                  const struct sbrec_chassis *,
>                  const struct sbrec_sb_global *,
>                  const struct ovsrec_open_vswitch_table *,
> -                const struct sset *transport_zones);
> +                const struct sset *transport_zones,
> +                const struct ovsrec_bridge_table *bridge_table,
> +                const char *br_int_name);
>
>  bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
>                      const struct ovsrec_bridge *br_int);
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 76be2426e..242d93823 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -536,6 +536,21 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
>      *br_int_ = br_int;
>  }
>
> +static void
> +consider_br_int_change(const struct ovsrec_bridge *br_int, char **current_name)
> +{
> +    ovs_assert(current_name);
> +
> +    if (!*current_name) {
> +        *current_name = xstrdup(br_int->name);
> +    }
> +
> +    if (strcmp(*current_name, br_int->name)) {
> +        free(*current_name);
> +        *current_name = xstrdup(br_int->name);
> +    }
> +}
> +
>  static void
>  update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
>  {
> @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
>      char *ovn_version = ovn_get_internal_version();
>      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
>
> +    char *current_br_int_name = NULL;
> +
>      /* Main loop. */
>      exiting = false;
>      restart = false;
> @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
>                                 chassis,
>                                 sbrec_sb_global_first(ovnsb_idl_loop.idl),
>                                 ovs_table,
> -                               &transport_zones);
> +                               &transport_zones,
> +                               bridge_table,
> +                               current_br_int_name);
>
>                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
>                                      time_msec());
> @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                     time_msec());
>                  }
> -
> +                /* The name needs to be reflected at the end of the block.
> +                 * This allows us to detect br-int changes and act
> +                 * accordingly. */
> +                consider_br_int_change(br_int, &current_br_int_name);
>              }
>
>              if (!engine_has_run()) {
> diff --git a/tests/ovn.at b/tests/ovn.at
> index a892691ca..65b7ef522 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -35179,3 +35179,171 @@ AT_CHECK([test "$(grep -c "Flushing CT for 5-tuple" hv1/ovn-controller.log)" = "
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([Re-create encap tunnels during integration bridge migration])
> +ovn_start
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +sim_add hv2
> +as hv2
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.2
> +
> +check ovn-nbctl --wait=hv sync
> +
> +check_tunnel_port() {
> +    local hv=$1
> +    local br=$2
> +    local id=$3
> +
> +    as $hv
> +    OVS_WAIT_UNTIL([
> +        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
> +    ])
> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
> +}
> +
> +# Check that both chassis have tunnel
> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> +check_tunnel_port hv2 br-int hv1@192.168.0.1
> +
> +# Stop ovn-controller on hv1
> +check as hv1 ovn-appctl -t ovn-controller exit --restart
> +
> +# The tunnel should remain intact
> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> +
> +# Change the bridge to br-int1 on hv1
> +as hv1
> +check ovs-vsctl add-br br-int1
> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> +start_daemon ovn-controller --verbose="encaps:dbg"
> +check ovn-nbctl --wait=hv sync
> +
> +# Check that the tunnel was created on br-int1 instead
> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> +check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2) from bridge \"br-int\"" hv1/ovn-controller.log
> +
> +# Change the bridge to br-int1 on hv2
> +as hv2
> +check ovn-appctl vlog/set encaps:dbg
> +check ovs-vsctl add-br br-int1
> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> +check ovn-nbctl --wait=hv sync
> +
> +
> +# Check that the tunnel was created on br-int1 instead
> +check_tunnel_port hv2 br-int1 hv1@192.168.0.1
> +check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int\"" hv2/ovn-controller.log
> +
> +# Stop ovn-controller on hv1
> +check as hv1 ovn-appctl -t ovn-controller exit --restart
> +
> +# The tunnel should remain intact
> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> +prev_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
> +
> +# Start the controller again
> +start_daemon ovn-controller --verbose="encaps:dbg"
> +check ovn-nbctl --wait=hv sync
> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> +current_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
> +
> +# The tunnel should be the same after restart
> +check test "$current_id" = "$prev_id"
> +
> +OVN_CLEANUP([hv1],[hv2])
> +AT_CLEANUP
> +])
> +
> +# NOTE: This test case runs two ovn-controllers inside the same sandbox (hv1).
> +# Each controller uses a unique chassis name - hv1 and hv2 - and manage
> +# different bridges with different ports.
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([Encaps tunnel cleanup does not interfere with multiple controller on the same host])
> +ovn_start
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys-1
> +ovn_attach n1 br-phys-1 192.168.0.1 24
> +
> +
> +# now start the second virtual controller
> +ovs-vsctl add-br br-phys-2
> +
> +
> +# the file is read once at startup so it's safe to write it
> +# here after the first ovn-controller has started
> +echo hv2 > ${OVN_SYSCONFDIR}/system-id-override
> +
> +# for some reason SSL ovsdb configuration overrides CLI, so
> +# delete ssl config from ovsdb to give CLI arguments priority
> +ovs-vsctl del-ssl
> +
> +start_virtual_controller n1 br-phys-2 br-int-2 192.168.0.2 24 geneve,vxlan hv2 \
> +    --pidfile=${OVS_RUNDIR}/ovn-controller-2.pid \
> +    --log-file=${OVS_RUNDIR}/ovn-controller-2.log \
> +    -p $PKIDIR/testpki-hv2-privkey.pem \
> +    -c $PKIDIR/testpki-hv2-cert.pem \
> +    -C $PKIDIR/testpki-cacert.pem
> +pidfile="$OVS_RUNDIR"/ovn-controller-2.pid
> +on_exit "test -e \"$pidfile\" && kill \`cat \"$pidfile\"\`"
> +
> +ovn-nbctl --wait=hv sync
> +
> +check_tunnel_port() {
> +    local hv=$1
> +    local br=$2
> +    local id=$3
> +
> +    as $hv
> +    OVS_WAIT_UNTIL([
> +        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
> +    ])
> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
> +}
> +
> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> +check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
> +prev_id1=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv1@192.168.0.1")
> +prev_id2=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
> +
> +# The hv2 is running we can remove the override file
> +rm -f ${OVN_SYSCONFDIR}/system-id-override
> +
> +check ovn-appctl -t ovn-controller exit --restart
> +
> +# for some reason SSL ovsdb configuration overrides CLI, so
> +# delete ssl config from ovsdb to give CLI arguments priority
> +ovs-vsctl del-ssl
> +
> +start_daemon ovn-controller --verbose="encaps:dbg" \
> +    -p $PKIDIR/testpki-hv1-privkey.pem \
> +    -c $PKIDIR/testpki-hv1-cert.pem \
> +    -C $PKIDIR/testpki-cacert.pem
> +
> +check ovn-nbctl --wait=hv sync
> +
> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> +check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
> +current_id1=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv1@192.168.0.1")
> +current_id2=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
> +
> +# Check that restart of hv1 ovn-controller did not interfere with hv2
> +AT_CHECK([grep -q "Clearing old tunnel port \"ovn0-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int-2\"" hv1/ovn-controller.log], [1])
> +check test "$current_id1" = "$prev_id1"
> +check test "$current_id2" = "$prev_id2"
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
> --
> 2.39.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara April 6, 2023, 11:43 a.m. UTC | #2
On 4/5/23 15:18, Ihar Hrachyshka wrote:
> Thank you for v2! All good now.
> 
> Acked-By: Ihar Hrachyshka <ihrachys@redhat.com>
> 

Thanks, Ales and Ihar!  I have a comment below.  If you're OK with that
I can fold the suggested change in and apply the patch.

Regards,
Dumitru

> On Mon, Apr 3, 2023 at 5:50 AM Ales Musil <amusil@redhat.com> wrote:
>>
>> After integration bridge change the tunnels would
>> stay on the old bridge preventing new tunnels creation
>> and disrupting traffic. Detect the bridge change
>> and clear the tunnels from the old integration bridge.
>>
>> Reported-at: https://bugzilla.redhat.com/2173635
>> Signed-off-by: Ales Musil <amusil@redhat.com>
>> ---
>> v2: Make sure that we don't clear tunnels belonging to
>>     other ovn-controllers on the same host.
>> ---
>>  controller/encaps.c         |  42 ++++++++-
>>  controller/encaps.h         |   4 +-
>>  controller/ovn-controller.c |  26 +++++-
>>  tests/ovn.at                | 168 ++++++++++++++++++++++++++++++++++++
>>  4 files changed, 236 insertions(+), 4 deletions(-)
>>
>> diff --git a/controller/encaps.c b/controller/encaps.c
>> index 2662eaf98..4a79030b8 100644
>> --- a/controller/encaps.c
>> +++ b/controller/encaps.c
>> @@ -386,6 +386,21 @@ chassis_tzones_overlap(const struct sset *transport_zones,
>>      return false;
>>  }
>>
>> +static void
>> +clear_old_tunnels(const struct ovsrec_bridge *old_br_int, const char *prefix,
>> +                  size_t prefix_len)
>> +{
>> +    for (size_t i = 0; i < old_br_int->n_ports; i++) {
>> +        const struct ovsrec_port *port = old_br_int->ports[i];
>> +        const char *id = smap_get(&port->external_ids, "ovn-chassis-id");
>> +        if (id && !strncmp(port->name, prefix, prefix_len)) {
>> +            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge "
>> +                     "\"%s\".", port->name, id, old_br_int->name);
>> +            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
>> +        }
>> +    }
>> +}
>> +
>>  void
>>  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>>             const struct ovsrec_bridge *br_int,
>> @@ -393,12 +408,37 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>>             const struct sbrec_chassis *this_chassis,
>>             const struct sbrec_sb_global *sbg,
>>             const struct ovsrec_open_vswitch_table *ovs_table,
>> -           const struct sset *transport_zones)
>> +           const struct sset *transport_zones,
>> +           const struct ovsrec_bridge_table *bridge_table,
>> +           const char *br_int_name)
>>  {
>>      if (!ovs_idl_txn || !br_int) {
>>          return;
>>      }
>>
>> +    if (!br_int_name) {
>> +        /* The controller has just started, we need to look through all
>> +         * bridges for old tunnel ports. */
>> +        char *tunnel_prefix = xasprintf("ovn%s-", get_chassis_idx(ovs_table));
>> +        size_t prefix_len = strlen(tunnel_prefix);
>> +
>> +        const struct ovsrec_bridge *br;
>> +        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
>> +            if (!strcmp(br->name, br_int->name)) {
>> +                continue;
>> +            }
>> +            clear_old_tunnels(br, tunnel_prefix, prefix_len);
>> +        }
>> +
>> +        free(tunnel_prefix);
>> +    } else if (strcmp(br_int_name, br_int->name)) {
>> +        /* The integration bridge was changed, clear tunnel ports from
>> +         * the old one. */
>> +        const struct ovsrec_bridge *old_br_int =
>> +            get_bridge(bridge_table, br_int_name);

Can't 'old_br_int' be NULL at this point?

Should we add this?

    if (old_br_int) {

>> +        clear_old_tunnels(old_br_int, "", 0);

    }

>> +    }
>> +
>>      const struct sbrec_chassis *chassis_rec;
>>
>>      struct tunnel_ctx tc = {
>> diff --git a/controller/encaps.h b/controller/encaps.h
>> index 867c6f28c..cf38dac1a 100644
>> --- a/controller/encaps.h
>> +++ b/controller/encaps.h
>> @@ -35,7 +35,9 @@ void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
>>                  const struct sbrec_chassis *,
>>                  const struct sbrec_sb_global *,
>>                  const struct ovsrec_open_vswitch_table *,
>> -                const struct sset *transport_zones);
>> +                const struct sset *transport_zones,
>> +                const struct ovsrec_bridge_table *bridge_table,
>> +                const char *br_int_name);
>>
>>  bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
>>                      const struct ovsrec_bridge *br_int);
>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>> index 76be2426e..242d93823 100644
>> --- a/controller/ovn-controller.c
>> +++ b/controller/ovn-controller.c
>> @@ -536,6 +536,21 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
>>      *br_int_ = br_int;
>>  }
>>
>> +static void
>> +consider_br_int_change(const struct ovsrec_bridge *br_int, char **current_name)
>> +{
>> +    ovs_assert(current_name);
>> +
>> +    if (!*current_name) {
>> +        *current_name = xstrdup(br_int->name);
>> +    }
>> +
>> +    if (strcmp(*current_name, br_int->name)) {
>> +        free(*current_name);
>> +        *current_name = xstrdup(br_int->name);
>> +    }
>> +}
>> +
>>  static void
>>  update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
>>  {
>> @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
>>      char *ovn_version = ovn_get_internal_version();
>>      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
>>
>> +    char *current_br_int_name = NULL;
>> +
>>      /* Main loop. */
>>      exiting = false;
>>      restart = false;
>> @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
>>                                 chassis,
>>                                 sbrec_sb_global_first(ovnsb_idl_loop.idl),
>>                                 ovs_table,
>> -                               &transport_zones);
>> +                               &transport_zones,
>> +                               bridge_table,
>> +                               current_br_int_name);
>>
>>                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
>>                                      time_msec());
>> @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
>>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>>                                     time_msec());
>>                  }
>> -
>> +                /* The name needs to be reflected at the end of the block.
>> +                 * This allows us to detect br-int changes and act
>> +                 * accordingly. */
>> +                consider_br_int_change(br_int, &current_br_int_name);
>>              }
>>
>>              if (!engine_has_run()) {
>> diff --git a/tests/ovn.at b/tests/ovn.at
>> index a892691ca..65b7ef522 100644
>> --- a/tests/ovn.at
>> +++ b/tests/ovn.at
>> @@ -35179,3 +35179,171 @@ AT_CHECK([test "$(grep -c "Flushing CT for 5-tuple" hv1/ovn-controller.log)" = "
>>  OVN_CLEANUP([hv1])
>>  AT_CLEANUP
>>  ])
>> +
>> +OVN_FOR_EACH_NORTHD([
>> +AT_SETUP([Re-create encap tunnels during integration bridge migration])
>> +ovn_start
>> +net_add n1
>> +
>> +sim_add hv1
>> +as hv1
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.1
>> +
>> +sim_add hv2
>> +as hv2
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.2
>> +
>> +check ovn-nbctl --wait=hv sync
>> +
>> +check_tunnel_port() {
>> +    local hv=$1
>> +    local br=$2
>> +    local id=$3
>> +
>> +    as $hv
>> +    OVS_WAIT_UNTIL([
>> +        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
>> +    ])
>> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
>> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
>> +}
>> +
>> +# Check that both chassis have tunnel
>> +check_tunnel_port hv1 br-int hv2@192.168.0.2
>> +check_tunnel_port hv2 br-int hv1@192.168.0.1
>> +
>> +# Stop ovn-controller on hv1
>> +check as hv1 ovn-appctl -t ovn-controller exit --restart
>> +
>> +# The tunnel should remain intact
>> +check_tunnel_port hv1 br-int hv2@192.168.0.2
>> +
>> +# Change the bridge to br-int1 on hv1
>> +as hv1
>> +check ovs-vsctl add-br br-int1
>> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
>> +start_daemon ovn-controller --verbose="encaps:dbg"
>> +check ovn-nbctl --wait=hv sync
>> +
>> +# Check that the tunnel was created on br-int1 instead
>> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
>> +check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2) from bridge \"br-int\"" hv1/ovn-controller.log
>> +
>> +# Change the bridge to br-int1 on hv2
>> +as hv2
>> +check ovn-appctl vlog/set encaps:dbg
>> +check ovs-vsctl add-br br-int1
>> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
>> +check ovn-nbctl --wait=hv sync
>> +
>> +
>> +# Check that the tunnel was created on br-int1 instead
>> +check_tunnel_port hv2 br-int1 hv1@192.168.0.1
>> +check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int\"" hv2/ovn-controller.log
>> +
>> +# Stop ovn-controller on hv1
>> +check as hv1 ovn-appctl -t ovn-controller exit --restart
>> +
>> +# The tunnel should remain intact
>> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
>> +prev_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
>> +
>> +# Start the controller again
>> +start_daemon ovn-controller --verbose="encaps:dbg"
>> +check ovn-nbctl --wait=hv sync
>> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
>> +current_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
>> +
>> +# The tunnel should be the same after restart
>> +check test "$current_id" = "$prev_id"
>> +
>> +OVN_CLEANUP([hv1],[hv2])
>> +AT_CLEANUP
>> +])
>> +
>> +# NOTE: This test case runs two ovn-controllers inside the same sandbox (hv1).
>> +# Each controller uses a unique chassis name - hv1 and hv2 - and manage
>> +# different bridges with different ports.
>> +OVN_FOR_EACH_NORTHD([
>> +AT_SETUP([Encaps tunnel cleanup does not interfere with multiple controller on the same host])
>> +ovn_start
>> +net_add n1
>> +
>> +sim_add hv1
>> +as hv1
>> +ovs-vsctl add-br br-phys-1
>> +ovn_attach n1 br-phys-1 192.168.0.1 24
>> +
>> +
>> +# now start the second virtual controller
>> +ovs-vsctl add-br br-phys-2
>> +
>> +
>> +# the file is read once at startup so it's safe to write it
>> +# here after the first ovn-controller has started
>> +echo hv2 > ${OVN_SYSCONFDIR}/system-id-override
>> +
>> +# for some reason SSL ovsdb configuration overrides CLI, so
>> +# delete ssl config from ovsdb to give CLI arguments priority
>> +ovs-vsctl del-ssl
>> +
>> +start_virtual_controller n1 br-phys-2 br-int-2 192.168.0.2 24 geneve,vxlan hv2 \
>> +    --pidfile=${OVS_RUNDIR}/ovn-controller-2.pid \
>> +    --log-file=${OVS_RUNDIR}/ovn-controller-2.log \
>> +    -p $PKIDIR/testpki-hv2-privkey.pem \
>> +    -c $PKIDIR/testpki-hv2-cert.pem \
>> +    -C $PKIDIR/testpki-cacert.pem
>> +pidfile="$OVS_RUNDIR"/ovn-controller-2.pid
>> +on_exit "test -e \"$pidfile\" && kill \`cat \"$pidfile\"\`"
>> +
>> +ovn-nbctl --wait=hv sync
>> +
>> +check_tunnel_port() {
>> +    local hv=$1
>> +    local br=$2
>> +    local id=$3
>> +
>> +    as $hv
>> +    OVS_WAIT_UNTIL([
>> +        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
>> +    ])
>> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
>> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
>> +}
>> +
>> +check_tunnel_port hv1 br-int hv2@192.168.0.2
>> +check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
>> +prev_id1=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv1@192.168.0.1")
>> +prev_id2=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
>> +
>> +# The hv2 is running we can remove the override file
>> +rm -f ${OVN_SYSCONFDIR}/system-id-override
>> +
>> +check ovn-appctl -t ovn-controller exit --restart
>> +
>> +# for some reason SSL ovsdb configuration overrides CLI, so
>> +# delete ssl config from ovsdb to give CLI arguments priority
>> +ovs-vsctl del-ssl
>> +
>> +start_daemon ovn-controller --verbose="encaps:dbg" \
>> +    -p $PKIDIR/testpki-hv1-privkey.pem \
>> +    -c $PKIDIR/testpki-hv1-cert.pem \
>> +    -C $PKIDIR/testpki-cacert.pem
>> +
>> +check ovn-nbctl --wait=hv sync
>> +
>> +check_tunnel_port hv1 br-int hv2@192.168.0.2
>> +check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
>> +current_id1=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv1@192.168.0.1")
>> +current_id2=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
>> +
>> +# Check that restart of hv1 ovn-controller did not interfere with hv2
>> +AT_CHECK([grep -q "Clearing old tunnel port \"ovn0-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int-2\"" hv1/ovn-controller.log], [1])
>> +check test "$current_id1" = "$prev_id1"
>> +check test "$current_id2" = "$prev_id2"
>> +
>> +OVN_CLEANUP([hv1])
>> +AT_CLEANUP
>> +])
>> --
>> 2.39.2
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ales Musil April 6, 2023, 12:26 p.m. UTC | #3
On Thu, Apr 6, 2023 at 1:43 PM Dumitru Ceara <dceara@redhat.com> wrote:

> On 4/5/23 15:18, Ihar Hrachyshka wrote:
> > Thank you for v2! All good now.
> >
> > Acked-By: Ihar Hrachyshka <ihrachys@redhat.com>
> >
>


>
> Thanks, Ales and Ihar!  I have a comment below.  If you're OK with that
> I can fold the suggested change in and apply the patch.
>
> Regards,
> Dumitru
>

Hi Dumitru,

thank you for the review. It makes sense to apply that change.

Thanks,
Ales


>
> > On Mon, Apr 3, 2023 at 5:50 AM Ales Musil <amusil@redhat.com> wrote:
> >>
> >> After integration bridge change the tunnels would
> >> stay on the old bridge preventing new tunnels creation
> >> and disrupting traffic. Detect the bridge change
> >> and clear the tunnels from the old integration bridge.
> >>
> >> Reported-at: https://bugzilla.redhat.com/2173635
> >> Signed-off-by: Ales Musil <amusil@redhat.com>
> >> ---
> >> v2: Make sure that we don't clear tunnels belonging to
> >>     other ovn-controllers on the same host.
> >> ---
> >>  controller/encaps.c         |  42 ++++++++-
> >>  controller/encaps.h         |   4 +-
> >>  controller/ovn-controller.c |  26 +++++-
> >>  tests/ovn.at                | 168 ++++++++++++++++++++++++++++++++++++
> >>  4 files changed, 236 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/controller/encaps.c b/controller/encaps.c
> >> index 2662eaf98..4a79030b8 100644
> >> --- a/controller/encaps.c
> >> +++ b/controller/encaps.c
> >> @@ -386,6 +386,21 @@ chassis_tzones_overlap(const struct sset
> *transport_zones,
> >>      return false;
> >>  }
> >>
> >> +static void
> >> +clear_old_tunnels(const struct ovsrec_bridge *old_br_int, const char
> *prefix,
> >> +                  size_t prefix_len)
> >> +{
> >> +    for (size_t i = 0; i < old_br_int->n_ports; i++) {
> >> +        const struct ovsrec_port *port = old_br_int->ports[i];
> >> +        const char *id = smap_get(&port->external_ids,
> "ovn-chassis-id");
> >> +        if (id && !strncmp(port->name, prefix, prefix_len)) {
> >> +            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge
> "
> >> +                     "\"%s\".", port->name, id, old_br_int->name);
> >> +            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
> >> +        }
> >> +    }
> >> +}
> >> +
> >>  void
> >>  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
> >>             const struct ovsrec_bridge *br_int,
> >> @@ -393,12 +408,37 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
> >>             const struct sbrec_chassis *this_chassis,
> >>             const struct sbrec_sb_global *sbg,
> >>             const struct ovsrec_open_vswitch_table *ovs_table,
> >> -           const struct sset *transport_zones)
> >> +           const struct sset *transport_zones,
> >> +           const struct ovsrec_bridge_table *bridge_table,
> >> +           const char *br_int_name)
> >>  {
> >>      if (!ovs_idl_txn || !br_int) {
> >>          return;
> >>      }
> >>
> >> +    if (!br_int_name) {
> >> +        /* The controller has just started, we need to look through all
> >> +         * bridges for old tunnel ports. */
> >> +        char *tunnel_prefix = xasprintf("ovn%s-",
> get_chassis_idx(ovs_table));
> >> +        size_t prefix_len = strlen(tunnel_prefix);
> >> +
> >> +        const struct ovsrec_bridge *br;
> >> +        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
> >> +            if (!strcmp(br->name, br_int->name)) {
> >> +                continue;
> >> +            }
> >> +            clear_old_tunnels(br, tunnel_prefix, prefix_len);
> >> +        }
> >> +
> >> +        free(tunnel_prefix);
> >> +    } else if (strcmp(br_int_name, br_int->name)) {
> >> +        /* The integration bridge was changed, clear tunnel ports from
> >> +         * the old one. */
> >> +        const struct ovsrec_bridge *old_br_int =
> >> +            get_bridge(bridge_table, br_int_name);
>
> Can't 'old_br_int' be NULL at this point?
>
> Should we add this?
>
>     if (old_br_int) {
>

> >> +        clear_old_tunnels(old_br_int, "", 0);
>
>     }
>
> >> +    }
> >> +
> >>      const struct sbrec_chassis *chassis_rec;
> >>
> >>      struct tunnel_ctx tc = {
> >> diff --git a/controller/encaps.h b/controller/encaps.h
> >> index 867c6f28c..cf38dac1a 100644
> >> --- a/controller/encaps.h
> >> +++ b/controller/encaps.h
> >> @@ -35,7 +35,9 @@ void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
> >>                  const struct sbrec_chassis *,
> >>                  const struct sbrec_sb_global *,
> >>                  const struct ovsrec_open_vswitch_table *,
> >> -                const struct sset *transport_zones);
> >> +                const struct sset *transport_zones,
> >> +                const struct ovsrec_bridge_table *bridge_table,
> >> +                const char *br_int_name);
> >>
> >>  bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
> >>                      const struct ovsrec_bridge *br_int);
> >> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> >> index 76be2426e..242d93823 100644
> >> --- a/controller/ovn-controller.c
> >> +++ b/controller/ovn-controller.c
> >> @@ -536,6 +536,21 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
> >>      *br_int_ = br_int;
> >>  }
> >>
> >> +static void
> >> +consider_br_int_change(const struct ovsrec_bridge *br_int, char
> **current_name)
> >> +{
> >> +    ovs_assert(current_name);
> >> +
> >> +    if (!*current_name) {
> >> +        *current_name = xstrdup(br_int->name);
> >> +    }
> >> +
> >> +    if (strcmp(*current_name, br_int->name)) {
> >> +        free(*current_name);
> >> +        *current_name = xstrdup(br_int->name);
> >> +    }
> >> +}
> >> +
> >>  static void
> >>  update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
> >>  {
> >> @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
> >>      char *ovn_version = ovn_get_internal_version();
> >>      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
> >>
> >> +    char *current_br_int_name = NULL;
> >> +
> >>      /* Main loop. */
> >>      exiting = false;
> >>      restart = false;
> >> @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
> >>                                 chassis,
> >>
>  sbrec_sb_global_first(ovnsb_idl_loop.idl),
> >>                                 ovs_table,
> >> -                               &transport_zones);
> >> +                               &transport_zones,
> >> +                               bridge_table,
> >> +                               current_br_int_name);
> >>
> >>                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> >>                                      time_msec());
> >> @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
> >>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >>                                     time_msec());
> >>                  }
> >> -
> >> +                /* The name needs to be reflected at the end of the
> block.
> >> +                 * This allows us to detect br-int changes and act
> >> +                 * accordingly. */
> >> +                consider_br_int_change(br_int, &current_br_int_name);
> >>              }
> >>
> >>              if (!engine_has_run()) {
> >> diff --git a/tests/ovn.at b/tests/ovn.at
> >> index a892691ca..65b7ef522 100644
> >> --- a/tests/ovn.at
> >> +++ b/tests/ovn.at
> >> @@ -35179,3 +35179,171 @@ AT_CHECK([test "$(grep -c "Flushing CT for
> 5-tuple" hv1/ovn-controller.log)" = "
> >>  OVN_CLEANUP([hv1])
> >>  AT_CLEANUP
> >>  ])
> >> +
> >> +OVN_FOR_EACH_NORTHD([
> >> +AT_SETUP([Re-create encap tunnels during integration bridge migration])
> >> +ovn_start
> >> +net_add n1
> >> +
> >> +sim_add hv1
> >> +as hv1
> >> +ovs-vsctl add-br br-phys
> >> +ovn_attach n1 br-phys 192.168.0.1
> >> +
> >> +sim_add hv2
> >> +as hv2
> >> +ovs-vsctl add-br br-phys
> >> +ovn_attach n1 br-phys 192.168.0.2
> >> +
> >> +check ovn-nbctl --wait=hv sync
> >> +
> >> +check_tunnel_port() {
> >> +    local hv=$1
> >> +    local br=$2
> >> +    local id=$3
> >> +
> >> +    as $hv
> >> +    OVS_WAIT_UNTIL([
> >> +        test "$(ovs-vsctl --format=table --no-headings find port
> external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
> >> +    ])
> >> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="$id")
> >> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br"
> | grep -q "$tunnel_id"])
> >> +}
> >> +
> >> +# Check that both chassis have tunnel
> >> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> >> +check_tunnel_port hv2 br-int hv1@192.168.0.1
> >> +
> >> +# Stop ovn-controller on hv1
> >> +check as hv1 ovn-appctl -t ovn-controller exit --restart
> >> +
> >> +# The tunnel should remain intact
> >> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> >> +
> >> +# Change the bridge to br-int1 on hv1
> >> +as hv1
> >> +check ovs-vsctl add-br br-int1
> >> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> >> +start_daemon ovn-controller --verbose="encaps:dbg"
> >> +check ovn-nbctl --wait=hv sync
> >> +
> >> +# Check that the tunnel was created on br-int1 instead
> >> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> >> +check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2)
> from bridge \"br-int\"" hv1/ovn-controller.log
> >> +
> >> +# Change the bridge to br-int1 on hv2
> >> +as hv2
> >> +check ovn-appctl vlog/set encaps:dbg
> >> +check ovs-vsctl add-br br-int1
> >> +check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
> >> +check ovn-nbctl --wait=hv sync
> >> +
> >> +
> >> +# Check that the tunnel was created on br-int1 instead
> >> +check_tunnel_port hv2 br-int1 hv1@192.168.0.1
> >> +check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1)
> from bridge \"br-int\"" hv2/ovn-controller.log
> >> +
> >> +# Stop ovn-controller on hv1
> >> +check as hv1 ovn-appctl -t ovn-controller exit --restart
> >> +
> >> +# The tunnel should remain intact
> >> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> >> +prev_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv2@192.168.0.2")
> >> +
> >> +# Start the controller again
> >> +start_daemon ovn-controller --verbose="encaps:dbg"
> >> +check ovn-nbctl --wait=hv sync
> >> +check_tunnel_port hv1 br-int1 hv2@192.168.0.2
> >> +current_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv2@192.168.0.2")
> >> +
> >> +# The tunnel should be the same after restart
> >> +check test "$current_id" = "$prev_id"
> >> +
> >> +OVN_CLEANUP([hv1],[hv2])
> >> +AT_CLEANUP
> >> +])
> >> +
> >> +# NOTE: This test case runs two ovn-controllers inside the same
> sandbox (hv1).
> >> +# Each controller uses a unique chassis name - hv1 and hv2 - and manage
> >> +# different bridges with different ports.
> >> +OVN_FOR_EACH_NORTHD([
> >> +AT_SETUP([Encaps tunnel cleanup does not interfere with multiple
> controller on the same host])
> >> +ovn_start
> >> +net_add n1
> >> +
> >> +sim_add hv1
> >> +as hv1
> >> +ovs-vsctl add-br br-phys-1
> >> +ovn_attach n1 br-phys-1 192.168.0.1 24
> >> +
> >> +
> >> +# now start the second virtual controller
> >> +ovs-vsctl add-br br-phys-2
> >> +
> >> +
> >> +# the file is read once at startup so it's safe to write it
> >> +# here after the first ovn-controller has started
> >> +echo hv2 > ${OVN_SYSCONFDIR}/system-id-override
> >> +
> >> +# for some reason SSL ovsdb configuration overrides CLI, so
> >> +# delete ssl config from ovsdb to give CLI arguments priority
> >> +ovs-vsctl del-ssl
> >> +
> >> +start_virtual_controller n1 br-phys-2 br-int-2 192.168.0.2 24
> geneve,vxlan hv2 \
> >> +    --pidfile=${OVS_RUNDIR}/ovn-controller-2.pid \
> >> +    --log-file=${OVS_RUNDIR}/ovn-controller-2.log \
> >> +    -p $PKIDIR/testpki-hv2-privkey.pem \
> >> +    -c $PKIDIR/testpki-hv2-cert.pem \
> >> +    -C $PKIDIR/testpki-cacert.pem
> >> +pidfile="$OVS_RUNDIR"/ovn-controller-2.pid
> >> +on_exit "test -e \"$pidfile\" && kill \`cat \"$pidfile\"\`"
> >> +
> >> +ovn-nbctl --wait=hv sync
> >> +
> >> +check_tunnel_port() {
> >> +    local hv=$1
> >> +    local br=$2
> >> +    local id=$3
> >> +
> >> +    as $hv
> >> +    OVS_WAIT_UNTIL([
> >> +        test "$(ovs-vsctl --format=table --no-headings find port
> external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
> >> +    ])
> >> +    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="$id")
> >> +    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br"
> | grep -q "$tunnel_id"])
> >> +}
> >> +
> >> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> >> +check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
> >> +prev_id1=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv1@192.168.0.1")
> >> +prev_id2=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv2@192.168.0.2")
> >> +
> >> +# The hv2 is running we can remove the override file
> >> +rm -f ${OVN_SYSCONFDIR}/system-id-override
> >> +
> >> +check ovn-appctl -t ovn-controller exit --restart
> >> +
> >> +# for some reason SSL ovsdb configuration overrides CLI, so
> >> +# delete ssl config from ovsdb to give CLI arguments priority
> >> +ovs-vsctl del-ssl
> >> +
> >> +start_daemon ovn-controller --verbose="encaps:dbg" \
> >> +    -p $PKIDIR/testpki-hv1-privkey.pem \
> >> +    -c $PKIDIR/testpki-hv1-cert.pem \
> >> +    -C $PKIDIR/testpki-cacert.pem
> >> +
> >> +check ovn-nbctl --wait=hv sync
> >> +
> >> +check_tunnel_port hv1 br-int hv2@192.168.0.2
> >> +check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
> >> +current_id1=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv1@192.168.0.1")
> >> +current_id2=$(ovs-vsctl --bare --columns _uuid find port
> external_ids:ovn-chassis-id="hv2@192.168.0.2")
> >> +
> >> +# Check that restart of hv1 ovn-controller did not interfere with hv2
> >> +AT_CHECK([grep -q "Clearing old tunnel port \"ovn0-hv1-0\" (
> hv1@192.168.0.1) from bridge \"br-int-2\"" hv1/ovn-controller.log], [1])
> >> +check test "$current_id1" = "$prev_id1"
> >> +check test "$current_id2" = "$prev_id2"
> >> +
> >> +OVN_CLEANUP([hv1])
> >> +AT_CLEANUP
> >> +])
> >> --
> >> 2.39.2
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Simon Horman April 6, 2023, 1:05 p.m. UTC | #4
On Mon, Apr 03, 2023 at 11:50:26AM +0200, Ales Musil wrote:
> After integration bridge change the tunnels would
> stay on the old bridge preventing new tunnels creation
> and disrupting traffic. Detect the bridge change
> and clear the tunnels from the old integration bridge.
> 
> Reported-at: https://bugzilla.redhat.com/2173635
> Signed-off-by: Ales Musil <amusil@redhat.com>

...

> @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
>      char *ovn_version = ovn_get_internal_version();
>      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
>  
> +    char *current_br_int_name = NULL;
> +
>      /* Main loop. */
>      exiting = false;
>      restart = false;
> @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
>                                 chassis,
>                                 sbrec_sb_global_first(ovnsb_idl_loop.idl),
>                                 ovs_table,
> -                               &transport_zones);
> +                               &transport_zones,
> +                               bridge_table,
> +                               current_br_int_name);
>  
>                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
>                                      time_msec());
> @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
>                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
>                                     time_msec());
>                  }
> -
> +                /* The name needs to be reflected at the end of the block.
> +                 * This allows us to detect br-int changes and act
> +                 * accordingly. */
> +                consider_br_int_change(br_int, &current_br_int_name);
>              }

The above call to consider_br_int_change()  will (sometimes)
allocate memory for current_br_int_name.

Should current_br_int_name be freed when the main loop exits?

>  
>              if (!engine_has_run()) {
Ales Musil April 6, 2023, 2:26 p.m. UTC | #5
On Thu, Apr 6, 2023 at 3:05 PM Simon Horman <simon.horman@corigine.com>
wrote:

> On Mon, Apr 03, 2023 at 11:50:26AM +0200, Ales Musil wrote:
> > After integration bridge change the tunnels would
> > stay on the old bridge preventing new tunnels creation
> > and disrupting traffic. Detect the bridge change
> > and clear the tunnels from the old integration bridge.
> >
> > Reported-at: https://bugzilla.redhat.com/2173635
> > Signed-off-by: Ales Musil <amusil@redhat.com>
>
> ...
>
> > @@ -4918,6 +4933,8 @@ main(int argc, char *argv[])
> >      char *ovn_version = ovn_get_internal_version();
> >      VLOG_INFO("OVN internal version is : [%s]", ovn_version);
> >
> > +    char *current_br_int_name = NULL;
> > +
> >      /* Main loop. */
> >      exiting = false;
> >      restart = false;
> > @@ -5070,7 +5087,9 @@ main(int argc, char *argv[])
> >                                 chassis,
> >
>  sbrec_sb_global_first(ovnsb_idl_loop.idl),
> >                                 ovs_table,
> > -                               &transport_zones);
> > +                               &transport_zones,
> > +                               bridge_table,
> > +                               current_br_int_name);
> >
> >                      stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
> >                                      time_msec());
> > @@ -5257,7 +5276,10 @@ main(int argc, char *argv[])
> >                      stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
> >                                     time_msec());
> >                  }
> > -
> > +                /* The name needs to be reflected at the end of the
> block.
> > +                 * This allows us to detect br-int changes and act
> > +                 * accordingly. */
> > +                consider_br_int_change(br_int, &current_br_int_name);
> >              }
>
> The above call to consider_br_int_change()  will (sometimes)
> allocate memory for current_br_int_name.
>
> Should current_br_int_name be freed when the main loop exits?
>

Right, I'll take care of it in v3, thanks.


>
> >
> >              if (!engine_has_run()) {
>
>

Thanks,
Ales
diff mbox series

Patch

diff --git a/controller/encaps.c b/controller/encaps.c
index 2662eaf98..4a79030b8 100644
--- a/controller/encaps.c
+++ b/controller/encaps.c
@@ -386,6 +386,21 @@  chassis_tzones_overlap(const struct sset *transport_zones,
     return false;
 }
 
+static void
+clear_old_tunnels(const struct ovsrec_bridge *old_br_int, const char *prefix,
+                  size_t prefix_len)
+{
+    for (size_t i = 0; i < old_br_int->n_ports; i++) {
+        const struct ovsrec_port *port = old_br_int->ports[i];
+        const char *id = smap_get(&port->external_ids, "ovn-chassis-id");
+        if (id && !strncmp(port->name, prefix, prefix_len)) {
+            VLOG_DBG("Clearing old tunnel port \"%s\" (%s) from bridge "
+                     "\"%s\".", port->name, id, old_br_int->name);
+            ovsrec_bridge_update_ports_delvalue(old_br_int, port);
+        }
+    }
+}
+
 void
 encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
            const struct ovsrec_bridge *br_int,
@@ -393,12 +408,37 @@  encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
            const struct sbrec_chassis *this_chassis,
            const struct sbrec_sb_global *sbg,
            const struct ovsrec_open_vswitch_table *ovs_table,
-           const struct sset *transport_zones)
+           const struct sset *transport_zones,
+           const struct ovsrec_bridge_table *bridge_table,
+           const char *br_int_name)
 {
     if (!ovs_idl_txn || !br_int) {
         return;
     }
 
+    if (!br_int_name) {
+        /* The controller has just started, we need to look through all
+         * bridges for old tunnel ports. */
+        char *tunnel_prefix = xasprintf("ovn%s-", get_chassis_idx(ovs_table));
+        size_t prefix_len = strlen(tunnel_prefix);
+
+        const struct ovsrec_bridge *br;
+        OVSREC_BRIDGE_TABLE_FOR_EACH (br, bridge_table) {
+            if (!strcmp(br->name, br_int->name)) {
+                continue;
+            }
+            clear_old_tunnels(br, tunnel_prefix, prefix_len);
+        }
+
+        free(tunnel_prefix);
+    } else if (strcmp(br_int_name, br_int->name)) {
+        /* The integration bridge was changed, clear tunnel ports from
+         * the old one. */
+        const struct ovsrec_bridge *old_br_int =
+            get_bridge(bridge_table, br_int_name);
+        clear_old_tunnels(old_br_int, "", 0);
+    }
+
     const struct sbrec_chassis *chassis_rec;
 
     struct tunnel_ctx tc = {
diff --git a/controller/encaps.h b/controller/encaps.h
index 867c6f28c..cf38dac1a 100644
--- a/controller/encaps.h
+++ b/controller/encaps.h
@@ -35,7 +35,9 @@  void encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
                 const struct sbrec_chassis *,
                 const struct sbrec_sb_global *,
                 const struct ovsrec_open_vswitch_table *,
-                const struct sset *transport_zones);
+                const struct sset *transport_zones,
+                const struct ovsrec_bridge_table *bridge_table,
+                const char *br_int_name);
 
 bool encaps_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
                     const struct ovsrec_bridge *br_int);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 76be2426e..242d93823 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -536,6 +536,21 @@  process_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
     *br_int_ = br_int;
 }
 
+static void
+consider_br_int_change(const struct ovsrec_bridge *br_int, char **current_name)
+{
+    ovs_assert(current_name);
+
+    if (!*current_name) {
+        *current_name = xstrdup(br_int->name);
+    }
+
+    if (strcmp(*current_name, br_int->name)) {
+        free(*current_name);
+        *current_name = xstrdup(br_int->name);
+    }
+}
+
 static void
 update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
 {
@@ -4918,6 +4933,8 @@  main(int argc, char *argv[])
     char *ovn_version = ovn_get_internal_version();
     VLOG_INFO("OVN internal version is : [%s]", ovn_version);
 
+    char *current_br_int_name = NULL;
+
     /* Main loop. */
     exiting = false;
     restart = false;
@@ -5070,7 +5087,9 @@  main(int argc, char *argv[])
                                chassis,
                                sbrec_sb_global_first(ovnsb_idl_loop.idl),
                                ovs_table,
-                               &transport_zones);
+                               &transport_zones,
+                               bridge_table,
+                               current_br_int_name);
 
                     stopwatch_start(CONTROLLER_LOOP_STOPWATCH_NAME,
                                     time_msec());
@@ -5257,7 +5276,10 @@  main(int argc, char *argv[])
                     stopwatch_stop(IF_STATUS_MGR_RUN_STOPWATCH_NAME,
                                    time_msec());
                 }
-
+                /* The name needs to be reflected at the end of the block.
+                 * This allows us to detect br-int changes and act
+                 * accordingly. */
+                consider_br_int_change(br_int, &current_br_int_name);
             }
 
             if (!engine_has_run()) {
diff --git a/tests/ovn.at b/tests/ovn.at
index a892691ca..65b7ef522 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -35179,3 +35179,171 @@  AT_CHECK([test "$(grep -c "Flushing CT for 5-tuple" hv1/ovn-controller.log)" = "
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Re-create encap tunnels during integration bridge migration])
+ovn_start
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+
+check ovn-nbctl --wait=hv sync
+
+check_tunnel_port() {
+    local hv=$1
+    local br=$2
+    local id=$3
+
+    as $hv
+    OVS_WAIT_UNTIL([
+        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
+    ])
+    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
+    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
+}
+
+# Check that both chassis have tunnel
+check_tunnel_port hv1 br-int hv2@192.168.0.2
+check_tunnel_port hv2 br-int hv1@192.168.0.1
+
+# Stop ovn-controller on hv1
+check as hv1 ovn-appctl -t ovn-controller exit --restart
+
+# The tunnel should remain intact
+check_tunnel_port hv1 br-int hv2@192.168.0.2
+
+# Change the bridge to br-int1 on hv1
+as hv1
+check ovs-vsctl add-br br-int1
+check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
+start_daemon ovn-controller --verbose="encaps:dbg"
+check ovn-nbctl --wait=hv sync
+
+# Check that the tunnel was created on br-int1 instead
+check_tunnel_port hv1 br-int1 hv2@192.168.0.2
+check grep -q "Clearing old tunnel port \"ovn-hv2-0\" (hv2@192.168.0.2) from bridge \"br-int\"" hv1/ovn-controller.log
+
+# Change the bridge to br-int1 on hv2
+as hv2
+check ovn-appctl vlog/set encaps:dbg
+check ovs-vsctl add-br br-int1
+check ovs-vsctl set open . external_ids:ovn-bridge="br-int1"
+check ovn-nbctl --wait=hv sync
+
+
+# Check that the tunnel was created on br-int1 instead
+check_tunnel_port hv2 br-int1 hv1@192.168.0.1
+check grep -q "Clearing old tunnel port \"ovn-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int\"" hv2/ovn-controller.log
+
+# Stop ovn-controller on hv1
+check as hv1 ovn-appctl -t ovn-controller exit --restart
+
+# The tunnel should remain intact
+check_tunnel_port hv1 br-int1 hv2@192.168.0.2
+prev_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
+
+# Start the controller again
+start_daemon ovn-controller --verbose="encaps:dbg"
+check ovn-nbctl --wait=hv sync
+check_tunnel_port hv1 br-int1 hv2@192.168.0.2
+current_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
+
+# The tunnel should be the same after restart
+check test "$current_id" = "$prev_id"
+
+OVN_CLEANUP([hv1],[hv2])
+AT_CLEANUP
+])
+
+# NOTE: This test case runs two ovn-controllers inside the same sandbox (hv1).
+# Each controller uses a unique chassis name - hv1 and hv2 - and manage
+# different bridges with different ports.
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Encaps tunnel cleanup does not interfere with multiple controller on the same host])
+ovn_start
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys-1
+ovn_attach n1 br-phys-1 192.168.0.1 24
+
+
+# now start the second virtual controller
+ovs-vsctl add-br br-phys-2
+
+
+# the file is read once at startup so it's safe to write it
+# here after the first ovn-controller has started
+echo hv2 > ${OVN_SYSCONFDIR}/system-id-override
+
+# for some reason SSL ovsdb configuration overrides CLI, so
+# delete ssl config from ovsdb to give CLI arguments priority
+ovs-vsctl del-ssl
+
+start_virtual_controller n1 br-phys-2 br-int-2 192.168.0.2 24 geneve,vxlan hv2 \
+    --pidfile=${OVS_RUNDIR}/ovn-controller-2.pid \
+    --log-file=${OVS_RUNDIR}/ovn-controller-2.log \
+    -p $PKIDIR/testpki-hv2-privkey.pem \
+    -c $PKIDIR/testpki-hv2-cert.pem \
+    -C $PKIDIR/testpki-cacert.pem
+pidfile="$OVS_RUNDIR"/ovn-controller-2.pid
+on_exit "test -e \"$pidfile\" && kill \`cat \"$pidfile\"\`"
+
+ovn-nbctl --wait=hv sync
+
+check_tunnel_port() {
+    local hv=$1
+    local br=$2
+    local id=$3
+
+    as $hv
+    OVS_WAIT_UNTIL([
+        test "$(ovs-vsctl --format=table --no-headings find port external_ids:ovn-chassis-id="$id" | wc -l)" = "1"
+    ])
+    local tunnel_id=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="$id")
+    AT_CHECK([ovs-vsctl --bare --columns ports find bridge name="$br" | grep -q "$tunnel_id"])
+}
+
+check_tunnel_port hv1 br-int hv2@192.168.0.2
+check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
+prev_id1=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv1@192.168.0.1")
+prev_id2=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
+
+# The hv2 is running we can remove the override file
+rm -f ${OVN_SYSCONFDIR}/system-id-override
+
+check ovn-appctl -t ovn-controller exit --restart
+
+# for some reason SSL ovsdb configuration overrides CLI, so
+# delete ssl config from ovsdb to give CLI arguments priority
+ovs-vsctl del-ssl
+
+start_daemon ovn-controller --verbose="encaps:dbg" \
+    -p $PKIDIR/testpki-hv1-privkey.pem \
+    -c $PKIDIR/testpki-hv1-cert.pem \
+    -C $PKIDIR/testpki-cacert.pem
+
+check ovn-nbctl --wait=hv sync
+
+check_tunnel_port hv1 br-int hv2@192.168.0.2
+check_tunnel_port hv1 br-int-2 hv1@192.168.0.1
+current_id1=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv1@192.168.0.1")
+current_id2=$(ovs-vsctl --bare --columns _uuid find port external_ids:ovn-chassis-id="hv2@192.168.0.2")
+
+# Check that restart of hv1 ovn-controller did not interfere with hv2
+AT_CHECK([grep -q "Clearing old tunnel port \"ovn0-hv1-0\" (hv1@192.168.0.1) from bridge \"br-int-2\"" hv1/ovn-controller.log], [1])
+check test "$current_id1" = "$prev_id1"
+check test "$current_id2" = "$prev_id2"
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])