diff mbox series

[ovs-dev] encaps: Support backward compatibility for tunnel chassis id change.

Message ID 20240216065230.2350314-1-hzhou@ovn.org
State Accepted
Delegated to: Dumitru Ceara
Headers show
Series [ovs-dev] encaps: Support backward compatibility for tunnel chassis id change. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Han Zhou Feb. 16, 2024, 6:52 a.m. UTC
In commit 41eefcb2807d, the format of external_ids:ovn-chassis-id for
tunnels was modified to include the local encapsulation IP. This change
can lead to the recreation of tunnels during an upgrade, potentially
disrupting the dataplane temporarily, especially in large-scale
environments.

This patch resolves the issue by recognizing the previous format. Thus,
if the only modification is to the ID format, the tunnel will not be
recreated. Instead, the external_ids will be updated directly. This
approach ensures that the upgrade process is non-disruptive.

Fixes: 41eefcb2807d ("encaps: Create separate tunnels for multiple local encap IPs.")
Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 controller/encaps.c     | 83 ++++++++++++++++++++++++++++++++---------
 tests/ovn-controller.at | 44 ++++++++++++++++++++++
 2 files changed, 110 insertions(+), 17 deletions(-)

Comments

Dumitru Ceara March 1, 2024, 10:26 a.m. UTC | #1
On 2/16/24 07:52, Han Zhou wrote:
> In commit 41eefcb2807d, the format of external_ids:ovn-chassis-id for
> tunnels was modified to include the local encapsulation IP. This change
> can lead to the recreation of tunnels during an upgrade, potentially
> disrupting the dataplane temporarily, especially in large-scale
> environments.
> 
> This patch resolves the issue by recognizing the previous format. Thus,
> if the only modification is to the ID format, the tunnel will not be
> recreated. Instead, the external_ids will be updated directly. This
> approach ensures that the upgrade process is non-disruptive.
> 
> Fixes: 41eefcb2807d ("encaps: Create separate tunnels for multiple local encap IPs.")
> Signed-off-by: Han Zhou <hzhou@ovn.org>
> ---

Hi Han,

>  controller/encaps.c     | 83 ++++++++++++++++++++++++++++++++---------
>  tests/ovn-controller.at | 44 ++++++++++++++++++++++
>  2 files changed, 110 insertions(+), 17 deletions(-)
> 
> diff --git a/controller/encaps.c b/controller/encaps.c
> index 28237f6191c8..1d0d47523e77 100644
> --- a/controller/encaps.c
> +++ b/controller/encaps.c
> @@ -104,40 +104,62 @@ encaps_tunnel_id_create(const char *chassis_id, const char *remote_encap_ip,
>                       '%', local_encap_ip);
>  }
>  
> +/*
> + * The older version of encaps_tunnel_id_create, which doesn't include
> + * local_encap_ip in the ID. This is used for backward compatibility support.
> + */
> +static char *
> +encaps_tunnel_id_create_old(const char *chassis_id,

Nit: I'd call this encaps_tunnel_id_create_legacy().

> +                            const char *remote_encap_ip)
> +{
> +    return xasprintf("%s%c%s", chassis_id, '@', remote_encap_ip);
> +}
> +
>  /*
>   * Parses a 'tunnel_id' of the form <chassis_name>@<remote IP>%<local IP>.
>   * If the 'chassis_id' argument is not NULL the function will allocate memory
>   * and store the chassis_name part of the tunnel-id at '*chassis_id'.
>   * Same for remote_encap_ip and local_encap_ip.
> + *
> + * The old form <chassis_name>@<remote IP> is also supported for backward
> + * compatibility during upgrade.
>   */
>  bool
>  encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
>                         char **remote_encap_ip, char **local_encap_ip)
>  {
> -    /* Find the @.  Fail if there is no @ or if any part is empty. */
> -    const char *d = strchr(tunnel_id, '@');
> -    if (d == tunnel_id || !d || !d[1]) {
> -        return false;
> +    char *tokstr = xstrdup(tunnel_id);
> +    char *saveptr = NULL;
> +    bool ret = false;
> +
> +    char *token_chassis = strtok_r(tokstr, "@", &saveptr);
> +    if (!token_chassis) {
> +        goto out;
>      }
>  
> -    /* Find the %.  Fail if there is no % or if any part is empty. */
> -    const char *d2 = strchr(d + 1, '%');
> -    if (d2 == d + 1 || !d2 || !d2[1]) {
> -        return false;
> +    char *token_remote_ip = strtok_r(NULL, "%", &saveptr);
> +    if (!token_remote_ip) {
> +        goto out;
>      }
>  
> +    char *token_local_ip = strtok_r(NULL, "", &saveptr);
> +
>      if (chassis_id) {
> -        *chassis_id = xmemdup0(tunnel_id, d - tunnel_id);
> +        *chassis_id = xstrdup(token_chassis);
>      }
> -
>      if (remote_encap_ip) {
> -        *remote_encap_ip = xmemdup0(d + 1, d2 - (d + 1));
> +        *remote_encap_ip = xstrdup(token_remote_ip);
>      }
> -
>      if (local_encap_ip) {
> -        *local_encap_ip = xstrdup(d2 + 1);
> +        /* To support backward compatibility during upgrade, ignore local ip if
> +         * it is not encoded in the tunnel id yet. */
> +        *local_encap_ip = token_local_ip ? xstrdup(token_local_ip) : NULL;

This can be simplified to:

*local_encap_ip = nullable_xstrdup(token_local_ip);

>      }
> -    return true;
> +
> +    ret = true;
> +out:
> +    free(tokstr);
> +    return ret;
>  }

I think I'd use strsep instead, seems a tiny bit cleaner.  I see
encaps_tunnel_id_match() also uses strtok_r() so I'll leave it up
to you to decide whether we should follow the same style or not.

I'll leave the strsep() alternative here just in case:

bool
encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
                       char **remote_encap_ip, char **local_encap_ip)
{
    char *str = xstrdup(tunnel_id);
    char *next = str;
    bool ret = false;

    char *token_chassis = strsep(&next, "@");
    if (!token_chassis) {
        goto out;
    }

    char *token_remote_ip = strsep(&next, "%");
    if (!token_remote_ip) {
        goto out;
    }

    char *token_local_ip = next;
    if (chassis_id) {
        *chassis_id = xstrdup(token_chassis);
    }
    if (remote_encap_ip) {
        *remote_encap_ip = xstrdup(token_remote_ip);
    }
    if (local_encap_ip) {
        /* To support backward compatibility during upgrade, ignore local ip if
         * it is not encoded in the tunnel id yet. */
        *local_encap_ip = nullable_xstrdup(token_local_ip);
    }

    ret = true;
out:
    free(str);
    return ret;
}

>  
>  /*
> @@ -145,6 +167,10 @@ encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
>   *      <chassis_id>@<remote_encap_ip>%<local_encap_ip>
>   * contains 'chassis_id' and, if specified, the given 'remote_encap_ip' and
>   * 'local_encap_ip'. Returns false otherwise.
> + *
> + * The old format <chassis_id>@<remote_encap_ip> is also supported for backward
> + * compatibility during upgrade, and the local_encap_ip matching is ignored in
> + * that case.
>   */
>  bool
>  encaps_tunnel_id_match(const char *tunnel_id, const char *chassis_id,
> @@ -166,8 +192,10 @@ encaps_tunnel_id_match(const char *tunnel_id, const char *chassis_id,
>      }
>  
>      char *token_local_ip = strtok_r(NULL, "", &saveptr);
> -    if (local_encap_ip &&
> -        (!token_local_ip || strcmp(token_local_ip, local_encap_ip))) {
> +    if (!token_local_ip) {
> +        /* It is old format. To support backward compatibility during upgrade,
> +         * just ignore local_ip. */
> +    } else if (local_encap_ip && strcmp(token_local_ip, local_encap_ip)) {
>          goto out;
>      }
>  
> @@ -189,6 +217,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
>      const char *dst_port = smap_get(&encap->options, "dst_port");
>      const char *csum = smap_get(&encap->options, "csum");
>      char *tunnel_entry_id = NULL;
> +    char *tunnel_entry_id_old = NULL;
>  
>      /*
>       * Since a chassis may have multiple encap-ip, we can't just add the
> @@ -198,6 +227,8 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
>       */
>      tunnel_entry_id = encaps_tunnel_id_create(new_chassis_id, encap->ip,
>                                                local_ip);
> +    tunnel_entry_id_old = encaps_tunnel_id_create_old(new_chassis_id,
> +                                                      encap->ip);
>      if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
>          smap_add(&options, "csum", csum);
>      }
> @@ -269,11 +300,28 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
>       * it). */
>      struct tunnel_node *tunnel = shash_find_data(&tc->tunnel,
>                                                   tunnel_entry_id);
> +    bool old_id_format = false;
> +    if (!tunnel) {
> +        tunnel = shash_find_data(&tc->tunnel, tunnel_entry_id_old);
> +        old_id_format = true;
> +    }
>      if (tunnel
>          && tunnel->port->n_interfaces == 1
>          && !strcmp(tunnel->port->interfaces[0]->type, encap->type)
>          && smap_equal(&tunnel->port->interfaces[0]->options, &options)) {
> -        shash_find_and_delete(&tc->tunnel, tunnel_entry_id);
> +        if (old_id_format) {
> +            /* We must be upgrading from an older version. We can reuse the
> +             * existing tunnel, but needs to update the tunnel's ID to the new
> +             * format. */
> +            ovsrec_port_update_external_ids_setkey(tunnel->port, OVN_TUNNEL_ID,
> +                                                   tunnel_entry_id);
> +            ovsrec_interface_update_external_ids_setkey(
> +                tunnel->port->interfaces[0], OVN_TUNNEL_ID, tunnel_entry_id);
> +
> +            shash_find_and_delete(&tc->tunnel, tunnel_entry_id_old);
> +        } else {
> +            shash_find_and_delete(&tc->tunnel, tunnel_entry_id);
> +        }
>          free(tunnel);
>          goto exit;
>      }
> @@ -306,6 +354,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
>  
>  exit:
>      free(tunnel_entry_id);
> +    free(tunnel_entry_id_old);
>      smap_destroy(&options);
>  }
>  
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index f77e032d4839..0b60806089cf 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -2732,3 +2732,47 @@ OVS_WAIT_FOR_OUTPUT([ovn-appctl -t ovn-controller connection-status], [0], [conn
>  
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - tunnel chassis id backward compatibility])
> +
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +ovn-nbctl --wait=sb sync

I assume this is to ensure that the the NB_Global/SB_Global contents are
populated before we continue.

In any case, it probably needs a "check" prefix.

> +wait_row_count Chassis 1 name=hv1
> +
> +ovn-sbctl chassis-add fakechassis geneve 192.168.0.2
> +fakech_tunnel=ovn-fakech-0
> +OVS_WAIT_UNTIL([ovs-vsctl list port $fakech_tunnel])
> +port_uuid=$(ovs-vsctl get port $fakech_tunnel _uuid)
> +
> +AT_CHECK([ovs-vsctl get port $fakech_tunnel external_ids:ovn-chassis-id], [0], [dnl
> +"fakechassis@192.168.0.2%192.168.0.1"
> +])
> +
> +# Stop ovn-controller without deleting tunnel
> +ovs-appctl --timeout=10 -t ovn-controller exit --restart

This should be ovn-appctl and we should prefix it with "check".  Why
isn't the default 30 second timeout good enough?

I think this should do:
check ovn-appctl -t ovn-controller exit --restart

> +
> +# Change the tunnel external id to the old format, and then start
> +# ovn-controller, pretending we are upgrading from an older version.
> +ovs-vsctl set port $fakech_tunnel external_ids:ovn-chassis-id=fakechassis@192.168.0.2
> +AT_CHECK([ovs-vsctl get port $fakech_tunnel external_ids:ovn-chassis-id], [0], [dnl
> +"fakechassis@192.168.0.2"
> +])
> +
> +start_daemon ovn-controller
> +
> +# The tunnel id should be updated to the new format but the tunnel's uuid
> +# should kept the same (no recreation).
> +OVS_WAIT_UNTIL([test x$(ovs-vsctl get port $fakech_tunnel external_ids:ovn-chassis-id) = x\"fakechassis@192.168.0.2%192.168.0.1\"])
> +AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get port $fakech_tunnel _uuid)])
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])

With the above small issues fixed:

Acked-by: Dumitru Ceara <dceara@redhat.com>

Regards,
Dumitru
Han Zhou March 1, 2024, 5:50 p.m. UTC | #2
On Fri, Mar 1, 2024 at 2:26 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 2/16/24 07:52, Han Zhou wrote:
> > In commit 41eefcb2807d, the format of external_ids:ovn-chassis-id for
> > tunnels was modified to include the local encapsulation IP. This change
> > can lead to the recreation of tunnels during an upgrade, potentially
> > disrupting the dataplane temporarily, especially in large-scale
> > environments.
> >
> > This patch resolves the issue by recognizing the previous format. Thus,
> > if the only modification is to the ID format, the tunnel will not be
> > recreated. Instead, the external_ids will be updated directly. This
> > approach ensures that the upgrade process is non-disruptive.
> >
> > Fixes: 41eefcb2807d ("encaps: Create separate tunnels for multiple
local encap IPs.")
> > Signed-off-by: Han Zhou <hzhou@ovn.org>
> > ---
>
> Hi Han,
>
> >  controller/encaps.c     | 83 ++++++++++++++++++++++++++++++++---------
> >  tests/ovn-controller.at | 44 ++++++++++++++++++++++
> >  2 files changed, 110 insertions(+), 17 deletions(-)
> >
> > diff --git a/controller/encaps.c b/controller/encaps.c
> > index 28237f6191c8..1d0d47523e77 100644
> > --- a/controller/encaps.c
> > +++ b/controller/encaps.c
> > @@ -104,40 +104,62 @@ encaps_tunnel_id_create(const char *chassis_id,
const char *remote_encap_ip,
> >                       '%', local_encap_ip);
> >  }
> >
> > +/*
> > + * The older version of encaps_tunnel_id_create, which doesn't include
> > + * local_encap_ip in the ID. This is used for backward compatibility
support.
> > + */
> > +static char *
> > +encaps_tunnel_id_create_old(const char *chassis_id,
>
> Nit: I'd call this encaps_tunnel_id_create_legacy().

Ack

>
> > +                            const char *remote_encap_ip)
> > +{
> > +    return xasprintf("%s%c%s", chassis_id, '@', remote_encap_ip);
> > +}
> > +
> >  /*
> >   * Parses a 'tunnel_id' of the form <chassis_name>@<remote IP>%<local
IP>.
> >   * If the 'chassis_id' argument is not NULL the function will allocate
memory
> >   * and store the chassis_name part of the tunnel-id at '*chassis_id'.
> >   * Same for remote_encap_ip and local_encap_ip.
> > + *
> > + * The old form <chassis_name>@<remote IP> is also supported for
backward
> > + * compatibility during upgrade.
> >   */
> >  bool
> >  encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
> >                         char **remote_encap_ip, char **local_encap_ip)
> >  {
> > -    /* Find the @.  Fail if there is no @ or if any part is empty. */
> > -    const char *d = strchr(tunnel_id, '@');
> > -    if (d == tunnel_id || !d || !d[1]) {
> > -        return false;
> > +    char *tokstr = xstrdup(tunnel_id);
> > +    char *saveptr = NULL;
> > +    bool ret = false;
> > +
> > +    char *token_chassis = strtok_r(tokstr, "@", &saveptr);
> > +    if (!token_chassis) {
> > +        goto out;
> >      }
> >
> > -    /* Find the %.  Fail if there is no % or if any part is empty. */
> > -    const char *d2 = strchr(d + 1, '%');
> > -    if (d2 == d + 1 || !d2 || !d2[1]) {
> > -        return false;
> > +    char *token_remote_ip = strtok_r(NULL, "%", &saveptr);
> > +    if (!token_remote_ip) {
> > +        goto out;
> >      }
> >
> > +    char *token_local_ip = strtok_r(NULL, "", &saveptr);
> > +
> >      if (chassis_id) {
> > -        *chassis_id = xmemdup0(tunnel_id, d - tunnel_id);
> > +        *chassis_id = xstrdup(token_chassis);
> >      }
> > -
> >      if (remote_encap_ip) {
> > -        *remote_encap_ip = xmemdup0(d + 1, d2 - (d + 1));
> > +        *remote_encap_ip = xstrdup(token_remote_ip);
> >      }
> > -
> >      if (local_encap_ip) {
> > -        *local_encap_ip = xstrdup(d2 + 1);
> > +        /* To support backward compatibility during upgrade, ignore
local ip if
> > +         * it is not encoded in the tunnel id yet. */
> > +        *local_encap_ip = token_local_ip ? xstrdup(token_local_ip) :
NULL;
>
> This can be simplified to:
>
> *local_encap_ip = nullable_xstrdup(token_local_ip);

Ack

>
> >      }
> > -    return true;
> > +
> > +    ret = true;
> > +out:
> > +    free(tokstr);
> > +    return ret;
> >  }
>
> I think I'd use strsep instead, seems a tiny bit cleaner.  I see
> encaps_tunnel_id_match() also uses strtok_r() so I'll leave it up
> to you to decide whether we should follow the same style or not.
>
> I'll leave the strsep() alternative here just in case:
>
> bool
> encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
>                        char **remote_encap_ip, char **local_encap_ip)
> {
>     char *str = xstrdup(tunnel_id);
>     char *next = str;
>     bool ret = false;
>
>     char *token_chassis = strsep(&next, "@");
>     if (!token_chassis) {
>         goto out;
>     }
>
>     char *token_remote_ip = strsep(&next, "%");
>     if (!token_remote_ip) {
>         goto out;
>     }
>
>     char *token_local_ip = next;
>     if (chassis_id) {
>         *chassis_id = xstrdup(token_chassis);
>     }
>     if (remote_encap_ip) {
>         *remote_encap_ip = xstrdup(token_remote_ip);
>     }
>     if (local_encap_ip) {
>         /* To support backward compatibility during upgrade, ignore local
ip if
>          * it is not encoded in the tunnel id yet. */
>         *local_encap_ip = nullable_xstrdup(token_local_ip);
>     }
>
>     ret = true;
> out:
>     free(str);
>     return ret;
> }
>

Thanks for the suggestion! I'd leave it as is for now to follow the style
of encaps_tunnel_id_match and other places. If prefered, we may replace
strtok_r with strsep wherever applicable across the code base with a
separate patch. I'd leave that for the future because I just want to merge
this before the release.

> >
> >  /*
> > @@ -145,6 +167,10 @@ encaps_tunnel_id_parse(const char *tunnel_id, char
**chassis_id,
> >   *      <chassis_id>@<remote_encap_ip>%<local_encap_ip>
> >   * contains 'chassis_id' and, if specified, the given
'remote_encap_ip' and
> >   * 'local_encap_ip'. Returns false otherwise.
> > + *
> > + * The old format <chassis_id>@<remote_encap_ip> is also supported for
backward
> > + * compatibility during upgrade, and the local_encap_ip matching is
ignored in
> > + * that case.
> >   */
> >  bool
> >  encaps_tunnel_id_match(const char *tunnel_id, const char *chassis_id,
> > @@ -166,8 +192,10 @@ encaps_tunnel_id_match(const char *tunnel_id,
const char *chassis_id,
> >      }
> >
> >      char *token_local_ip = strtok_r(NULL, "", &saveptr);
> > -    if (local_encap_ip &&
> > -        (!token_local_ip || strcmp(token_local_ip, local_encap_ip))) {
> > +    if (!token_local_ip) {
> > +        /* It is old format. To support backward compatibility during
upgrade,
> > +         * just ignore local_ip. */
> > +    } else if (local_encap_ip && strcmp(token_local_ip,
local_encap_ip)) {
> >          goto out;
> >      }
> >
> > @@ -189,6 +217,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
> >      const char *dst_port = smap_get(&encap->options, "dst_port");
> >      const char *csum = smap_get(&encap->options, "csum");
> >      char *tunnel_entry_id = NULL;
> > +    char *tunnel_entry_id_old = NULL;
> >
> >      /*
> >       * Since a chassis may have multiple encap-ip, we can't just add
the
> > @@ -198,6 +227,8 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
> >       */
> >      tunnel_entry_id = encaps_tunnel_id_create(new_chassis_id,
encap->ip,
> >                                                local_ip);
> > +    tunnel_entry_id_old = encaps_tunnel_id_create_old(new_chassis_id,
> > +                                                      encap->ip);
> >      if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
> >          smap_add(&options, "csum", csum);
> >      }
> > @@ -269,11 +300,28 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
> >       * it). */
> >      struct tunnel_node *tunnel = shash_find_data(&tc->tunnel,
> >                                                   tunnel_entry_id);
> > +    bool old_id_format = false;
> > +    if (!tunnel) {
> > +        tunnel = shash_find_data(&tc->tunnel, tunnel_entry_id_old);
> > +        old_id_format = true;
> > +    }
> >      if (tunnel
> >          && tunnel->port->n_interfaces == 1
> >          && !strcmp(tunnel->port->interfaces[0]->type, encap->type)
> >          && smap_equal(&tunnel->port->interfaces[0]->options,
&options)) {
> > -        shash_find_and_delete(&tc->tunnel, tunnel_entry_id);
> > +        if (old_id_format) {
> > +            /* We must be upgrading from an older version. We can
reuse the
> > +             * existing tunnel, but needs to update the tunnel's ID to
the new
> > +             * format. */
> > +            ovsrec_port_update_external_ids_setkey(tunnel->port,
OVN_TUNNEL_ID,
> > +                                                   tunnel_entry_id);
> > +            ovsrec_interface_update_external_ids_setkey(
> > +                tunnel->port->interfaces[0], OVN_TUNNEL_ID,
tunnel_entry_id);
> > +
> > +            shash_find_and_delete(&tc->tunnel, tunnel_entry_id_old);
> > +        } else {
> > +            shash_find_and_delete(&tc->tunnel, tunnel_entry_id);
> > +        }
> >          free(tunnel);
> >          goto exit;
> >      }
> > @@ -306,6 +354,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
> >
> >  exit:
> >      free(tunnel_entry_id);
> > +    free(tunnel_entry_id_old);
> >      smap_destroy(&options);
> >  }
> >
> > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> > index f77e032d4839..0b60806089cf 100644
> > --- a/tests/ovn-controller.at
> > +++ b/tests/ovn-controller.at
> > @@ -2732,3 +2732,47 @@ OVS_WAIT_FOR_OUTPUT([ovn-appctl -t
ovn-controller connection-status], [0], [conn
> >
> >  OVN_CLEANUP([hv1])
> >  AT_CLEANUP
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([ovn-controller - tunnel chassis id backward compatibility])
> > +
> > +ovn_start
> > +
> > +net_add n1
> > +sim_add hv1
> > +as hv1
> > +ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.1
> > +
> > +ovn-nbctl --wait=sb sync
>
> I assume this is to ensure that the the NB_Global/SB_Global contents are
> populated before we continue.
>
> In any case, it probably needs a "check" prefix.

Ack

>
> > +wait_row_count Chassis 1 name=hv1
> > +
> > +ovn-sbctl chassis-add fakechassis geneve 192.168.0.2
> > +fakech_tunnel=ovn-fakech-0
> > +OVS_WAIT_UNTIL([ovs-vsctl list port $fakech_tunnel])
> > +port_uuid=$(ovs-vsctl get port $fakech_tunnel _uuid)
> > +
> > +AT_CHECK([ovs-vsctl get port $fakech_tunnel
external_ids:ovn-chassis-id], [0], [dnl
> > +"fakechassis@192.168.0.2%192.168.0.1"
> > +])
> > +
> > +# Stop ovn-controller without deleting tunnel
> > +ovs-appctl --timeout=10 -t ovn-controller exit --restart
>
> This should be ovn-appctl and we should prefix it with "check".  Why
> isn't the default 30 second timeout good enough?
>
> I think this should do:
> check ovn-appctl -t ovn-controller exit --restart
>

Ack.

> > +
> > +# Change the tunnel external id to the old format, and then start
> > +# ovn-controller, pretending we are upgrading from an older version.
> > +ovs-vsctl set port $fakech_tunnel external_ids:ovn-chassis-id=
fakechassis@192.168.0.2
> > +AT_CHECK([ovs-vsctl get port $fakech_tunnel
external_ids:ovn-chassis-id], [0], [dnl
> > +"fakechassis@192.168.0.2"
> > +])
> > +
> > +start_daemon ovn-controller
> > +
> > +# The tunnel id should be updated to the new format but the tunnel's
uuid
> > +# should kept the same (no recreation).
> > +OVS_WAIT_UNTIL([test x$(ovs-vsctl get port $fakech_tunnel
external_ids:ovn-chassis-id) = x\"fakechassis@192.168.0.2%192.168.0.1\"])
> > +AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get port $fakech_tunnel
_uuid)])
> > +
> > +OVN_CLEANUP([hv1])
> > +AT_CLEANUP
> > +])
>
> With the above small issues fixed:
>
> Acked-by: Dumitru Ceara <dceara@redhat.com>

Thanks for your review! I addressed your comments and pushed to main and
branch-24.03.

Regards,
Han

>
> Regards,
> Dumitru
>
diff mbox series

Patch

diff --git a/controller/encaps.c b/controller/encaps.c
index 28237f6191c8..1d0d47523e77 100644
--- a/controller/encaps.c
+++ b/controller/encaps.c
@@ -104,40 +104,62 @@  encaps_tunnel_id_create(const char *chassis_id, const char *remote_encap_ip,
                      '%', local_encap_ip);
 }
 
+/*
+ * The older version of encaps_tunnel_id_create, which doesn't include
+ * local_encap_ip in the ID. This is used for backward compatibility support.
+ */
+static char *
+encaps_tunnel_id_create_old(const char *chassis_id,
+                            const char *remote_encap_ip)
+{
+    return xasprintf("%s%c%s", chassis_id, '@', remote_encap_ip);
+}
+
 /*
  * Parses a 'tunnel_id' of the form <chassis_name>@<remote IP>%<local IP>.
  * If the 'chassis_id' argument is not NULL the function will allocate memory
  * and store the chassis_name part of the tunnel-id at '*chassis_id'.
  * Same for remote_encap_ip and local_encap_ip.
+ *
+ * The old form <chassis_name>@<remote IP> is also supported for backward
+ * compatibility during upgrade.
  */
 bool
 encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
                        char **remote_encap_ip, char **local_encap_ip)
 {
-    /* Find the @.  Fail if there is no @ or if any part is empty. */
-    const char *d = strchr(tunnel_id, '@');
-    if (d == tunnel_id || !d || !d[1]) {
-        return false;
+    char *tokstr = xstrdup(tunnel_id);
+    char *saveptr = NULL;
+    bool ret = false;
+
+    char *token_chassis = strtok_r(tokstr, "@", &saveptr);
+    if (!token_chassis) {
+        goto out;
     }
 
-    /* Find the %.  Fail if there is no % or if any part is empty. */
-    const char *d2 = strchr(d + 1, '%');
-    if (d2 == d + 1 || !d2 || !d2[1]) {
-        return false;
+    char *token_remote_ip = strtok_r(NULL, "%", &saveptr);
+    if (!token_remote_ip) {
+        goto out;
     }
 
+    char *token_local_ip = strtok_r(NULL, "", &saveptr);
+
     if (chassis_id) {
-        *chassis_id = xmemdup0(tunnel_id, d - tunnel_id);
+        *chassis_id = xstrdup(token_chassis);
     }
-
     if (remote_encap_ip) {
-        *remote_encap_ip = xmemdup0(d + 1, d2 - (d + 1));
+        *remote_encap_ip = xstrdup(token_remote_ip);
     }
-
     if (local_encap_ip) {
-        *local_encap_ip = xstrdup(d2 + 1);
+        /* To support backward compatibility during upgrade, ignore local ip if
+         * it is not encoded in the tunnel id yet. */
+        *local_encap_ip = token_local_ip ? xstrdup(token_local_ip) : NULL;
     }
-    return true;
+
+    ret = true;
+out:
+    free(tokstr);
+    return ret;
 }
 
 /*
@@ -145,6 +167,10 @@  encaps_tunnel_id_parse(const char *tunnel_id, char **chassis_id,
  *      <chassis_id>@<remote_encap_ip>%<local_encap_ip>
  * contains 'chassis_id' and, if specified, the given 'remote_encap_ip' and
  * 'local_encap_ip'. Returns false otherwise.
+ *
+ * The old format <chassis_id>@<remote_encap_ip> is also supported for backward
+ * compatibility during upgrade, and the local_encap_ip matching is ignored in
+ * that case.
  */
 bool
 encaps_tunnel_id_match(const char *tunnel_id, const char *chassis_id,
@@ -166,8 +192,10 @@  encaps_tunnel_id_match(const char *tunnel_id, const char *chassis_id,
     }
 
     char *token_local_ip = strtok_r(NULL, "", &saveptr);
-    if (local_encap_ip &&
-        (!token_local_ip || strcmp(token_local_ip, local_encap_ip))) {
+    if (!token_local_ip) {
+        /* It is old format. To support backward compatibility during upgrade,
+         * just ignore local_ip. */
+    } else if (local_encap_ip && strcmp(token_local_ip, local_encap_ip)) {
         goto out;
     }
 
@@ -189,6 +217,7 @@  tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
     const char *dst_port = smap_get(&encap->options, "dst_port");
     const char *csum = smap_get(&encap->options, "csum");
     char *tunnel_entry_id = NULL;
+    char *tunnel_entry_id_old = NULL;
 
     /*
      * Since a chassis may have multiple encap-ip, we can't just add the
@@ -198,6 +227,8 @@  tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
      */
     tunnel_entry_id = encaps_tunnel_id_create(new_chassis_id, encap->ip,
                                               local_ip);
+    tunnel_entry_id_old = encaps_tunnel_id_create_old(new_chassis_id,
+                                                      encap->ip);
     if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
         smap_add(&options, "csum", csum);
     }
@@ -269,11 +300,28 @@  tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
      * it). */
     struct tunnel_node *tunnel = shash_find_data(&tc->tunnel,
                                                  tunnel_entry_id);
+    bool old_id_format = false;
+    if (!tunnel) {
+        tunnel = shash_find_data(&tc->tunnel, tunnel_entry_id_old);
+        old_id_format = true;
+    }
     if (tunnel
         && tunnel->port->n_interfaces == 1
         && !strcmp(tunnel->port->interfaces[0]->type, encap->type)
         && smap_equal(&tunnel->port->interfaces[0]->options, &options)) {
-        shash_find_and_delete(&tc->tunnel, tunnel_entry_id);
+        if (old_id_format) {
+            /* We must be upgrading from an older version. We can reuse the
+             * existing tunnel, but needs to update the tunnel's ID to the new
+             * format. */
+            ovsrec_port_update_external_ids_setkey(tunnel->port, OVN_TUNNEL_ID,
+                                                   tunnel_entry_id);
+            ovsrec_interface_update_external_ids_setkey(
+                tunnel->port->interfaces[0], OVN_TUNNEL_ID, tunnel_entry_id);
+
+            shash_find_and_delete(&tc->tunnel, tunnel_entry_id_old);
+        } else {
+            shash_find_and_delete(&tc->tunnel, tunnel_entry_id);
+        }
         free(tunnel);
         goto exit;
     }
@@ -306,6 +354,7 @@  tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
 
 exit:
     free(tunnel_entry_id);
+    free(tunnel_entry_id_old);
     smap_destroy(&options);
 }
 
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index f77e032d4839..0b60806089cf 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2732,3 +2732,47 @@  OVS_WAIT_FOR_OUTPUT([ovn-appctl -t ovn-controller connection-status], [0], [conn
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - tunnel chassis id backward compatibility])
+
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+ovn-nbctl --wait=sb sync
+wait_row_count Chassis 1 name=hv1
+
+ovn-sbctl chassis-add fakechassis geneve 192.168.0.2
+fakech_tunnel=ovn-fakech-0
+OVS_WAIT_UNTIL([ovs-vsctl list port $fakech_tunnel])
+port_uuid=$(ovs-vsctl get port $fakech_tunnel _uuid)
+
+AT_CHECK([ovs-vsctl get port $fakech_tunnel external_ids:ovn-chassis-id], [0], [dnl
+"fakechassis@192.168.0.2%192.168.0.1"
+])
+
+# Stop ovn-controller without deleting tunnel
+ovs-appctl --timeout=10 -t ovn-controller exit --restart
+
+# Change the tunnel external id to the old format, and then start
+# ovn-controller, pretending we are upgrading from an older version.
+ovs-vsctl set port $fakech_tunnel external_ids:ovn-chassis-id=fakechassis@192.168.0.2
+AT_CHECK([ovs-vsctl get port $fakech_tunnel external_ids:ovn-chassis-id], [0], [dnl
+"fakechassis@192.168.0.2"
+])
+
+start_daemon ovn-controller
+
+# The tunnel id should be updated to the new format but the tunnel's uuid
+# should kept the same (no recreation).
+OVS_WAIT_UNTIL([test x$(ovs-vsctl get port $fakech_tunnel external_ids:ovn-chassis-id) = x\"fakechassis@192.168.0.2%192.168.0.1\"])
+AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get port $fakech_tunnel _uuid)])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])