diff mbox series

[ovs-dev,RFC] controller: send GARPs for LSPs in vlan-backed network scenario

Message ID 27d1debf2f781d67e600e49b9c4eb9f6b3c04291.1655418594.git.lorenzo.bianconi@redhat.com
State RFC
Headers show
Series [ovs-dev,RFC] controller: send GARPs for LSPs in vlan-backed network scenario | expand

Commit Message

Lorenzo Bianconi June 16, 2022, 10:31 p.m. UTC
When using VLAN backed networks and OVN routers leveraging the
'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
chassis mac address in order to not expose the router mac address from
different nodes and confuse the TOR switch. However doing so the TOR
switch is not able to learn the port/mac bindings for routed E/W traffic
and it is force to always flood it. Fix this issue adding the capability
to send GARP traffic for logical switch ports if the corresponding logical
switch has the ovn-lsp-garp parameter set to true in the option column.
More into about the issue can be found here [0].

[0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
 northd/northd.c      | 29 +++++++++++++++
 2 files changed, 85 insertions(+), 29 deletions(-)

Comments

Dumitru Ceara July 13, 2022, 2:18 p.m. UTC | #1
On 6/17/22 00:31, Lorenzo Bianconi wrote:
> When using VLAN backed networks and OVN routers leveraging the
> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
> chassis mac address in order to not expose the router mac address from
> different nodes and confuse the TOR switch. However doing so the TOR
> switch is not able to learn the port/mac bindings for routed E/W traffic
> and it is force to always flood it. Fix this issue adding the capability
> to send GARP traffic for logical switch ports if the corresponding logical
> switch has the ovn-lsp-garp parameter set to true in the option column.
> More into about the issue can be found here [0].
> 
> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---

Hi Lorenzo,

I have a few concerns with this approach:

a. The CMS will have to set this option for all VMs on all logical
switches which will enable periodic GARPs for all of them all the time.
That seems like quite a lot of broadcast traffic in the fabric.

b. There's no guarantee that the GARPs are sent in time to prevent the
FDB timeouts on the ToR switches.  At best we could make the interval
configurable but I don't think this is way better either.

c. This is not really introduced by your patch but we will be causing
this more often now.  With the topology:

(HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
            (VLAN-backed network)

HV1 configured with chassis mac mapping HV1-MAC
HV2 configured with chassis mac mapping HV2-MAC

We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
that's OK.

I think a proper solution is to change how we run the logical pipelines
in case of vlan-backed networks.  We currently have an assymetry:

For packets flowing from VM1 to VM2 we execute:
- on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
- on HV2: LS2-egress

For packets flowing from VM2 to VM1 we execute:
- on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
- on HV1: LS1-egress

What if we change this to:
VM1 -> VM2:
- on HV1: LS1-ingress, LS1-egress, LR-ingress
- on HV2: LR-egress, LS2-ingress, LS2-egress

VM2 -> VM1:
- on HV2: LS2-ingress, LS2-egress, LR-ingress
- on HV2: LR-egress, LS1-ingress, LS1-egress

Would this we ensure that we always only use ovn-chassis-mac-mappings on
the VLAN network and avoid flooding on the ToR?

Regards,
Dumitru

>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
>  northd/northd.c      | 29 +++++++++++++++
>  2 files changed, 85 insertions(+), 29 deletions(-)
> 
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 9a1a0faa1..eb5739bfc 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
>          garp_rarp->backoff *= 2;
>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
>      } else {
> -        garp_rarp->announce_time = LLONG_MAX;
> +        /* Default timeout is 180s. */
> +        garp_rarp->announce_time = current_time + 180 * 1000;
>      }
>      return garp_rarp->announce_time;
>  }
> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
>  
>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
>  static void
> -get_localnet_vifs_l3gwports(
> +get_local_vifs_l3gwports(
>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
>      const struct ovsrec_bridge *br_int,
>      const struct sbrec_chassis *chassis,
>      const struct hmap *local_datapaths,
>      struct sset *localnet_vifs,
> -    struct sset *local_l3gw_ports)
> +    struct sset *local_l3gw_ports,
> +    struct sset *local_vifs)
>  {
>      for (int i = 0; i < br_int->n_ports; i++) {
>          const struct ovsrec_port *port_rec = br_int->ports[i];
> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
>           * that connect to gateway routers (if local), and consider port
>           * bindings of type "patch" since they might connect to
> -         * distributed gateway ports with NAT addresses. */
> +         * distributed gateway ports with NAT addresses.
> +         * Get LSP ports if requested by CMS. */
>  
>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
>                  || !strcmp(pb->type, "patch")) {
>                  sset_add(local_l3gw_ports, pb->logical_port);
>              }
> +            /* GARP packets for lsp ports. */
> +            if (pb->chassis == chassis &&
> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
> +                sset_add(local_vifs, pb->logical_port);
> +            }
>          }
>      }
>      sbrec_port_binding_index_destroy_row(target);
> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
>      }
>  }
>  
> +static void
> +send_garp_rarp_update_for_pb_set(
> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +        struct sset *vif_set, const struct hmap *local_datapaths,
> +        struct shash *nat_addresses)
> +{
> +    const char *iface_id;
> +    SSET_FOR_EACH (iface_id, vif_set) {
> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> +            sbrec_port_binding_by_name, iface_id);
> +        if (pb) {
> +            send_garp_rarp_update(ovnsb_idl_txn,
> +                                  sbrec_mac_binding_by_lport_ip,
> +                                  local_datapaths, pb, nat_addresses);
> +        }
> +    }
> +}
> +
>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
>   * thread context. */
>  static void
> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>  {
>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
>      struct shash nat_addresses;
>  
>      shash_init(&nat_addresses);
>  
> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
> -                                sbrec_port_binding_by_name,
> -                                br_int, chassis, local_datapaths,
> -                                &localnet_vifs, &local_l3gw_ports);
> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
> +                             sbrec_port_binding_by_name,
> +                             br_int, chassis, local_datapaths,
> +                             &localnet_vifs, &local_l3gw_ports,
> +                             &local_vifs);
>  
>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
>                                 &nat_ip_keys, &local_l3gw_ports,
> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      struct shash_node *iter;
>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>          if (!sset_contains(&localnet_vifs, iter->name) &&
> -            !sset_contains(&nat_ip_keys, iter->name)) {
> +            !sset_contains(&nat_ip_keys, iter->name) &&
> +            !sset_contains(&local_vifs, iter->name)) {
>              send_garp_rarp_delete(iter->name);
>          }
>      }
>  
>      /* Update send_garp_rarp_data. */
> -    const char *iface_id;
> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> -            sbrec_port_binding_by_name, iface_id);
> -        if (pb) {
> -            send_garp_rarp_update(ovnsb_idl_txn,
> -                                  sbrec_mac_binding_by_lport_ip,
> -                                  local_datapaths, pb, &nat_addresses);
> -        }
> -    }
> -
> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> +                                     sbrec_mac_binding_by_lport_ip,
> +                                     sbrec_port_binding_by_name,
> +                                     &localnet_vifs, local_datapaths,
> +                                     &nat_addresses);
> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> +                                     sbrec_mac_binding_by_lport_ip,
> +                                     sbrec_port_binding_by_name,
> +                                     &local_vifs, local_datapaths,
> +                                     &nat_addresses);
>      /* Update send_garp_rarp_data for nat-addresses. */
> -    const char *gw_port;
> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> -        const struct sbrec_port_binding *pb
> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> -        if (pb) {
> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> -                                  local_datapaths, pb, &nat_addresses);
> -        }
> -    }
> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> +                                     sbrec_mac_binding_by_lport_ip,
> +                                     sbrec_port_binding_by_name,
> +                                     &local_l3gw_ports, local_datapaths,
> +                                     &nat_addresses);
>  
>      /* pinctrl_handler thread will send the GARPs. */
>  
> +    sset_destroy(&local_vifs);
>      sset_destroy(&localnet_vifs);
>      sset_destroy(&local_l3gw_ports);
>  
> diff --git a/northd/northd.c b/northd/northd.c
> index 0d6ebccde..9a4a880a7 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
>      }
>  }
>  
> +static void
> +ovn_update_lsp_garp_options(struct hmap *ports)
> +{
> +    struct ovn_port *op;
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (!op->nbsp) {
> +            continue;
> +        }
> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
> +            continue;
> +        }
> +
> +        struct ovn_datapath *od = op->od;
> +        if (!od || !od->nbs) {
> +            continue;
> +        }
> +
> +        struct smap options;
> +        smap_clone(&options, &op->sb->options);
> +
> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
> +                                          "ovn-lsp-garp", false);
> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
> +        sbrec_port_binding_set_options(op->sb, &options);
> +        smap_destroy(&options);
> +    }
> +}
> +
>  static void
>  build_port_group_lswitches(struct northd_input *input_data,
>                             struct hmap *pgs,
> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
>      ovn_update_ipv6_options(&data->ports);
>      ovn_update_ipv6_prefix(&data->ports);
> +    ovn_update_lsp_garp_options(&data->ports);
>  
>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
Lorenzo Bianconi July 13, 2022, 4:07 p.m. UTC | #2
> On 6/17/22 00:31, Lorenzo Bianconi wrote:
> > When using VLAN backed networks and OVN routers leveraging the
> > 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
> > chassis mac address in order to not expose the router mac address from
> > different nodes and confuse the TOR switch. However doing so the TOR
> > switch is not able to learn the port/mac bindings for routed E/W traffic
> > and it is force to always flood it. Fix this issue adding the capability
> > to send GARP traffic for logical switch ports if the corresponding logical
> > switch has the ovn-lsp-garp parameter set to true in the option column.
> > More into about the issue can be found here [0].
> > 
> > [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> 
> Hi Lorenzo,

Hi Dumitru,

Thanks for reviewing it :)

> 
> I have a few concerns with this approach:
> 
> a. The CMS will have to set this option for all VMs on all logical
> switches which will enable periodic GARPs for all of them all the time.
> That seems like quite a lot of broadcast traffic in the fabric.
> 
> b. There's no guarantee that the GARPs are sent in time to prevent the
> FDB timeouts on the ToR switches.  At best we could make the interval
> configurable but I don't think this is way better either.
> 
> c. This is not really introduced by your patch but we will be causing
> this more often now.  With the topology:
> 
> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
>             (VLAN-backed network)
> 
> HV1 configured with chassis mac mapping HV1-MAC
> HV2 configured with chassis mac mapping HV2-MAC
> 
> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
> that's OK.
> 
> I think a proper solution is to change how we run the logical pipelines
> in case of vlan-backed networks.  We currently have an assymetry:
> 
> For packets flowing from VM1 to VM2 we execute:
> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
> - on HV2: LS2-egress
> 
> For packets flowing from VM2 to VM1 we execute:
> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
> - on HV1: LS1-egress
> 
> What if we change this to:
> VM1 -> VM2:
> - on HV1: LS1-ingress, LS1-egress, LR-ingress
> - on HV2: LR-egress, LS2-ingress, LS2-egress
> 
> VM2 -> VM1:
> - on HV2: LS2-ingress, LS2-egress, LR-ingress
> - on HV2: LR-egress, LS1-ingress, LS1-egress

I do not know why the current architecture is done this way (any suggestions??).
I guess the approach you suggested should work. Are we introducing any backward
compatibility issue?

Regards,
Lorenzo

> 
> Would this we ensure that we always only use ovn-chassis-mac-mappings on
> the VLAN network and avoid flooding on the ToR?
> 
> Regards,
> Dumitru
> 
> >  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
> >  northd/northd.c      | 29 +++++++++++++++
> >  2 files changed, 85 insertions(+), 29 deletions(-)
> > 
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index 9a1a0faa1..eb5739bfc 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
> >          garp_rarp->backoff *= 2;
> >          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
> >      } else {
> > -        garp_rarp->announce_time = LLONG_MAX;
> > +        /* Default timeout is 180s. */
> > +        garp_rarp->announce_time = current_time + 180 * 1000;
> >      }
> >      return garp_rarp->announce_time;
> >  }
> > @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
> >  
> >  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
> >  static void
> > -get_localnet_vifs_l3gwports(
> > +get_local_vifs_l3gwports(
> >      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> >      struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >      const struct ovsrec_bridge *br_int,
> >      const struct sbrec_chassis *chassis,
> >      const struct hmap *local_datapaths,
> >      struct sset *localnet_vifs,
> > -    struct sset *local_l3gw_ports)
> > +    struct sset *local_l3gw_ports,
> > +    struct sset *local_vifs)
> >  {
> >      for (int i = 0; i < br_int->n_ports; i++) {
> >          const struct ovsrec_port *port_rec = br_int->ports[i];
> > @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
> >          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
> >           * that connect to gateway routers (if local), and consider port
> >           * bindings of type "patch" since they might connect to
> > -         * distributed gateway ports with NAT addresses. */
> > +         * distributed gateway ports with NAT addresses.
> > +         * Get LSP ports if requested by CMS. */
> >  
> >          sbrec_port_binding_index_set_datapath(target, ld->datapath);
> >          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> > @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
> >                  || !strcmp(pb->type, "patch")) {
> >                  sset_add(local_l3gw_ports, pb->logical_port);
> >              }
> > +            /* GARP packets for lsp ports. */
> > +            if (pb->chassis == chassis &&
> > +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
> > +                sset_add(local_vifs, pb->logical_port);
> > +            }
> >          }
> >      }
> >      sbrec_port_binding_index_destroy_row(target);
> > @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
> >      }
> >  }
> >  
> > +static void
> > +send_garp_rarp_update_for_pb_set(
> > +        struct ovsdb_idl_txn *ovnsb_idl_txn,
> > +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> > +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > +        struct sset *vif_set, const struct hmap *local_datapaths,
> > +        struct shash *nat_addresses)
> > +{
> > +    const char *iface_id;
> > +    SSET_FOR_EACH (iface_id, vif_set) {
> > +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > +            sbrec_port_binding_by_name, iface_id);
> > +        if (pb) {
> > +            send_garp_rarp_update(ovnsb_idl_txn,
> > +                                  sbrec_mac_binding_by_lport_ip,
> > +                                  local_datapaths, pb, nat_addresses);
> > +        }
> > +    }
> > +}
> > +
> >  /* Called by pinctrl_run(). Runs with in the main ovn-controller
> >   * thread context. */
> >  static void
> > @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >  {
> >      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
> >      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> > +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
> >      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
> >      struct shash nat_addresses;
> >  
> >      shash_init(&nat_addresses);
> >  
> > -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
> > -                                sbrec_port_binding_by_name,
> > -                                br_int, chassis, local_datapaths,
> > -                                &localnet_vifs, &local_l3gw_ports);
> > +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
> > +                             sbrec_port_binding_by_name,
> > +                             br_int, chassis, local_datapaths,
> > +                             &localnet_vifs, &local_l3gw_ports,
> > +                             &local_vifs);
> >  
> >      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
> >                                 &nat_ip_keys, &local_l3gw_ports,
> > @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >      struct shash_node *iter;
> >      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> >          if (!sset_contains(&localnet_vifs, iter->name) &&
> > -            !sset_contains(&nat_ip_keys, iter->name)) {
> > +            !sset_contains(&nat_ip_keys, iter->name) &&
> > +            !sset_contains(&local_vifs, iter->name)) {
> >              send_garp_rarp_delete(iter->name);
> >          }
> >      }
> >  
> >      /* Update send_garp_rarp_data. */
> > -    const char *iface_id;
> > -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
> > -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > -            sbrec_port_binding_by_name, iface_id);
> > -        if (pb) {
> > -            send_garp_rarp_update(ovnsb_idl_txn,
> > -                                  sbrec_mac_binding_by_lport_ip,
> > -                                  local_datapaths, pb, &nat_addresses);
> > -        }
> > -    }
> > -
> > +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > +                                     sbrec_mac_binding_by_lport_ip,
> > +                                     sbrec_port_binding_by_name,
> > +                                     &localnet_vifs, local_datapaths,
> > +                                     &nat_addresses);
> > +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > +                                     sbrec_mac_binding_by_lport_ip,
> > +                                     sbrec_port_binding_by_name,
> > +                                     &local_vifs, local_datapaths,
> > +                                     &nat_addresses);
> >      /* Update send_garp_rarp_data for nat-addresses. */
> > -    const char *gw_port;
> > -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> > -        const struct sbrec_port_binding *pb
> > -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> > -        if (pb) {
> > -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> > -                                  local_datapaths, pb, &nat_addresses);
> > -        }
> > -    }
> > +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > +                                     sbrec_mac_binding_by_lport_ip,
> > +                                     sbrec_port_binding_by_name,
> > +                                     &local_l3gw_ports, local_datapaths,
> > +                                     &nat_addresses);
> >  
> >      /* pinctrl_handler thread will send the GARPs. */
> >  
> > +    sset_destroy(&local_vifs);
> >      sset_destroy(&localnet_vifs);
> >      sset_destroy(&local_l3gw_ports);
> >  
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 0d6ebccde..9a4a880a7 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
> >      }
> >  }
> >  
> > +static void
> > +ovn_update_lsp_garp_options(struct hmap *ports)
> > +{
> > +    struct ovn_port *op;
> > +    HMAP_FOR_EACH (op, key_node, ports) {
> > +        if (!op->nbsp) {
> > +            continue;
> > +        }
> > +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
> > +            continue;
> > +        }
> > +
> > +        struct ovn_datapath *od = op->od;
> > +        if (!od || !od->nbs) {
> > +            continue;
> > +        }
> > +
> > +        struct smap options;
> > +        smap_clone(&options, &op->sb->options);
> > +
> > +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
> > +                                          "ovn-lsp-garp", false);
> > +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
> > +        sbrec_port_binding_set_options(op->sb, &options);
> > +        smap_destroy(&options);
> > +    }
> > +}
> > +
> >  static void
> >  build_port_group_lswitches(struct northd_input *input_data,
> >                             struct hmap *pgs,
> > @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
> >      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
> >      ovn_update_ipv6_options(&data->ports);
> >      ovn_update_ipv6_prefix(&data->ports);
> > +    ovn_update_lsp_garp_options(&data->ports);
> >  
> >      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
> >      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
>
Dumitru Ceara July 13, 2022, 6:44 p.m. UTC | #3
On 7/13/22 18:07, Lorenzo Bianconi wrote:
>> On 6/17/22 00:31, Lorenzo Bianconi wrote:
>>> When using VLAN backed networks and OVN routers leveraging the
>>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
>>> chassis mac address in order to not expose the router mac address from
>>> different nodes and confuse the TOR switch. However doing so the TOR
>>> switch is not able to learn the port/mac bindings for routed E/W traffic
>>> and it is force to always flood it. Fix this issue adding the capability
>>> to send GARP traffic for logical switch ports if the corresponding logical
>>> switch has the ovn-lsp-garp parameter set to true in the option column.
>>> More into about the issue can be found here [0].
>>>
>>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
>>>
>>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
>>> ---
>>
>> Hi Lorenzo,
> 
> Hi Dumitru,
> 
> Thanks for reviewing it :)
> 
>>
>> I have a few concerns with this approach:
>>
>> a. The CMS will have to set this option for all VMs on all logical
>> switches which will enable periodic GARPs for all of them all the time.
>> That seems like quite a lot of broadcast traffic in the fabric.
>>
>> b. There's no guarantee that the GARPs are sent in time to prevent the
>> FDB timeouts on the ToR switches.  At best we could make the interval
>> configurable but I don't think this is way better either.
>>
>> c. This is not really introduced by your patch but we will be causing
>> this more often now.  With the topology:
>>
>> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
>>             (VLAN-backed network)
>>
>> HV1 configured with chassis mac mapping HV1-MAC
>> HV2 configured with chassis mac mapping HV2-MAC
>>
>> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
>> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
>> that's OK.
>>
>> I think a proper solution is to change how we run the logical pipelines
>> in case of vlan-backed networks.  We currently have an assymetry:
>>
>> For packets flowing from VM1 to VM2 we execute:
>> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
>> - on HV2: LS2-egress
>>
>> For packets flowing from VM2 to VM1 we execute:
>> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
>> - on HV1: LS1-egress
>>
>> What if we change this to:
>> VM1 -> VM2:
>> - on HV1: LS1-ingress, LS1-egress, LR-ingress
>> - on HV2: LR-egress, LS2-ingress, LS2-egress
>>
>> VM2 -> VM1:
>> - on HV2: LS2-ingress, LS2-egress, LR-ingress
>> - on HV2: LR-egress, LS1-ingress, LS1-egress
> 
> I do not know why the current architecture is done this way (any suggestions??).
> I guess the approach you suggested should work. Are we introducing any backward
> compatibility issue?

We would probably create a compatibility issue. :)

I don't know if this approach would even work, I was just trying to
imagine how traffic from within OVN should be seen on the fabric in this
case.

Maybe we need to think some more about other options.

Regards,
Dumitru

> 
> Regards,
> Lorenzo
> 
>>
>> Would this we ensure that we always only use ovn-chassis-mac-mappings on
>> the VLAN network and avoid flooding on the ToR?
>>
>> Regards,
>> Dumitru
>>
>>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
>>>  northd/northd.c      | 29 +++++++++++++++
>>>  2 files changed, 85 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>> index 9a1a0faa1..eb5739bfc 100644
>>> --- a/controller/pinctrl.c
>>> +++ b/controller/pinctrl.c
>>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
>>>          garp_rarp->backoff *= 2;
>>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
>>>      } else {
>>> -        garp_rarp->announce_time = LLONG_MAX;
>>> +        /* Default timeout is 180s. */
>>> +        garp_rarp->announce_time = current_time + 180 * 1000;
>>>      }
>>>      return garp_rarp->announce_time;
>>>  }
>>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
>>>  
>>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
>>>  static void
>>> -get_localnet_vifs_l3gwports(
>>> +get_local_vifs_l3gwports(
>>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>      const struct ovsrec_bridge *br_int,
>>>      const struct sbrec_chassis *chassis,
>>>      const struct hmap *local_datapaths,
>>>      struct sset *localnet_vifs,
>>> -    struct sset *local_l3gw_ports)
>>> +    struct sset *local_l3gw_ports,
>>> +    struct sset *local_vifs)
>>>  {
>>>      for (int i = 0; i < br_int->n_ports; i++) {
>>>          const struct ovsrec_port *port_rec = br_int->ports[i];
>>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
>>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
>>>           * that connect to gateway routers (if local), and consider port
>>>           * bindings of type "patch" since they might connect to
>>> -         * distributed gateway ports with NAT addresses. */
>>> +         * distributed gateway ports with NAT addresses.
>>> +         * Get LSP ports if requested by CMS. */
>>>  
>>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
>>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
>>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
>>>                  || !strcmp(pb->type, "patch")) {
>>>                  sset_add(local_l3gw_ports, pb->logical_port);
>>>              }
>>> +            /* GARP packets for lsp ports. */
>>> +            if (pb->chassis == chassis &&
>>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
>>> +                sset_add(local_vifs, pb->logical_port);
>>> +            }
>>>          }
>>>      }
>>>      sbrec_port_binding_index_destroy_row(target);
>>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
>>>      }
>>>  }
>>>  
>>> +static void
>>> +send_garp_rarp_update_for_pb_set(
>>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
>>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>> +        struct sset *vif_set, const struct hmap *local_datapaths,
>>> +        struct shash *nat_addresses)
>>> +{
>>> +    const char *iface_id;
>>> +    SSET_FOR_EACH (iface_id, vif_set) {
>>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>> +            sbrec_port_binding_by_name, iface_id);
>>> +        if (pb) {
>>> +            send_garp_rarp_update(ovnsb_idl_txn,
>>> +                                  sbrec_mac_binding_by_lport_ip,
>>> +                                  local_datapaths, pb, nat_addresses);
>>> +        }
>>> +    }
>>> +}
>>> +
>>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
>>>   * thread context. */
>>>  static void
>>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>  {
>>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
>>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
>>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
>>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
>>>      struct shash nat_addresses;
>>>  
>>>      shash_init(&nat_addresses);
>>>  
>>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
>>> -                                sbrec_port_binding_by_name,
>>> -                                br_int, chassis, local_datapaths,
>>> -                                &localnet_vifs, &local_l3gw_ports);
>>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
>>> +                             sbrec_port_binding_by_name,
>>> +                             br_int, chassis, local_datapaths,
>>> +                             &localnet_vifs, &local_l3gw_ports,
>>> +                             &local_vifs);
>>>  
>>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
>>>                                 &nat_ip_keys, &local_l3gw_ports,
>>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>      struct shash_node *iter;
>>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>>>          if (!sset_contains(&localnet_vifs, iter->name) &&
>>> -            !sset_contains(&nat_ip_keys, iter->name)) {
>>> +            !sset_contains(&nat_ip_keys, iter->name) &&
>>> +            !sset_contains(&local_vifs, iter->name)) {
>>>              send_garp_rarp_delete(iter->name);
>>>          }
>>>      }
>>>  
>>>      /* Update send_garp_rarp_data. */
>>> -    const char *iface_id;
>>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
>>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>> -            sbrec_port_binding_by_name, iface_id);
>>> -        if (pb) {
>>> -            send_garp_rarp_update(ovnsb_idl_txn,
>>> -                                  sbrec_mac_binding_by_lport_ip,
>>> -                                  local_datapaths, pb, &nat_addresses);
>>> -        }
>>> -    }
>>> -
>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>> +                                     sbrec_mac_binding_by_lport_ip,
>>> +                                     sbrec_port_binding_by_name,
>>> +                                     &localnet_vifs, local_datapaths,
>>> +                                     &nat_addresses);
>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>> +                                     sbrec_mac_binding_by_lport_ip,
>>> +                                     sbrec_port_binding_by_name,
>>> +                                     &local_vifs, local_datapaths,
>>> +                                     &nat_addresses);
>>>      /* Update send_garp_rarp_data for nat-addresses. */
>>> -    const char *gw_port;
>>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
>>> -        const struct sbrec_port_binding *pb
>>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
>>> -        if (pb) {
>>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
>>> -                                  local_datapaths, pb, &nat_addresses);
>>> -        }
>>> -    }
>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>> +                                     sbrec_mac_binding_by_lport_ip,
>>> +                                     sbrec_port_binding_by_name,
>>> +                                     &local_l3gw_ports, local_datapaths,
>>> +                                     &nat_addresses);
>>>  
>>>      /* pinctrl_handler thread will send the GARPs. */
>>>  
>>> +    sset_destroy(&local_vifs);
>>>      sset_destroy(&localnet_vifs);
>>>      sset_destroy(&local_l3gw_ports);
>>>  
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index 0d6ebccde..9a4a880a7 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
>>>      }
>>>  }
>>>  
>>> +static void
>>> +ovn_update_lsp_garp_options(struct hmap *ports)
>>> +{
>>> +    struct ovn_port *op;
>>> +    HMAP_FOR_EACH (op, key_node, ports) {
>>> +        if (!op->nbsp) {
>>> +            continue;
>>> +        }
>>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
>>> +            continue;
>>> +        }
>>> +
>>> +        struct ovn_datapath *od = op->od;
>>> +        if (!od || !od->nbs) {
>>> +            continue;
>>> +        }
>>> +
>>> +        struct smap options;
>>> +        smap_clone(&options, &op->sb->options);
>>> +
>>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
>>> +                                          "ovn-lsp-garp", false);
>>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
>>> +        sbrec_port_binding_set_options(op->sb, &options);
>>> +        smap_destroy(&options);
>>> +    }
>>> +}
>>> +
>>>  static void
>>>  build_port_group_lswitches(struct northd_input *input_data,
>>>                             struct hmap *pgs,
>>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
>>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
>>>      ovn_update_ipv6_options(&data->ports);
>>>      ovn_update_ipv6_prefix(&data->ports);
>>> +    ovn_update_lsp_garp_options(&data->ports);
>>>  
>>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
>>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
>>
Lorenzo Bianconi July 14, 2022, 8:35 a.m. UTC | #4
> On 7/13/22 18:07, Lorenzo Bianconi wrote:
> >> On 6/17/22 00:31, Lorenzo Bianconi wrote:
> >>> When using VLAN backed networks and OVN routers leveraging the
> >>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
> >>> chassis mac address in order to not expose the router mac address from
> >>> different nodes and confuse the TOR switch. However doing so the TOR
> >>> switch is not able to learn the port/mac bindings for routed E/W traffic
> >>> and it is force to always flood it. Fix this issue adding the capability
> >>> to send GARP traffic for logical switch ports if the corresponding logical
> >>> switch has the ovn-lsp-garp parameter set to true in the option column.
> >>> More into about the issue can be found here [0].
> >>>
> >>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
> >>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
> >>>
> >>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> >>> ---
> >>
> >> Hi Lorenzo,
> > 
> > Hi Dumitru,
> > 
> > Thanks for reviewing it :)
> > 
> >>
> >> I have a few concerns with this approach:
> >>
> >> a. The CMS will have to set this option for all VMs on all logical
> >> switches which will enable periodic GARPs for all of them all the time.
> >> That seems like quite a lot of broadcast traffic in the fabric.
> >>
> >> b. There's no guarantee that the GARPs are sent in time to prevent the
> >> FDB timeouts on the ToR switches.  At best we could make the interval
> >> configurable but I don't think this is way better either.
> >>
> >> c. This is not really introduced by your patch but we will be causing
> >> this more often now.  With the topology:
> >>
> >> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
> >>             (VLAN-backed network)
> >>
> >> HV1 configured with chassis mac mapping HV1-MAC
> >> HV2 configured with chassis mac mapping HV2-MAC
> >>
> >> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
> >> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
> >> that's OK.
> >>
> >> I think a proper solution is to change how we run the logical pipelines
> >> in case of vlan-backed networks.  We currently have an assymetry:
> >>
> >> For packets flowing from VM1 to VM2 we execute:
> >> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
> >> - on HV2: LS2-egress
> >>
> >> For packets flowing from VM2 to VM1 we execute:
> >> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
> >> - on HV1: LS1-egress
> >>
> >> What if we change this to:
> >> VM1 -> VM2:
> >> - on HV1: LS1-ingress, LS1-egress, LR-ingress
> >> - on HV2: LR-egress, LS2-ingress, LS2-egress
> >>
> >> VM2 -> VM1:
> >> - on HV2: LS2-ingress, LS2-egress, LR-ingress
> >> - on HV2: LR-egress, LS1-ingress, LS1-egress
> > 
> > I do not know why the current architecture is done this way (any suggestions??).
> > I guess the approach you suggested should work. Are we introducing any backward
> > compatibility issue?
> 
> We would probably create a compatibility issue. :)
> 
> I don't know if this approach would even work, I was just trying to
> imagine how traffic from within OVN should be seen on the fabric in this
> case.
> 
> Maybe we need to think some more about other options.

Thinking again about this approach we are implementing some kind of L2 natting,
right? How can we implement dnat to swap HV2-MAC with proper destination
mac address (let's say pod B)?

Regards,
Lorenzo

> 
> Regards,
> Dumitru
> 
> > 
> > Regards,
> > Lorenzo
> > 
> >>
> >> Would this we ensure that we always only use ovn-chassis-mac-mappings on
> >> the VLAN network and avoid flooding on the ToR?
> >>
> >> Regards,
> >> Dumitru
> >>
> >>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
> >>>  northd/northd.c      | 29 +++++++++++++++
> >>>  2 files changed, 85 insertions(+), 29 deletions(-)
> >>>
> >>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>> index 9a1a0faa1..eb5739bfc 100644
> >>> --- a/controller/pinctrl.c
> >>> +++ b/controller/pinctrl.c
> >>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
> >>>          garp_rarp->backoff *= 2;
> >>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
> >>>      } else {
> >>> -        garp_rarp->announce_time = LLONG_MAX;
> >>> +        /* Default timeout is 180s. */
> >>> +        garp_rarp->announce_time = current_time + 180 * 1000;
> >>>      }
> >>>      return garp_rarp->announce_time;
> >>>  }
> >>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
> >>>  
> >>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
> >>>  static void
> >>> -get_localnet_vifs_l3gwports(
> >>> +get_local_vifs_l3gwports(
> >>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> >>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >>>      const struct ovsrec_bridge *br_int,
> >>>      const struct sbrec_chassis *chassis,
> >>>      const struct hmap *local_datapaths,
> >>>      struct sset *localnet_vifs,
> >>> -    struct sset *local_l3gw_ports)
> >>> +    struct sset *local_l3gw_ports,
> >>> +    struct sset *local_vifs)
> >>>  {
> >>>      for (int i = 0; i < br_int->n_ports; i++) {
> >>>          const struct ovsrec_port *port_rec = br_int->ports[i];
> >>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
> >>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
> >>>           * that connect to gateway routers (if local), and consider port
> >>>           * bindings of type "patch" since they might connect to
> >>> -         * distributed gateway ports with NAT addresses. */
> >>> +         * distributed gateway ports with NAT addresses.
> >>> +         * Get LSP ports if requested by CMS. */
> >>>  
> >>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
> >>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> >>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
> >>>                  || !strcmp(pb->type, "patch")) {
> >>>                  sset_add(local_l3gw_ports, pb->logical_port);
> >>>              }
> >>> +            /* GARP packets for lsp ports. */
> >>> +            if (pb->chassis == chassis &&
> >>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
> >>> +                sset_add(local_vifs, pb->logical_port);
> >>> +            }
> >>>          }
> >>>      }
> >>>      sbrec_port_binding_index_destroy_row(target);
> >>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
> >>>      }
> >>>  }
> >>>  
> >>> +static void
> >>> +send_garp_rarp_update_for_pb_set(
> >>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
> >>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> >>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >>> +        struct sset *vif_set, const struct hmap *local_datapaths,
> >>> +        struct shash *nat_addresses)
> >>> +{
> >>> +    const char *iface_id;
> >>> +    SSET_FOR_EACH (iface_id, vif_set) {
> >>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> >>> +            sbrec_port_binding_by_name, iface_id);
> >>> +        if (pb) {
> >>> +            send_garp_rarp_update(ovnsb_idl_txn,
> >>> +                                  sbrec_mac_binding_by_lport_ip,
> >>> +                                  local_datapaths, pb, nat_addresses);
> >>> +        }
> >>> +    }
> >>> +}
> >>> +
> >>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
> >>>   * thread context. */
> >>>  static void
> >>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >>>  {
> >>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
> >>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> >>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
> >>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
> >>>      struct shash nat_addresses;
> >>>  
> >>>      shash_init(&nat_addresses);
> >>>  
> >>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
> >>> -                                sbrec_port_binding_by_name,
> >>> -                                br_int, chassis, local_datapaths,
> >>> -                                &localnet_vifs, &local_l3gw_ports);
> >>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
> >>> +                             sbrec_port_binding_by_name,
> >>> +                             br_int, chassis, local_datapaths,
> >>> +                             &localnet_vifs, &local_l3gw_ports,
> >>> +                             &local_vifs);
> >>>  
> >>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
> >>>                                 &nat_ip_keys, &local_l3gw_ports,
> >>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >>>      struct shash_node *iter;
> >>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> >>>          if (!sset_contains(&localnet_vifs, iter->name) &&
> >>> -            !sset_contains(&nat_ip_keys, iter->name)) {
> >>> +            !sset_contains(&nat_ip_keys, iter->name) &&
> >>> +            !sset_contains(&local_vifs, iter->name)) {
> >>>              send_garp_rarp_delete(iter->name);
> >>>          }
> >>>      }
> >>>  
> >>>      /* Update send_garp_rarp_data. */
> >>> -    const char *iface_id;
> >>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
> >>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> >>> -            sbrec_port_binding_by_name, iface_id);
> >>> -        if (pb) {
> >>> -            send_garp_rarp_update(ovnsb_idl_txn,
> >>> -                                  sbrec_mac_binding_by_lport_ip,
> >>> -                                  local_datapaths, pb, &nat_addresses);
> >>> -        }
> >>> -    }
> >>> -
> >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> >>> +                                     sbrec_mac_binding_by_lport_ip,
> >>> +                                     sbrec_port_binding_by_name,
> >>> +                                     &localnet_vifs, local_datapaths,
> >>> +                                     &nat_addresses);
> >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> >>> +                                     sbrec_mac_binding_by_lport_ip,
> >>> +                                     sbrec_port_binding_by_name,
> >>> +                                     &local_vifs, local_datapaths,
> >>> +                                     &nat_addresses);
> >>>      /* Update send_garp_rarp_data for nat-addresses. */
> >>> -    const char *gw_port;
> >>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> >>> -        const struct sbrec_port_binding *pb
> >>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> >>> -        if (pb) {
> >>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> >>> -                                  local_datapaths, pb, &nat_addresses);
> >>> -        }
> >>> -    }
> >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> >>> +                                     sbrec_mac_binding_by_lport_ip,
> >>> +                                     sbrec_port_binding_by_name,
> >>> +                                     &local_l3gw_ports, local_datapaths,
> >>> +                                     &nat_addresses);
> >>>  
> >>>      /* pinctrl_handler thread will send the GARPs. */
> >>>  
> >>> +    sset_destroy(&local_vifs);
> >>>      sset_destroy(&localnet_vifs);
> >>>      sset_destroy(&local_l3gw_ports);
> >>>  
> >>> diff --git a/northd/northd.c b/northd/northd.c
> >>> index 0d6ebccde..9a4a880a7 100644
> >>> --- a/northd/northd.c
> >>> +++ b/northd/northd.c
> >>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
> >>>      }
> >>>  }
> >>>  
> >>> +static void
> >>> +ovn_update_lsp_garp_options(struct hmap *ports)
> >>> +{
> >>> +    struct ovn_port *op;
> >>> +    HMAP_FOR_EACH (op, key_node, ports) {
> >>> +        if (!op->nbsp) {
> >>> +            continue;
> >>> +        }
> >>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
> >>> +            continue;
> >>> +        }
> >>> +
> >>> +        struct ovn_datapath *od = op->od;
> >>> +        if (!od || !od->nbs) {
> >>> +            continue;
> >>> +        }
> >>> +
> >>> +        struct smap options;
> >>> +        smap_clone(&options, &op->sb->options);
> >>> +
> >>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
> >>> +                                          "ovn-lsp-garp", false);
> >>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
> >>> +        sbrec_port_binding_set_options(op->sb, &options);
> >>> +        smap_destroy(&options);
> >>> +    }
> >>> +}
> >>> +
> >>>  static void
> >>>  build_port_group_lswitches(struct northd_input *input_data,
> >>>                             struct hmap *pgs,
> >>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
> >>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
> >>>      ovn_update_ipv6_options(&data->ports);
> >>>      ovn_update_ipv6_prefix(&data->ports);
> >>> +    ovn_update_lsp_garp_options(&data->ports);
> >>>  
> >>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
> >>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
> >>
>
Numan Siddique July 14, 2022, 3:35 p.m. UTC | #5
On Thu, Jul 14, 2022 at 3:36 AM Lorenzo Bianconi
<lorenzo.bianconi@redhat.com> wrote:
>
> > On 7/13/22 18:07, Lorenzo Bianconi wrote:
> > >> On 6/17/22 00:31, Lorenzo Bianconi wrote:
> > >>> When using VLAN backed networks and OVN routers leveraging the
> > >>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
> > >>> chassis mac address in order to not expose the router mac address from
> > >>> different nodes and confuse the TOR switch. However doing so the TOR
> > >>> switch is not able to learn the port/mac bindings for routed E/W traffic
> > >>> and it is force to always flood it. Fix this issue adding the capability
> > >>> to send GARP traffic for logical switch ports if the corresponding logical
> > >>> switch has the ovn-lsp-garp parameter set to true in the option column.
> > >>> More into about the issue can be found here [0].
> > >>>
> > >>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
> > >>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
> > >>>
> > >>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > >>> ---
> > >>
> > >> Hi Lorenzo,
> > >
> > > Hi Dumitru,
> > >
> > > Thanks for reviewing it :)
> > >
> > >>
> > >> I have a few concerns with this approach:
> > >>
> > >> a. The CMS will have to set this option for all VMs on all logical
> > >> switches which will enable periodic GARPs for all of them all the time.
> > >> That seems like quite a lot of broadcast traffic in the fabric.
> > >>
> > >> b. There's no guarantee that the GARPs are sent in time to prevent the
> > >> FDB timeouts on the ToR switches.  At best we could make the interval
> > >> configurable but I don't think this is way better either.
> > >>
> > >> c. This is not really introduced by your patch but we will be causing
> > >> this more often now.  With the topology:
> > >>
> > >> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
> > >>             (VLAN-backed network)
> > >>
> > >> HV1 configured with chassis mac mapping HV1-MAC
> > >> HV2 configured with chassis mac mapping HV2-MAC
> > >>
> > >> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
> > >> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
> > >> that's OK.

I'm not sure I understood what you mean here.
Let's say we have 2 VMs - VM1 and VM3 connected to the same VLAN
logical switch LS1
and if VM1 is on chassis HV1 and VM3 is on chassis HV3,  the fabric
will learn VM1's and VM3's mac
when they both communicate right ?   Please note that logical switch
LS1 is a vlan tenant network with
a localnet port (so no geneve tunnels are used for the traffic from VM1 to VM3).


> > >>
> > >> I think a proper solution is to change how we run the logical pipelines
> > >> in case of vlan-backed networks.  We currently have an assymetry:
> > >>
> > >> For packets flowing from VM1 to VM2 we execute:
> > >> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
> > >> - on HV2: LS2-egress
> > >>
> > >> For packets flowing from VM2 to VM1 we execute:
> > >> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
> > >> - on HV1: LS1-egress
> > >>
> > >> What if we change this to:
> > >> VM1 -> VM2:
> > >> - on HV1: LS1-ingress, LS1-egress, LR-ingress
> > >> - on HV2: LR-egress, LS2-ingress, LS2-egress
> > >>
> > >> VM2 -> VM1:
> > >> - on HV2: LS2-ingress, LS2-egress, LR-ingress
> > >> - on HV2: LR-egress, LS1-ingress, LS1-egress
> > >

I'm not sure how easy it would be to run the pipeline as you suggested
when ovn-chassis-mac-mappings is configured.
However OVN does support centralized routing if a logical router has a
distributed gateway port (connecting to the provider network)
and the other logical router ports connecting to the tenant VLAN
networks have the option - reside-on-redirect-chassis configured.

So in a way CMS can use the centralized routing to avoid this issue.
However if we want to fix this issue for distributed routing,  looks
to me
the proposed RFC patch is the only way to fix it.  And with the proper
documentation CMS can know the benefits and drawbacks of both the
options.

Thanks
Numan


> > > I do not know why the current architecture is done this way (any suggestions??).
> > > I guess the approach you suggested should work. Are we introducing any backward
> > > compatibility issue?
> >
> > We would probably create a compatibility issue. :)
> >
> > I don't know if this approach would even work, I was just trying to
> > imagine how traffic from within OVN should be seen on the fabric in this
> > case.
> >
> > Maybe we need to think some more about other options.
>
> Thinking again about this approach we are implementing some kind of L2 natting,
> right? How can we implement dnat to swap HV2-MAC with proper destination
> mac address (let's say pod B)?
>
> Regards,
> Lorenzo
>
> >
> > Regards,
> > Dumitru
> >
> > >
> > > Regards,
> > > Lorenzo
> > >
> > >>
> > >> Would this we ensure that we always only use ovn-chassis-mac-mappings on
> > >> the VLAN network and avoid flooding on the ToR?
> > >>
> > >> Regards,
> > >> Dumitru
> > >>
> > >>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
> > >>>  northd/northd.c      | 29 +++++++++++++++
> > >>>  2 files changed, 85 insertions(+), 29 deletions(-)
> > >>>
> > >>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > >>> index 9a1a0faa1..eb5739bfc 100644
> > >>> --- a/controller/pinctrl.c
> > >>> +++ b/controller/pinctrl.c
> > >>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
> > >>>          garp_rarp->backoff *= 2;
> > >>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
> > >>>      } else {
> > >>> -        garp_rarp->announce_time = LLONG_MAX;
> > >>> +        /* Default timeout is 180s. */
> > >>> +        garp_rarp->announce_time = current_time + 180 * 1000;
> > >>>      }
> > >>>      return garp_rarp->announce_time;
> > >>>  }
> > >>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
> > >>>
> > >>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
> > >>>  static void
> > >>> -get_localnet_vifs_l3gwports(
> > >>> +get_local_vifs_l3gwports(
> > >>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> > >>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > >>>      const struct ovsrec_bridge *br_int,
> > >>>      const struct sbrec_chassis *chassis,
> > >>>      const struct hmap *local_datapaths,
> > >>>      struct sset *localnet_vifs,
> > >>> -    struct sset *local_l3gw_ports)
> > >>> +    struct sset *local_l3gw_ports,
> > >>> +    struct sset *local_vifs)
> > >>>  {
> > >>>      for (int i = 0; i < br_int->n_ports; i++) {
> > >>>          const struct ovsrec_port *port_rec = br_int->ports[i];
> > >>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
> > >>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
> > >>>           * that connect to gateway routers (if local), and consider port
> > >>>           * bindings of type "patch" since they might connect to
> > >>> -         * distributed gateway ports with NAT addresses. */
> > >>> +         * distributed gateway ports with NAT addresses.
> > >>> +         * Get LSP ports if requested by CMS. */
> > >>>
> > >>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
> > >>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> > >>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
> > >>>                  || !strcmp(pb->type, "patch")) {
> > >>>                  sset_add(local_l3gw_ports, pb->logical_port);
> > >>>              }
> > >>> +            /* GARP packets for lsp ports. */
> > >>> +            if (pb->chassis == chassis &&
> > >>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
> > >>> +                sset_add(local_vifs, pb->logical_port);
> > >>> +            }
> > >>>          }
> > >>>      }
> > >>>      sbrec_port_binding_index_destroy_row(target);
> > >>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
> > >>>      }
> > >>>  }
> > >>>
> > >>> +static void
> > >>> +send_garp_rarp_update_for_pb_set(
> > >>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> > >>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > >>> +        struct sset *vif_set, const struct hmap *local_datapaths,
> > >>> +        struct shash *nat_addresses)
> > >>> +{
> > >>> +    const char *iface_id;
> > >>> +    SSET_FOR_EACH (iface_id, vif_set) {
> > >>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > >>> +            sbrec_port_binding_by_name, iface_id);
> > >>> +        if (pb) {
> > >>> +            send_garp_rarp_update(ovnsb_idl_txn,
> > >>> +                                  sbrec_mac_binding_by_lport_ip,
> > >>> +                                  local_datapaths, pb, nat_addresses);
> > >>> +        }
> > >>> +    }
> > >>> +}
> > >>> +
> > >>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
> > >>>   * thread context. */
> > >>>  static void
> > >>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >>>  {
> > >>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
> > >>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> > >>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
> > >>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
> > >>>      struct shash nat_addresses;
> > >>>
> > >>>      shash_init(&nat_addresses);
> > >>>
> > >>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
> > >>> -                                sbrec_port_binding_by_name,
> > >>> -                                br_int, chassis, local_datapaths,
> > >>> -                                &localnet_vifs, &local_l3gw_ports);
> > >>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
> > >>> +                             sbrec_port_binding_by_name,
> > >>> +                             br_int, chassis, local_datapaths,
> > >>> +                             &localnet_vifs, &local_l3gw_ports,
> > >>> +                             &local_vifs);
> > >>>
> > >>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
> > >>>                                 &nat_ip_keys, &local_l3gw_ports,
> > >>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > >>>      struct shash_node *iter;
> > >>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> > >>>          if (!sset_contains(&localnet_vifs, iter->name) &&
> > >>> -            !sset_contains(&nat_ip_keys, iter->name)) {
> > >>> +            !sset_contains(&nat_ip_keys, iter->name) &&
> > >>> +            !sset_contains(&local_vifs, iter->name)) {
> > >>>              send_garp_rarp_delete(iter->name);
> > >>>          }
> > >>>      }
> > >>>
> > >>>      /* Update send_garp_rarp_data. */
> > >>> -    const char *iface_id;
> > >>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
> > >>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > >>> -            sbrec_port_binding_by_name, iface_id);
> > >>> -        if (pb) {
> > >>> -            send_garp_rarp_update(ovnsb_idl_txn,
> > >>> -                                  sbrec_mac_binding_by_lport_ip,
> > >>> -                                  local_datapaths, pb, &nat_addresses);
> > >>> -        }
> > >>> -    }
> > >>> -
> > >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > >>> +                                     sbrec_mac_binding_by_lport_ip,
> > >>> +                                     sbrec_port_binding_by_name,
> > >>> +                                     &localnet_vifs, local_datapaths,
> > >>> +                                     &nat_addresses);
> > >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > >>> +                                     sbrec_mac_binding_by_lport_ip,
> > >>> +                                     sbrec_port_binding_by_name,
> > >>> +                                     &local_vifs, local_datapaths,
> > >>> +                                     &nat_addresses);
> > >>>      /* Update send_garp_rarp_data for nat-addresses. */
> > >>> -    const char *gw_port;
> > >>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> > >>> -        const struct sbrec_port_binding *pb
> > >>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> > >>> -        if (pb) {
> > >>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> > >>> -                                  local_datapaths, pb, &nat_addresses);
> > >>> -        }
> > >>> -    }
> > >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > >>> +                                     sbrec_mac_binding_by_lport_ip,
> > >>> +                                     sbrec_port_binding_by_name,
> > >>> +                                     &local_l3gw_ports, local_datapaths,
> > >>> +                                     &nat_addresses);
> > >>>
> > >>>      /* pinctrl_handler thread will send the GARPs. */
> > >>>
> > >>> +    sset_destroy(&local_vifs);
> > >>>      sset_destroy(&localnet_vifs);
> > >>>      sset_destroy(&local_l3gw_ports);
> > >>>
> > >>> diff --git a/northd/northd.c b/northd/northd.c
> > >>> index 0d6ebccde..9a4a880a7 100644
> > >>> --- a/northd/northd.c
> > >>> +++ b/northd/northd.c
> > >>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
> > >>>      }
> > >>>  }
> > >>>
> > >>> +static void
> > >>> +ovn_update_lsp_garp_options(struct hmap *ports)
> > >>> +{
> > >>> +    struct ovn_port *op;
> > >>> +    HMAP_FOR_EACH (op, key_node, ports) {
> > >>> +        if (!op->nbsp) {
> > >>> +            continue;
> > >>> +        }
> > >>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
> > >>> +            continue;
> > >>> +        }
> > >>> +
> > >>> +        struct ovn_datapath *od = op->od;
> > >>> +        if (!od || !od->nbs) {
> > >>> +            continue;
> > >>> +        }
> > >>> +
> > >>> +        struct smap options;
> > >>> +        smap_clone(&options, &op->sb->options);
> > >>> +
> > >>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
> > >>> +                                          "ovn-lsp-garp", false);
> > >>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
> > >>> +        sbrec_port_binding_set_options(op->sb, &options);
> > >>> +        smap_destroy(&options);
> > >>> +    }
> > >>> +}
> > >>> +
> > >>>  static void
> > >>>  build_port_group_lswitches(struct northd_input *input_data,
> > >>>                             struct hmap *pgs,
> > >>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
> > >>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
> > >>>      ovn_update_ipv6_options(&data->ports);
> > >>>      ovn_update_ipv6_prefix(&data->ports);
> > >>> +    ovn_update_lsp_garp_options(&data->ports);
> > >>>
> > >>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
> > >>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
> > >>
> >
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara July 14, 2022, 6:27 p.m. UTC | #6
On 7/14/22 17:35, Numan Siddique wrote:
> On Thu, Jul 14, 2022 at 3:36 AM Lorenzo Bianconi
> <lorenzo.bianconi@redhat.com> wrote:
>>
>>> On 7/13/22 18:07, Lorenzo Bianconi wrote:
>>>>> On 6/17/22 00:31, Lorenzo Bianconi wrote:
>>>>>> When using VLAN backed networks and OVN routers leveraging the
>>>>>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
>>>>>> chassis mac address in order to not expose the router mac address from
>>>>>> different nodes and confuse the TOR switch. However doing so the TOR
>>>>>> switch is not able to learn the port/mac bindings for routed E/W traffic
>>>>>> and it is force to always flood it. Fix this issue adding the capability
>>>>>> to send GARP traffic for logical switch ports if the corresponding logical
>>>>>> switch has the ovn-lsp-garp parameter set to true in the option column.
>>>>>> More into about the issue can be found here [0].
>>>>>>
>>>>>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
>>>>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
>>>>>>
>>>>>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
>>>>>> ---
>>>>>
>>>>> Hi Lorenzo,
>>>>
>>>> Hi Dumitru,
>>>>
>>>> Thanks for reviewing it :)
>>>>
>>>>>
>>>>> I have a few concerns with this approach:
>>>>>
>>>>> a. The CMS will have to set this option for all VMs on all logical
>>>>> switches which will enable periodic GARPs for all of them all the time.
>>>>> That seems like quite a lot of broadcast traffic in the fabric.
>>>>>
>>>>> b. There's no guarantee that the GARPs are sent in time to prevent the
>>>>> FDB timeouts on the ToR switches.  At best we could make the interval
>>>>> configurable but I don't think this is way better either.
>>>>>
>>>>> c. This is not really introduced by your patch but we will be causing
>>>>> this more often now.  With the topology:
>>>>>
>>>>> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
>>>>>             (VLAN-backed network)
>>>>>
>>>>> HV1 configured with chassis mac mapping HV1-MAC
>>>>> HV2 configured with chassis mac mapping HV2-MAC
>>>>>
>>>>> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
>>>>> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
>>>>> that's OK.
> 
> I'm not sure I understood what you mean here.
> Let's say we have 2 VMs - VM1 and VM3 connected to the same VLAN
> logical switch LS1
> and if VM1 is on chassis HV1 and VM3 is on chassis HV3,  the fabric
> will learn VM1's and VM3's mac
> when they both communicate right ?   Please note that logical switch
> LS1 is a vlan tenant network with
> a localnet port (so no geneve tunnels are used for the traffic from VM1 to VM3).
> 
> 

You're right, I misread the logical/physical topologies.  The MACs will
only be visible in the VLANs corresponding to the localnet port of each
of the logical switches.  Please ignore this part.

>>>>>
>>>>> I think a proper solution is to change how we run the logical pipelines
>>>>> in case of vlan-backed networks.  We currently have an assymetry:
>>>>>
>>>>> For packets flowing from VM1 to VM2 we execute:
>>>>> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
>>>>> - on HV2: LS2-egress
>>>>>
>>>>> For packets flowing from VM2 to VM1 we execute:
>>>>> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
>>>>> - on HV1: LS1-egress
>>>>>
>>>>> What if we change this to:
>>>>> VM1 -> VM2:
>>>>> - on HV1: LS1-ingress, LS1-egress, LR-ingress
>>>>> - on HV2: LR-egress, LS2-ingress, LS2-egress
>>>>>
>>>>> VM2 -> VM1:
>>>>> - on HV2: LS2-ingress, LS2-egress, LR-ingress
>>>>> - on HV2: LR-egress, LS1-ingress, LS1-egress
>>>>
> 
> I'm not sure how easy it would be to run the pipeline as you suggested
> when ovn-chassis-mac-mappings is configured.

Indeed it doesn't seem possible as we have no knowledge about what
happened prior to the packet being received on the vlan, it could've
been already processed by an ovn node.

> However OVN does support centralized routing if a logical router has a
> distributed gateway port (connecting to the provider network)
> and the other logical router ports connecting to the tenant VLAN
> networks have the option - reside-on-redirect-chassis configured.
> 
> So in a way CMS can use the centralized routing to avoid this issue.
> However if we want to fix this issue for distributed routing,  looks
> to me
> the proposed RFC patch is the only way to fix it.  And with the proper
> documentation CMS can know the benefits and drawbacks of both the
> options.
> 

I still think that's potentially a lot of broadcast traffic.

Can we find a way of forcing the VMs to send some unicast L2 traffic on
the VLAN network?  Would it be possible to periodically send an ICMP
packet from ovn-controller (with dl_src=chassis-mac) towards the IP of
all non-local VIFs attached to the logical switch.  This is more work
than sending GARPs but should result in less traffic.  The CMS will
still have to configure this feature on the logical routers.

I'm not sure if this covers all cases though.  What if the VIF runs a
VM/container that drops all ICMPs (or whatever probe we choose)?

Thanks,
Dumitru

> Thanks
> Numan
> 
> 
>>>> I do not know why the current architecture is done this way (any suggestions??).
>>>> I guess the approach you suggested should work. Are we introducing any backward
>>>> compatibility issue?
>>>
>>> We would probably create a compatibility issue. :)
>>>
>>> I don't know if this approach would even work, I was just trying to
>>> imagine how traffic from within OVN should be seen on the fabric in this
>>> case.
>>>
>>> Maybe we need to think some more about other options.
>>
>> Thinking again about this approach we are implementing some kind of L2 natting,
>> right? How can we implement dnat to swap HV2-MAC with proper destination
>> mac address (let's say pod B)?
>>
>> Regards,
>> Lorenzo
>>
>>>
>>> Regards,
>>> Dumitru
>>>
>>>>
>>>> Regards,
>>>> Lorenzo
>>>>
>>>>>
>>>>> Would this we ensure that we always only use ovn-chassis-mac-mappings on
>>>>> the VLAN network and avoid flooding on the ToR?
>>>>>
>>>>> Regards,
>>>>> Dumitru
>>>>>
>>>>>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
>>>>>>  northd/northd.c      | 29 +++++++++++++++
>>>>>>  2 files changed, 85 insertions(+), 29 deletions(-)
>>>>>>
>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>>>> index 9a1a0faa1..eb5739bfc 100644
>>>>>> --- a/controller/pinctrl.c
>>>>>> +++ b/controller/pinctrl.c
>>>>>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
>>>>>>          garp_rarp->backoff *= 2;
>>>>>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
>>>>>>      } else {
>>>>>> -        garp_rarp->announce_time = LLONG_MAX;
>>>>>> +        /* Default timeout is 180s. */
>>>>>> +        garp_rarp->announce_time = current_time + 180 * 1000;
>>>>>>      }
>>>>>>      return garp_rarp->announce_time;
>>>>>>  }
>>>>>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
>>>>>>
>>>>>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
>>>>>>  static void
>>>>>> -get_localnet_vifs_l3gwports(
>>>>>> +get_local_vifs_l3gwports(
>>>>>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>>>>>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>>>>      const struct ovsrec_bridge *br_int,
>>>>>>      const struct sbrec_chassis *chassis,
>>>>>>      const struct hmap *local_datapaths,
>>>>>>      struct sset *localnet_vifs,
>>>>>> -    struct sset *local_l3gw_ports)
>>>>>> +    struct sset *local_l3gw_ports,
>>>>>> +    struct sset *local_vifs)
>>>>>>  {
>>>>>>      for (int i = 0; i < br_int->n_ports; i++) {
>>>>>>          const struct ovsrec_port *port_rec = br_int->ports[i];
>>>>>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
>>>>>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
>>>>>>           * that connect to gateway routers (if local), and consider port
>>>>>>           * bindings of type "patch" since they might connect to
>>>>>> -         * distributed gateway ports with NAT addresses. */
>>>>>> +         * distributed gateway ports with NAT addresses.
>>>>>> +         * Get LSP ports if requested by CMS. */
>>>>>>
>>>>>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
>>>>>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
>>>>>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
>>>>>>                  || !strcmp(pb->type, "patch")) {
>>>>>>                  sset_add(local_l3gw_ports, pb->logical_port);
>>>>>>              }
>>>>>> +            /* GARP packets for lsp ports. */
>>>>>> +            if (pb->chassis == chassis &&
>>>>>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
>>>>>> +                sset_add(local_vifs, pb->logical_port);
>>>>>> +            }
>>>>>>          }
>>>>>>      }
>>>>>>      sbrec_port_binding_index_destroy_row(target);
>>>>>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
>>>>>>      }
>>>>>>  }
>>>>>>
>>>>>> +static void
>>>>>> +send_garp_rarp_update_for_pb_set(
>>>>>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>>>>>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>>>> +        struct sset *vif_set, const struct hmap *local_datapaths,
>>>>>> +        struct shash *nat_addresses)
>>>>>> +{
>>>>>> +    const char *iface_id;
>>>>>> +    SSET_FOR_EACH (iface_id, vif_set) {
>>>>>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>>>>> +            sbrec_port_binding_by_name, iface_id);
>>>>>> +        if (pb) {
>>>>>> +            send_garp_rarp_update(ovnsb_idl_txn,
>>>>>> +                                  sbrec_mac_binding_by_lport_ip,
>>>>>> +                                  local_datapaths, pb, nat_addresses);
>>>>>> +        }
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
>>>>>>   * thread context. */
>>>>>>  static void
>>>>>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>  {
>>>>>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
>>>>>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
>>>>>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
>>>>>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
>>>>>>      struct shash nat_addresses;
>>>>>>
>>>>>>      shash_init(&nat_addresses);
>>>>>>
>>>>>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
>>>>>> -                                sbrec_port_binding_by_name,
>>>>>> -                                br_int, chassis, local_datapaths,
>>>>>> -                                &localnet_vifs, &local_l3gw_ports);
>>>>>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
>>>>>> +                             sbrec_port_binding_by_name,
>>>>>> +                             br_int, chassis, local_datapaths,
>>>>>> +                             &localnet_vifs, &local_l3gw_ports,
>>>>>> +                             &local_vifs);
>>>>>>
>>>>>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
>>>>>>                                 &nat_ip_keys, &local_l3gw_ports,
>>>>>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>      struct shash_node *iter;
>>>>>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>>>>>>          if (!sset_contains(&localnet_vifs, iter->name) &&
>>>>>> -            !sset_contains(&nat_ip_keys, iter->name)) {
>>>>>> +            !sset_contains(&nat_ip_keys, iter->name) &&
>>>>>> +            !sset_contains(&local_vifs, iter->name)) {
>>>>>>              send_garp_rarp_delete(iter->name);
>>>>>>          }
>>>>>>      }
>>>>>>
>>>>>>      /* Update send_garp_rarp_data. */
>>>>>> -    const char *iface_id;
>>>>>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
>>>>>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>>>>> -            sbrec_port_binding_by_name, iface_id);
>>>>>> -        if (pb) {
>>>>>> -            send_garp_rarp_update(ovnsb_idl_txn,
>>>>>> -                                  sbrec_mac_binding_by_lport_ip,
>>>>>> -                                  local_datapaths, pb, &nat_addresses);
>>>>>> -        }
>>>>>> -    }
>>>>>> -
>>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>>>>> +                                     sbrec_mac_binding_by_lport_ip,
>>>>>> +                                     sbrec_port_binding_by_name,
>>>>>> +                                     &localnet_vifs, local_datapaths,
>>>>>> +                                     &nat_addresses);
>>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>>>>> +                                     sbrec_mac_binding_by_lport_ip,
>>>>>> +                                     sbrec_port_binding_by_name,
>>>>>> +                                     &local_vifs, local_datapaths,
>>>>>> +                                     &nat_addresses);
>>>>>>      /* Update send_garp_rarp_data for nat-addresses. */
>>>>>> -    const char *gw_port;
>>>>>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
>>>>>> -        const struct sbrec_port_binding *pb
>>>>>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
>>>>>> -        if (pb) {
>>>>>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
>>>>>> -                                  local_datapaths, pb, &nat_addresses);
>>>>>> -        }
>>>>>> -    }
>>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>>>>> +                                     sbrec_mac_binding_by_lport_ip,
>>>>>> +                                     sbrec_port_binding_by_name,
>>>>>> +                                     &local_l3gw_ports, local_datapaths,
>>>>>> +                                     &nat_addresses);
>>>>>>
>>>>>>      /* pinctrl_handler thread will send the GARPs. */
>>>>>>
>>>>>> +    sset_destroy(&local_vifs);
>>>>>>      sset_destroy(&localnet_vifs);
>>>>>>      sset_destroy(&local_l3gw_ports);
>>>>>>
>>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>>> index 0d6ebccde..9a4a880a7 100644
>>>>>> --- a/northd/northd.c
>>>>>> +++ b/northd/northd.c
>>>>>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
>>>>>>      }
>>>>>>  }
>>>>>>
>>>>>> +static void
>>>>>> +ovn_update_lsp_garp_options(struct hmap *ports)
>>>>>> +{
>>>>>> +    struct ovn_port *op;
>>>>>> +    HMAP_FOR_EACH (op, key_node, ports) {
>>>>>> +        if (!op->nbsp) {
>>>>>> +            continue;
>>>>>> +        }
>>>>>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
>>>>>> +            continue;
>>>>>> +        }
>>>>>> +
>>>>>> +        struct ovn_datapath *od = op->od;
>>>>>> +        if (!od || !od->nbs) {
>>>>>> +            continue;
>>>>>> +        }
>>>>>> +
>>>>>> +        struct smap options;
>>>>>> +        smap_clone(&options, &op->sb->options);
>>>>>> +
>>>>>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
>>>>>> +                                          "ovn-lsp-garp", false);
>>>>>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
>>>>>> +        sbrec_port_binding_set_options(op->sb, &options);
>>>>>> +        smap_destroy(&options);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>>  static void
>>>>>>  build_port_group_lswitches(struct northd_input *input_data,
>>>>>>                             struct hmap *pgs,
>>>>>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
>>>>>>      ovn_update_ipv6_options(&data->ports);
>>>>>>      ovn_update_ipv6_prefix(&data->ports);
>>>>>> +    ovn_update_lsp_garp_options(&data->ports);
>>>>>>
>>>>>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
>>>>>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
>>>>>
>>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
Numan Siddique July 19, 2022, 3:02 p.m. UTC | #7
On Thu, Jul 14, 2022 at 1:30 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 7/14/22 17:35, Numan Siddique wrote:
> > On Thu, Jul 14, 2022 at 3:36 AM Lorenzo Bianconi
> > <lorenzo.bianconi@redhat.com> wrote:
> >>
> >>> On 7/13/22 18:07, Lorenzo Bianconi wrote:
> >>>>> On 6/17/22 00:31, Lorenzo Bianconi wrote:
> >>>>>> When using VLAN backed networks and OVN routers leveraging the
> >>>>>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
> >>>>>> chassis mac address in order to not expose the router mac address from
> >>>>>> different nodes and confuse the TOR switch. However doing so the TOR
> >>>>>> switch is not able to learn the port/mac bindings for routed E/W traffic
> >>>>>> and it is force to always flood it. Fix this issue adding the capability
> >>>>>> to send GARP traffic for logical switch ports if the corresponding logical
> >>>>>> switch has the ovn-lsp-garp parameter set to true in the option column.
> >>>>>> More into about the issue can be found here [0].
> >>>>>>
> >>>>>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
> >>>>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
> >>>>>>
> >>>>>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> >>>>>> ---
> >>>>>
> >>>>> Hi Lorenzo,
> >>>>
> >>>> Hi Dumitru,
> >>>>
> >>>> Thanks for reviewing it :)
> >>>>
> >>>>>
> >>>>> I have a few concerns with this approach:
> >>>>>
> >>>>> a. The CMS will have to set this option for all VMs on all logical
> >>>>> switches which will enable periodic GARPs for all of them all the time.
> >>>>> That seems like quite a lot of broadcast traffic in the fabric.
> >>>>>
> >>>>> b. There's no guarantee that the GARPs are sent in time to prevent the
> >>>>> FDB timeouts on the ToR switches.  At best we could make the interval
> >>>>> configurable but I don't think this is way better either.
> >>>>>
> >>>>> c. This is not really introduced by your patch but we will be causing
> >>>>> this more often now.  With the topology:
> >>>>>
> >>>>> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
> >>>>>             (VLAN-backed network)
> >>>>>
> >>>>> HV1 configured with chassis mac mapping HV1-MAC
> >>>>> HV2 configured with chassis mac mapping HV2-MAC
> >>>>>
> >>>>> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
> >>>>> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
> >>>>> that's OK.
> >
> > I'm not sure I understood what you mean here.
> > Let's say we have 2 VMs - VM1 and VM3 connected to the same VLAN
> > logical switch LS1
> > and if VM1 is on chassis HV1 and VM3 is on chassis HV3,  the fabric
> > will learn VM1's and VM3's mac
> > when they both communicate right ?   Please note that logical switch
> > LS1 is a vlan tenant network with
> > a localnet port (so no geneve tunnels are used for the traffic from VM1 to VM3).
> >
> >
>
> You're right, I misread the logical/physical topologies.  The MACs will
> only be visible in the VLANs corresponding to the localnet port of each
> of the logical switches.  Please ignore this part.
>
> >>>>>
> >>>>> I think a proper solution is to change how we run the logical pipelines
> >>>>> in case of vlan-backed networks.  We currently have an assymetry:
> >>>>>
> >>>>> For packets flowing from VM1 to VM2 we execute:
> >>>>> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
> >>>>> - on HV2: LS2-egress
> >>>>>
> >>>>> For packets flowing from VM2 to VM1 we execute:
> >>>>> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
> >>>>> - on HV1: LS1-egress
> >>>>>
> >>>>> What if we change this to:
> >>>>> VM1 -> VM2:
> >>>>> - on HV1: LS1-ingress, LS1-egress, LR-ingress
> >>>>> - on HV2: LR-egress, LS2-ingress, LS2-egress
> >>>>>
> >>>>> VM2 -> VM1:
> >>>>> - on HV2: LS2-ingress, LS2-egress, LR-ingress
> >>>>> - on HV2: LR-egress, LS1-ingress, LS1-egress
> >>>>
> >
> > I'm not sure how easy it would be to run the pipeline as you suggested
> > when ovn-chassis-mac-mappings is configured.
>
> Indeed it doesn't seem possible as we have no knowledge about what
> happened prior to the packet being received on the vlan, it could've
> been already processed by an ovn node.
>
> > However OVN does support centralized routing if a logical router has a
> > distributed gateway port (connecting to the provider network)
> > and the other logical router ports connecting to the tenant VLAN
> > networks have the option - reside-on-redirect-chassis configured.
> >
> > So in a way CMS can use the centralized routing to avoid this issue.
> > However if we want to fix this issue for distributed routing,  looks
> > to me
> > the proposed RFC patch is the only way to fix it.  And with the proper
> > documentation CMS can know the benefits and drawbacks of both the
> > options.
> >
>
> I still think that's potentially a lot of broadcast traffic.

If you compare this GARP broadcast traffic with the present flooding
done by the upstream switch for every traffic, that's still better
right ?

>
> Can we find a way of forcing the VMs to send some unicast L2 traffic on
> the VLAN network?  Would it be possible to periodically send an ICMP
> packet from ovn-controller (with dl_src=chassis-mac) towards the IP of
> all non-local VIFs attached to the logical switch.  This is more work
> than sending GARPs but should result in less traffic.  The CMS will
> still have to configure this feature on the logical routers.
>
> I'm not sure if this covers all cases though.  What if the VIF runs a
> VM/container that drops all ICMPs (or whatever probe we choose)?

This is an interesting idea.  The only concern would be if there are
any security group rules (ACLs)
added by CMS which would block the icmp packet generated by ovn-controller.

Numan

>
> Thanks,
> Dumitru
>
> > Thanks
> > Numan
> >
> >
> >>>> I do not know why the current architecture is done this way (any suggestions??).
> >>>> I guess the approach you suggested should work. Are we introducing any backward
> >>>> compatibility issue?
> >>>
> >>> We would probably create a compatibility issue. :)
> >>>
> >>> I don't know if this approach would even work, I was just trying to
> >>> imagine how traffic from within OVN should be seen on the fabric in this
> >>> case.
> >>>
> >>> Maybe we need to think some more about other options.
> >>
> >> Thinking again about this approach we are implementing some kind of L2 natting,
> >> right? How can we implement dnat to swap HV2-MAC with proper destination
> >> mac address (let's say pod B)?
> >>
> >> Regards,
> >> Lorenzo
> >>
> >>>
> >>> Regards,
> >>> Dumitru
> >>>
> >>>>
> >>>> Regards,
> >>>> Lorenzo
> >>>>
> >>>>>
> >>>>> Would this we ensure that we always only use ovn-chassis-mac-mappings on
> >>>>> the VLAN network and avoid flooding on the ToR?
> >>>>>
> >>>>> Regards,
> >>>>> Dumitru
> >>>>>
> >>>>>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
> >>>>>>  northd/northd.c      | 29 +++++++++++++++
> >>>>>>  2 files changed, 85 insertions(+), 29 deletions(-)
> >>>>>>
> >>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>>>>> index 9a1a0faa1..eb5739bfc 100644
> >>>>>> --- a/controller/pinctrl.c
> >>>>>> +++ b/controller/pinctrl.c
> >>>>>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
> >>>>>>          garp_rarp->backoff *= 2;
> >>>>>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
> >>>>>>      } else {
> >>>>>> -        garp_rarp->announce_time = LLONG_MAX;
> >>>>>> +        /* Default timeout is 180s. */
> >>>>>> +        garp_rarp->announce_time = current_time + 180 * 1000;
> >>>>>>      }
> >>>>>>      return garp_rarp->announce_time;
> >>>>>>  }
> >>>>>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
> >>>>>>
> >>>>>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
> >>>>>>  static void
> >>>>>> -get_localnet_vifs_l3gwports(
> >>>>>> +get_local_vifs_l3gwports(
> >>>>>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> >>>>>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >>>>>>      const struct ovsrec_bridge *br_int,
> >>>>>>      const struct sbrec_chassis *chassis,
> >>>>>>      const struct hmap *local_datapaths,
> >>>>>>      struct sset *localnet_vifs,
> >>>>>> -    struct sset *local_l3gw_ports)
> >>>>>> +    struct sset *local_l3gw_ports,
> >>>>>> +    struct sset *local_vifs)
> >>>>>>  {
> >>>>>>      for (int i = 0; i < br_int->n_ports; i++) {
> >>>>>>          const struct ovsrec_port *port_rec = br_int->ports[i];
> >>>>>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
> >>>>>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
> >>>>>>           * that connect to gateway routers (if local), and consider port
> >>>>>>           * bindings of type "patch" since they might connect to
> >>>>>> -         * distributed gateway ports with NAT addresses. */
> >>>>>> +         * distributed gateway ports with NAT addresses.
> >>>>>> +         * Get LSP ports if requested by CMS. */
> >>>>>>
> >>>>>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
> >>>>>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> >>>>>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
> >>>>>>                  || !strcmp(pb->type, "patch")) {
> >>>>>>                  sset_add(local_l3gw_ports, pb->logical_port);
> >>>>>>              }
> >>>>>> +            /* GARP packets for lsp ports. */
> >>>>>> +            if (pb->chassis == chassis &&
> >>>>>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
> >>>>>> +                sset_add(local_vifs, pb->logical_port);
> >>>>>> +            }
> >>>>>>          }
> >>>>>>      }
> >>>>>>      sbrec_port_binding_index_destroy_row(target);
> >>>>>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
> >>>>>>      }
> >>>>>>  }
> >>>>>>
> >>>>>> +static void
> >>>>>> +send_garp_rarp_update_for_pb_set(
> >>>>>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
> >>>>>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> >>>>>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
> >>>>>> +        struct sset *vif_set, const struct hmap *local_datapaths,
> >>>>>> +        struct shash *nat_addresses)
> >>>>>> +{
> >>>>>> +    const char *iface_id;
> >>>>>> +    SSET_FOR_EACH (iface_id, vif_set) {
> >>>>>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> >>>>>> +            sbrec_port_binding_by_name, iface_id);
> >>>>>> +        if (pb) {
> >>>>>> +            send_garp_rarp_update(ovnsb_idl_txn,
> >>>>>> +                                  sbrec_mac_binding_by_lport_ip,
> >>>>>> +                                  local_datapaths, pb, nat_addresses);
> >>>>>> +        }
> >>>>>> +    }
> >>>>>> +}
> >>>>>> +
> >>>>>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
> >>>>>>   * thread context. */
> >>>>>>  static void
> >>>>>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >>>>>>  {
> >>>>>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
> >>>>>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> >>>>>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
> >>>>>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
> >>>>>>      struct shash nat_addresses;
> >>>>>>
> >>>>>>      shash_init(&nat_addresses);
> >>>>>>
> >>>>>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
> >>>>>> -                                sbrec_port_binding_by_name,
> >>>>>> -                                br_int, chassis, local_datapaths,
> >>>>>> -                                &localnet_vifs, &local_l3gw_ports);
> >>>>>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
> >>>>>> +                             sbrec_port_binding_by_name,
> >>>>>> +                             br_int, chassis, local_datapaths,
> >>>>>> +                             &localnet_vifs, &local_l3gw_ports,
> >>>>>> +                             &local_vifs);
> >>>>>>
> >>>>>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
> >>>>>>                                 &nat_ip_keys, &local_l3gw_ports,
> >>>>>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> >>>>>>      struct shash_node *iter;
> >>>>>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> >>>>>>          if (!sset_contains(&localnet_vifs, iter->name) &&
> >>>>>> -            !sset_contains(&nat_ip_keys, iter->name)) {
> >>>>>> +            !sset_contains(&nat_ip_keys, iter->name) &&
> >>>>>> +            !sset_contains(&local_vifs, iter->name)) {
> >>>>>>              send_garp_rarp_delete(iter->name);
> >>>>>>          }
> >>>>>>      }
> >>>>>>
> >>>>>>      /* Update send_garp_rarp_data. */
> >>>>>> -    const char *iface_id;
> >>>>>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
> >>>>>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> >>>>>> -            sbrec_port_binding_by_name, iface_id);
> >>>>>> -        if (pb) {
> >>>>>> -            send_garp_rarp_update(ovnsb_idl_txn,
> >>>>>> -                                  sbrec_mac_binding_by_lport_ip,
> >>>>>> -                                  local_datapaths, pb, &nat_addresses);
> >>>>>> -        }
> >>>>>> -    }
> >>>>>> -
> >>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> >>>>>> +                                     sbrec_mac_binding_by_lport_ip,
> >>>>>> +                                     sbrec_port_binding_by_name,
> >>>>>> +                                     &localnet_vifs, local_datapaths,
> >>>>>> +                                     &nat_addresses);
> >>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> >>>>>> +                                     sbrec_mac_binding_by_lport_ip,
> >>>>>> +                                     sbrec_port_binding_by_name,
> >>>>>> +                                     &local_vifs, local_datapaths,
> >>>>>> +                                     &nat_addresses);
> >>>>>>      /* Update send_garp_rarp_data for nat-addresses. */
> >>>>>> -    const char *gw_port;
> >>>>>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> >>>>>> -        const struct sbrec_port_binding *pb
> >>>>>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> >>>>>> -        if (pb) {
> >>>>>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> >>>>>> -                                  local_datapaths, pb, &nat_addresses);
> >>>>>> -        }
> >>>>>> -    }
> >>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> >>>>>> +                                     sbrec_mac_binding_by_lport_ip,
> >>>>>> +                                     sbrec_port_binding_by_name,
> >>>>>> +                                     &local_l3gw_ports, local_datapaths,
> >>>>>> +                                     &nat_addresses);
> >>>>>>
> >>>>>>      /* pinctrl_handler thread will send the GARPs. */
> >>>>>>
> >>>>>> +    sset_destroy(&local_vifs);
> >>>>>>      sset_destroy(&localnet_vifs);
> >>>>>>      sset_destroy(&local_l3gw_ports);
> >>>>>>
> >>>>>> diff --git a/northd/northd.c b/northd/northd.c
> >>>>>> index 0d6ebccde..9a4a880a7 100644
> >>>>>> --- a/northd/northd.c
> >>>>>> +++ b/northd/northd.c
> >>>>>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
> >>>>>>      }
> >>>>>>  }
> >>>>>>
> >>>>>> +static void
> >>>>>> +ovn_update_lsp_garp_options(struct hmap *ports)
> >>>>>> +{
> >>>>>> +    struct ovn_port *op;
> >>>>>> +    HMAP_FOR_EACH (op, key_node, ports) {
> >>>>>> +        if (!op->nbsp) {
> >>>>>> +            continue;
> >>>>>> +        }
> >>>>>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
> >>>>>> +            continue;
> >>>>>> +        }
> >>>>>> +
> >>>>>> +        struct ovn_datapath *od = op->od;
> >>>>>> +        if (!od || !od->nbs) {
> >>>>>> +            continue;
> >>>>>> +        }
> >>>>>> +
> >>>>>> +        struct smap options;
> >>>>>> +        smap_clone(&options, &op->sb->options);
> >>>>>> +
> >>>>>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
> >>>>>> +                                          "ovn-lsp-garp", false);
> >>>>>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
> >>>>>> +        sbrec_port_binding_set_options(op->sb, &options);
> >>>>>> +        smap_destroy(&options);
> >>>>>> +    }
> >>>>>> +}
> >>>>>> +
> >>>>>>  static void
> >>>>>>  build_port_group_lswitches(struct northd_input *input_data,
> >>>>>>                             struct hmap *pgs,
> >>>>>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
> >>>>>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
> >>>>>>      ovn_update_ipv6_options(&data->ports);
> >>>>>>      ovn_update_ipv6_prefix(&data->ports);
> >>>>>> +    ovn_update_lsp_garp_options(&data->ports);
> >>>>>>
> >>>>>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
> >>>>>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
> >>>>>
> >>>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara July 19, 2022, 9:01 p.m. UTC | #8
On 7/19/22 17:02, Numan Siddique wrote:
> On Thu, Jul 14, 2022 at 1:30 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 7/14/22 17:35, Numan Siddique wrote:
>>> On Thu, Jul 14, 2022 at 3:36 AM Lorenzo Bianconi
>>> <lorenzo.bianconi@redhat.com> wrote:
>>>>
>>>>> On 7/13/22 18:07, Lorenzo Bianconi wrote:
>>>>>>> On 6/17/22 00:31, Lorenzo Bianconi wrote:
>>>>>>>> When using VLAN backed networks and OVN routers leveraging the
>>>>>>>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
>>>>>>>> chassis mac address in order to not expose the router mac address from
>>>>>>>> different nodes and confuse the TOR switch. However doing so the TOR
>>>>>>>> switch is not able to learn the port/mac bindings for routed E/W traffic
>>>>>>>> and it is force to always flood it. Fix this issue adding the capability
>>>>>>>> to send GARP traffic for logical switch ports if the corresponding logical
>>>>>>>> switch has the ovn-lsp-garp parameter set to true in the option column.
>>>>>>>> More into about the issue can be found here [0].
>>>>>>>>
>>>>>>>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
>>>>>>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
>>>>>>>>
>>>>>>>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
>>>>>>>> ---
>>>>>>>
>>>>>>> Hi Lorenzo,
>>>>>>
>>>>>> Hi Dumitru,
>>>>>>
>>>>>> Thanks for reviewing it :)
>>>>>>
>>>>>>>
>>>>>>> I have a few concerns with this approach:
>>>>>>>
>>>>>>> a. The CMS will have to set this option for all VMs on all logical
>>>>>>> switches which will enable periodic GARPs for all of them all the time.
>>>>>>> That seems like quite a lot of broadcast traffic in the fabric.
>>>>>>>
>>>>>>> b. There's no guarantee that the GARPs are sent in time to prevent the
>>>>>>> FDB timeouts on the ToR switches.  At best we could make the interval
>>>>>>> configurable but I don't think this is way better either.
>>>>>>>
>>>>>>> c. This is not really introduced by your patch but we will be causing
>>>>>>> this more often now.  With the topology:
>>>>>>>
>>>>>>> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
>>>>>>>             (VLAN-backed network)
>>>>>>>
>>>>>>> HV1 configured with chassis mac mapping HV1-MAC
>>>>>>> HV2 configured with chassis mac mapping HV2-MAC
>>>>>>>
>>>>>>> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
>>>>>>> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
>>>>>>> that's OK.
>>>
>>> I'm not sure I understood what you mean here.
>>> Let's say we have 2 VMs - VM1 and VM3 connected to the same VLAN
>>> logical switch LS1
>>> and if VM1 is on chassis HV1 and VM3 is on chassis HV3,  the fabric
>>> will learn VM1's and VM3's mac
>>> when they both communicate right ?   Please note that logical switch
>>> LS1 is a vlan tenant network with
>>> a localnet port (so no geneve tunnels are used for the traffic from VM1 to VM3).
>>>
>>>
>>
>> You're right, I misread the logical/physical topologies.  The MACs will
>> only be visible in the VLANs corresponding to the localnet port of each
>> of the logical switches.  Please ignore this part.
>>
>>>>>>>
>>>>>>> I think a proper solution is to change how we run the logical pipelines
>>>>>>> in case of vlan-backed networks.  We currently have an assymetry:
>>>>>>>
>>>>>>> For packets flowing from VM1 to VM2 we execute:
>>>>>>> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
>>>>>>> - on HV2: LS2-egress
>>>>>>>
>>>>>>> For packets flowing from VM2 to VM1 we execute:
>>>>>>> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
>>>>>>> - on HV1: LS1-egress
>>>>>>>
>>>>>>> What if we change this to:
>>>>>>> VM1 -> VM2:
>>>>>>> - on HV1: LS1-ingress, LS1-egress, LR-ingress
>>>>>>> - on HV2: LR-egress, LS2-ingress, LS2-egress
>>>>>>>
>>>>>>> VM2 -> VM1:
>>>>>>> - on HV2: LS2-ingress, LS2-egress, LR-ingress
>>>>>>> - on HV2: LR-egress, LS1-ingress, LS1-egress
>>>>>>
>>>
>>> I'm not sure how easy it would be to run the pipeline as you suggested
>>> when ovn-chassis-mac-mappings is configured.
>>
>> Indeed it doesn't seem possible as we have no knowledge about what
>> happened prior to the packet being received on the vlan, it could've
>> been already processed by an ovn node.
>>
>>> However OVN does support centralized routing if a logical router has a
>>> distributed gateway port (connecting to the provider network)
>>> and the other logical router ports connecting to the tenant VLAN
>>> networks have the option - reside-on-redirect-chassis configured.
>>>
>>> So in a way CMS can use the centralized routing to avoid this issue.
>>> However if we want to fix this issue for distributed routing,  looks
>>> to me
>>> the proposed RFC patch is the only way to fix it.  And with the proper
>>> documentation CMS can know the benefits and drawbacks of both the
>>> options.
>>>
>>
>> I still think that's potentially a lot of broadcast traffic.
> 
> If you compare this GARP broadcast traffic with the present flooding
> done by the upstream switch for every traffic, that's still better
> right ?
> 

It is.

>>
>> Can we find a way of forcing the VMs to send some unicast L2 traffic on
>> the VLAN network?  Would it be possible to periodically send an ICMP
>> packet from ovn-controller (with dl_src=chassis-mac) towards the IP of
>> all non-local VIFs attached to the logical switch.  This is more work
>> than sending GARPs but should result in less traffic.  The CMS will
>> still have to configure this feature on the logical routers.
>>
>> I'm not sure if this covers all cases though.  What if the VIF runs a
>> VM/container that drops all ICMPs (or whatever probe we choose)?
> 
> This is an interesting idea.  The only concern would be if there are
> any security group rules (ACLs)
> added by CMS which would block the icmp packet generated by ovn-controller.
> 

Yeah, we'd need help from the CMS for that.  I'm not sure it's worth the
effort though.  I think we really need some input from neutron on this
(Daniel and Jakub in cc).  If broadcasts are not that big of a concern
for this case then maybe it's good enough to go with the approach
implemented in the RFC.

> Numan
> 
>>
>> Thanks,
>> Dumitru
>>
>>> Thanks
>>> Numan
>>>
>>>
>>>>>> I do not know why the current architecture is done this way (any suggestions??).
>>>>>> I guess the approach you suggested should work. Are we introducing any backward
>>>>>> compatibility issue?
>>>>>
>>>>> We would probably create a compatibility issue. :)
>>>>>
>>>>> I don't know if this approach would even work, I was just trying to
>>>>> imagine how traffic from within OVN should be seen on the fabric in this
>>>>> case.
>>>>>
>>>>> Maybe we need to think some more about other options.
>>>>
>>>> Thinking again about this approach we are implementing some kind of L2 natting,
>>>> right? How can we implement dnat to swap HV2-MAC with proper destination
>>>> mac address (let's say pod B)?
>>>>
>>>> Regards,
>>>> Lorenzo
>>>>
>>>>>
>>>>> Regards,
>>>>> Dumitru
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Lorenzo
>>>>>>
>>>>>>>
>>>>>>> Would this we ensure that we always only use ovn-chassis-mac-mappings on
>>>>>>> the VLAN network and avoid flooding on the ToR?
>>>>>>>
>>>>>>> Regards,
>>>>>>> Dumitru
>>>>>>>
>>>>>>>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
>>>>>>>>  northd/northd.c      | 29 +++++++++++++++
>>>>>>>>  2 files changed, 85 insertions(+), 29 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>>>>>> index 9a1a0faa1..eb5739bfc 100644
>>>>>>>> --- a/controller/pinctrl.c
>>>>>>>> +++ b/controller/pinctrl.c
>>>>>>>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
>>>>>>>>          garp_rarp->backoff *= 2;
>>>>>>>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
>>>>>>>>      } else {
>>>>>>>> -        garp_rarp->announce_time = LLONG_MAX;
>>>>>>>> +        /* Default timeout is 180s. */
>>>>>>>> +        garp_rarp->announce_time = current_time + 180 * 1000;
>>>>>>>>      }
>>>>>>>>      return garp_rarp->announce_time;
>>>>>>>>  }
>>>>>>>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
>>>>>>>>
>>>>>>>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
>>>>>>>>  static void
>>>>>>>> -get_localnet_vifs_l3gwports(
>>>>>>>> +get_local_vifs_l3gwports(
>>>>>>>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
>>>>>>>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>>>>>>      const struct ovsrec_bridge *br_int,
>>>>>>>>      const struct sbrec_chassis *chassis,
>>>>>>>>      const struct hmap *local_datapaths,
>>>>>>>>      struct sset *localnet_vifs,
>>>>>>>> -    struct sset *local_l3gw_ports)
>>>>>>>> +    struct sset *local_l3gw_ports,
>>>>>>>> +    struct sset *local_vifs)
>>>>>>>>  {
>>>>>>>>      for (int i = 0; i < br_int->n_ports; i++) {
>>>>>>>>          const struct ovsrec_port *port_rec = br_int->ports[i];
>>>>>>>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
>>>>>>>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
>>>>>>>>           * that connect to gateway routers (if local), and consider port
>>>>>>>>           * bindings of type "patch" since they might connect to
>>>>>>>> -         * distributed gateway ports with NAT addresses. */
>>>>>>>> +         * distributed gateway ports with NAT addresses.
>>>>>>>> +         * Get LSP ports if requested by CMS. */
>>>>>>>>
>>>>>>>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
>>>>>>>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
>>>>>>>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
>>>>>>>>                  || !strcmp(pb->type, "patch")) {
>>>>>>>>                  sset_add(local_l3gw_ports, pb->logical_port);
>>>>>>>>              }
>>>>>>>> +            /* GARP packets for lsp ports. */
>>>>>>>> +            if (pb->chassis == chassis &&
>>>>>>>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
>>>>>>>> +                sset_add(local_vifs, pb->logical_port);
>>>>>>>> +            }
>>>>>>>>          }
>>>>>>>>      }
>>>>>>>>      sbrec_port_binding_index_destroy_row(target);
>>>>>>>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
>>>>>>>>      }
>>>>>>>>  }
>>>>>>>>
>>>>>>>> +static void
>>>>>>>> +send_garp_rarp_update_for_pb_set(
>>>>>>>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>>>>>>>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>>>>>> +        struct sset *vif_set, const struct hmap *local_datapaths,
>>>>>>>> +        struct shash *nat_addresses)
>>>>>>>> +{
>>>>>>>> +    const char *iface_id;
>>>>>>>> +    SSET_FOR_EACH (iface_id, vif_set) {
>>>>>>>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>>>>>>> +            sbrec_port_binding_by_name, iface_id);
>>>>>>>> +        if (pb) {
>>>>>>>> +            send_garp_rarp_update(ovnsb_idl_txn,
>>>>>>>> +                                  sbrec_mac_binding_by_lport_ip,
>>>>>>>> +                                  local_datapaths, pb, nat_addresses);
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
>>>>>>>>   * thread context. */
>>>>>>>>  static void
>>>>>>>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>>>  {
>>>>>>>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
>>>>>>>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
>>>>>>>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
>>>>>>>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
>>>>>>>>      struct shash nat_addresses;
>>>>>>>>
>>>>>>>>      shash_init(&nat_addresses);
>>>>>>>>
>>>>>>>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
>>>>>>>> -                                sbrec_port_binding_by_name,
>>>>>>>> -                                br_int, chassis, local_datapaths,
>>>>>>>> -                                &localnet_vifs, &local_l3gw_ports);
>>>>>>>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
>>>>>>>> +                             sbrec_port_binding_by_name,
>>>>>>>> +                             br_int, chassis, local_datapaths,
>>>>>>>> +                             &localnet_vifs, &local_l3gw_ports,
>>>>>>>> +                             &local_vifs);
>>>>>>>>
>>>>>>>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
>>>>>>>>                                 &nat_ip_keys, &local_l3gw_ports,
>>>>>>>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>>>>>      struct shash_node *iter;
>>>>>>>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
>>>>>>>>          if (!sset_contains(&localnet_vifs, iter->name) &&
>>>>>>>> -            !sset_contains(&nat_ip_keys, iter->name)) {
>>>>>>>> +            !sset_contains(&nat_ip_keys, iter->name) &&
>>>>>>>> +            !sset_contains(&local_vifs, iter->name)) {
>>>>>>>>              send_garp_rarp_delete(iter->name);
>>>>>>>>          }
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /* Update send_garp_rarp_data. */
>>>>>>>> -    const char *iface_id;
>>>>>>>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
>>>>>>>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
>>>>>>>> -            sbrec_port_binding_by_name, iface_id);
>>>>>>>> -        if (pb) {
>>>>>>>> -            send_garp_rarp_update(ovnsb_idl_txn,
>>>>>>>> -                                  sbrec_mac_binding_by_lport_ip,
>>>>>>>> -                                  local_datapaths, pb, &nat_addresses);
>>>>>>>> -        }
>>>>>>>> -    }
>>>>>>>> -
>>>>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>>>>>>> +                                     sbrec_mac_binding_by_lport_ip,
>>>>>>>> +                                     sbrec_port_binding_by_name,
>>>>>>>> +                                     &localnet_vifs, local_datapaths,
>>>>>>>> +                                     &nat_addresses);
>>>>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>>>>>>> +                                     sbrec_mac_binding_by_lport_ip,
>>>>>>>> +                                     sbrec_port_binding_by_name,
>>>>>>>> +                                     &local_vifs, local_datapaths,
>>>>>>>> +                                     &nat_addresses);
>>>>>>>>      /* Update send_garp_rarp_data for nat-addresses. */
>>>>>>>> -    const char *gw_port;
>>>>>>>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
>>>>>>>> -        const struct sbrec_port_binding *pb
>>>>>>>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
>>>>>>>> -        if (pb) {
>>>>>>>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
>>>>>>>> -                                  local_datapaths, pb, &nat_addresses);
>>>>>>>> -        }
>>>>>>>> -    }
>>>>>>>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
>>>>>>>> +                                     sbrec_mac_binding_by_lport_ip,
>>>>>>>> +                                     sbrec_port_binding_by_name,
>>>>>>>> +                                     &local_l3gw_ports, local_datapaths,
>>>>>>>> +                                     &nat_addresses);
>>>>>>>>
>>>>>>>>      /* pinctrl_handler thread will send the GARPs. */
>>>>>>>>
>>>>>>>> +    sset_destroy(&local_vifs);
>>>>>>>>      sset_destroy(&localnet_vifs);
>>>>>>>>      sset_destroy(&local_l3gw_ports);
>>>>>>>>
>>>>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>>>>> index 0d6ebccde..9a4a880a7 100644
>>>>>>>> --- a/northd/northd.c
>>>>>>>> +++ b/northd/northd.c
>>>>>>>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
>>>>>>>>      }
>>>>>>>>  }
>>>>>>>>
>>>>>>>> +static void
>>>>>>>> +ovn_update_lsp_garp_options(struct hmap *ports)
>>>>>>>> +{
>>>>>>>> +    struct ovn_port *op;
>>>>>>>> +    HMAP_FOR_EACH (op, key_node, ports) {
>>>>>>>> +        if (!op->nbsp) {
>>>>>>>> +            continue;
>>>>>>>> +        }
>>>>>>>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
>>>>>>>> +            continue;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        struct ovn_datapath *od = op->od;
>>>>>>>> +        if (!od || !od->nbs) {
>>>>>>>> +            continue;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        struct smap options;
>>>>>>>> +        smap_clone(&options, &op->sb->options);
>>>>>>>> +
>>>>>>>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
>>>>>>>> +                                          "ovn-lsp-garp", false);
>>>>>>>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
>>>>>>>> +        sbrec_port_binding_set_options(op->sb, &options);
>>>>>>>> +        smap_destroy(&options);
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  static void
>>>>>>>>  build_port_group_lswitches(struct northd_input *input_data,
>>>>>>>>                             struct hmap *pgs,
>>>>>>>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>>>>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
>>>>>>>>      ovn_update_ipv6_options(&data->ports);
>>>>>>>>      ovn_update_ipv6_prefix(&data->ports);
>>>>>>>> +    ovn_update_lsp_garp_options(&data->ports);
>>>>>>>>
>>>>>>>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
>>>>>>>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
>>>>>>>
>>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>>
>>>
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
Lorenzo Bianconi July 29, 2022, 4:35 p.m. UTC | #9
> On Thu, Jul 14, 2022 at 3:36 AM Lorenzo Bianconi
> <lorenzo.bianconi@redhat.com> wrote:
> >
> > > On 7/13/22 18:07, Lorenzo Bianconi wrote:
> > > >> On 6/17/22 00:31, Lorenzo Bianconi wrote:
> > > >>> When using VLAN backed networks and OVN routers leveraging the
> > > >>> 'ovn-chassis-mac-mappings' option, the eth.src field is replaced by the
> > > >>> chassis mac address in order to not expose the router mac address from
> > > >>> different nodes and confuse the TOR switch. However doing so the TOR
> > > >>> switch is not able to learn the port/mac bindings for routed E/W traffic
> > > >>> and it is force to always flood it. Fix this issue adding the capability
> > > >>> to send GARP traffic for logical switch ports if the corresponding logical
> > > >>> switch has the ovn-lsp-garp parameter set to true in the option column.
> > > >>> More into about the issue can be found here [0].
> > > >>>
> > > >>> [0] https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html
> > > >>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779
> > > >>>
> > > >>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > > >>> ---
> > > >>
> > > >> Hi Lorenzo,
> > > >
> > > > Hi Dumitru,
> > > >
> > > > Thanks for reviewing it :)
> > > >
> > > >>
> > > >> I have a few concerns with this approach:
> > > >>
> > > >> a. The CMS will have to set this option for all VMs on all logical
> > > >> switches which will enable periodic GARPs for all of them all the time.
> > > >> That seems like quite a lot of broadcast traffic in the fabric.
> > > >>
> > > >> b. There's no guarantee that the GARPs are sent in time to prevent the
> > > >> FDB timeouts on the ToR switches.  At best we could make the interval
> > > >> configurable but I don't think this is way better either.
> > > >>
> > > >> c. This is not really introduced by your patch but we will be causing
> > > >> this more often now.  With the topology:
> > > >>
> > > >> (HV1) VM1 -- LS1 --- LR -- LS2 -- VM2 (HV2)
> > > >>             (VLAN-backed network)
> > > >>
> > > >> HV1 configured with chassis mac mapping HV1-MAC
> > > >> HV2 configured with chassis mac mapping HV2-MAC
> > > >>
> > > >> We're leaking MAC addresses from LS1's broadcast domain (VM1-MAC) and
> > > >> from LS2's broadcast domain (VM2-MAC) into the fabric.  I'm not sure
> > > >> that's OK.
> 
> I'm not sure I understood what you mean here.
> Let's say we have 2 VMs - VM1 and VM3 connected to the same VLAN
> logical switch LS1
> and if VM1 is on chassis HV1 and VM3 is on chassis HV3,  the fabric
> will learn VM1's and VM3's mac
> when they both communicate right ?   Please note that logical switch
> LS1 is a vlan tenant network with
> a localnet port (so no geneve tunnels are used for the traffic from VM1 to VM3).
> 
> 
> > > >>
> > > >> I think a proper solution is to change how we run the logical pipelines
> > > >> in case of vlan-backed networks.  We currently have an assymetry:
> > > >>
> > > >> For packets flowing from VM1 to VM2 we execute:
> > > >> - on HV1: LS1-ingress, LS1-egress, LR-ingress, LR-egress, LS2-ingress
> > > >> - on HV2: LS2-egress
> > > >>
> > > >> For packets flowing from VM2 to VM1 we execute:
> > > >> - on HV2: LS2-ingress, LS2-egress, LR-ingress, LR-egress, LS1-ingress
> > > >> - on HV1: LS1-egress
> > > >>
> > > >> What if we change this to:
> > > >> VM1 -> VM2:
> > > >> - on HV1: LS1-ingress, LS1-egress, LR-ingress
> > > >> - on HV2: LR-egress, LS2-ingress, LS2-egress
> > > >>
> > > >> VM2 -> VM1:
> > > >> - on HV2: LS2-ingress, LS2-egress, LR-ingress
> > > >> - on HV2: LR-egress, LS1-ingress, LS1-egress
> > > >
> 
> I'm not sure how easy it would be to run the pipeline as you suggested
> when ovn-chassis-mac-mappings is configured.
> However OVN does support centralized routing if a logical router has a
> distributed gateway port (connecting to the provider network)
> and the other logical router ports connecting to the tenant VLAN
> networks have the option - reside-on-redirect-chassis configured.

Hi Numan,

sorry for the late reply. I took a look to the configuration you described
above and I guess we still have a similar issue. In fact the hvs running VM1
and VM2 will send the traffic using the logical router mac address as
destination address through the localnet interfaces while the hv running the
distributed gateway port will send the traffic using ovn-chassis-mac-mappings
as source mac address. In this way the TOR switch will not learn the port to
use for logical router mac addresses (since there are no packets with these
addresses as source address) and so the traffic is still flooaded.
To fix the issue we should enable GARP for these addresses so I guess there is
no gain here, right?
Below I reported the traffic sniffed on ovn-chassis-1 localnet interface (eth2
for ovn-fake-multinode).

- 00:00:00:00:ff:01 and 00:00:00:00:ff:02 are logical router mac addresses
- 50:54:00:00:00:03 and 40:54:00:00:00:03 are VM1 and VM2 mac addresses
- 1e:02:ad:bb:aa:01 is ovn-chassis-mac-mappings

[root@ovn-chassis-1 /]# tcpdump -nnei eth2 icmp
listening on eth2, link-type EN10MB (Ethernet), snapshot length 262144 bytes
16:13:47.879353 50:54:00:00:00:03 > 00:00:00:00:ff:01, ethertype 802.1Q (0x8100), length 102: vlan 170, p 0, ethertype IPv4 (0x0800), 10.0.0.3 > 20.0.0.3: ICMP echo request, id 32172, seq 1, length 64
16:13:47.879391 40:54:00:00:00:03 > 00:00:00:00:ff:02, ethertype 802.1Q (0x8100), length 102: vlan 190, p 0, ethertype IPv4 (0x0800), 20.0.0.3 > 10.0.0.3: ICMP echo reply, id 32172, seq 1, length 64
16:13:47.879398 1e:02:ad:bb:aa:01 > 50:54:00:00:00:03, ethertype 802.1Q (0x8100), length 102: vlan 170, p 0, ethertype IPv4 (0x0800), 20.0.0.3 > 10.0.0.3: ICMP echo reply, id 32172, seq 1, length 64

I guess the packet below should not be received by ovn-chassis-1:
16:13:47.879391 40:54:00:00:00:03 > 00:00:00:00:ff:02, ethertype 802.1Q (0x8100), length 102: vlan 190, p 0, ethertype IPv4 (0x0800), 20.0.0.3 > 10.0.0.3: ICMP echo reply, id 32172, seq 1, length 64

Regards,
Lorenzo

> 
> So in a way CMS can use the centralized routing to avoid this issue.
> However if we want to fix this issue for distributed routing,  looks
> to me
> the proposed RFC patch is the only way to fix it.  And with the proper
> documentation CMS can know the benefits and drawbacks of both the
> options.
> 
> Thanks
> Numan
> 
> 
> > > > I do not know why the current architecture is done this way (any suggestions??).
> > > > I guess the approach you suggested should work. Are we introducing any backward
> > > > compatibility issue?
> > >
> > > We would probably create a compatibility issue. :)
> > >
> > > I don't know if this approach would even work, I was just trying to
> > > imagine how traffic from within OVN should be seen on the fabric in this
> > > case.
> > >
> > > Maybe we need to think some more about other options.
> >
> > Thinking again about this approach we are implementing some kind of L2 natting,
> > right? How can we implement dnat to swap HV2-MAC with proper destination
> > mac address (let's say pod B)?
> >
> > Regards,
> > Lorenzo
> >
> > >
> > > Regards,
> > > Dumitru
> > >
> > > >
> > > > Regards,
> > > > Lorenzo
> > > >
> > > >>
> > > >> Would this we ensure that we always only use ovn-chassis-mac-mappings on
> > > >> the VLAN network and avoid flooding on the ToR?
> > > >>
> > > >> Regards,
> > > >> Dumitru
> > > >>
> > > >>>  controller/pinctrl.c | 85 +++++++++++++++++++++++++++++---------------
> > > >>>  northd/northd.c      | 29 +++++++++++++++
> > > >>>  2 files changed, 85 insertions(+), 29 deletions(-)
> > > >>>
> > > >>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > > >>> index 9a1a0faa1..eb5739bfc 100644
> > > >>> --- a/controller/pinctrl.c
> > > >>> +++ b/controller/pinctrl.c
> > > >>> @@ -4533,7 +4533,8 @@ send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
> > > >>>          garp_rarp->backoff *= 2;
> > > >>>          garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
> > > >>>      } else {
> > > >>> -        garp_rarp->announce_time = LLONG_MAX;
> > > >>> +        /* Default timeout is 180s. */
> > > >>> +        garp_rarp->announce_time = current_time + 180 * 1000;
> > > >>>      }
> > > >>>      return garp_rarp->announce_time;
> > > >>>  }
> > > >>> @@ -5510,14 +5511,15 @@ ip_mcast_querier_wait(long long int query_time)
> > > >>>
> > > >>>  /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
> > > >>>  static void
> > > >>> -get_localnet_vifs_l3gwports(
> > > >>> +get_local_vifs_l3gwports(
> > > >>>      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> > > >>>      struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > > >>>      const struct ovsrec_bridge *br_int,
> > > >>>      const struct sbrec_chassis *chassis,
> > > >>>      const struct hmap *local_datapaths,
> > > >>>      struct sset *localnet_vifs,
> > > >>> -    struct sset *local_l3gw_ports)
> > > >>> +    struct sset *local_l3gw_ports,
> > > >>> +    struct sset *local_vifs)
> > > >>>  {
> > > >>>      for (int i = 0; i < br_int->n_ports; i++) {
> > > >>>          const struct ovsrec_port *port_rec = br_int->ports[i];
> > > >>> @@ -5574,7 +5576,8 @@ get_localnet_vifs_l3gwports(
> > > >>>          /* Get l3gw ports.  Consider port bindings with type "l3gateway"
> > > >>>           * that connect to gateway routers (if local), and consider port
> > > >>>           * bindings of type "patch" since they might connect to
> > > >>> -         * distributed gateway ports with NAT addresses. */
> > > >>> +         * distributed gateway ports with NAT addresses.
> > > >>> +         * Get LSP ports if requested by CMS. */
> > > >>>
> > > >>>          sbrec_port_binding_index_set_datapath(target, ld->datapath);
> > > >>>          SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> > > >>> @@ -5583,6 +5586,11 @@ get_localnet_vifs_l3gwports(
> > > >>>                  || !strcmp(pb->type, "patch")) {
> > > >>>                  sset_add(local_l3gw_ports, pb->logical_port);
> > > >>>              }
> > > >>> +            /* GARP packets for lsp ports. */
> > > >>> +            if (pb->chassis == chassis &&
> > > >>> +                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
> > > >>> +                sset_add(local_vifs, pb->logical_port);
> > > >>> +            }
> > > >>>          }
> > > >>>      }
> > > >>>      sbrec_port_binding_index_destroy_row(target);
> > > >>> @@ -5761,6 +5769,26 @@ send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
> > > >>>      }
> > > >>>  }
> > > >>>
> > > >>> +static void
> > > >>> +send_garp_rarp_update_for_pb_set(
> > > >>> +        struct ovsdb_idl_txn *ovnsb_idl_txn,
> > > >>> +        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
> > > >>> +        struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > > >>> +        struct sset *vif_set, const struct hmap *local_datapaths,
> > > >>> +        struct shash *nat_addresses)
> > > >>> +{
> > > >>> +    const char *iface_id;
> > > >>> +    SSET_FOR_EACH (iface_id, vif_set) {
> > > >>> +        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > > >>> +            sbrec_port_binding_by_name, iface_id);
> > > >>> +        if (pb) {
> > > >>> +            send_garp_rarp_update(ovnsb_idl_txn,
> > > >>> +                                  sbrec_mac_binding_by_lport_ip,
> > > >>> +                                  local_datapaths, pb, nat_addresses);
> > > >>> +        }
> > > >>> +    }
> > > >>> +}
> > > >>> +
> > > >>>  /* Called by pinctrl_run(). Runs with in the main ovn-controller
> > > >>>   * thread context. */
> > > >>>  static void
> > > >>> @@ -5776,15 +5804,17 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > > >>>  {
> > > >>>      struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
> > > >>>      struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
> > > >>> +    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
> > > >>>      struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
> > > >>>      struct shash nat_addresses;
> > > >>>
> > > >>>      shash_init(&nat_addresses);
> > > >>>
> > > >>> -    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
> > > >>> -                                sbrec_port_binding_by_name,
> > > >>> -                                br_int, chassis, local_datapaths,
> > > >>> -                                &localnet_vifs, &local_l3gw_ports);
> > > >>> +    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
> > > >>> +                             sbrec_port_binding_by_name,
> > > >>> +                             br_int, chassis, local_datapaths,
> > > >>> +                             &localnet_vifs, &local_l3gw_ports,
> > > >>> +                             &local_vifs);
> > > >>>
> > > >>>      get_nat_addresses_and_keys(sbrec_port_binding_by_name,
> > > >>>                                 &nat_ip_keys, &local_l3gw_ports,
> > > >>> @@ -5795,36 +5825,33 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > > >>>      struct shash_node *iter;
> > > >>>      SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
> > > >>>          if (!sset_contains(&localnet_vifs, iter->name) &&
> > > >>> -            !sset_contains(&nat_ip_keys, iter->name)) {
> > > >>> +            !sset_contains(&nat_ip_keys, iter->name) &&
> > > >>> +            !sset_contains(&local_vifs, iter->name)) {
> > > >>>              send_garp_rarp_delete(iter->name);
> > > >>>          }
> > > >>>      }
> > > >>>
> > > >>>      /* Update send_garp_rarp_data. */
> > > >>> -    const char *iface_id;
> > > >>> -    SSET_FOR_EACH (iface_id, &localnet_vifs) {
> > > >>> -        const struct sbrec_port_binding *pb = lport_lookup_by_name(
> > > >>> -            sbrec_port_binding_by_name, iface_id);
> > > >>> -        if (pb) {
> > > >>> -            send_garp_rarp_update(ovnsb_idl_txn,
> > > >>> -                                  sbrec_mac_binding_by_lport_ip,
> > > >>> -                                  local_datapaths, pb, &nat_addresses);
> > > >>> -        }
> > > >>> -    }
> > > >>> -
> > > >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > > >>> +                                     sbrec_mac_binding_by_lport_ip,
> > > >>> +                                     sbrec_port_binding_by_name,
> > > >>> +                                     &localnet_vifs, local_datapaths,
> > > >>> +                                     &nat_addresses);
> > > >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > > >>> +                                     sbrec_mac_binding_by_lport_ip,
> > > >>> +                                     sbrec_port_binding_by_name,
> > > >>> +                                     &local_vifs, local_datapaths,
> > > >>> +                                     &nat_addresses);
> > > >>>      /* Update send_garp_rarp_data for nat-addresses. */
> > > >>> -    const char *gw_port;
> > > >>> -    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
> > > >>> -        const struct sbrec_port_binding *pb
> > > >>> -            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
> > > >>> -        if (pb) {
> > > >>> -            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
> > > >>> -                                  local_datapaths, pb, &nat_addresses);
> > > >>> -        }
> > > >>> -    }
> > > >>> +    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
> > > >>> +                                     sbrec_mac_binding_by_lport_ip,
> > > >>> +                                     sbrec_port_binding_by_name,
> > > >>> +                                     &local_l3gw_ports, local_datapaths,
> > > >>> +                                     &nat_addresses);
> > > >>>
> > > >>>      /* pinctrl_handler thread will send the GARPs. */
> > > >>>
> > > >>> +    sset_destroy(&local_vifs);
> > > >>>      sset_destroy(&localnet_vifs);
> > > >>>      sset_destroy(&local_l3gw_ports);
> > > >>>
> > > >>> diff --git a/northd/northd.c b/northd/northd.c
> > > >>> index 0d6ebccde..9a4a880a7 100644
> > > >>> --- a/northd/northd.c
> > > >>> +++ b/northd/northd.c
> > > >>> @@ -6446,6 +6446,34 @@ ovn_update_ipv6_options(struct hmap *ports)
> > > >>>      }
> > > >>>  }
> > > >>>
> > > >>> +static void
> > > >>> +ovn_update_lsp_garp_options(struct hmap *ports)
> > > >>> +{
> > > >>> +    struct ovn_port *op;
> > > >>> +    HMAP_FOR_EACH (op, key_node, ports) {
> > > >>> +        if (!op->nbsp) {
> > > >>> +            continue;
> > > >>> +        }
> > > >>> +        if (op->nbsp->type[0] || op->nbsp->parent_name) {
> > > >>> +            continue;
> > > >>> +        }
> > > >>> +
> > > >>> +        struct ovn_datapath *od = op->od;
> > > >>> +        if (!od || !od->nbs) {
> > > >>> +            continue;
> > > >>> +        }
> > > >>> +
> > > >>> +        struct smap options;
> > > >>> +        smap_clone(&options, &op->sb->options);
> > > >>> +
> > > >>> +        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
> > > >>> +                                          "ovn-lsp-garp", false);
> > > >>> +        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
> > > >>> +        sbrec_port_binding_set_options(op->sb, &options);
> > > >>> +        smap_destroy(&options);
> > > >>> +    }
> > > >>> +}
> > > >>> +
> > > >>>  static void
> > > >>>  build_port_group_lswitches(struct northd_input *input_data,
> > > >>>                             struct hmap *pgs,
> > > >>> @@ -15381,6 +15409,7 @@ ovnnb_db_run(struct northd_input *input_data,
> > > >>>      stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
> > > >>>      ovn_update_ipv6_options(&data->ports);
> > > >>>      ovn_update_ipv6_prefix(&data->ports);
> > > >>> +    ovn_update_lsp_garp_options(&data->ports);
> > > >>>
> > > >>>      sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
> > > >>>      sync_address_sets(input_data, ovnsb_txn, &data->datapaths);
> > > >>
> > >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
diff mbox series

Patch

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 9a1a0faa1..eb5739bfc 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4533,7 +4533,8 @@  send_garp_rarp(struct rconn *swconn, struct garp_rarp_data *garp_rarp,
         garp_rarp->backoff *= 2;
         garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000;
     } else {
-        garp_rarp->announce_time = LLONG_MAX;
+        /* Default timeout is 180s. */
+        garp_rarp->announce_time = current_time + 180 * 1000;
     }
     return garp_rarp->announce_time;
 }
@@ -5510,14 +5511,15 @@  ip_mcast_querier_wait(long long int query_time)
 
 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
 static void
-get_localnet_vifs_l3gwports(
+get_local_vifs_l3gwports(
     struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
     struct ovsdb_idl_index *sbrec_port_binding_by_name,
     const struct ovsrec_bridge *br_int,
     const struct sbrec_chassis *chassis,
     const struct hmap *local_datapaths,
     struct sset *localnet_vifs,
-    struct sset *local_l3gw_ports)
+    struct sset *local_l3gw_ports,
+    struct sset *local_vifs)
 {
     for (int i = 0; i < br_int->n_ports; i++) {
         const struct ovsrec_port *port_rec = br_int->ports[i];
@@ -5574,7 +5576,8 @@  get_localnet_vifs_l3gwports(
         /* Get l3gw ports.  Consider port bindings with type "l3gateway"
          * that connect to gateway routers (if local), and consider port
          * bindings of type "patch" since they might connect to
-         * distributed gateway ports with NAT addresses. */
+         * distributed gateway ports with NAT addresses.
+         * Get LSP ports if requested by CMS. */
 
         sbrec_port_binding_index_set_datapath(target, ld->datapath);
         SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
@@ -5583,6 +5586,11 @@  get_localnet_vifs_l3gwports(
                 || !strcmp(pb->type, "patch")) {
                 sset_add(local_l3gw_ports, pb->logical_port);
             }
+            /* GARP packets for lsp ports. */
+            if (pb->chassis == chassis &&
+                smap_get_bool(&pb->options, "ovn-lsp-garp", false)) {
+                sset_add(local_vifs, pb->logical_port);
+            }
         }
     }
     sbrec_port_binding_index_destroy_row(target);
@@ -5761,6 +5769,26 @@  send_garp_rarp_run(struct rconn *swconn, long long int *send_garp_rarp_time)
     }
 }
 
+static void
+send_garp_rarp_update_for_pb_set(
+        struct ovsdb_idl_txn *ovnsb_idl_txn,
+        struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+        struct ovsdb_idl_index *sbrec_port_binding_by_name,
+        struct sset *vif_set, const struct hmap *local_datapaths,
+        struct shash *nat_addresses)
+{
+    const char *iface_id;
+    SSET_FOR_EACH (iface_id, vif_set) {
+        const struct sbrec_port_binding *pb = lport_lookup_by_name(
+            sbrec_port_binding_by_name, iface_id);
+        if (pb) {
+            send_garp_rarp_update(ovnsb_idl_txn,
+                                  sbrec_mac_binding_by_lport_ip,
+                                  local_datapaths, pb, nat_addresses);
+        }
+    }
+}
+
 /* Called by pinctrl_run(). Runs with in the main ovn-controller
  * thread context. */
 static void
@@ -5776,15 +5804,17 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
 {
     struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
     struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
+    struct sset local_vifs = SSET_INITIALIZER(&local_vifs);
     struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
     struct shash nat_addresses;
 
     shash_init(&nat_addresses);
 
-    get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
-                                sbrec_port_binding_by_name,
-                                br_int, chassis, local_datapaths,
-                                &localnet_vifs, &local_l3gw_ports);
+    get_local_vifs_l3gwports(sbrec_port_binding_by_datapath,
+                             sbrec_port_binding_by_name,
+                             br_int, chassis, local_datapaths,
+                             &localnet_vifs, &local_l3gw_ports,
+                             &local_vifs);
 
     get_nat_addresses_and_keys(sbrec_port_binding_by_name,
                                &nat_ip_keys, &local_l3gw_ports,
@@ -5795,36 +5825,33 @@  send_garp_rarp_prepare(struct ovsdb_idl_txn *ovnsb_idl_txn,
     struct shash_node *iter;
     SHASH_FOR_EACH_SAFE (iter, &send_garp_rarp_data) {
         if (!sset_contains(&localnet_vifs, iter->name) &&
-            !sset_contains(&nat_ip_keys, iter->name)) {
+            !sset_contains(&nat_ip_keys, iter->name) &&
+            !sset_contains(&local_vifs, iter->name)) {
             send_garp_rarp_delete(iter->name);
         }
     }
 
     /* Update send_garp_rarp_data. */
-    const char *iface_id;
-    SSET_FOR_EACH (iface_id, &localnet_vifs) {
-        const struct sbrec_port_binding *pb = lport_lookup_by_name(
-            sbrec_port_binding_by_name, iface_id);
-        if (pb) {
-            send_garp_rarp_update(ovnsb_idl_txn,
-                                  sbrec_mac_binding_by_lport_ip,
-                                  local_datapaths, pb, &nat_addresses);
-        }
-    }
-
+    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
+                                     sbrec_mac_binding_by_lport_ip,
+                                     sbrec_port_binding_by_name,
+                                     &localnet_vifs, local_datapaths,
+                                     &nat_addresses);
+    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
+                                     sbrec_mac_binding_by_lport_ip,
+                                     sbrec_port_binding_by_name,
+                                     &local_vifs, local_datapaths,
+                                     &nat_addresses);
     /* Update send_garp_rarp_data for nat-addresses. */
-    const char *gw_port;
-    SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
-        const struct sbrec_port_binding *pb
-            = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
-        if (pb) {
-            send_garp_rarp_update(ovnsb_idl_txn, sbrec_mac_binding_by_lport_ip,
-                                  local_datapaths, pb, &nat_addresses);
-        }
-    }
+    send_garp_rarp_update_for_pb_set(ovnsb_idl_txn,
+                                     sbrec_mac_binding_by_lport_ip,
+                                     sbrec_port_binding_by_name,
+                                     &local_l3gw_ports, local_datapaths,
+                                     &nat_addresses);
 
     /* pinctrl_handler thread will send the GARPs. */
 
+    sset_destroy(&local_vifs);
     sset_destroy(&localnet_vifs);
     sset_destroy(&local_l3gw_ports);
 
diff --git a/northd/northd.c b/northd/northd.c
index 0d6ebccde..9a4a880a7 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -6446,6 +6446,34 @@  ovn_update_ipv6_options(struct hmap *ports)
     }
 }
 
+static void
+ovn_update_lsp_garp_options(struct hmap *ports)
+{
+    struct ovn_port *op;
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbsp) {
+            continue;
+        }
+        if (op->nbsp->type[0] || op->nbsp->parent_name) {
+            continue;
+        }
+
+        struct ovn_datapath *od = op->od;
+        if (!od || !od->nbs) {
+            continue;
+        }
+
+        struct smap options;
+        smap_clone(&options, &op->sb->options);
+
+        bool ovn_lsp_garp = smap_get_bool(&od->nbs->other_config,
+                                          "ovn-lsp-garp", false);
+        smap_add(&options, "ovn-lsp-garp", ovn_lsp_garp ? "true" : "false");
+        sbrec_port_binding_set_options(op->sb, &options);
+        smap_destroy(&options);
+    }
+}
+
 static void
 build_port_group_lswitches(struct northd_input *input_data,
                            struct hmap *pgs,
@@ -15381,6 +15409,7 @@  ovnnb_db_run(struct northd_input *input_data,
     stopwatch_start(CLEAR_LFLOWS_CTX_STOPWATCH_NAME, time_msec());
     ovn_update_ipv6_options(&data->ports);
     ovn_update_ipv6_prefix(&data->ports);
+    ovn_update_lsp_garp_options(&data->ports);
 
     sync_lbs(input_data, ovnsb_txn, &data->datapaths, &data->lbs);
     sync_address_sets(input_data, ovnsb_txn, &data->datapaths);