diff mbox

[ovs-dev,v4,1/2] ovn: Gratuitous ARP for centralized NAT rules on a distributed router

Message ID 1489789809-5548-1-git-send-email-mickeys.dev@gmail.com
State Changes Requested
Headers show

Commit Message

Mickey Spiegel March 17, 2017, 10:30 p.m. UTC
This patch extends gratuitous ARP support for NAT addresses so that it
applies to centralized NAT rules on a distributed router, in addition to
the existing gratuitous ARP support for NAT addresses on gateway routers.

Gratuitous ARP packets for centralized NAT rules on a distributed router
are only generated on the redirect-chassis.  This is achieved by extending
the syntax for "options:nat-addresses" in the southbound database,
allowing the condition 'is_chassis_resident("LPORT_NAME")' to be appended
after the MAC and IP addresses.  This condition is automatically inserted
by ovn-northd when the northbound "options:nat-addresses" is set to
"router" and the peer is a distributed gateway port.

A separate patch will be required to support gratuitous ARP for
distributed NAT rules that specify logical_port and external_mac.  Since
the MAC address differs and the logical port often resides on a different
chassis from the redirect-chassis, these addresses cannot be included in
the same "nat-addresses" string as for centralized NAT rules.

Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com>
---
 ovn/controller/pinctrl.c | 104 ++++++++++++++++++++++++++++++++++++++++++++---
 ovn/lib/ovn-util.c       |  38 ++++++++++++++---
 ovn/lib/ovn-util.h       |   2 +
 ovn/northd/ovn-northd.c  |  52 +++++++++++++++++-------
 ovn/ovn-nb.xml           |  33 ++++++++++++---
 ovn/ovn-sb.xml           |  31 ++++++++++----
 tests/ovn.at             |  70 +++++++++++++++++++++++++++++++
 7 files changed, 289 insertions(+), 41 deletions(-)

Comments

Gurucharan Shetty March 21, 2017, 8:39 p.m. UTC | #1
On 17 March 2017 at 15:30, Mickey Spiegel <mickeys.dev@gmail.com> wrote:

> This patch extends gratuitous ARP support for NAT addresses so that it
> applies to centralized NAT rules on a distributed router, in addition to
> the existing gratuitous ARP support for NAT addresses on gateway routers.
>
> Gratuitous ARP packets for centralized NAT rules on a distributed router
> are only generated on the redirect-chassis.


A comment here on what centralized NAT rules are will be useful when this
is seen a couple of months from now.


> This is achieved by extending
> the syntax for "options:nat-addresses" in the southbound database,
> allowing the condition 'is_chassis_resident("LPORT_NAME")' to be appended
> after the MAC and IP addresses.  This condition is automatically inserted
> by ovn-northd when the northbound "options:nat-addresses" is set to
> "router" and the peer is a distributed gateway port.
>
> A separate patch will be required to support gratuitous ARP for
> distributed NAT rules that specify logical_port and external_mac.  Since
> the MAC address differs and the logical port often resides on a different
> chassis from the redirect-chassis, these addresses cannot be included in
> the same "nat-addresses" string as for centralized NAT rules.
>
> Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com>
> ---
>  ovn/controller/pinctrl.c | 104 ++++++++++++++++++++++++++++++
> ++++++++++++++---
>  ovn/lib/ovn-util.c       |  38 ++++++++++++++---
>  ovn/lib/ovn-util.h       |   2 +
>  ovn/northd/ovn-northd.c  |  52 +++++++++++++++++-------
>  ovn/ovn-nb.xml           |  33 ++++++++++++---
>  ovn/ovn-sb.xml           |  31 ++++++++++----
>  tests/ovn.at             |  70 +++++++++++++++++++++++++++++++
>  7 files changed, 289 insertions(+), 41 deletions(-)
>
> diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
> index b342189..08af792 100644
> --- a/ovn/controller/pinctrl.c
> +++ b/ovn/controller/pinctrl.c
> @@ -37,6 +37,7 @@
>  #include "lib/dhcp.h"
>  #include "ovn-controller.h"
>  #include "ovn/actions.h"
> +#include "ovn/lex.h"
>  #include "ovn/lib/logical-fields.h"
>  #include "ovn/lib/ovn-dhcp.h"
>  #include "ovn/lib/ovn-util.h"
> @@ -1049,7 +1050,8 @@ send_garp_update(const struct sbrec_port_binding
> *binding_rec,
>
>      volatile struct garp_data *garp = NULL;
>      /* Update GARP for NAT IP if it exists. */
> -    if (!strcmp(binding_rec->type, "l3gateway")) {
> +    if (!strcmp(binding_rec->type, "l3gateway")
> +        || !strcmp(binding_rec->type, "patch")) {
>
A comment above on why we should also look at "patch" will be useful.


>          struct lport_addresses *laddrs = NULL;
>          laddrs = shash_find_data(nat_addresses,
> binding_rec->logical_port);
>          if (!laddrs) {
> @@ -1202,24 +1204,101 @@ get_localnet_vifs_l3gwports(const struct
> ovsrec_bridge *br_int,
>
>      const struct local_datapath *ld;
>      HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> -        if (!ld->has_local_l3gateway) {
> +        if (!ld->localnet_port) {
>              continue;
>          }
>
>          for (size_t i = 0; i < ld->ldatapath->n_lports; i++) {
>              const struct sbrec_port_binding *pb =
> ld->ldatapath->lports[i];
> -            if (!strcmp(pb->type, "l3gateway")
> -                /* && it's on this chassis */) {
> +            if ((ld->has_local_l3gateway && !strcmp(pb->type,
> "l3gateway"))
> +                || !strcmp(pb->type, "patch")) {
>
A comment above on why we are considering "patch" will be useful.

Does local_datapaths include every patch port or is it only those that have
l2 distributed gateway port instantiated on it?


>                  sset_add(local_l3gw_ports, pb->logical_port);
>              }
>          }
>      }
>  }
>
> +static bool
> +pinctrl_is_chassis_resident(const struct lport_index *lports,
> +                            const struct sbrec_chassis *chassis,
> +                            const char *port_name)
> +{
> +    const struct sbrec_port_binding *pb
> +        = lport_lookup_by_name(lports, port_name);
> +    return pb && pb->chassis && pb->chassis == chassis;
> +}
> +
> +/* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
> + * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
> + * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
> + * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
> + * fields of 'laddrs'.  The logical port name is stored in 'lport'.
> + *
> + * Returns true if at least 'MAC' is found in 'address', false otherwise.
> + *
> + * The caller must call destroy_lport_addresses() and free(*lport). */
> +static bool
> +extract_addresses_with_port(const char *addresses,
> +                            struct lport_addresses *laddrs,
> +                            char **lport)
> +{
> +    int ofs;
> +    if (!extract_addresses(addresses, laddrs, &ofs)) {
> +        return false;
> +    } else if (ofs >= strlen(addresses)) {
> +        return true;
> +    }
> +
> +    struct lexer lexer;
> +    lexer_init(&lexer, addresses + ofs);
> +    lexer_get(&lexer);
> +
> +    if (lexer.error || lexer.token.type != LEX_T_ID
> +        || !lexer_match_id(&lexer, "is_chassis_resident")) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
> +        lexer_destroy(&lexer);
> +        return true;
> +    }
> +
> +    if (!lexer_match(&lexer, LEX_T_LPAREN)) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
> +                          "'is_chassis_resident' in address '%s'",
> addresses);
> +        lexer_destroy(&lexer);
> +        return false;
> +    }
> +
> +    if (lexer.token.type != LEX_T_STRING) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_INFO_RL(&rl,
> +                    "Syntax error: expecting quoted string after"
> +                    " 'is_chassis_resident' in address '%s'", addresses);
> +        lexer_destroy(&lexer);
> +        return false;
> +    }
> +
> +    *lport = xstrdup(lexer.token.s);
> +
> +    lexer_get(&lexer);
> +    if (!lexer_match(&lexer, LEX_T_RPAREN)) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted
> string in "
> +                          "'is_chassis_resident()' in address '%s'",
> +                          addresses);
> +        lexer_destroy(&lexer);
> +        return false;
> +    }
> +
> +    lexer_destroy(&lexer);
> +    return true;
> +}
> +
>  static void
>  get_nat_addresses_and_keys(struct sset *nat_address_keys,
>                             struct sset *local_l3gw_ports,
>                             const struct lport_index *lports,
> +                           const struct sbrec_chassis *chassis,
>                             struct shash *nat_addresses)
>  {
>      const char *gw_port;
> @@ -1236,10 +1315,23 @@ get_nat_addresses_and_keys(struct sset
> *nat_address_keys,
>          }
>
>          struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
> -        if (!extract_lsp_addresses(nat_addresses_options, laddrs)) {
> +        char *lport = NULL;
> +        if (!extract_addresses_with_port(nat_addresses_options, laddrs,
> &lport)
> +            || (!lport && !strcmp(pb->type, "patch"))) {
>              free(laddrs);
> +            if (lport) {
> +                free(lport);
> +            }
>              continue;
> +        } else if (lport) {
> +            if (!pinctrl_is_chassis_resident(lports, chassis, lport)) {
> +                free(laddrs);
> +                free(lport);
> +                continue;
> +            }
> +            free(lport);
>          }
> +
>          int i;
>          for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
>              char *name = xasprintf("%s-%s", pb->logical_port,
> @@ -1275,7 +1367,7 @@ send_garp_run(const struct ovsrec_bridge *br_int,
>                        &localnet_vifs, &localnet_ofports,
> &local_l3gw_ports);
>
>      get_nat_addresses_and_keys(&nat_ip_keys, &local_l3gw_ports, lports,
> -                               &nat_addresses);
> +                               chassis, &nat_addresses);
>      /* For deleted ports and deleted nat ips, remove from send_garp_data.
> */
>      struct shash_node *iter, *next;
>      SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
> diff --git a/ovn/lib/ovn-util.c b/ovn/lib/ovn-util.c
> index 99e4a0e..644a5dc 100644
> --- a/ovn/lib/ovn-util.c
> +++ b/ovn/lib/ovn-util.c
> @@ -83,24 +83,29 @@ is_dynamic_lsp_address(const char *address)
>  }
>
>  /* Extracts the mac, IPv4 and IPv6 addresses from * 'address' which
> - * should be of the format 'MAC [IP1 IP2 ..]" where IPn should be a
> + * should be of the format "MAC [IP1 IP2 ..] .." where IPn should be a
>   * valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
> - * 'ipv6_addrs' fields of 'laddrs'.
> + * 'ipv6_addrs' fields of 'laddrs'.  There may be additional content in
> + * 'address' after "MAC [IP1 IP2 .. ]".  The value of 'ofs' that is
> + * returned indicates the offset where that additional content begins.
>   *
> - * Return true if at least 'MAC' is found in 'address', false otherwise.
> + * Returns true if at least 'MAC' is found in 'address', false otherwise.
>   *
>   * The caller must call destroy_lport_addresses(). */
>  bool
> -extract_lsp_addresses(const char *address, struct lport_addresses *laddrs)
> +extract_addresses(const char *address, struct lport_addresses *laddrs,
> +                  int *ofs)
>  {
>      memset(laddrs, 0, sizeof *laddrs);
>
>      const char *buf = address;
> +    const char *const start = buf;
>      int buf_index = 0;
>      const char *buf_end = buf + strlen(address);
>      if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
>                        ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
>          laddrs->ea = eth_addr_zero;
> +        *ofs = 0;
>          return false;
>      }
>
> @@ -129,17 +134,38 @@ extract_lsp_addresses(const char *address, struct
> lport_addresses *laddrs)
>          if (!error) {
>              add_ipv6_netaddr(laddrs, ip6, plen);
>          } else {
> -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> -            VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
>              free(error);
>              break;
>          }
>          buf += buf_index;
>      }
>
> +    *ofs = buf - start;
>      return true;
>  }
>
> +/* Extracts the mac, IPv4 and IPv6 addresses from * 'address' which
> + * should be of the format 'MAC [IP1 IP2 ..]" where IPn should be a
> + * valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
> + * 'ipv6_addrs' fields of 'laddrs'.
> + *
> + * Return true if at least 'MAC' is found in 'address', false otherwise.
> + *
> + * The caller must call destroy_lport_addresses(). */
> +bool
> +extract_lsp_addresses(const char *address, struct lport_addresses *laddrs)
> +{
> +    int ofs;
> +    bool success = extract_addresses(address, laddrs, &ofs);
> +
> +    if (success && ofs < strlen(address)) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
> +    }
> +
> +    return success;
> +}
> +
>  /* Extracts the mac, IPv4 and IPv6 addresses from the
>   * "nbrec_logical_router_port" parameter 'lrp'.  Stores the IPv4 and
>   * IPv6 addresses in the 'ipv4_addrs' and 'ipv6_addrs' fields of
> diff --git a/ovn/lib/ovn-util.h b/ovn/lib/ovn-util.h
> index 30b27b1..8252283 100644
> --- a/ovn/lib/ovn-util.h
> +++ b/ovn/lib/ovn-util.h
> @@ -54,6 +54,8 @@ struct lport_addresses {
>  };
>
>  bool is_dynamic_lsp_address(const char *address);
> +bool extract_addresses(const char *address, struct lport_addresses *,
> +                       int *ofs);
>  bool extract_lsp_addresses(const char *address, struct lport_addresses *);
>  bool extract_lrp_networks(const struct nbrec_logical_router_port *,
>                            struct lport_addresses *);
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 8c8f16b..6090e24 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -1384,6 +1384,15 @@ join_logical_ports(struct northd_context *ctx,
>                                       "on L3 gateway router", nbrp->name);
>                          continue;
>                      }
> +                    if (od->l3dgw_port || od->l3redirect_port) {
> +                        static struct vlog_rate_limit rl
> +                            = VLOG_RATE_LIMIT_INIT(1, 1);
> +                        VLOG_WARN_RL(&rl, "Bad configuration: multiple
> ports "
> +                                     "with redirect-chassis on same
> logical "
> +                                     "router %s", od->nbr->name);
> +                        continue;
> +                    }
> +
>                      char *redirect_name = chassis_redirect_name(nbrp->
> name);
>                      struct ovn_port *crp = ovn_port_find(ports,
> redirect_name);
>                      if (crp) {
> @@ -1402,17 +1411,8 @@ join_logical_ports(struct northd_context *ctx,
>
>                      /* Set l3dgw_port and l3redirect_port in od, for later
>                       * use during flow creation. */
> -                    if (od->l3dgw_port || od->l3redirect_port) {
> -                        static struct vlog_rate_limit rl
> -                            = VLOG_RATE_LIMIT_INIT(1, 1);
> -                        VLOG_WARN_RL(&rl, "Bad configuration: multiple
> ports "
> -                                     "with redirect-chassis on same
> logical "
> -                                     "router %s", od->nbr->name);
> -                        continue;
> -                    } else {
> -                        od->l3dgw_port = op;
> -                        od->l3redirect_port = crp;
> -                    }
> +                    od->l3dgw_port = op;
> +                    od->l3redirect_port = crp;
>                  }
>              }
>          }
> @@ -1536,7 +1536,21 @@ get_nat_addresses(const struct ovn_port *op)
>              free(error);
>              continue;
>          }
> -        ds_put_format(&addresses, " %s", nat->external_ip);
> +
> +        /* Determine whether this NAT rule satisfies the conditions for
> +         * distributed NAT processing. */
> +        if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
> +            && nat->logical_port && nat->external_mac) {
> +            /* Distributed NAT rule. */
> +            /* XXX This uses a different MAC address, so it cannot go
> +             * into the same string as centralized NAT external IP
> +             * addresses.  Need to change this function to return an
> +             * array of strings. */
> +        } else {
> +            /* Centralized NAT rule, either on gateway router or
> distributed
> +             * router. */
> +            ds_put_format(&addresses, " %s", nat->external_ip);
> +        }
>      }
>
>      /* A set to hold all load-balancer vips. */
> @@ -1549,6 +1563,13 @@ get_nat_addresses(const struct ovn_port *op)
>      }
>      sset_destroy(&all_ips);
>
> +    /* Gratuitous ARP for centralized NAT rules on distributed gateway
> +     * ports should be restricted to the "redirect-chassis". */
> +    if (op->od->l3redirect_port) {
> +        ds_put_format(&addresses, " is_chassis_resident(%s)",
> +                      op->od->l3redirect_port->json_key);
> +    }
> +
>      return ds_steal_cstr(&addresses);
>  }
>
> @@ -1642,14 +1663,17 @@ ovn_port_update_sbrec(const struct ovn_port *op,
>              const char *nat_addresses = smap_get(&op->nbsp->options,
>                                             "nat-addresses");
>              if (nat_addresses && !strcmp(nat_addresses, "router")) {
> -                if (op->peer && op->peer->nbrp) {
> +                if (op->peer && op->peer->od
> +                    && (chassis || op->peer->od->l3redirect_port)) {
>                      char *nats = get_nat_addresses(op->peer);
>                      if (nats) {
>                          smap_add(&new, "nat-addresses", nats);
>                          free(nats);
>                      }
>                  }
> -            } else if (nat_addresses) {
> +            /* Only accept manual specification of ethernet address
> +             * followed by IPv4 addresses on type "l3gateway" ports. */
> +            } else if (nat_addresses && chassis) {
>                  struct lport_addresses laddrs;
>                  if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
>                      static struct vlog_rate_limit rl =
> diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
> index 46a25f6..1f0b003 100644
> --- a/ovn/ovn-nb.xml
> +++ b/ovn/ovn-nb.xml
> @@ -244,8 +244,7 @@
>          <column name="options" key="nat-addresses">
>            <p>
>              This is used to send gratuitous ARPs for SNAT and DNAT IP
> -            addresses via <code>localnet</code> and is valid for only L3
> -            gateway ports.
> +            addresses via <code>localnet</code>.
>

localnet switch port is usually a separate logical_switch_port that is
attached to the same switch.  And this logical siwtch port is usually the
one that is connected to the router, right? I think if we clarify it above,
it will cause less confusion.


>            </p>
>
>            <p>
> @@ -264,6 +263,13 @@
>                </p>
>
>                <p>
> +                This form of <ref column="options" key="nat-addresses"/>
> is
> +                valid for L3 gateway ports, and for logical switch ports
> +                where <ref column="options" key="router-port"/> is the
> name
> +                of a distributed gateway port.
> +              </p>
> +
>
The above is a little confusing. I think if it read as below, it will be
more clear?

This form of options:nat-addresses is valid for for  logical  switch  ports
 where
options:router-port is the port of a gateway router or the name of a
distributed gateway port.


> +              <p>
>                  Supported only in OVN 2.8 and later.  Earlier versions
> required
>                  NAT addresses to be manually synchronized.
>                </p>
> @@ -271,10 +277,17 @@
>
>              <dt><code>Ethernet address followed by one or more IPv4
> addresses</code></dt>
>              <dd>
> -              Example: <code>80:fa:5b:06:72:b7 158.36.44.22
> -              158.36.44.24</code>. This would result in generation of
> -              gratuitous ARPs for IP addresses 158.36.44.22 and
> 158.36.44.24
> -              with a MAC address of 80:fa:5b:06:72:b7.
> +              <p>
> +                Example: <code>80:fa:5b:06:72:b7 158.36.44.22
> +                158.36.44.24</code>. This would result in generation of
> +                gratuitous ARPs for IP addresses 158.36.44.22 and
> 158.36.44.24
> +                with a MAC address of 80:fa:5b:06:72:b7.
> +              </p>
> +
> +              <p>
> +                This form of <ref column="options" key="nat-addresses"/>
> is
> +                only valid for L3 gateway ports.
>

s/is only valid for L3 gateway ports/is only valid for a port of a gateway
router/  is more clear?





> +              </p>
>              </dd>
>            </dl>
>          </column>
> @@ -1166,6 +1179,14 @@
>            peer logical switch's destination lookup flow on the
>            <code>redirect-chassis</code>.
>          </p>
> +
> +        <p>
> +          When this option is specified and it is desired to generate
> +          gratuitous ARPs for NAT addresses, then the peer logical switch
> +          port's <ref column="options" key="nat-addresses"
> +          table="Logical_Switch_Port"/> should be set to
> +          <code>router</code>.
> +        </p>
>        </column>
>      </group>
>
> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
> index 4e95c80..2df45e8 100644
> --- a/ovn/ovn-sb.xml
> +++ b/ovn/ovn-sb.xml
> @@ -1862,6 +1862,20 @@ tcp.flags = RST;
>          ports must have reversed <ref column="logical_port"/> and
>          <code>peer</code> values.
>        </column>
> +
> +      <column name="options" key="nat-addresses">
> +        MAC address of the <code>patch</code> port followed by a list of
> +        SNAT and DNAT external IP addresses, followed by
> +        <code>is_chassis_resident("<var>lport</var>")</code>, where
> +        <var>lport</var> is the name of a logical port on the same chassis
> +        where the corresponding NAT rules are applied.  This is used to
> +        send gratuitous ARPs for SNAT and DNAT external IP addresses via
> +        <code>localnet</code>, from the chassis where <var>lport</var>
> +        resides.  Example: <code>80:fa:5b:06:72:b7 158.36.44.22
> +        158.36.44.24</code>.  This would result in generation of
> +        gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24
> +        with a MAC address of 80:fa:5b:06:72:b7.
>

The example above does not include "is_chassis_resident". Can you add that?


> +      </column>
>      </group>
>
>      <group title="L3 Gateway Options">
> @@ -1883,15 +1897,14 @@ tcp.flags = RST;
>          The <code>chassis</code> in which the port resides.
>        </column>
>
> -        <column name="options" key="nat-addresses">
> -          MAC address of the <code>l3gateway</code> port followed by a
> list of
> -          SNAT and DNAT IP addresses. This is used to send gratuitous
> ARPs for
> -          SNAT and DNAT IP addresses via <code>localnet</code> and is
> valid for
> -          only L3 gateway ports.  Example: <code>80:fa:5b:06:72:b7
> 158.36.44.22
> -          158.36.44.24</code>. This would result in generation of
> gratuitous
> -          ARPs for IP addresses 158.36.44.22 and 158.36.44.24 with a MAC
> -          address of 80:fa:5b:06:72:b7.
> -        </column>
> +      <column name="options" key="nat-addresses">
> +        MAC address of the <code>l3gateway</code> port followed by a list
> of
> +        SNAT and DNAT external IP addresses.  This is used to send
> gratuitous
> +        ARPs for SNAT and DNAT external IP addresses via
> <code>localnet</code>.
> +        Example: <code>80:fa:5b:06:72:b7 158.36.44.22 158.36.44.24</code>.
> +        This would result in generation of gratuitous ARPs for IP
> addresses
> +        158.36.44.22 and 158.36.44.24 with a MAC address of
> 80:fa:5b:06:72:b7.
> +      </column>
>      </group>
>
>      <group title="Localnet Options">
> diff --git a/tests/ovn.at b/tests/ovn.at
> index bbbec90..0915f7a 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -6660,3 +6660,73 @@ OVN_CHECK_PACKETS([hv2/vif1-tx.pcap],
> [hv2-vif1.expected])
>  OVN_CLEANUP([hv1],[hv2],[hv3])
>
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- send gratuitous arp for NAT rules on distributed router])
> +AT_SKIP_IF([test $HAVE_PYTHON = no])
> +ovn_start
> +# Create logical switch
> +ovn-nbctl ls-add ls0
> +# Create gateway router
> +ovn-nbctl create Logical_Router name=lr0
> +# Add router port to gateway router
> +ovn-nbctl lrp-add lr0 lrp0 f0:00:00:00:00:01 192.168.0.1/24 \
> +    -- set Logical_Router_Port lrp0 options:redirect-chassis="hv2"
> +ovn-nbctl lsp-add ls0 lrp0-rp -- set Logical_Switch_Port lrp0-rp \
> +    type=router options:router-port=lrp0-rp addresses="router"
>

A fix above for 'options:router-port' to point to the router port.

I think the test would be more useful if the logical port backing the VM is
in a separate logical switch as it reflects the real world usage better. It
will also help to look at this test as an example later on how to configure
GARP.


> +# Add logical port for NAT rule
> +ovn-nbctl lsp-add ls0 foo
> +# Add nat-addresses option
> +ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router"
> +# Add NAT rules
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.0.1 10.0.0.0/24])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 192.168.0.2 10.0.0.1])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.3 10.0.0.2 foo
> f0:00:00:00:00:02])
> +
> +net_add n1
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-
> mappings=physnet1:br-phys])
> +AT_CHECK([ovs-vsctl add-port br-phys snoopvif -- set Interface snoopvif
> options:tx_pcap=hv1/snoopvif-tx.pcap options:rxq_pcap=hv1/snoopvif-
> rx.pcap])
> +
> +sim_add hv2
> +as hv2
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.2
> +# Initially test with no bridge-mapping on hv2, expect to receive no
> packets
> +
> +# Create a localnet port.
> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
> +
> +# Allow some time for ovn-northd and ovn-controller to catch up.
> +# XXX This should be more systematic.
> +sleep 2
> +
> +# Expect no packets when hv2 bridge-mapping is not present
> +: > packets
> +OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [packets])
> +
> +# Add bridge-mapping on hv2
> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-
> mappings=physnet1:br-phys])
> +
> +# Wait for packet to be received.
> +OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 50])
> +trim_zeros() {
> +    sed 's/\(00\)\{1,\}$//'
> +}
> +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap |
> trim_zeros > packets
> +expected="fffffffffffff0000000000108060001080006040001f00000000001c0a8
> 0001000000000000c0a80001"
> +echo $expected > expout
> +expected="fffffffffffff0000000000108060001080006040001f00000000001c0a8
> 0002000000000000c0a80002"
> +echo $expected >> expout
> +AT_CHECK([sort packets], [0], [expout])
> +cat packets
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> --
> 1.9.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Mickey Spiegel March 21, 2017, 10:06 p.m. UTC | #2
On Tue, Mar 21, 2017 at 1:39 PM, Guru Shetty <guru@ovn.org> wrote:

>
>
> On 17 March 2017 at 15:30, Mickey Spiegel <mickeys.dev@gmail.com> wrote:
>
>> This patch extends gratuitous ARP support for NAT addresses so that it
>> applies to centralized NAT rules on a distributed router, in addition to
>> the existing gratuitous ARP support for NAT addresses on gateway routers.
>>
>> Gratuitous ARP packets for centralized NAT rules on a distributed router
>> are only generated on the redirect-chassis.
>
>
> A comment here on what centralized NAT rules are will be useful when this
> is seen a couple of months from now.
>

I will add the explanation of centralized NAT rules.


>
>
>> This is achieved by extending
>> the syntax for "options:nat-addresses" in the southbound database,
>> allowing the condition 'is_chassis_resident("LPORT_NAME")' to be appended
>> after the MAC and IP addresses.  This condition is automatically inserted
>> by ovn-northd when the northbound "options:nat-addresses" is set to
>> "router" and the peer is a distributed gateway port.
>>
>> A separate patch will be required to support gratuitous ARP for
>> distributed NAT rules that specify logical_port and external_mac.  Since
>> the MAC address differs and the logical port often resides on a different
>> chassis from the redirect-chassis, these addresses cannot be included in
>> the same "nat-addresses" string as for centralized NAT rules.
>>
>> Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com>
>> ---
>>  ovn/controller/pinctrl.c | 104 ++++++++++++++++++++++++++++++
>> ++++++++++++++---
>>  ovn/lib/ovn-util.c       |  38 ++++++++++++++---
>>  ovn/lib/ovn-util.h       |   2 +
>>  ovn/northd/ovn-northd.c  |  52 +++++++++++++++++-------
>>  ovn/ovn-nb.xml           |  33 ++++++++++++---
>>  ovn/ovn-sb.xml           |  31 ++++++++++----
>>  tests/ovn.at             |  70 +++++++++++++++++++++++++++++++
>>  7 files changed, 289 insertions(+), 41 deletions(-)
>>
>> diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
>> index b342189..08af792 100644
>> --- a/ovn/controller/pinctrl.c
>> +++ b/ovn/controller/pinctrl.c
>> @@ -37,6 +37,7 @@
>>  #include "lib/dhcp.h"
>>  #include "ovn-controller.h"
>>  #include "ovn/actions.h"
>> +#include "ovn/lex.h"
>>  #include "ovn/lib/logical-fields.h"
>>  #include "ovn/lib/ovn-dhcp.h"
>>  #include "ovn/lib/ovn-util.h"
>> @@ -1049,7 +1050,8 @@ send_garp_update(const struct sbrec_port_binding
>> *binding_rec,
>>
>>      volatile struct garp_data *garp = NULL;
>>      /* Update GARP for NAT IP if it exists. */
>> -    if (!strcmp(binding_rec->type, "l3gateway")) {
>> +    if (!strcmp(binding_rec->type, "l3gateway")
>> +        || !strcmp(binding_rec->type, "patch")) {
>>
> A comment above on why we should also look at "patch" will be useful.
>

Replace the comment above with something along the following lines?
/* Update GARP for NAT IP if it exists. Consider port bindings with type
 * "l3gateway" for logical switch ports attached to gateway routers, and
 * port bindings with type "patch" for logical switch ports attached to
 * distributed gateway ports. */


>
>>          struct lport_addresses *laddrs = NULL;
>>          laddrs = shash_find_data(nat_addresses,
>> binding_rec->logical_port);
>>          if (!laddrs) {
>> @@ -1202,24 +1204,101 @@ get_localnet_vifs_l3gwports(const struct
>> ovsrec_bridge *br_int,
>>
>>      const struct local_datapath *ld;
>>      HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
>> -        if (!ld->has_local_l3gateway) {
>> +        if (!ld->localnet_port) {
>>              continue;
>>          }
>>
>>          for (size_t i = 0; i < ld->ldatapath->n_lports; i++) {
>>              const struct sbrec_port_binding *pb =
>> ld->ldatapath->lports[i];
>> -            if (!strcmp(pb->type, "l3gateway")
>> -                /* && it's on this chassis */) {
>> +            if ((ld->has_local_l3gateway && !strcmp(pb->type,
>> "l3gateway"))
>> +                || !strcmp(pb->type, "patch")) {
>>
> A comment above on why we are considering "patch" will be useful.
>

Something along the lines?
/* Consider port bindings of type "l3gateway" that connect to gateway
routers,
 * and port bindings of type "patch" since they might connect to distributed
 * gateway ports with NAT addresses. */


> Does local_datapaths include every patch port or is it only those that
> have l2 distributed gateway port instantiated on it?
>

Local datapaths is constructed in ovn/controller/binding.c. Start with
every local port (VIF, l3gateway, l2gateway, chassisredirect) and then find
all connected datapaths by walking patch ports.


>
>
>>                  sset_add(local_l3gw_ports, pb->logical_port);
>>              }
>>          }
>>      }
>>  }
>>
>> +static bool
>> +pinctrl_is_chassis_resident(const struct lport_index *lports,
>> +                            const struct sbrec_chassis *chassis,
>> +                            const char *port_name)
>> +{
>> +    const struct sbrec_port_binding *pb
>> +        = lport_lookup_by_name(lports, port_name);
>> +    return pb && pb->chassis && pb->chassis == chassis;
>> +}
>> +
>> +/* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
>> + * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
>> + * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid
>> IPv4
>> + * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
>> + * fields of 'laddrs'.  The logical port name is stored in 'lport'.
>> + *
>> + * Returns true if at least 'MAC' is found in 'address', false otherwise.
>> + *
>> + * The caller must call destroy_lport_addresses() and free(*lport). */
>> +static bool
>> +extract_addresses_with_port(const char *addresses,
>> +                            struct lport_addresses *laddrs,
>> +                            char **lport)
>> +{
>> +    int ofs;
>> +    if (!extract_addresses(addresses, laddrs, &ofs)) {
>> +        return false;
>> +    } else if (ofs >= strlen(addresses)) {
>> +        return true;
>> +    }
>> +
>> +    struct lexer lexer;
>> +    lexer_init(&lexer, addresses + ofs);
>> +    lexer_get(&lexer);
>> +
>> +    if (lexer.error || lexer.token.type != LEX_T_ID
>> +        || !lexer_match_id(&lexer, "is_chassis_resident")) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>> +        VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
>> +        lexer_destroy(&lexer);
>> +        return true;
>> +    }
>> +
>> +    if (!lexer_match(&lexer, LEX_T_LPAREN)) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>> +        VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
>> +                          "'is_chassis_resident' in address '%s'",
>> addresses);
>> +        lexer_destroy(&lexer);
>> +        return false;
>> +    }
>> +
>> +    if (lexer.token.type != LEX_T_STRING) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>> +        VLOG_INFO_RL(&rl,
>> +                    "Syntax error: expecting quoted string after"
>> +                    " 'is_chassis_resident' in address '%s'", addresses);
>> +        lexer_destroy(&lexer);
>> +        return false;
>> +    }
>> +
>> +    *lport = xstrdup(lexer.token.s);
>> +
>> +    lexer_get(&lexer);
>> +    if (!lexer_match(&lexer, LEX_T_RPAREN)) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>> +        VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted
>> string in "
>> +                          "'is_chassis_resident()' in address '%s'",
>> +                          addresses);
>> +        lexer_destroy(&lexer);
>> +        return false;
>> +    }
>> +
>> +    lexer_destroy(&lexer);
>> +    return true;
>> +}
>> +
>>  static void
>>  get_nat_addresses_and_keys(struct sset *nat_address_keys,
>>                             struct sset *local_l3gw_ports,
>>                             const struct lport_index *lports,
>> +                           const struct sbrec_chassis *chassis,
>>                             struct shash *nat_addresses)
>>  {
>>      const char *gw_port;
>> @@ -1236,10 +1315,23 @@ get_nat_addresses_and_keys(struct sset
>> *nat_address_keys,
>>          }
>>
>>          struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
>> -        if (!extract_lsp_addresses(nat_addresses_options, laddrs)) {
>> +        char *lport = NULL;
>> +        if (!extract_addresses_with_port(nat_addresses_options, laddrs,
>> &lport)
>> +            || (!lport && !strcmp(pb->type, "patch"))) {
>>              free(laddrs);
>> +            if (lport) {
>> +                free(lport);
>> +            }
>>              continue;
>> +        } else if (lport) {
>> +            if (!pinctrl_is_chassis_resident(lports, chassis, lport)) {
>> +                free(laddrs);
>> +                free(lport);
>> +                continue;
>> +            }
>> +            free(lport);
>>          }
>> +
>>          int i;
>>          for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
>>              char *name = xasprintf("%s-%s", pb->logical_port,
>> @@ -1275,7 +1367,7 @@ send_garp_run(const struct ovsrec_bridge *br_int,
>>                        &localnet_vifs, &localnet_ofports,
>> &local_l3gw_ports);
>>
>>      get_nat_addresses_and_keys(&nat_ip_keys, &local_l3gw_ports, lports,
>> -                               &nat_addresses);
>> +                               chassis, &nat_addresses);
>>      /* For deleted ports and deleted nat ips, remove from
>> send_garp_data. */
>>      struct shash_node *iter, *next;
>>      SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
>> diff --git a/ovn/lib/ovn-util.c b/ovn/lib/ovn-util.c
>> index 99e4a0e..644a5dc 100644
>> --- a/ovn/lib/ovn-util.c
>> +++ b/ovn/lib/ovn-util.c
>> @@ -83,24 +83,29 @@ is_dynamic_lsp_address(const char *address)
>>  }
>>
>>  /* Extracts the mac, IPv4 and IPv6 addresses from * 'address' which
>> - * should be of the format 'MAC [IP1 IP2 ..]" where IPn should be a
>> + * should be of the format "MAC [IP1 IP2 ..] .." where IPn should be a
>>   * valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
>> - * 'ipv6_addrs' fields of 'laddrs'.
>> + * 'ipv6_addrs' fields of 'laddrs'.  There may be additional content in
>> + * 'address' after "MAC [IP1 IP2 .. ]".  The value of 'ofs' that is
>> + * returned indicates the offset where that additional content begins.
>>   *
>> - * Return true if at least 'MAC' is found in 'address', false otherwise.
>> + * Returns true if at least 'MAC' is found in 'address', false otherwise.
>>   *
>>   * The caller must call destroy_lport_addresses(). */
>>  bool
>> -extract_lsp_addresses(const char *address, struct lport_addresses
>> *laddrs)
>> +extract_addresses(const char *address, struct lport_addresses *laddrs,
>> +                  int *ofs)
>>  {
>>      memset(laddrs, 0, sizeof *laddrs);
>>
>>      const char *buf = address;
>> +    const char *const start = buf;
>>      int buf_index = 0;
>>      const char *buf_end = buf + strlen(address);
>>      if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
>>                        ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
>>          laddrs->ea = eth_addr_zero;
>> +        *ofs = 0;
>>          return false;
>>      }
>>
>> @@ -129,17 +134,38 @@ extract_lsp_addresses(const char *address, struct
>> lport_addresses *laddrs)
>>          if (!error) {
>>              add_ipv6_netaddr(laddrs, ip6, plen);
>>          } else {
>> -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
>> 1);
>> -            VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
>>              free(error);
>>              break;
>>          }
>>          buf += buf_index;
>>      }
>>
>> +    *ofs = buf - start;
>>      return true;
>>  }
>>
>> +/* Extracts the mac, IPv4 and IPv6 addresses from * 'address' which
>> + * should be of the format 'MAC [IP1 IP2 ..]" where IPn should be a
>> + * valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
>> + * 'ipv6_addrs' fields of 'laddrs'.
>> + *
>> + * Return true if at least 'MAC' is found in 'address', false otherwise.
>> + *
>> + * The caller must call destroy_lport_addresses(). */
>> +bool
>> +extract_lsp_addresses(const char *address, struct lport_addresses
>> *laddrs)
>> +{
>> +    int ofs;
>> +    bool success = extract_addresses(address, laddrs, &ofs);
>> +
>> +    if (success && ofs < strlen(address)) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>> +        VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
>> +    }
>> +
>> +    return success;
>> +}
>> +
>>  /* Extracts the mac, IPv4 and IPv6 addresses from the
>>   * "nbrec_logical_router_port" parameter 'lrp'.  Stores the IPv4 and
>>   * IPv6 addresses in the 'ipv4_addrs' and 'ipv6_addrs' fields of
>> diff --git a/ovn/lib/ovn-util.h b/ovn/lib/ovn-util.h
>> index 30b27b1..8252283 100644
>> --- a/ovn/lib/ovn-util.h
>> +++ b/ovn/lib/ovn-util.h
>> @@ -54,6 +54,8 @@ struct lport_addresses {
>>  };
>>
>>  bool is_dynamic_lsp_address(const char *address);
>> +bool extract_addresses(const char *address, struct lport_addresses *,
>> +                       int *ofs);
>>  bool extract_lsp_addresses(const char *address, struct lport_addresses
>> *);
>>  bool extract_lrp_networks(const struct nbrec_logical_router_port *,
>>                            struct lport_addresses *);
>> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
>> index 8c8f16b..6090e24 100644
>> --- a/ovn/northd/ovn-northd.c
>> +++ b/ovn/northd/ovn-northd.c
>> @@ -1384,6 +1384,15 @@ join_logical_ports(struct northd_context *ctx,
>>                                       "on L3 gateway router", nbrp->name);
>>                          continue;
>>                      }
>> +                    if (od->l3dgw_port || od->l3redirect_port) {
>> +                        static struct vlog_rate_limit rl
>> +                            = VLOG_RATE_LIMIT_INIT(1, 1);
>> +                        VLOG_WARN_RL(&rl, "Bad configuration: multiple
>> ports "
>> +                                     "with redirect-chassis on same
>> logical "
>> +                                     "router %s", od->nbr->name);
>> +                        continue;
>> +                    }
>> +
>>                      char *redirect_name = chassis_redirect_name(nbrp->na
>> me);
>>                      struct ovn_port *crp = ovn_port_find(ports,
>> redirect_name);
>>                      if (crp) {
>> @@ -1402,17 +1411,8 @@ join_logical_ports(struct northd_context *ctx,
>>
>>                      /* Set l3dgw_port and l3redirect_port in od, for
>> later
>>                       * use during flow creation. */
>> -                    if (od->l3dgw_port || od->l3redirect_port) {
>> -                        static struct vlog_rate_limit rl
>> -                            = VLOG_RATE_LIMIT_INIT(1, 1);
>> -                        VLOG_WARN_RL(&rl, "Bad configuration: multiple
>> ports "
>> -                                     "with redirect-chassis on same
>> logical "
>> -                                     "router %s", od->nbr->name);
>> -                        continue;
>> -                    } else {
>> -                        od->l3dgw_port = op;
>> -                        od->l3redirect_port = crp;
>> -                    }
>> +                    od->l3dgw_port = op;
>> +                    od->l3redirect_port = crp;
>>                  }
>>              }
>>          }
>> @@ -1536,7 +1536,21 @@ get_nat_addresses(const struct ovn_port *op)
>>              free(error);
>>              continue;
>>          }
>> -        ds_put_format(&addresses, " %s", nat->external_ip);
>> +
>> +        /* Determine whether this NAT rule satisfies the conditions for
>> +         * distributed NAT processing. */
>> +        if (op->od->l3redirect_port && !strcmp(nat->type,
>> "dnat_and_snat")
>> +            && nat->logical_port && nat->external_mac) {
>> +            /* Distributed NAT rule. */
>> +            /* XXX This uses a different MAC address, so it cannot go
>> +             * into the same string as centralized NAT external IP
>> +             * addresses.  Need to change this function to return an
>> +             * array of strings. */
>> +        } else {
>> +            /* Centralized NAT rule, either on gateway router or
>> distributed
>> +             * router. */
>> +            ds_put_format(&addresses, " %s", nat->external_ip);
>> +        }
>>      }
>>
>>      /* A set to hold all load-balancer vips. */
>> @@ -1549,6 +1563,13 @@ get_nat_addresses(const struct ovn_port *op)
>>      }
>>      sset_destroy(&all_ips);
>>
>> +    /* Gratuitous ARP for centralized NAT rules on distributed gateway
>> +     * ports should be restricted to the "redirect-chassis". */
>> +    if (op->od->l3redirect_port) {
>> +        ds_put_format(&addresses, " is_chassis_resident(%s)",
>> +                      op->od->l3redirect_port->json_key);
>> +    }
>> +
>>      return ds_steal_cstr(&addresses);
>>  }
>>
>> @@ -1642,14 +1663,17 @@ ovn_port_update_sbrec(const struct ovn_port *op,
>>              const char *nat_addresses = smap_get(&op->nbsp->options,
>>                                             "nat-addresses");
>>              if (nat_addresses && !strcmp(nat_addresses, "router")) {
>> -                if (op->peer && op->peer->nbrp) {
>> +                if (op->peer && op->peer->od
>> +                    && (chassis || op->peer->od->l3redirect_port)) {
>>                      char *nats = get_nat_addresses(op->peer);
>>                      if (nats) {
>>                          smap_add(&new, "nat-addresses", nats);
>>                          free(nats);
>>                      }
>>                  }
>> -            } else if (nat_addresses) {
>> +            /* Only accept manual specification of ethernet address
>> +             * followed by IPv4 addresses on type "l3gateway" ports. */
>> +            } else if (nat_addresses && chassis) {
>>                  struct lport_addresses laddrs;
>>                  if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
>>                      static struct vlog_rate_limit rl =
>> diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
>> index 46a25f6..1f0b003 100644
>> --- a/ovn/ovn-nb.xml
>> +++ b/ovn/ovn-nb.xml
>> @@ -244,8 +244,7 @@
>>          <column name="options" key="nat-addresses">
>>            <p>
>>              This is used to send gratuitous ARPs for SNAT and DNAT IP
>> -            addresses via <code>localnet</code> and is valid for only L3
>> -            gateway ports.
>> +            addresses via <code>localnet</code>.
>>
>
> localnet switch port is usually a separate logical_switch_port that is
> attached to the same switch.  And this logical siwtch port is usually the
> one that is connected to the router, right? I think if we clarify it above,
> it will cause less confusion.
>

This is used to send gratuitous ARPs for SNAT and DNAT IP
addresses via the <code>localnet</code> port that is attached
to the same logical switch. This option is specified on a logical
switch port that is connected to a gateway router, or a logical
switch port that is connected to a distributed gateway port on
a logical router.


>
>
>>            </p>
>>
>>            <p>
>> @@ -264,6 +263,13 @@
>>                </p>
>>
>>                <p>
>> +                This form of <ref column="options" key="nat-addresses"/>
>> is
>> +                valid for L3 gateway ports, and for logical switch ports
>> +                where <ref column="options" key="router-port"/> is the
>> name
>> +                of a distributed gateway port.
>> +              </p>
>> +
>>
> The above is a little confusing. I think if it read as below, it will be
> more clear?
>
> This form of options:nat-addresses is valid for for  logical  switch
>  ports  where
> options:router-port is the port of a gateway router or the name of a
> distributed gateway port.
>

That is better.


>
>
>> +              <p>
>>                  Supported only in OVN 2.8 and later.  Earlier versions
>> required
>>                  NAT addresses to be manually synchronized.
>>                </p>
>> @@ -271,10 +277,17 @@
>>
>>              <dt><code>Ethernet address followed by one or more IPv4
>> addresses</code></dt>
>>              <dd>
>> -              Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>> -              158.36.44.24</code>. This would result in generation of
>> -              gratuitous ARPs for IP addresses 158.36.44.22 and
>> 158.36.44.24
>> -              with a MAC address of 80:fa:5b:06:72:b7.
>> +              <p>
>> +                Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>> +                158.36.44.24</code>. This would result in generation of
>> +                gratuitous ARPs for IP addresses 158.36.44.22 and
>> 158.36.44.24
>> +                with a MAC address of 80:fa:5b:06:72:b7.
>> +              </p>
>> +
>> +              <p>
>> +                This form of <ref column="options" key="nat-addresses"/>
>> is
>> +                only valid for L3 gateway ports.
>>
>
> s/is only valid for L3 gateway ports/is only valid for a port of a gateway
> router/  is more clear?
>

No opinion, so I can go with this.


>
>> +              </p>
>>              </dd>
>>            </dl>
>>          </column>
>> @@ -1166,6 +1179,14 @@
>>            peer logical switch's destination lookup flow on the
>>            <code>redirect-chassis</code>.
>>          </p>
>> +
>> +        <p>
>> +          When this option is specified and it is desired to generate
>> +          gratuitous ARPs for NAT addresses, then the peer logical switch
>> +          port's <ref column="options" key="nat-addresses"
>> +          table="Logical_Switch_Port"/> should be set to
>> +          <code>router</code>.
>> +        </p>
>>        </column>
>>      </group>
>>
>> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
>> index 4e95c80..2df45e8 100644
>> --- a/ovn/ovn-sb.xml
>> +++ b/ovn/ovn-sb.xml
>> @@ -1862,6 +1862,20 @@ tcp.flags = RST;
>>          ports must have reversed <ref column="logical_port"/> and
>>          <code>peer</code> values.
>>        </column>
>> +
>> +      <column name="options" key="nat-addresses">
>> +        MAC address of the <code>patch</code> port followed by a list of
>> +        SNAT and DNAT external IP addresses, followed by
>> +        <code>is_chassis_resident("<var>lport</var>")</code>, where
>> +        <var>lport</var> is the name of a logical port on the same
>> chassis
>> +        where the corresponding NAT rules are applied.  This is used to
>> +        send gratuitous ARPs for SNAT and DNAT external IP addresses via
>> +        <code>localnet</code>, from the chassis where <var>lport</var>
>> +        resides.  Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>> +        158.36.44.24</code>.  This would result in generation of
>> +        gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24
>> +        with a MAC address of 80:fa:5b:06:72:b7.
>>
>
> The example above does not include "is_chassis_resident". Can you add that?
>

Yes, I can add that.


>
>
>> +      </column>
>>      </group>
>>
>>      <group title="L3 Gateway Options">
>> @@ -1883,15 +1897,14 @@ tcp.flags = RST;
>>          The <code>chassis</code> in which the port resides.
>>        </column>
>>
>> -        <column name="options" key="nat-addresses">
>> -          MAC address of the <code>l3gateway</code> port followed by a
>> list of
>> -          SNAT and DNAT IP addresses. This is used to send gratuitous
>> ARPs for
>> -          SNAT and DNAT IP addresses via <code>localnet</code> and is
>> valid for
>> -          only L3 gateway ports.  Example: <code>80:fa:5b:06:72:b7
>> 158.36.44.22
>> -          158.36.44.24</code>. This would result in generation of
>> gratuitous
>> -          ARPs for IP addresses 158.36.44.22 and 158.36.44.24 with a MAC
>> -          address of 80:fa:5b:06:72:b7.
>> -        </column>
>> +      <column name="options" key="nat-addresses">
>> +        MAC address of the <code>l3gateway</code> port followed by a
>> list of
>> +        SNAT and DNAT external IP addresses.  This is used to send
>> gratuitous
>> +        ARPs for SNAT and DNAT external IP addresses via
>> <code>localnet</code>.
>> +        Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>> 158.36.44.24</code>.
>> +        This would result in generation of gratuitous ARPs for IP
>> addresses
>> +        158.36.44.22 and 158.36.44.24 with a MAC address of
>> 80:fa:5b:06:72:b7.
>> +      </column>
>>      </group>
>>
>>      <group title="Localnet Options">
>> diff --git a/tests/ovn.at b/tests/ovn.at
>> index bbbec90..0915f7a 100644
>> --- a/tests/ovn.at
>> +++ b/tests/ovn.at
>> @@ -6660,3 +6660,73 @@ OVN_CHECK_PACKETS([hv2/vif1-tx.pcap],
>> [hv2-vif1.expected])
>>  OVN_CLEANUP([hv1],[hv2],[hv3])
>>
>>  AT_CLEANUP
>> +
>> +AT_SETUP([ovn -- send gratuitous arp for NAT rules on distributed
>> router])
>> +AT_SKIP_IF([test $HAVE_PYTHON = no])
>> +ovn_start
>> +# Create logical switch
>> +ovn-nbctl ls-add ls0
>> +# Create gateway router
>> +ovn-nbctl create Logical_Router name=lr0
>> +# Add router port to gateway router
>> +ovn-nbctl lrp-add lr0 lrp0 f0:00:00:00:00:01 192.168.0.1/24 \
>> +    -- set Logical_Router_Port lrp0 options:redirect-chassis="hv2"
>> +ovn-nbctl lsp-add ls0 lrp0-rp -- set Logical_Switch_Port lrp0-rp \
>> +    type=router options:router-port=lrp0-rp addresses="router"
>>
>
> A fix above for 'options:router-port' to point to the router port.
>

Should I create the patch to fix the other two places with this
problem, or will you do that?

I think the test would be more useful if the logical port backing the VM is
> in a separate logical switch as it reflects the real world usage better. It
> will also help to look at this test as an example later on how to configure
> GARP.
>

Agreed. I will add the separate logical switch.


> +# Add logical port for NAT rule
>> +ovn-nbctl lsp-add ls0 foo
>> +# Add nat-addresses option
>> +ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router"
>> +# Add NAT rules
>> +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.0.1 10.0.0.0/24])
>> +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 192.168.0.2 10.0.0.1])
>> +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.3 10.0.0.2
>> foo f0:00:00:00:00:02])
>> +
>> +net_add n1
>> +sim_add hv1
>> +as hv1
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.1
>> +
>> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappin
>> gs=physnet1:br-phys])
>> +AT_CHECK([ovs-vsctl add-port br-phys snoopvif -- set Interface snoopvif
>> options:tx_pcap=hv1/snoopvif-tx.pcap options:rxq_pcap=hv1/snoopvif-
>> rx.pcap])
>> +
>> +sim_add hv2
>> +as hv2
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.2
>> +# Initially test with no bridge-mapping on hv2, expect to receive no
>> packets
>> +
>> +# Create a localnet port.
>> +AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
>> +AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
>> +AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
>> +AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
>> +
>> +# Allow some time for ovn-northd and ovn-controller to catch up.
>> +# XXX This should be more systematic.
>> +sleep 2
>> +
>> +# Expect no packets when hv2 bridge-mapping is not present
>> +: > packets
>> +OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [packets])
>> +
>> +# Add bridge-mapping on hv2
>> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappin
>> gs=physnet1:br-phys])
>> +
>> +# Wait for packet to be received.
>> +OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 50])
>> +trim_zeros() {
>> +    sed 's/\(00\)\{1,\}$//'
>> +}
>> +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap |
>> trim_zeros > packets
>> +expected="fffffffffffff0000000000108060001080006040001f0000
>> 0000001c0a80001000000000000c0a80001"
>> +echo $expected > expout
>> +expected="fffffffffffff0000000000108060001080006040001f0000
>> 0000001c0a80002000000000000c0a80002"
>> +echo $expected >> expout
>> +AT_CHECK([sort packets], [0], [expout])
>> +cat packets
>> +
>> +OVN_CLEANUP([hv1],[hv2])
>> +
>> +AT_CLEANUP
>> --
>> 1.9.1
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
>
Gurucharan Shetty March 22, 2017, 4:03 p.m. UTC | #3
>
>
>
>>
>>> This is achieved by extending
>>> the syntax for "options:nat-addresses" in the southbound database,
>>> allowing the condition 'is_chassis_resident("LPORT_NAME")' to be
>>> appended
>>> after the MAC and IP addresses.  This condition is automatically inserted
>>> by ovn-northd when the northbound "options:nat-addresses" is set to
>>> "router" and the peer is a distributed gateway port.
>>>
>>> A separate patch will be required to support gratuitous ARP for
>>> distributed NAT rules that specify logical_port and external_mac.  Since
>>> the MAC address differs and the logical port often resides on a different
>>> chassis from the redirect-chassis, these addresses cannot be included in
>>> the same "nat-addresses" string as for centralized NAT rules.
>>>
>>> Signed-off-by: Mickey Spiegel <mickeys.dev@gmail.com>
>>> ---
>>>  ovn/controller/pinctrl.c | 104 ++++++++++++++++++++++++++++++
>>> ++++++++++++++---
>>>  ovn/lib/ovn-util.c       |  38 ++++++++++++++---
>>>  ovn/lib/ovn-util.h       |   2 +
>>>  ovn/northd/ovn-northd.c  |  52 +++++++++++++++++-------
>>>  ovn/ovn-nb.xml           |  33 ++++++++++++---
>>>  ovn/ovn-sb.xml           |  31 ++++++++++----
>>>  tests/ovn.at             |  70 +++++++++++++++++++++++++++++++
>>>  7 files changed, 289 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
>>> index b342189..08af792 100644
>>> --- a/ovn/controller/pinctrl.c
>>> +++ b/ovn/controller/pinctrl.c
>>> @@ -37,6 +37,7 @@
>>>  #include "lib/dhcp.h"
>>>  #include "ovn-controller.h"
>>>  #include "ovn/actions.h"
>>> +#include "ovn/lex.h"
>>>  #include "ovn/lib/logical-fields.h"
>>>  #include "ovn/lib/ovn-dhcp.h"
>>>  #include "ovn/lib/ovn-util.h"
>>> @@ -1049,7 +1050,8 @@ send_garp_update(const struct sbrec_port_binding
>>> *binding_rec,
>>>
>>>      volatile struct garp_data *garp = NULL;
>>>      /* Update GARP for NAT IP if it exists. */
>>> -    if (!strcmp(binding_rec->type, "l3gateway")) {
>>> +    if (!strcmp(binding_rec->type, "l3gateway")
>>> +        || !strcmp(binding_rec->type, "patch")) {
>>>
>> A comment above on why we should also look at "patch" will be useful.
>>
>
> Replace the comment above with something along the following lines?
> /* Update GARP for NAT IP if it exists. Consider port bindings with type
>  * "l3gateway" for logical switch ports attached to gateway routers, and
>  * port bindings with type "patch" for logical switch ports attached to
>  * distributed gateway ports. */
>

Looks good.

>
>
>>
>>>          struct lport_addresses *laddrs = NULL;
>>>          laddrs = shash_find_data(nat_addresses,
>>> binding_rec->logical_port);
>>>          if (!laddrs) {
>>> @@ -1202,24 +1204,101 @@ get_localnet_vifs_l3gwports(const struct
>>> ovsrec_bridge *br_int,
>>>
>>>      const struct local_datapath *ld;
>>>      HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
>>> -        if (!ld->has_local_l3gateway) {
>>> +        if (!ld->localnet_port) {
>>>              continue;
>>>          }
>>>
>>>          for (size_t i = 0; i < ld->ldatapath->n_lports; i++) {
>>>              const struct sbrec_port_binding *pb =
>>> ld->ldatapath->lports[i];
>>> -            if (!strcmp(pb->type, "l3gateway")
>>> -                /* && it's on this chassis */) {
>>> +            if ((ld->has_local_l3gateway && !strcmp(pb->type,
>>> "l3gateway"))
>>> +                || !strcmp(pb->type, "patch")) {
>>>
>> A comment above on why we are considering "patch" will be useful.
>>
>
> Something along the lines?
> /* Consider port bindings of type "l3gateway" that connect to gateway
> routers,
>  * and port bindings of type "patch" since they might connect to
> distributed
>  * gateway ports with NAT addresses. */
>
Looks good


>
>
>> Does local_datapaths include every patch port or is it only those that
>> have l2 distributed gateway port instantiated on it?
>>
>
> Local datapaths is constructed in ovn/controller/binding.c. Start with
> every local port (VIF, l3gateway, l2gateway, chassisredirect) and then find
> all connected datapaths by walking patch ports.
>
Okay.


> diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
>>> index 46a25f6..1f0b003 100644
>>> --- a/ovn/ovn-nb.xml
>>> +++ b/ovn/ovn-nb.xml
>>> @@ -244,8 +244,7 @@
>>>          <column name="options" key="nat-addresses">
>>>            <p>
>>>              This is used to send gratuitous ARPs for SNAT and DNAT IP
>>> -            addresses via <code>localnet</code> and is valid for only L3
>>> -            gateway ports.
>>> +            addresses via <code>localnet</code>.
>>>
>>
>> localnet switch port is usually a separate logical_switch_port that is
>> attached to the same switch.  And this logical siwtch port is usually the
>> one that is connected to the router, right? I think if we clarify it above,
>> it will cause less confusion.
>>
>
> This is used to send gratuitous ARPs for SNAT and DNAT IP
> addresses via the <code>localnet</code> port that is attached
> to the same logical switch. This option is specified on a logical
> switch port that is connected to a gateway router, or a logical
> switch port that is connected to a distributed gateway port on
> a logical router.
>
Sounds good.


>
>
>>
>>
>>>            </p>
>>>
>>>            <p>
>>> @@ -264,6 +263,13 @@
>>>                </p>
>>>
>>>                <p>
>>> +                This form of <ref column="options"
>>> key="nat-addresses"/> is
>>> +                valid for L3 gateway ports, and for logical switch ports
>>> +                where <ref column="options" key="router-port"/> is the
>>> name
>>> +                of a distributed gateway port.
>>> +              </p>
>>> +
>>>
>> The above is a little confusing. I think if it read as below, it will be
>> more clear?
>>
>> This form of options:nat-addresses is valid for for  logical  switch
>>  ports  where
>> options:router-port is the port of a gateway router or the name of a
>> distributed gateway port.
>>
>
> That is better.
>
>
>>
>>
>>> +              <p>
>>>                  Supported only in OVN 2.8 and later.  Earlier versions
>>> required
>>>                  NAT addresses to be manually synchronized.
>>>                </p>
>>> @@ -271,10 +277,17 @@
>>>
>>>              <dt><code>Ethernet address followed by one or more IPv4
>>> addresses</code></dt>
>>>              <dd>
>>> -              Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>>> -              158.36.44.24</code>. This would result in generation of
>>> -              gratuitous ARPs for IP addresses 158.36.44.22 and
>>> 158.36.44.24
>>> -              with a MAC address of 80:fa:5b:06:72:b7.
>>> +              <p>
>>> +                Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>>> +                158.36.44.24</code>. This would result in generation of
>>> +                gratuitous ARPs for IP addresses 158.36.44.22 and
>>> 158.36.44.24
>>> +                with a MAC address of 80:fa:5b:06:72:b7.
>>> +              </p>
>>> +
>>> +              <p>
>>> +                This form of <ref column="options"
>>> key="nat-addresses"/> is
>>> +                only valid for L3 gateway ports.
>>>
>>
>> s/is only valid for L3 gateway ports/is only valid for a port of a
>> gateway router/  is more clear?
>>
>
> No opinion, so I can go with this.
>
>
>>
>>> +              </p>
>>>              </dd>
>>>            </dl>
>>>          </column>
>>> @@ -1166,6 +1179,14 @@
>>>            peer logical switch's destination lookup flow on the
>>>            <code>redirect-chassis</code>.
>>>          </p>
>>> +
>>> +        <p>
>>> +          When this option is specified and it is desired to generate
>>> +          gratuitous ARPs for NAT addresses, then the peer logical
>>> switch
>>> +          port's <ref column="options" key="nat-addresses"
>>> +          table="Logical_Switch_Port"/> should be set to
>>> +          <code>router</code>.
>>> +        </p>
>>>        </column>
>>>      </group>
>>>
>>> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
>>> index 4e95c80..2df45e8 100644
>>> --- a/ovn/ovn-sb.xml
>>> +++ b/ovn/ovn-sb.xml
>>> @@ -1862,6 +1862,20 @@ tcp.flags = RST;
>>>          ports must have reversed <ref column="logical_port"/> and
>>>          <code>peer</code> values.
>>>        </column>
>>> +
>>> +      <column name="options" key="nat-addresses">
>>> +        MAC address of the <code>patch</code> port followed by a list of
>>> +        SNAT and DNAT external IP addresses, followed by
>>> +        <code>is_chassis_resident("<var>lport</var>")</code>, where
>>> +        <var>lport</var> is the name of a logical port on the same
>>> chassis
>>> +        where the corresponding NAT rules are applied.  This is used to
>>> +        send gratuitous ARPs for SNAT and DNAT external IP addresses via
>>> +        <code>localnet</code>, from the chassis where <var>lport</var>
>>> +        resides.  Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>>> +        158.36.44.24</code>.  This would result in generation of
>>> +        gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24
>>> +        with a MAC address of 80:fa:5b:06:72:b7.
>>>
>>
>> The example above does not include "is_chassis_resident". Can you add
>> that?
>>
>
> Yes, I can add that.
>
>
>>
>>
>>> +      </column>
>>>      </group>
>>>
>>>      <group title="L3 Gateway Options">
>>> @@ -1883,15 +1897,14 @@ tcp.flags = RST;
>>>          The <code>chassis</code> in which the port resides.
>>>        </column>
>>>
>>> -        <column name="options" key="nat-addresses">
>>> -          MAC address of the <code>l3gateway</code> port followed by a
>>> list of
>>> -          SNAT and DNAT IP addresses. This is used to send gratuitous
>>> ARPs for
>>> -          SNAT and DNAT IP addresses via <code>localnet</code> and is
>>> valid for
>>> -          only L3 gateway ports.  Example: <code>80:fa:5b:06:72:b7
>>> 158.36.44.22
>>> -          158.36.44.24</code>. This would result in generation of
>>> gratuitous
>>> -          ARPs for IP addresses 158.36.44.22 and 158.36.44.24 with a MAC
>>> -          address of 80:fa:5b:06:72:b7.
>>> -        </column>
>>> +      <column name="options" key="nat-addresses">
>>> +        MAC address of the <code>l3gateway</code> port followed by a
>>> list of
>>> +        SNAT and DNAT external IP addresses.  This is used to send
>>> gratuitous
>>> +        ARPs for SNAT and DNAT external IP addresses via
>>> <code>localnet</code>.
>>> +        Example: <code>80:fa:5b:06:72:b7 158.36.44.22
>>> 158.36.44.24</code>.
>>> +        This would result in generation of gratuitous ARPs for IP
>>> addresses
>>> +        158.36.44.22 and 158.36.44.24 with a MAC address of
>>> 80:fa:5b:06:72:b7.
>>> +      </column>
>>>      </group>
>>>
>>>      <group title="Localnet Options">
>>> diff --git a/tests/ovn.at b/tests/ovn.at
>>> index bbbec90..0915f7a 100644
>>> --- a/tests/ovn.at
>>> +++ b/tests/ovn.at
>>> @@ -6660,3 +6660,73 @@ OVN_CHECK_PACKETS([hv2/vif1-tx.pcap],
>>> [hv2-vif1.expected])
>>>  OVN_CLEANUP([hv1],[hv2],[hv3])
>>>
>>>  AT_CLEANUP
>>> +
>>> +AT_SETUP([ovn -- send gratuitous arp for NAT rules on distributed
>>> router])
>>> +AT_SKIP_IF([test $HAVE_PYTHON = no])
>>> +ovn_start
>>> +# Create logical switch
>>> +ovn-nbctl ls-add ls0
>>> +# Create gateway router
>>> +ovn-nbctl create Logical_Router name=lr0
>>> +# Add router port to gateway router
>>> +ovn-nbctl lrp-add lr0 lrp0 f0:00:00:00:00:01 192.168.0.1/24 \
>>> +    -- set Logical_Router_Port lrp0 options:redirect-chassis="hv2"
>>> +ovn-nbctl lsp-add ls0 lrp0-rp -- set Logical_Switch_Port lrp0-rp \
>>> +    type=router options:router-port=lrp0-rp addresses="router"
>>>
>>
>> A fix above for 'options:router-port' to point to the router port.
>>
>
> Should I create the patch to fix the other two places with this
> problem, or will you do that?
>
Please go ahead and do it.
diff mbox

Patch

diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index b342189..08af792 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -37,6 +37,7 @@ 
 #include "lib/dhcp.h"
 #include "ovn-controller.h"
 #include "ovn/actions.h"
+#include "ovn/lex.h"
 #include "ovn/lib/logical-fields.h"
 #include "ovn/lib/ovn-dhcp.h"
 #include "ovn/lib/ovn-util.h"
@@ -1049,7 +1050,8 @@  send_garp_update(const struct sbrec_port_binding *binding_rec,
 
     volatile struct garp_data *garp = NULL;
     /* Update GARP for NAT IP if it exists. */
-    if (!strcmp(binding_rec->type, "l3gateway")) {
+    if (!strcmp(binding_rec->type, "l3gateway")
+        || !strcmp(binding_rec->type, "patch")) {
         struct lport_addresses *laddrs = NULL;
         laddrs = shash_find_data(nat_addresses, binding_rec->logical_port);
         if (!laddrs) {
@@ -1202,24 +1204,101 @@  get_localnet_vifs_l3gwports(const struct ovsrec_bridge *br_int,
 
     const struct local_datapath *ld;
     HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
-        if (!ld->has_local_l3gateway) {
+        if (!ld->localnet_port) {
             continue;
         }
 
         for (size_t i = 0; i < ld->ldatapath->n_lports; i++) {
             const struct sbrec_port_binding *pb = ld->ldatapath->lports[i];
-            if (!strcmp(pb->type, "l3gateway")
-                /* && it's on this chassis */) {
+            if ((ld->has_local_l3gateway && !strcmp(pb->type, "l3gateway"))
+                || !strcmp(pb->type, "patch")) {
                 sset_add(local_l3gw_ports, pb->logical_port);
             }
         }
     }
 }
 
+static bool
+pinctrl_is_chassis_resident(const struct lport_index *lports,
+                            const struct sbrec_chassis *chassis,
+                            const char *port_name)
+{
+    const struct sbrec_port_binding *pb
+        = lport_lookup_by_name(lports, port_name);
+    return pb && pb->chassis && pb->chassis == chassis;
+}
+
+/* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
+ * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
+ * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
+ * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
+ * fields of 'laddrs'.  The logical port name is stored in 'lport'.
+ *
+ * Returns true if at least 'MAC' is found in 'address', false otherwise.
+ *
+ * The caller must call destroy_lport_addresses() and free(*lport). */
+static bool
+extract_addresses_with_port(const char *addresses,
+                            struct lport_addresses *laddrs,
+                            char **lport)
+{
+    int ofs;
+    if (!extract_addresses(addresses, laddrs, &ofs)) {
+        return false;
+    } else if (ofs >= strlen(addresses)) {
+        return true;
+    }
+
+    struct lexer lexer;
+    lexer_init(&lexer, addresses + ofs);
+    lexer_get(&lexer);
+
+    if (lexer.error || lexer.token.type != LEX_T_ID
+        || !lexer_match_id(&lexer, "is_chassis_resident")) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
+        lexer_destroy(&lexer);
+        return true;
+    }
+
+    if (!lexer_match(&lexer, LEX_T_LPAREN)) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
+                          "'is_chassis_resident' in address '%s'", addresses);
+        lexer_destroy(&lexer);
+        return false;
+    }
+
+    if (lexer.token.type != LEX_T_STRING) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_INFO_RL(&rl,
+                    "Syntax error: expecting quoted string after"
+                    " 'is_chassis_resident' in address '%s'", addresses);
+        lexer_destroy(&lexer);
+        return false;
+    }
+
+    *lport = xstrdup(lexer.token.s);
+
+    lexer_get(&lexer);
+    if (!lexer_match(&lexer, LEX_T_RPAREN)) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted string in "
+                          "'is_chassis_resident()' in address '%s'",
+                          addresses);
+        lexer_destroy(&lexer);
+        return false;
+    }
+
+    lexer_destroy(&lexer);
+    return true;
+}
+
 static void
 get_nat_addresses_and_keys(struct sset *nat_address_keys,
                            struct sset *local_l3gw_ports,
                            const struct lport_index *lports,
+                           const struct sbrec_chassis *chassis,
                            struct shash *nat_addresses)
 {
     const char *gw_port;
@@ -1236,10 +1315,23 @@  get_nat_addresses_and_keys(struct sset *nat_address_keys,
         }
 
         struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
-        if (!extract_lsp_addresses(nat_addresses_options, laddrs)) {
+        char *lport = NULL;
+        if (!extract_addresses_with_port(nat_addresses_options, laddrs, &lport)
+            || (!lport && !strcmp(pb->type, "patch"))) {
             free(laddrs);
+            if (lport) {
+                free(lport);
+            }
             continue;
+        } else if (lport) {
+            if (!pinctrl_is_chassis_resident(lports, chassis, lport)) {
+                free(laddrs);
+                free(lport);
+                continue;
+            }
+            free(lport);
         }
+
         int i;
         for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
             char *name = xasprintf("%s-%s", pb->logical_port,
@@ -1275,7 +1367,7 @@  send_garp_run(const struct ovsrec_bridge *br_int,
                       &localnet_vifs, &localnet_ofports, &local_l3gw_ports);
 
     get_nat_addresses_and_keys(&nat_ip_keys, &local_l3gw_ports, lports,
-                               &nat_addresses);
+                               chassis, &nat_addresses);
     /* For deleted ports and deleted nat ips, remove from send_garp_data. */
     struct shash_node *iter, *next;
     SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
diff --git a/ovn/lib/ovn-util.c b/ovn/lib/ovn-util.c
index 99e4a0e..644a5dc 100644
--- a/ovn/lib/ovn-util.c
+++ b/ovn/lib/ovn-util.c
@@ -83,24 +83,29 @@  is_dynamic_lsp_address(const char *address)
 }
 
 /* Extracts the mac, IPv4 and IPv6 addresses from * 'address' which
- * should be of the format 'MAC [IP1 IP2 ..]" where IPn should be a
+ * should be of the format "MAC [IP1 IP2 ..] .." where IPn should be a
  * valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
- * 'ipv6_addrs' fields of 'laddrs'.
+ * 'ipv6_addrs' fields of 'laddrs'.  There may be additional content in
+ * 'address' after "MAC [IP1 IP2 .. ]".  The value of 'ofs' that is
+ * returned indicates the offset where that additional content begins.
  *
- * Return true if at least 'MAC' is found in 'address', false otherwise.
+ * Returns true if at least 'MAC' is found in 'address', false otherwise.
  *
  * The caller must call destroy_lport_addresses(). */
 bool
-extract_lsp_addresses(const char *address, struct lport_addresses *laddrs)
+extract_addresses(const char *address, struct lport_addresses *laddrs,
+                  int *ofs)
 {
     memset(laddrs, 0, sizeof *laddrs);
 
     const char *buf = address;
+    const char *const start = buf;
     int buf_index = 0;
     const char *buf_end = buf + strlen(address);
     if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
                       ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
         laddrs->ea = eth_addr_zero;
+        *ofs = 0;
         return false;
     }
 
@@ -129,17 +134,38 @@  extract_lsp_addresses(const char *address, struct lport_addresses *laddrs)
         if (!error) {
             add_ipv6_netaddr(laddrs, ip6, plen);
         } else {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-            VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
             free(error);
             break;
         }
         buf += buf_index;
     }
 
+    *ofs = buf - start;
     return true;
 }
 
+/* Extracts the mac, IPv4 and IPv6 addresses from * 'address' which
+ * should be of the format 'MAC [IP1 IP2 ..]" where IPn should be a
+ * valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
+ * 'ipv6_addrs' fields of 'laddrs'.
+ *
+ * Return true if at least 'MAC' is found in 'address', false otherwise.
+ *
+ * The caller must call destroy_lport_addresses(). */
+bool
+extract_lsp_addresses(const char *address, struct lport_addresses *laddrs)
+{
+    int ofs;
+    bool success = extract_addresses(address, laddrs, &ofs);
+
+    if (success && ofs < strlen(address)) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
+    }
+
+    return success;
+}
+
 /* Extracts the mac, IPv4 and IPv6 addresses from the
  * "nbrec_logical_router_port" parameter 'lrp'.  Stores the IPv4 and
  * IPv6 addresses in the 'ipv4_addrs' and 'ipv6_addrs' fields of
diff --git a/ovn/lib/ovn-util.h b/ovn/lib/ovn-util.h
index 30b27b1..8252283 100644
--- a/ovn/lib/ovn-util.h
+++ b/ovn/lib/ovn-util.h
@@ -54,6 +54,8 @@  struct lport_addresses {
 };
 
 bool is_dynamic_lsp_address(const char *address);
+bool extract_addresses(const char *address, struct lport_addresses *,
+                       int *ofs);
 bool extract_lsp_addresses(const char *address, struct lport_addresses *);
 bool extract_lrp_networks(const struct nbrec_logical_router_port *,
                           struct lport_addresses *);
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 8c8f16b..6090e24 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -1384,6 +1384,15 @@  join_logical_ports(struct northd_context *ctx,
                                      "on L3 gateway router", nbrp->name);
                         continue;
                     }
+                    if (od->l3dgw_port || od->l3redirect_port) {
+                        static struct vlog_rate_limit rl
+                            = VLOG_RATE_LIMIT_INIT(1, 1);
+                        VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
+                                     "with redirect-chassis on same logical "
+                                     "router %s", od->nbr->name);
+                        continue;
+                    }
+
                     char *redirect_name = chassis_redirect_name(nbrp->name);
                     struct ovn_port *crp = ovn_port_find(ports, redirect_name);
                     if (crp) {
@@ -1402,17 +1411,8 @@  join_logical_ports(struct northd_context *ctx,
 
                     /* Set l3dgw_port and l3redirect_port in od, for later
                      * use during flow creation. */
-                    if (od->l3dgw_port || od->l3redirect_port) {
-                        static struct vlog_rate_limit rl
-                            = VLOG_RATE_LIMIT_INIT(1, 1);
-                        VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
-                                     "with redirect-chassis on same logical "
-                                     "router %s", od->nbr->name);
-                        continue;
-                    } else {
-                        od->l3dgw_port = op;
-                        od->l3redirect_port = crp;
-                    }
+                    od->l3dgw_port = op;
+                    od->l3redirect_port = crp;
                 }
             }
         }
@@ -1536,7 +1536,21 @@  get_nat_addresses(const struct ovn_port *op)
             free(error);
             continue;
         }
-        ds_put_format(&addresses, " %s", nat->external_ip);
+
+        /* Determine whether this NAT rule satisfies the conditions for
+         * distributed NAT processing. */
+        if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
+            && nat->logical_port && nat->external_mac) {
+            /* Distributed NAT rule. */
+            /* XXX This uses a different MAC address, so it cannot go
+             * into the same string as centralized NAT external IP
+             * addresses.  Need to change this function to return an
+             * array of strings. */
+        } else {
+            /* Centralized NAT rule, either on gateway router or distributed
+             * router. */
+            ds_put_format(&addresses, " %s", nat->external_ip);
+        }
     }
 
     /* A set to hold all load-balancer vips. */
@@ -1549,6 +1563,13 @@  get_nat_addresses(const struct ovn_port *op)
     }
     sset_destroy(&all_ips);
 
+    /* Gratuitous ARP for centralized NAT rules on distributed gateway
+     * ports should be restricted to the "redirect-chassis". */
+    if (op->od->l3redirect_port) {
+        ds_put_format(&addresses, " is_chassis_resident(%s)",
+                      op->od->l3redirect_port->json_key);
+    }
+
     return ds_steal_cstr(&addresses);
 }
 
@@ -1642,14 +1663,17 @@  ovn_port_update_sbrec(const struct ovn_port *op,
             const char *nat_addresses = smap_get(&op->nbsp->options,
                                            "nat-addresses");
             if (nat_addresses && !strcmp(nat_addresses, "router")) {
-                if (op->peer && op->peer->nbrp) {
+                if (op->peer && op->peer->od
+                    && (chassis || op->peer->od->l3redirect_port)) {
                     char *nats = get_nat_addresses(op->peer);
                     if (nats) {
                         smap_add(&new, "nat-addresses", nats);
                         free(nats);
                     }
                 }
-            } else if (nat_addresses) {
+            /* Only accept manual specification of ethernet address
+             * followed by IPv4 addresses on type "l3gateway" ports. */
+            } else if (nat_addresses && chassis) {
                 struct lport_addresses laddrs;
                 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
                     static struct vlog_rate_limit rl =
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index 46a25f6..1f0b003 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -244,8 +244,7 @@ 
         <column name="options" key="nat-addresses">
           <p>
             This is used to send gratuitous ARPs for SNAT and DNAT IP
-            addresses via <code>localnet</code> and is valid for only L3
-            gateway ports.
+            addresses via <code>localnet</code>.
           </p>
 
           <p>
@@ -264,6 +263,13 @@ 
               </p>
 
               <p>
+                This form of <ref column="options" key="nat-addresses"/> is
+                valid for L3 gateway ports, and for logical switch ports
+                where <ref column="options" key="router-port"/> is the name
+                of a distributed gateway port.
+              </p>
+
+              <p>
                 Supported only in OVN 2.8 and later.  Earlier versions required
                 NAT addresses to be manually synchronized.
               </p>
@@ -271,10 +277,17 @@ 
 
             <dt><code>Ethernet address followed by one or more IPv4 addresses</code></dt>
             <dd>
-              Example: <code>80:fa:5b:06:72:b7 158.36.44.22
-              158.36.44.24</code>. This would result in generation of
-              gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24
-              with a MAC address of 80:fa:5b:06:72:b7.
+              <p>
+                Example: <code>80:fa:5b:06:72:b7 158.36.44.22
+                158.36.44.24</code>. This would result in generation of
+                gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24
+                with a MAC address of 80:fa:5b:06:72:b7.
+              </p>
+
+              <p>
+                This form of <ref column="options" key="nat-addresses"/> is
+                only valid for L3 gateway ports.
+              </p>
             </dd>
           </dl>
         </column>
@@ -1166,6 +1179,14 @@ 
           peer logical switch's destination lookup flow on the
           <code>redirect-chassis</code>.
         </p>
+
+        <p>
+          When this option is specified and it is desired to generate
+          gratuitous ARPs for NAT addresses, then the peer logical switch
+          port's <ref column="options" key="nat-addresses"
+          table="Logical_Switch_Port"/> should be set to
+          <code>router</code>.
+        </p>
       </column>
     </group>
 
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 4e95c80..2df45e8 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1862,6 +1862,20 @@  tcp.flags = RST;
         ports must have reversed <ref column="logical_port"/> and
         <code>peer</code> values.
       </column>
+
+      <column name="options" key="nat-addresses">
+        MAC address of the <code>patch</code> port followed by a list of
+        SNAT and DNAT external IP addresses, followed by
+        <code>is_chassis_resident("<var>lport</var>")</code>, where
+        <var>lport</var> is the name of a logical port on the same chassis
+        where the corresponding NAT rules are applied.  This is used to
+        send gratuitous ARPs for SNAT and DNAT external IP addresses via
+        <code>localnet</code>, from the chassis where <var>lport</var>
+        resides.  Example: <code>80:fa:5b:06:72:b7 158.36.44.22
+        158.36.44.24</code>.  This would result in generation of
+        gratuitous ARPs for IP addresses 158.36.44.22 and 158.36.44.24
+        with a MAC address of 80:fa:5b:06:72:b7.
+      </column>
     </group>
 
     <group title="L3 Gateway Options">
@@ -1883,15 +1897,14 @@  tcp.flags = RST;
         The <code>chassis</code> in which the port resides.
       </column>
 
-        <column name="options" key="nat-addresses">
-          MAC address of the <code>l3gateway</code> port followed by a list of
-          SNAT and DNAT IP addresses. This is used to send gratuitous ARPs for
-          SNAT and DNAT IP addresses via <code>localnet</code> and is valid for
-          only L3 gateway ports.  Example: <code>80:fa:5b:06:72:b7 158.36.44.22
-          158.36.44.24</code>. This would result in generation of gratuitous
-          ARPs for IP addresses 158.36.44.22 and 158.36.44.24 with a MAC
-          address of 80:fa:5b:06:72:b7.
-        </column>
+      <column name="options" key="nat-addresses">
+        MAC address of the <code>l3gateway</code> port followed by a list of
+        SNAT and DNAT external IP addresses.  This is used to send gratuitous
+        ARPs for SNAT and DNAT external IP addresses via <code>localnet</code>.
+        Example: <code>80:fa:5b:06:72:b7 158.36.44.22 158.36.44.24</code>.
+        This would result in generation of gratuitous ARPs for IP addresses
+        158.36.44.22 and 158.36.44.24 with a MAC address of 80:fa:5b:06:72:b7.
+      </column>
     </group>
 
     <group title="Localnet Options">
diff --git a/tests/ovn.at b/tests/ovn.at
index bbbec90..0915f7a 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -6660,3 +6660,73 @@  OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [hv2-vif1.expected])
 OVN_CLEANUP([hv1],[hv2],[hv3])
 
 AT_CLEANUP
+
+AT_SETUP([ovn -- send gratuitous arp for NAT rules on distributed router])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+# Create logical switch
+ovn-nbctl ls-add ls0
+# Create gateway router
+ovn-nbctl create Logical_Router name=lr0
+# Add router port to gateway router
+ovn-nbctl lrp-add lr0 lrp0 f0:00:00:00:00:01 192.168.0.1/24 \
+    -- set Logical_Router_Port lrp0 options:redirect-chassis="hv2"
+ovn-nbctl lsp-add ls0 lrp0-rp -- set Logical_Switch_Port lrp0-rp \
+    type=router options:router-port=lrp0-rp addresses="router"
+# Add logical port for NAT rule
+ovn-nbctl lsp-add ls0 foo
+# Add nat-addresses option
+ovn-nbctl lsp-set-options lrp0-rp router-port=lrp0 nat-addresses="router"
+# Add NAT rules
+AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.0.1 10.0.0.0/24])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 192.168.0.2 10.0.0.1])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 192.168.0.3 10.0.0.2 foo f0:00:00:00:00:02])
+
+net_add n1
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
+AT_CHECK([ovs-vsctl add-port br-phys snoopvif -- set Interface snoopvif options:tx_pcap=hv1/snoopvif-tx.pcap options:rxq_pcap=hv1/snoopvif-rx.pcap])
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+# Initially test with no bridge-mapping on hv2, expect to receive no packets
+
+# Create a localnet port.
+AT_CHECK([ovn-nbctl lsp-add ls0 ln_port])
+AT_CHECK([ovn-nbctl lsp-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lsp-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lsp-set-options ln_port network_name=physnet1])
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 2
+
+# Expect no packets when hv2 bridge-mapping is not present
+: > packets
+OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [packets])
+
+# Add bridge-mapping on hv2
+AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
+
+# Wait for packet to be received.
+OVS_WAIT_UNTIL([test `wc -c < "hv1/snoopvif-tx.pcap"` -ge 50])
+trim_zeros() {
+    sed 's/\(00\)\{1,\}$//'
+}
+$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv1/snoopvif-tx.pcap | trim_zeros > packets
+expected="fffffffffffff0000000000108060001080006040001f00000000001c0a80001000000000000c0a80001"
+echo $expected > expout
+expected="fffffffffffff0000000000108060001080006040001f00000000001c0a80002000000000000c0a80002"
+echo $expected >> expout
+AT_CHECK([sort packets], [0], [expout])
+cat packets
+
+OVN_CLEANUP([hv1],[hv2])
+
+AT_CLEANUP