diff mbox

[ovs-dev,v4] ovn-northd: Force SNAT for multiple gateway routers.

Message ID 1479449856-12750-1-git-send-email-guru@ovn.org
State Accepted
Headers show

Commit Message

Gurucharan Shetty Nov. 18, 2016, 6:17 a.m. UTC
When multiple gateway routers exist, a packet can
enter any gateway router. Once the packet reaches its
destination, its reverse direction should be via the
same gateway router.  This is achieved by doing a SNAT
of the packet in the inward direction (towards logical space)
with a IP address of the gateway router such that packet travels back
to the same gateway router.

To do the above, we introduce two new options in the logical router.

options:dnat_force_snat_ip=$IP will force SNAT any packet to $IP if
it has been previously DNATted.

options:lb_force_snat_ip=$IP will force SNAT any packet to $IP if
it has been previously load-balanced.

Signed-off-by: Gurucharan Shetty <guru@ovn.org>
---
v3->v4:
  1. Use an additional bit to hint that force SNAT has to be done for LB.
  2. The established traffic of LB gets its own flows just like new connection.
---
 ovn/lib/logical-fields.c    |   8 +
 ovn/lib/logical-fields.h    |  10 ++
 ovn/northd/ovn-northd.8.xml |  62 ++++++-
 ovn/northd/ovn-northd.c     | 155 ++++++++++++++++--
 ovn/ovn-nb.xml              |  25 +++
 tests/system-ovn.at         | 386 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 628 insertions(+), 18 deletions(-)

Comments

Mickey Spiegel Nov. 29, 2016, 3:33 a.m. UTC | #1
Acked-by: Mickey Spiegel <mickeys.dev@gmail.com>

A few comments and nits below.

On Thu, Nov 17, 2016 at 10:17 PM, Gurucharan Shetty <guru@ovn.org> wrote:

> When multiple gateway routers exist, a packet can
> enter any gateway router. Once the packet reaches its
> destination, its reverse direction should be via the
> same gateway router.  This is achieved by doing a SNAT
> of the packet in the inward direction (towards logical space)
> with a IP address of the gateway router such that packet travels back
> to the same gateway router.
>
> To do the above, we introduce two new options in the logical router.
>
> options:dnat_force_snat_ip=$IP will force SNAT any packet to $IP if
> it has been previously DNATted.
>
> options:lb_force_snat_ip=$IP will force SNAT any packet to $IP if
> it has been previously load-balanced.
>
> Signed-off-by: Gurucharan Shetty <guru@ovn.org>
> ---
> v3->v4:
>   1. Use an additional bit to hint that force SNAT has to be done for LB.
>   2. The established traffic of LB gets its own flows just like new
> connection.
> ---
>  ovn/lib/logical-fields.c    |   8 +
>  ovn/lib/logical-fields.h    |  10 ++
>  ovn/northd/ovn-northd.8.xml |  62 ++++++-
>  ovn/northd/ovn-northd.c     | 155 ++++++++++++++++--
>  ovn/ovn-nb.xml              |  25 +++
>  tests/system-ovn.at         | 386 ++++++++++++++++++++++++++++++
> ++++++++++++++
>  6 files changed, 628 insertions(+), 18 deletions(-)
>
> diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c
> index d4578c3..fa134d6 100644
> --- a/ovn/lib/logical-fields.c
> +++ b/ovn/lib/logical-fields.c
> @@ -88,6 +88,14 @@ ovn_init_symtab(struct shash *symtab)
>      char flags_str[16];
>      snprintf(flags_str, sizeof flags_str, "flags[%d]",
> MLF_ALLOW_LOOPBACK_BIT);
>      expr_symtab_add_subfield(symtab, "flags.loopback", NULL, flags_str);
> +    snprintf(flags_str, sizeof flags_str, "flags[%d]",
> +             MLF_FORCE_SNAT_FOR_DNAT_BIT);
> +    expr_symtab_add_subfield(symtab, "flags.force_snat_for_dnat", NULL,
> +                             flags_str);
> +    snprintf(flags_str, sizeof flags_str, "flags[%d]",
> +             MLF_FORCE_SNAT_FOR_LB_BIT);
> +    expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL,
> +                             flags_str);
>
>      /* Connection tracking state. */
>      expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false);
> diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
> index a1f1da6..696c529 100644
> --- a/ovn/lib/logical-fields.h
> +++ b/ovn/lib/logical-fields.h
> @@ -47,6 +47,8 @@ void ovn_init_symtab(struct shash *symtab);
>  enum mff_log_flags_bits {
>      MLF_ALLOW_LOOPBACK_BIT = 0,
>      MLF_RCV_FROM_VXLAN_BIT = 1,
> +    MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
> +    MLF_FORCE_SNAT_FOR_LB_BIT = 3,
>  };
>
>  /* MFF_LOG_FLAGS_REG flag assignments */
> @@ -59,6 +61,14 @@ enum mff_log_flags {
>       * VXLAN encapsulation.  Egress port information is available for
>       * Geneve and STT tunnel types. */
>      MLF_RCV_FROM_VXLAN = (1 << MLF_RCV_FROM_VXLAN_BIT),
> +
> +    /* Indicate that a packet needs a force SNAT in the gateway router
> when
> +     * DNAT has taken place. */
> +    MLF_FORCE_SNAT_FOR_DNAT = (1 << MLF_FORCE_SNAT_FOR_DNAT_BIT),
> +
> +    /* Indicate that a packet needs a force SNAT in the gateway router
> when
> +     * load-balancing has taken place. */
> +    MLF_FORCE_SNAT_FOR_LB = (1 << MLF_FORCE_SNAT_FOR_LB_BIT),
>  };
>
>  #endif /* ovn/lib/logical-fields.h */
> diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
> index df53d4c..11245c6 100644
> --- a/ovn/northd/ovn-northd.8.xml
> +++ b/ovn/northd/ovn-northd.8.xml
> @@ -1153,6 +1153,14 @@ icmp4 {
>          </p>
>
>          <p>
> +          If the Gateway router has been configured to force SNAT (any
> +          previously DNATted or Load-balanced packets) to <var>B</var>,
> +          a priority-100 flow matches <code>ip &amp;&amp;
> +          ip4.dst == <var>B</var></code> with an action <code>ct_snat;
> +          next;</code>.
> +        </p>
> +
> +        <p>
>            A priority-0 logical flow with match <code>1</code> has actions
>            <code>next;</code>.
>          </p>
> @@ -1176,7 +1184,23 @@ icmp4 {
>          &amp;&amp; <var>P</var> &amp;&amp; <var>P</var>.dst == <var>PORT
>          </var></code> with an action of <code>ct_lb(<var>args</var>)</
> code>,
>          where <var>args</var> contains comma separated IPv4 addresses (and
> -        optional port numbers) to load balance to.
> +        optional port numbers) to load balance to.  If the Gateway router
> +        is configured to force SNAT any load-balanced packets, the above
> +        action will be replaced by <code>flags.force_snat_for_lb = 1;
> +        ct_lb(<var>args</var>);</code>.
> +      </li>
> +
> +      <li>
> +        For all the configured load balancing rules for Gateway router in
> +        <code>OVN_Northbound</code> database that includes a L4 port
> +        <var>PORT</var> of protocol <var>P</var> and IPv4 address
> +        <var>VIP</var>, a priority-120 flow that matches on
> +        <code>ct.est &amp;&amp; ip &amp;&amp; ip4.dst == <var>VIP</var>
> +        &amp;&amp; <var>P</var> &amp;&amp; <var>P</var>.dst == <var>PORT
> +        </var></code> with an action of <code>ct_dnat;</code>.
> +        If the Gateway router is configured to force SNAT any
> load-balanced
> +        packets, the above action will be replaced by
> +        <code>flags.force_snat_for_lb = 1; ct_dnat;</code>.
>        </li>
>
>        <li>
> @@ -1186,7 +1210,21 @@ icmp4 {
>          <code>ct.new &amp;&amp; ip &amp;&amp; ip4.dst ==
>          <var>VIP</var></code> with an action of
>          <code>ct_lb(<var>args</var>)</code>, where <var>args</var>
> contains
> -        comma separated IPv4 addresses.
> +        comma separated IPv4 addresses.  If the Gateway router
> +        is configured to force SNAT any load-balanced packets, the above
> +        action will be replaced by <code>flags.force_snat_for_lb = 1;
> +        ct_lb(<var>args</var>);</code>.
> +      </li>
> +
> +      <li>
> +        For all the configured load balancing rules for Gateway router in
> +        <code>OVN_Northbound</code> database that includes just an IP
> address
> +        <var>VIP</var> to match on, a priority-110 flow that matches on
> +        <code>ct.est &amp;&amp; ip &amp;&amp; ip4.dst ==
> +        <var>VIP</var></code> with an action of <code>ct_dnat;</code>.
> +        If the Gateway router is configured to force SNAT any
> load-balanced
> +        packets, the above action will be replaced by
> +        <code>flags.force_snat_for_lb = 1; ct_dnat;</code>.
>        </li>
>
>        <li>
> @@ -1194,7 +1232,11 @@ icmp4 {
>          to change the destination IP address of a packet from
> <var>A</var> to
>          <var>B</var>, a priority-100 flow matches <code>ip &amp;&amp;
>          ip4.dst == <var>A</var></code> with an action
> -        <code>flags.loopback = 1; ct_dnat(<var>B</var>);</code>.
> +        <code>flags.loopback = 1; ct_dnat(<var>B</var>);</code>.  If the
> +        Gateway router is configured to force SNAT any DNATed packet,
> +        the above action will be replaced by
> +        <code>flags.force_snat_for_dnat = 1; flags.loopback = 1;
> +        ct_dnat(<var>B</var>);</code>.
>        </li>
>
>        <li>
> @@ -1433,6 +1475,20 @@ arp {
>      <ul>
>        <li>
>          <p>
> +          If the Gateway router in the OVN Northbound database has been
> +          configured to force SNAT a packet (that has been previously
> DNATted)
> +          to <var>B</var>, a priority-110 flow matches
> +          <code>flags.force_snat_for_dnat == 1 &amp;&amp; ip</code> with
> an
> +          action <code>ct_snat(<var>B</var>);</code>.
> +        </p>
> +        <p>
> +          If the Gateway router in the OVN Northbound database has been
> +          configured to force SNAT a packet (that has been previously
> +          load-balanced) to <var>B</var>, a priority-100 flow matches
> +          <code>flags.force_snat_for_lb == 1 &amp;&amp; ip</code> with an
> +          action <code>ct_snat(<var>B</var>);</code>.
> +        </p>
> +        <p>
>            For each configuration in the OVN Northbound database, that asks
>            to change the source IP address of a packet from an IP address
> of
>            <var>A</var> or to change the source IP address of a packet that
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 437da9f..5afcbbc 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -3452,6 +3452,63 @@ op_put_v6_networks(struct ds *ds, const struct
> ovn_port *op)
>      ds_put_cstr(ds, "}");
>  }
>
> +static const char *
> +get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32
> *ip)
> +{
> +    char *key = xasprintf("%s_force_snat_ip", key_type);
> +    const char *ip_address = smap_get(&od->nbr->options, key);
> +    free(key);
> +
> +    if (ip_address) {
> +        ovs_be32 mask;
> +        char *error = ip_parse_masked(ip_address, ip, &mask);
> +        if (error || mask != OVS_BE32_MAX) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +            VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
> +                         ip_address, UUID_ARGS(&od->key));
> +            free(error);
> +            *ip = 0;
> +            return NULL;
> +        }
> +        return ip_address;
> +    }
> +
> +    *ip = 0;
> +    return NULL;
> +}
> +
> +static void
> +add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
> +                   struct ds *match, struct ds *actions, int priority,
> +                   const char *lb_force_snat_ip)
> +{
> +    /* A match and actions for new connections. */
> +    char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
> +    if (lb_force_snat_ip) {
> +        char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
> +                                      ds_cstr(actions));
> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
> +                      new_actions);
> +        free(new_actions);
> +    } else {
> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
> +                      ds_cstr(actions));
> +    }
> +
> +    /* A match and actions for established connections. */
> +    char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
> +    if (lb_force_snat_ip) {
> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
> +                      "flags.force_snat_for_lb = 1; ct_dnat;");
> +    } else {
> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
> +                      "ct_dnat;");
> +    }
> +
> +    free(new_match);
> +    free(est_match);
> +}
> +
>  static void
>  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                      struct hmap *lflows)
> @@ -3673,8 +3730,26 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>
>          sset_destroy(&all_ips);
>
> -        ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
> op->od->nbr->n_nat);
> +        /* A gateway router can have 2 SNAT IP addresses to force DNATed
> and
> +         * LBed traffic respectively to be SNATed.  In addition, there
> can be
> +         * a number of SNAT rules in the NAT table. */
> +        ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
> +                                     (op->od->nbr->n_nat + 2));
>          size_t n_snat_ips = 0;
> +
> +        ovs_be32 snat_ip;
> +        const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
> +                                                           &snat_ip);
> +        if (dnat_force_snat_ip) {
> +            snat_ips[n_snat_ips++] = snat_ip;
> +        }
> +
> +        const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
> +                                                         &snat_ip);
> +        if (lb_force_snat_ip) {
> +            snat_ips[n_snat_ips++] = snat_ip;
> +        }
> +
>          for (int i = 0; i < op->od->nbr->n_nat; i++) {
>              const struct nbrec_nat *nat;
>
>
I never noticed before, but I am unsure why the "continue" in line 3693
(without changes), line 3768 (with changes), should be there. In order to
generate ARP replies for SNAT external IPs, this seems to assume that
all external IPs for SNAT are router interface IPs, and that all ARP
requests for those IPs will arrive over the corresponding router interface.
The latter condition is true for gateway routers, given that they connect
to the distributed router over a single join network.

If the continue were removed, then both assumptions would go away.

What I am pointing out is old code, it just gets emphasized more with
this new functionality. Explicit SNAT rules point outward and so uses
an IP address on a router interface pointing outward. With the new
force_snat functionality, due to the "continue" in line 3693, you would
have to use an IP address on the gateway router's interface to the join
network, which is exactly what you have done in the tests below.

I can revisit this in the distributed router patch set if you feel that it
is
not relevant to NAT on a gateway router.


> @@ -3845,6 +3920,12 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>              continue;
>          }
>
> +        ovs_be32 snat_ip;
> +        const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
> +                                                           &snat_ip);
> +        const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
> +                                                         &snat_ip);
> +
>          /* A set to hold all ips that need defragmentation and tracking.
> */
>          struct sset all_ips = SSET_INITIALIZER(&all_ips);
>
> @@ -3867,14 +3948,16 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>                      sset_add(&all_ips, ip_address);
>                  }
>
> -                /* Higher priority rules are added in DNAT table to match
> on
> -                 * ct.new which in-turn have group id as an action for
> load
> -                 * balancing. */
> +                /* Higher priority rules are added for load-balancing in
> DNAT
> +                 * table.  For every match (on a VIP[:port]), we add two
> flows
> +                 * via add_router_lb_flow().  One flow is for specific
> matching
> +                 * on ct.new with an action of "ct_lb($targets);".  The
> other
> +                 * flow is for ct.est with an action of "ct_dnat;". */
>                  ds_clear(&actions);
>                  ds_put_format(&actions, "ct_lb(%s);", node->value);
>
>                  ds_clear(&match);
> -                ds_put_format(&match, "ct.new && ip && ip4.dst == %s",
> +                ds_put_format(&match, "ip && ip4.dst == %s",
>                                ip_address);
>                  free(ip_address);
>
> @@ -3886,11 +3969,11 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>                          ds_put_format(&match, " && tcp && tcp.dst == %d",
>                                        port);
>                      }
> -                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
> -                                  120, ds_cstr(&match),
> ds_cstr(&actions));
> +                    add_router_lb_flow(lflows, od, &match, &actions, 120,
> +                                       lb_force_snat_ip);
>                  } else {
> -                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
> -                                  110, ds_cstr(&match),
> ds_cstr(&actions));
> +                    add_router_lb_flow(lflows, od, &match, &actions, 110,
> +                                       lb_force_snat_ip);
>                  }
>              }
>          }
> @@ -3981,7 +4064,13 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>                  ds_clear(&match);
>                  ds_put_format(&match, "ip && ip4.dst == %s",
> nat->external_ip);
>                  ds_clear(&actions);
> -                ds_put_format(&actions,"flags.loopback = 1;
> ct_dnat(%s);",
> +                if (dnat_force_snat_ip) {
> +                    /* Indicate to the future tables that a DNAT has taken
> +                     * place and a force SNAT needs to be done in the
> Egress
> +                     * SNAT table. */
> +                    ds_put_format(&actions, "flags.force_snat_for_dnat =
> 1; ");
> +                }
> +                ds_put_format(&actions, "flags.loopback = 1;
> ct_dnat(%s);",
>                                nat->logical_ip);
>                  ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
>                                ds_cstr(&match), ds_cstr(&actions));
> @@ -4006,8 +4095,47 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>              }
>          }
>
> +        /* Handle force SNAT options set in the gateway router. */
> +        if (dnat_force_snat_ip) {
> +            /* If a packet with destination IP address as that of the
> +             * gateway router (as set in options:dnat_force_snat_ip) is
> seen,
> +             * UNSNAT it. */
> +            ds_clear(&match);
> +            ds_put_format(&match, "ip && ip4.dst == %s",
> dnat_force_snat_ip);
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
> +                          ds_cstr(&match), "ct_snat; next;");
>

I am unable to come up with a rationale for the different priority values
in the
S_ROUTER_IN_UNSNAT stage. Explicit SNAT rules use priority 100,
dnat_force_snat uses priority 110, and lb_force_snat uses priority 100.
It seems like there can be cases where the same IP address is specified for
dnat_force_snat and lb_force_snat. With NAT on a distributed router, it
would
be possible for all 3 to use the same IP address. The actions are the same,
so it will all work regardless. It just seems like it would be more
consistent if
all three cases use the same priority value, or all 3 cases use different
priority
values.

Also note that the ovn-northd.8.xml changes describe all of these flows as
priority 100 flows.

+
> +            /* Higher priority rules to force SNAT with the IP addresses
> +             * configured in the Gateway router.  This only takes effect
> +             * when the packet has already been DNATed once. */
> +            ds_clear(&match);
> +            ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
> +            ds_clear(&actions);
> +            ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
> +            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 110,
> +                          ds_cstr(&match), ds_cstr(&actions));

+        }
> +        if (lb_force_snat_ip) {
> +            /* If a packet with destination IP address as that of the
> +             * gateway router (as set in options:lb_force_snat_ip) is
> seen,
> +             * UNSNAT it. */
> +            ds_clear(&match);
> +            ds_put_format(&match, "ip && ip4.dst == %s",
> lb_force_snat_ip);
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
> +                          ds_cstr(&match), "ct_snat; next;");
> +
> +            /* Load balanced traffic will have flags.force_snat_for_lb
> set.
> +             * Force SNAT it. */
> +            ds_clear(&match);
> +            ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
> +            ds_clear(&actions);
> +            ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
> +            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 110,
> +                          ds_cstr(&match), ds_cstr(&actions));
>

The lb_force_snat priority value of 110 is not consistent with the
ovn-northd.8.xml changes, which describe the use of priority 110 for
dnat_force_snat and 100 for lb_force_snat.

I could not figure out why priority 110 is used for the force_snat
S_ROUTER_OUT_SNAT flows rather than 100, since I did not see
any priority 100 flows. I guess if the priority values are different for
dnat_force_snat and lb_force_snat, then it makes sense. In practice,
we should never see both flags.force_snat_for_dnat and
flags.force_snat_for_lb set, but I guess different priority values would
be safer and easier to debug if that ever does happen.

+        }
> +
>          /* Re-circulate every packet through the DNAT zone.
> -        * This helps with three things.
> +        * This helps with two things.
>          *
>          * 1. Any packet that needs to be unDNATed in the reverse
>          * direction gets unDNATed. Ideally this could be done in
> @@ -4016,10 +4144,7 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
>          * ip address being external IP address for IP routing,
>          * we can do it here, saving a future re-circulation.
>          *
> -        * 2. Established load-balanced connections automatically get
> -        * DNATed.
> -        *
> -        * 3. Any packet that was sent through SNAT zone in the
> +        * 2. Any packet that was sent through SNAT zone in the
>          * previous table automatically gets re-circulated to get
>          * back the new destination IP address that is needed for
>          * routing in the openflow pipeline. */
>

<snip>

Mickey
diff mbox

Patch

diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c
index d4578c3..fa134d6 100644
--- a/ovn/lib/logical-fields.c
+++ b/ovn/lib/logical-fields.c
@@ -88,6 +88,14 @@  ovn_init_symtab(struct shash *symtab)
     char flags_str[16];
     snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_ALLOW_LOOPBACK_BIT);
     expr_symtab_add_subfield(symtab, "flags.loopback", NULL, flags_str);
+    snprintf(flags_str, sizeof flags_str, "flags[%d]",
+             MLF_FORCE_SNAT_FOR_DNAT_BIT);
+    expr_symtab_add_subfield(symtab, "flags.force_snat_for_dnat", NULL,
+                             flags_str);
+    snprintf(flags_str, sizeof flags_str, "flags[%d]",
+             MLF_FORCE_SNAT_FOR_LB_BIT);
+    expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL,
+                             flags_str);
 
     /* Connection tracking state. */
     expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false);
diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
index a1f1da6..696c529 100644
--- a/ovn/lib/logical-fields.h
+++ b/ovn/lib/logical-fields.h
@@ -47,6 +47,8 @@  void ovn_init_symtab(struct shash *symtab);
 enum mff_log_flags_bits {
     MLF_ALLOW_LOOPBACK_BIT = 0,
     MLF_RCV_FROM_VXLAN_BIT = 1,
+    MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
+    MLF_FORCE_SNAT_FOR_LB_BIT = 3,
 };
 
 /* MFF_LOG_FLAGS_REG flag assignments */
@@ -59,6 +61,14 @@  enum mff_log_flags {
      * VXLAN encapsulation.  Egress port information is available for
      * Geneve and STT tunnel types. */
     MLF_RCV_FROM_VXLAN = (1 << MLF_RCV_FROM_VXLAN_BIT),
+
+    /* Indicate that a packet needs a force SNAT in the gateway router when
+     * DNAT has taken place. */
+    MLF_FORCE_SNAT_FOR_DNAT = (1 << MLF_FORCE_SNAT_FOR_DNAT_BIT),
+
+    /* Indicate that a packet needs a force SNAT in the gateway router when
+     * load-balancing has taken place. */
+    MLF_FORCE_SNAT_FOR_LB = (1 << MLF_FORCE_SNAT_FOR_LB_BIT),
 };
 
 #endif /* ovn/lib/logical-fields.h */
diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index df53d4c..11245c6 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -1153,6 +1153,14 @@  icmp4 {
         </p>
 
         <p>
+          If the Gateway router has been configured to force SNAT (any
+          previously DNATted or Load-balanced packets) to <var>B</var>,
+          a priority-100 flow matches <code>ip &amp;&amp;
+          ip4.dst == <var>B</var></code> with an action <code>ct_snat;
+          next;</code>.
+        </p>
+
+        <p>
           A priority-0 logical flow with match <code>1</code> has actions
           <code>next;</code>.
         </p>
@@ -1176,7 +1184,23 @@  icmp4 {
         &amp;&amp; <var>P</var> &amp;&amp; <var>P</var>.dst == <var>PORT
         </var></code> with an action of <code>ct_lb(<var>args</var>)</code>,
         where <var>args</var> contains comma separated IPv4 addresses (and
-        optional port numbers) to load balance to.
+        optional port numbers) to load balance to.  If the Gateway router
+        is configured to force SNAT any load-balanced packets, the above
+        action will be replaced by <code>flags.force_snat_for_lb = 1;
+        ct_lb(<var>args</var>);</code>.
+      </li>
+
+      <li>
+        For all the configured load balancing rules for Gateway router in
+        <code>OVN_Northbound</code> database that includes a L4 port
+        <var>PORT</var> of protocol <var>P</var> and IPv4 address
+        <var>VIP</var>, a priority-120 flow that matches on
+        <code>ct.est &amp;&amp; ip &amp;&amp; ip4.dst == <var>VIP</var>
+        &amp;&amp; <var>P</var> &amp;&amp; <var>P</var>.dst == <var>PORT
+        </var></code> with an action of <code>ct_dnat;</code>.
+        If the Gateway router is configured to force SNAT any load-balanced
+        packets, the above action will be replaced by
+        <code>flags.force_snat_for_lb = 1; ct_dnat;</code>.
       </li>
 
       <li>
@@ -1186,7 +1210,21 @@  icmp4 {
         <code>ct.new &amp;&amp; ip &amp;&amp; ip4.dst ==
         <var>VIP</var></code> with an action of
         <code>ct_lb(<var>args</var>)</code>, where <var>args</var> contains
-        comma separated IPv4 addresses.
+        comma separated IPv4 addresses.  If the Gateway router
+        is configured to force SNAT any load-balanced packets, the above
+        action will be replaced by <code>flags.force_snat_for_lb = 1;
+        ct_lb(<var>args</var>);</code>.
+      </li>
+
+      <li>
+        For all the configured load balancing rules for Gateway router in
+        <code>OVN_Northbound</code> database that includes just an IP address
+        <var>VIP</var> to match on, a priority-110 flow that matches on
+        <code>ct.est &amp;&amp; ip &amp;&amp; ip4.dst ==
+        <var>VIP</var></code> with an action of <code>ct_dnat;</code>.
+        If the Gateway router is configured to force SNAT any load-balanced
+        packets, the above action will be replaced by
+        <code>flags.force_snat_for_lb = 1; ct_dnat;</code>.
       </li>
 
       <li>
@@ -1194,7 +1232,11 @@  icmp4 {
         to change the destination IP address of a packet from <var>A</var> to
         <var>B</var>, a priority-100 flow matches <code>ip &amp;&amp;
         ip4.dst == <var>A</var></code> with an action
-        <code>flags.loopback = 1; ct_dnat(<var>B</var>);</code>.
+        <code>flags.loopback = 1; ct_dnat(<var>B</var>);</code>.  If the
+        Gateway router is configured to force SNAT any DNATed packet,
+        the above action will be replaced by
+        <code>flags.force_snat_for_dnat = 1; flags.loopback = 1;
+        ct_dnat(<var>B</var>);</code>.
       </li>
 
       <li>
@@ -1433,6 +1475,20 @@  arp {
     <ul>
       <li>
         <p>
+          If the Gateway router in the OVN Northbound database has been
+          configured to force SNAT a packet (that has been previously DNATted)
+          to <var>B</var>, a priority-110 flow matches
+          <code>flags.force_snat_for_dnat == 1 &amp;&amp; ip</code> with an
+          action <code>ct_snat(<var>B</var>);</code>.
+        </p>
+        <p>
+          If the Gateway router in the OVN Northbound database has been
+          configured to force SNAT a packet (that has been previously
+          load-balanced) to <var>B</var>, a priority-100 flow matches
+          <code>flags.force_snat_for_lb == 1 &amp;&amp; ip</code> with an
+          action <code>ct_snat(<var>B</var>);</code>.
+        </p>
+        <p>
           For each configuration in the OVN Northbound database, that asks
           to change the source IP address of a packet from an IP address of
           <var>A</var> or to change the source IP address of a packet that
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 437da9f..5afcbbc 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -3452,6 +3452,63 @@  op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
     ds_put_cstr(ds, "}");
 }
 
+static const char *
+get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
+{
+    char *key = xasprintf("%s_force_snat_ip", key_type);
+    const char *ip_address = smap_get(&od->nbr->options, key);
+    free(key);
+
+    if (ip_address) {
+        ovs_be32 mask;
+        char *error = ip_parse_masked(ip_address, ip, &mask);
+        if (error || mask != OVS_BE32_MAX) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
+                         ip_address, UUID_ARGS(&od->key));
+            free(error);
+            *ip = 0;
+            return NULL;
+        }
+        return ip_address;
+    }
+
+    *ip = 0;
+    return NULL;
+}
+
+static void
+add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
+                   struct ds *match, struct ds *actions, int priority,
+                   const char *lb_force_snat_ip)
+{
+    /* A match and actions for new connections. */
+    char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
+    if (lb_force_snat_ip) {
+        char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
+                                      ds_cstr(actions));
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
+                      new_actions);
+        free(new_actions);
+    } else {
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
+                      ds_cstr(actions));
+    }
+
+    /* A match and actions for established connections. */
+    char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
+    if (lb_force_snat_ip) {
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
+                      "flags.force_snat_for_lb = 1; ct_dnat;");
+    } else {
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
+                      "ct_dnat;");
+    }
+
+    free(new_match);
+    free(est_match);
+}
+
 static void
 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                     struct hmap *lflows)
@@ -3673,8 +3730,26 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
 
         sset_destroy(&all_ips);
 
-        ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
+        /* A gateway router can have 2 SNAT IP addresses to force DNATed and
+         * LBed traffic respectively to be SNATed.  In addition, there can be
+         * a number of SNAT rules in the NAT table. */
+        ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
+                                     (op->od->nbr->n_nat + 2));
         size_t n_snat_ips = 0;
+
+        ovs_be32 snat_ip;
+        const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
+                                                           &snat_ip);
+        if (dnat_force_snat_ip) {
+            snat_ips[n_snat_ips++] = snat_ip;
+        }
+
+        const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
+                                                         &snat_ip);
+        if (lb_force_snat_ip) {
+            snat_ips[n_snat_ips++] = snat_ip;
+        }
+
         for (int i = 0; i < op->od->nbr->n_nat; i++) {
             const struct nbrec_nat *nat;
 
@@ -3845,6 +3920,12 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
+        ovs_be32 snat_ip;
+        const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
+                                                           &snat_ip);
+        const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
+                                                         &snat_ip);
+
         /* A set to hold all ips that need defragmentation and tracking. */
         struct sset all_ips = SSET_INITIALIZER(&all_ips);
 
@@ -3867,14 +3948,16 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                     sset_add(&all_ips, ip_address);
                 }
 
-                /* Higher priority rules are added in DNAT table to match on
-                 * ct.new which in-turn have group id as an action for load
-                 * balancing. */
+                /* Higher priority rules are added for load-balancing in DNAT
+                 * table.  For every match (on a VIP[:port]), we add two flows
+                 * via add_router_lb_flow().  One flow is for specific matching
+                 * on ct.new with an action of "ct_lb($targets);".  The other
+                 * flow is for ct.est with an action of "ct_dnat;". */
                 ds_clear(&actions);
                 ds_put_format(&actions, "ct_lb(%s);", node->value);
 
                 ds_clear(&match);
-                ds_put_format(&match, "ct.new && ip && ip4.dst == %s",
+                ds_put_format(&match, "ip && ip4.dst == %s",
                               ip_address);
                 free(ip_address);
 
@@ -3886,11 +3969,11 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                         ds_put_format(&match, " && tcp && tcp.dst == %d",
                                       port);
                     }
-                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
-                                  120, ds_cstr(&match), ds_cstr(&actions));
+                    add_router_lb_flow(lflows, od, &match, &actions, 120,
+                                       lb_force_snat_ip);
                 } else {
-                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
-                                  110, ds_cstr(&match), ds_cstr(&actions));
+                    add_router_lb_flow(lflows, od, &match, &actions, 110,
+                                       lb_force_snat_ip);
                 }
             }
         }
@@ -3981,7 +4064,13 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                 ds_clear(&match);
                 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
                 ds_clear(&actions);
-                ds_put_format(&actions,"flags.loopback = 1; ct_dnat(%s);",
+                if (dnat_force_snat_ip) {
+                    /* Indicate to the future tables that a DNAT has taken
+                     * place and a force SNAT needs to be done in the Egress
+                     * SNAT table. */
+                    ds_put_format(&actions, "flags.force_snat_for_dnat = 1; ");
+                }
+                ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
                               nat->logical_ip);
                 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
                               ds_cstr(&match), ds_cstr(&actions));
@@ -4006,8 +4095,47 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             }
         }
 
+        /* Handle force SNAT options set in the gateway router. */
+        if (dnat_force_snat_ip) {
+            /* If a packet with destination IP address as that of the
+             * gateway router (as set in options:dnat_force_snat_ip) is seen,
+             * UNSNAT it. */
+            ds_clear(&match);
+            ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
+                          ds_cstr(&match), "ct_snat; next;");
+
+            /* Higher priority rules to force SNAT with the IP addresses
+             * configured in the Gateway router.  This only takes effect
+             * when the packet has already been DNATed once. */
+            ds_clear(&match);
+            ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
+            ds_clear(&actions);
+            ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
+            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 110,
+                          ds_cstr(&match), ds_cstr(&actions));
+        }
+        if (lb_force_snat_ip) {
+            /* If a packet with destination IP address as that of the
+             * gateway router (as set in options:lb_force_snat_ip) is seen,
+             * UNSNAT it. */
+            ds_clear(&match);
+            ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
+                          ds_cstr(&match), "ct_snat; next;");
+
+            /* Load balanced traffic will have flags.force_snat_for_lb set.
+             * Force SNAT it. */
+            ds_clear(&match);
+            ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
+            ds_clear(&actions);
+            ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
+            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 110,
+                          ds_cstr(&match), ds_cstr(&actions));
+        }
+
         /* Re-circulate every packet through the DNAT zone.
-        * This helps with three things.
+        * This helps with two things.
         *
         * 1. Any packet that needs to be unDNATed in the reverse
         * direction gets unDNATed. Ideally this could be done in
@@ -4016,10 +4144,7 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         * ip address being external IP address for IP routing,
         * we can do it here, saving a future re-circulation.
         *
-        * 2. Established load-balanced connections automatically get
-        * DNATed.
-        *
-        * 3. Any packet that was sent through SNAT zone in the
+        * 2. Any packet that was sent through SNAT zone in the
         * previous table automatically gets re-circulated to get
         * back the new destination IP address that is needed for
         * routing in the openflow pipeline. */
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index 49fbe00..3e40881 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -924,6 +924,31 @@ 
           router.
         </p>
       </column>
+      <column name="options" key="dnat_force_snat_ip">
+        <p>
+          If set, indicates the IP address to use to force SNAT a packet
+          that has already been DNATed in the gateway router.  When multiple
+          gateway routers are configured, a packet can potentially enter any
+          of the gateway router, get DNATted and eventually reach the logical
+          switch port.  For the return traffic to go back to the same gateway
+          router (for unDNATing), the packet needs a SNAT in the first place.
+          This can be achieved by setting the above option with a gateway
+          specific IP address.
+        </p>
+      </column>
+      <column name="options" key="lb_force_snat_ip">
+        <p>
+          If set, indicates the IP address to use to force SNAT a packet
+          that has already been load-balanced in the gateway router.  When
+          multiple gateway routers are configured, a packet can potentially
+          enter any of the gateway router, get DNATted as part of the load-
+          balancing and eventually reach the logical switch port.
+          For the return traffic to go back to the same gateway router (for
+          unDNATing), the packet needs a SNAT in the first place.  This can be
+          achieved by setting the above option with a gateway specific IP
+          address.
+        </p>
+      </column>
     </group>
 
     <group title="Common Columns">
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 21226d9..9e32342 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -279,6 +279,225 @@  OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
 /connection dropped.*/d"])
 AT_CLEANUP
 
+AT_SETUP([ovn -- multiple gateway routers, SNAT and DNAT])
+AT_KEYWORDS([ovnnat])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# Three LRs - R1, R2 and R3 that are connected to each other via LS "join"
+# in 20.0.0.0/24 network. R1 has switchess foo (192.168.1.0/24) and
+# bar (192.168.2.0/24) connected to it. R2 has alice (172.16.1.0/24) connected
+# to it.  R3 has bob (172.16.1.0/24) connected to it. Note how both alice and
+# bob have the same subnet behind it.  We are trying to simulate external
+# network via those 2 switches. In real world the switch ports of these
+# switches will have addresses set as "unknown" to make them learning switches.
+# Or those switches will be "localnet" ones.
+#
+#    foo -- R1 -- join - R2 -- alice
+#           |          |
+#    bar ----          - R3 --- bob
+
+ovn-nbctl create Logical_Router name=R1
+ovn-nbctl create Logical_Router name=R2 options:chassis=hv1
+ovn-nbctl create Logical_Router name=R3 options:chassis=hv1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add bar
+ovn-nbctl ls-add alice
+ovn-nbctl ls-add bob
+ovn-nbctl ls-add join
+
+# Connect foo to R1
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+    type=router options:router-port=foo addresses=\"00:00:01:01:02:03\"
+
+# Connect bar to R1
+ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24
+ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
+    type=router options:router-port=bar addresses=\"00:00:01:01:02:04\"
+
+# Connect alice to R2
+ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 172.16.1.1/24
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+    type=router options:router-port=alice addresses=\"00:00:02:01:02:03\"
+
+# Connect bob to R3
+ovn-nbctl lrp-add R3 bob 00:00:03:01:02:03 172.16.1.2/24
+ovn-nbctl lsp-add bob rp-bob -- set Logical_Switch_Port rp-bob \
+    type=router options:router-port=bob addresses=\"00:00:03:01:02:03\"
+
+# Connect R1 to join
+ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 20.0.0.1/24
+ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \
+    type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"'
+
+# Connect R2 to join
+ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 20.0.0.2/24
+ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \
+    type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"'
+
+# Connect R3 to join
+ovn-nbctl lrp-add R3 R3_join 00:00:04:01:02:05 20.0.0.3/24
+ovn-nbctl lsp-add join r3-join -- set Logical_Switch_Port r3-join \
+    type=router options:router-port=R3_join addresses='"00:00:04:01:02:05"'
+
+# Install static routes with source ip address as the policy for routing.
+# We want traffic from 'foo' to go via R2 and traffic of 'bar' to go via R3.
+ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.1.0/24 20.0.0.2
+ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.2.0/24 20.0.0.3
+
+# Static routes.
+ovn-nbctl lr-route-add R2 192.168.0.0/16 20.0.0.1
+ovn-nbctl lr-route-add R3 192.168.0.0/16 20.0.0.1
+
+# For gateway routers R2 and R3, set a force SNAT rule.
+ovn-nbctl set logical_router R2 options:dnat_force_snat_ip=20.0.0.2
+ovn-nbctl set logical_router R3 options:dnat_force_snat_ip=20.0.0.3
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+         "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "172.16.1.3/24", "f0:00:00:01:02:04", \
+         "172.16.1.1")
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.3"
+
+# Logical port 'bar1' in switch 'bar'.
+ADD_NAMESPACES(bar1)
+ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:05", \
+"192.168.2.1")
+ovn-nbctl lsp-add bar bar1 \
+-- lsp-set-addresses bar1 "f0:00:00:01:02:05 192.168.2.2"
+
+# Logical port 'bob1' in switch 'bob'.
+ADD_NAMESPACES(bob1)
+ADD_VETH(bob1, bob1, br-int, "172.16.1.4/24", "f0:00:00:01:02:06", \
+         "172.16.1.2")
+ovn-nbctl lsp-add bob bob1 \
+-- lsp-set-addresses bob1 "f0:00:00:01:02:06 172.16.1.4"
+
+# Router R2
+# Add a DNAT rule.
+ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=192.168.1.2 \
+    external_ip=30.0.0.2 -- add logical_router R2 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.1.2 \
+    external_ip=30.0.0.1 -- add logical_router R2 nat @nat
+
+# Router R3
+# Add a DNAT rule.
+ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=192.168.1.2 \
+    external_ip=30.0.0.3 -- add logical_router R3 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=192.168.2.2 \
+    external_ip=30.0.0.4 -- add logical_router R3 nat @nat
+
+# wait for ovn-controller to catch up.
+OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep ct\( | grep nat])
+
+# North-South DNAT: 'alice1' should be able to ping 'foo1' via 30.0.0.2
+NS_CHECK_EXEC([alice1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.3) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.3,dst=30.0.0.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# But foo1 should receive traffic from 20.0.0.2
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.3,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=20.0.0.2,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# North-South DNAT: 'bob1' should be able to ping 'foo1' via 30.0.0.3
+NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.4) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.4,dst=30.0.0.3,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=172.16.1.4,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# But foo1 should receive traffic from 20.0.0.3
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0.3) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.1.4,dst=192.168.1.2,id=<cleared>,type=8,code=0),reply=(src=192.168.1.2,dst=20.0.0.3,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# South-North SNAT: 'bar1' pings 'bob1'. But 'bob1' receives traffic
+# from 30.0.0.4
+NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.4) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.2.2,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=172.16.1.4,dst=30.0.0.4,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+# South-North SNAT: 'foo1' pings 'alice1'. But 'alice1' receives traffic
+# from 30.0.0.1
+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.1.2,dst=172.16.1.3,id=<cleared>,type=8,code=0),reply=(src=172.16.1.3,dst=30.0.0.1,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+
 AT_SETUP([ovn -- load-balancing])
 AT_KEYWORDS([ovnlb])
 
@@ -672,3 +891,170 @@  as
 OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
 /connection dropped.*/d"])
 AT_CLEANUP
+
+AT_SETUP([ovn -- multiple gateway routers, load-balancing])
+AT_KEYWORDS([ovnlb])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# Three LRs - R1, R2 and R3 that are connected to each other via LS "join"
+# in 20.0.0.0/24 network. R1 has switchess foo (192.168.1.0/24) and
+# bar (192.168.2.0/24) connected to it. R2 has alice (172.16.1.0/24) connected
+# to it.  R3 has bob (172.16.1.0/24) connected to it. Note how both alice and
+# bob have the same subnet behind it.  We are trying to simulate external
+# network via those 2 switches. In real world the switch ports of these
+# switches will have addresses set as "unknown" to make them learning switches.
+# Or those switches will be "localnet" ones.
+#
+#    foo -- R1 -- join - R2 -- alice
+#           |          |
+#    bar ----          - R3 --- bob
+
+ovn-nbctl create Logical_Router name=R1
+ovn-nbctl create Logical_Router name=R2 options:chassis=hv1
+ovn-nbctl create Logical_Router name=R3 options:chassis=hv1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add bar
+ovn-nbctl ls-add alice
+ovn-nbctl ls-add bob
+ovn-nbctl ls-add join
+
+# Connect foo to R1
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+    type=router options:router-port=foo addresses=\"00:00:01:01:02:03\"
+
+# Connect bar to R1
+ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24
+ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
+    type=router options:router-port=bar addresses=\"00:00:01:01:02:04\"
+
+# Connect alice to R2
+ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 172.16.1.1/24
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+    type=router options:router-port=alice addresses=\"00:00:02:01:02:03\"
+
+# Connect bob to R3
+ovn-nbctl lrp-add R3 bob 00:00:03:01:02:03 172.16.1.2/24
+ovn-nbctl lsp-add bob rp-bob -- set Logical_Switch_Port rp-bob \
+    type=router options:router-port=bob addresses=\"00:00:03:01:02:03\"
+
+# Connect R1 to join
+ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 20.0.0.1/24
+ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \
+    type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"'
+
+# Connect R2 to join
+ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 20.0.0.2/24
+ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \
+    type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"'
+
+# Connect R3 to join
+ovn-nbctl lrp-add R3 R3_join 00:00:04:01:02:05 20.0.0.3/24
+ovn-nbctl lsp-add join r3-join -- set Logical_Switch_Port r3-join \
+    type=router options:router-port=R3_join addresses='"00:00:04:01:02:05"'
+
+# Install static routes with source ip address as the policy for routing.
+# We want traffic from 'foo' to go via R2 and traffic of 'bar' to go via R3.
+ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.1.0/24 20.0.0.2
+ovn-nbctl --policy="src-ip" lr-route-add R1 192.168.2.0/24 20.0.0.3
+
+# Static routes.
+ovn-nbctl lr-route-add R2 192.168.0.0/16 20.0.0.1
+ovn-nbctl lr-route-add R3 192.168.0.0/16 20.0.0.1
+
+# For gateway routers R2 and R3, set a force SNAT rule.
+ovn-nbctl set logical_router R2 options:lb_force_snat_ip=20.0.0.2
+ovn-nbctl set logical_router R3 options:lb_force_snat_ip=20.0.0.3
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+         "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "172.16.1.3/24", "f0:00:00:01:02:04", \
+         "172.16.1.1")
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:04 172.16.1.3"
+
+# Logical port 'bar1' in switch 'bar'.
+ADD_NAMESPACES(bar1)
+ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:05", \
+"192.168.2.1")
+ovn-nbctl lsp-add bar bar1 \
+-- lsp-set-addresses bar1 "f0:00:00:01:02:05 192.168.2.2"
+
+# Logical port 'bob1' in switch 'bob'.
+ADD_NAMESPACES(bob1)
+ADD_VETH(bob1, bob1, br-int, "172.16.1.4/24", "f0:00:00:01:02:06", \
+         "172.16.1.2")
+ovn-nbctl lsp-add bob bob1 \
+-- lsp-set-addresses bob1 "f0:00:00:01:02:06 172.16.1.4"
+
+# Config OVN load-balancer with a VIP.
+uuid=`ovn-nbctl  create load_balancer vips:30.0.0.1="192.168.1.2,192.168.2.2"`
+ovn-nbctl set logical_router R2 load_balancer=$uuid
+ovn-nbctl set logical_router R3 load_balancer=$uuid
+
+# Wait for ovn-controller to catch up.
+OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | grep ct\(])
+
+# Start webservers in 'foo1', 'bar1'.
+NETNS_DAEMONIZE([foo1], [[$PYTHON $srcdir/test-l7.py]], [http1.pid])
+NETNS_DAEMONIZE([bar1], [[$PYTHON $srcdir/test-l7.py]], [http2.pid])
+
+dnl Should work with the virtual IP address through NAT
+for i in `seq 1 20`; do
+    echo Request $i
+    NS_CHECK_EXEC([alice1], [wget 30.0.0.1 -t 5 -T 1 --retry-connrefused -v -o wget$i.log])
+done
+
+dnl Each server should have at least one connection.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) |
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=172.16.1.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=172.16.1.3,dst=30.0.0.1,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=172.16.1.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
+])
+
+dnl Force SNAT should have worked.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(20.0.0) |
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+tcp,orig=(src=172.16.1.3,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.1.2,dst=20.0.0.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=172.16.1.3,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=20.0.0.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
+])
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP