diff mbox series

[ovs-dev,ovn,branch-20.06,7/7] ovn-northd: Fix logical flows to limit ARP/NS broadcast domain.

Message ID 20200810105435.25272.92714.stgit@dceara.remote.csb
State Accepted
Headers show
Series Backport ARP/NS responder flow explosion fixes. | expand

Commit Message

Dumitru Ceara Aug. 10, 2020, 10:54 a.m. UTC
Logical flows that limit the ARP/NS broadcast domain on a logical switch
should only match on ARP requests/NS for IPs that can actually be
replied to on the connected router port (i.e., an IP on the same network
is configured on the router port).

Reported-by: Girish Moodalbail <gmoodalbail@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050287.html
Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain whenever possible.")
Acked-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>

(cherry-picked from master commit 1e07781310d8155997672bdce01a2ff4f5a93e83)
---
 northd/ovn-northd.c |  162 ++++++++++++++++++++++++++++++++++++++++++---------
 tests/ovn.at        |   74 +++++++++++++++++++++++
 2 files changed, 208 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index f8401d7..1134528 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -6090,6 +6090,42 @@  build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list)
     }
 }
 
+/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the
+ * IPs configured on the router port.
+ */
+static bool
+lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr)
+{
+    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+        struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i];
+
+        if ((addr & op_addr->mask) == op_addr->network) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the
+ * IPs configured on the router port.
+ */
+static bool
+lrouter_port_ipv6_reachable(const struct ovn_port *op,
+                            const struct in6_addr *addr)
+{
+    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i];
+
+        struct in6_addr nat_addr6_masked =
+            ipv6_addr_bitand(addr, &op_addr->mask);
+
+        if (ipv6_addr_equals(&nat_addr6_masked, &op_addr->network)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 /*
  * Ingress table 19: Flows that flood self originated ARP/ND packets in the
  * switching domain.
@@ -6100,8 +6136,47 @@  build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
                                            struct ovn_datapath *od,
                                            struct hmap *lflows)
 {
-    struct ds match = DS_EMPTY_INITIALIZER;
+    struct sset all_eth_addrs = SSET_INITIALIZER(&all_eth_addrs);
     struct ds eth_src = DS_EMPTY_INITIALIZER;
+    struct ds match = DS_EMPTY_INITIALIZER;
+
+    sset_add(&all_eth_addrs, op->lrp_networks.ea_s);
+
+    for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+        struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+        const struct nbrec_nat *nat = nat_entry->nb;
+
+        if (!nat_entry_is_valid(nat_entry)) {
+            continue;
+        }
+
+        if (!strcmp(nat->type, "snat")) {
+            continue;
+        }
+
+        if (!nat->external_mac) {
+            continue;
+        }
+
+        /* Check if the ovn port has a network configured on which we could
+         * expect ARP requests/NS for the DNAT external_ip.
+         */
+        if (nat_entry_is_v6(nat_entry)) {
+            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
+
+            if (!lrouter_port_ipv6_reachable(op, addr)) {
+                continue;
+            }
+        } else {
+            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
+
+            if (!lrouter_port_ipv4_reachable(op, addr)) {
+                continue;
+            }
+        }
+        sset_add(&all_eth_addrs, nat->external_mac);
+    }
+
 
     /* Self originated (G)ARP requests/ND need to be flooded as usual.
      * Determine that packets are self originated by also matching on
@@ -6109,15 +6184,11 @@  build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
      * is a VLAN-backed network.
      * Priority: 80.
      */
-    ds_put_format(&eth_src, "{ %s, ", op->lrp_networks.ea_s);
-    for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
-        const struct nbrec_nat *nat = op->od->nbr->nat[i];
-
-        if (!nat->external_mac) {
-            continue;
-        }
+    const char *eth_addr;
 
-        ds_put_format(&eth_src, "%s, ", nat->external_mac);
+    ds_put_cstr(&eth_src, "{");
+    SSET_FOR_EACH (eth_addr, &all_eth_addrs) {
+        ds_put_format(&eth_src, "%s, ", eth_addr);
     }
     ds_chomp(&eth_src, ' ');
     ds_chomp(&eth_src, ',');
@@ -6129,8 +6200,9 @@  build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
                   ds_cstr(&match),
                   "outport = \""MC_FLOOD"\"; output;");
 
-    ds_destroy(&match);
+    sset_destroy(&all_eth_addrs);
     ds_destroy(&eth_src);
+    ds_destroy(&match);
 }
 
 /*
@@ -6221,38 +6293,72 @@  build_lswitch_rport_arp_req_flows(struct ovn_port *op,
     struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
     struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
 
-    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
-    }
-    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
+    get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+
+    const char *ip_addr;
+    const char *ip_addr_next;
+    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) {
+        ovs_be32 ipv4_addr;
+
+        /* Check if the ovn port has a network configured on which we could
+         * expect ARP requests for the LB VIP.
+         */
+        if (ip_parse(ip_addr, &ipv4_addr) &&
+                lrouter_port_ipv4_reachable(op, ipv4_addr)) {
+            continue;
+        }
+
+        sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr));
     }
+    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) {
+        struct in6_addr ipv6_addr;
 
-    get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+        /* Check if the ovn port has a network configured on which we could
+         * expect NS requests for the LB VIP.
+         */
+        if (ipv6_parse(ip_addr, &ipv6_addr) &&
+                lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
+            continue;
+        }
+
+        sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr));
+    }
 
     for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
-        const struct nbrec_nat *nat = op->od->nbr->nat[i];
+        struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+        const struct nbrec_nat *nat = nat_entry->nb;
+
+        if (!nat_entry_is_valid(nat_entry)) {
+            continue;
+        }
 
         if (!strcmp(nat->type, "snat")) {
             continue;
         }
 
-        ovs_be32 ip;
-        ovs_be32 mask;
-        struct in6_addr ipv6;
-        struct in6_addr mask_v6;
+        /* Check if the ovn port has a network configured on which we could
+         * expect ARP requests/NS for the DNAT external_ip.
+         */
+        if (nat_entry_is_v6(nat_entry)) {
+            struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
 
-        char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
-        if (error) {
-            free(error);
-            error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
-            if (!error) {
+            if (lrouter_port_ipv6_reachable(op, addr)) {
                 sset_add(&all_ips_v6, nat->external_ip);
             }
         } else {
-            sset_add(&all_ips_v4, nat->external_ip);
+            ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
+
+            if (lrouter_port_ipv4_reachable(op, addr)) {
+                sset_add(&all_ips_v4, nat->external_ip);
+            }
         }
-        free(error);
+    }
+
+    for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+        sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
+    }
+    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
     }
 
     if (!sset_is_empty(&all_ips_v4)) {
diff --git a/tests/ovn.at b/tests/ovn.at
index c99675f..9fa433c 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -19015,12 +19015,14 @@  ovn-nbctl --wait=hv sync
 
 sw_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding sw-agg)
 sw_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw-agg)
+sw1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw1)
 r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding rtr1)
 
 r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr1)
 r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr2)
 
 match_sw_metadata="metadata=0x${sw_dp_key}"
+match_sw1_metadata="metadata=0x${sw1_dp_key}"
 match_r1_metadata="metadata=0x${r1_dp_key}"
 
 # Inject ARP request for first router owned IP address.
@@ -19136,6 +19138,78 @@  ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.122 20.0.0.12 sw1-p2 00:00:00:02:
 ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::122 20::12 sw1-p2 00:00:00:02:00:00
 ovn-nbctl --wait=hv sync
 
+# Check that broadcast domain limiting flows match only on IPs that are
+# directly reachable via the router port.
+# For sw-rtr1:
+# - 10.0.0.1, 10::1, fe80::200:ff:fe00:100 - interface IPs.
+# - 10.0.0.11, 10::11 - LB VIPs.
+# - 10.0.0.111, 10.0.0.121, 10.0.0.122, 10::111, 10::121, 10::122 - DNAT IPs.
+# For sw-rtr2:
+# - 10.0.0.2, 10::2, fe80::200:ff:fe00:200 - interface IPs.
+# - 10.0.0.22, 10::22 - LB VIPs.
+# - 10.0.0.222, 10::222 - DNAT IPs.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], [0], [dnl
+arp_tpa=10.0.0.1
+arp_tpa=10.0.0.11
+arp_tpa=10.0.0.111
+arp_tpa=10.0.0.121
+arp_tpa=10.0.0.122
+arp_tpa=10.0.0.2
+arp_tpa=10.0.0.22
+arp_tpa=10.0.0.222
+])
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl
+nd_target=10::1
+nd_target=10::11
+nd_target=10::111
+nd_target=10::121
+nd_target=10::122
+nd_target=10::2
+nd_target=10::22
+nd_target=10::222
+nd_target=fe80::200:ff:fe00:100
+nd_target=fe80::200:ff:fe00:200
+])
+
+# For sw1-rtr1:
+# - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], [0], [dnl
+arp_tpa=20.0.0.1
+])
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl
+nd_target=20::1
+nd_target=fe80::200:1ff:fe00:0
+])
+
+# Self originated ARP/NS with SMACs owned by rtr1-sw and rtr2-sw should be
+# flooded:
+# - 00:00:00:00:01:00 - interface MAC (rtr1-sw).
+# - 00:00:00:00:02:00 - interface MAC (rtr2-sw).
+# - 00:00:00:01:00:00 - dnat_and_snat external MAC.
+# - 00:00:00:02:00:00 - dnat_and_snat external MAC.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
+dl_src=00:00:00:00:01:00
+dl_src=00:00:00:00:02:00
+dl_src=00:00:00:01:00:00
+dl_src=00:00:00:02:00:00
+])
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw_metadata}.*icmp_type=135" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
+dl_src=00:00:00:00:01:00
+dl_src=00:00:00:00:02:00
+dl_src=00:00:00:01:00:00
+dl_src=00:00:00:02:00:00
+])
+
+# Self originated ARP/NS with SMACs owned by rtr1-sw1 should be flooded:
+# - 00:00:01:00:00:00 - interface MAC.
+as hv1
+AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw1_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl
+dl_src=00:00:01:00:00:00
+])
+
 # Inject ARP request for first router owned NAT address.
 send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 111)