Message ID | 20200708150301.10666.86582.stgit@dceara.remote.csb |
---|---|
State | Superseded |
Headers | show |
Series | Reduce number of openflows generated for limiting the broadcast domain. | expand |
On Wed, Jul 8, 2020 at 8:34 PM Dumitru Ceara <dceara@redhat.com> wrote: > Logical flows that limit the ARP/NS broadcast domain on a logical switch > should only match on ARP requests/NS for IPs that can actually be > replied to on the connected router port (i.e., an IP on the same network > is configured on the router port). > > Reported-by: Girish Moodalbail <gmoodalbail@gmail.com> > Reported-at: > https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050287.html > Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain whenever > possible.") > Signed-off-by: Dumitru Ceara <dceara@redhat.com> > Hi Dumitru, The patch LGTM. I've few nits. Thanks Numan > --- > northd/ovn-northd.c | 164 > ++++++++++++++++++++++++++++++++++++++++++--------- > tests/ovn.at | 74 +++++++++++++++++++++++ > 2 files changed, 210 insertions(+), 28 deletions(-) > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 4c23158..e2e875a 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -6091,6 +6091,44 @@ build_lrouter_groups(struct hmap *ports, struct > ovs_list *lr_list) > } > } > > +/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the > + * IPs configured on the router port. > + */ > +static bool > +lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr) > +{ > + for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > + struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i]; > + > + if ((op_addr->addr & op_addr->mask) == (addr & op_addr->mask)) { > This can be - if ((addr & op_addr->mask) == (op_addr->network)) { If you see lib/ovn-util.c, network is initialized as - addr & mask. > + return true; > + } > + } > + return false; > +} > + > +/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the > + * IPs configured on the router port. > + */ > +static bool > +lrouter_port_ipv6_reachable(const struct ovn_port *op, > + const struct in6_addr *addr) > +{ > + for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > + struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i]; > + > + struct in6_addr op_addr6_masked = > + ipv6_addr_bitand(&op_addr->addr, &op_addr->mask); > Same as above. We don't need to do this and instead we can use - op_addr->network below in ipv6_addr_equals() > + struct in6_addr nat_addr6_masked = > + ipv6_addr_bitand(addr, &op_addr->mask); > + > + if (ipv6_addr_equals(&op_addr6_masked, &nat_addr6_masked)) { + return true; > + } > + } > + return false; > +} > + > /* > * Ingress table 19: Flows that flood self originated ARP/ND packets in > the > * switching domain. > @@ -6101,8 +6139,47 @@ build_lswitch_rport_arp_req_self_orig_flow(struct > ovn_port *op, > struct ovn_datapath *od, > struct hmap *lflows) > { > - struct ds match = DS_EMPTY_INITIALIZER; > + struct sset all_eth_addrs = SSET_INITIALIZER(&all_eth_addrs); > struct ds eth_src = DS_EMPTY_INITIALIZER; > + struct ds match = DS_EMPTY_INITIALIZER; > + > + sset_add(&all_eth_addrs, op->lrp_networks.ea_s); > + > + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; > + const struct nbrec_nat *nat = nat_entry->nb; > + > + if (!nat_entry_is_valid(nat_entry)) { > + continue; > + } > + > + if (!strcmp(nat->type, "snat")) { > + continue; > + } > + > + if (!nat->external_mac) { > + continue; > + } > + > + /* Check if there's a network configured on op on which we could > s/op/ovn port or s/op/'op' I couldn't understand what op is. So either putting it in quotes or referencing it as an "ovn port" would be easier to understand. > + * expect ARP requests/NS for the DNAT external_ip. > + */ > + if (nat_entry_is_v6(nat_entry)) { > + struct in6_addr *addr = > &nat_entry->ext_addrs.ipv6_addrs[0].addr; > + > + if (!lrouter_port_ipv6_reachable(op, addr)) { > + continue; > + } > + } else { > + ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; > + > + if (!lrouter_port_ipv4_reachable(op, addr)) { > + continue; > + } > + } > + sset_add(&all_eth_addrs, nat->external_mac); > + } > + > > /* Self originated (G)ARP requests/ND need to be flooded as usual. > * Determine that packets are self originated by also matching on > @@ -6110,15 +6187,11 @@ build_lswitch_rport_arp_req_self_orig_flow(struct > ovn_port *op, > * is a VLAN-backed network. > * Priority: 80. > */ > - ds_put_format(ð_src, "{ %s, ", op->lrp_networks.ea_s); > - for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > - const struct nbrec_nat *nat = op->od->nbr->nat[i]; > - > - if (!nat->external_mac) { > - continue; > - } > + const char *eth_addr; > > - ds_put_format(ð_src, "%s, ", nat->external_mac); > + ds_put_cstr(ð_src, "{"); > + SSET_FOR_EACH (eth_addr, &all_eth_addrs) { > + ds_put_format(ð_src, "%s, ", eth_addr); > } > ds_chomp(ð_src, ' '); > ds_chomp(ð_src, ','); > @@ -6130,8 +6203,9 @@ build_lswitch_rport_arp_req_self_orig_flow(struct > ovn_port *op, > ds_cstr(&match), > "outport = \""MC_FLOOD"\"; output;"); > > - ds_destroy(&match); > + sset_destroy(&all_eth_addrs); > ds_destroy(ð_src); > + ds_destroy(&match); > } > > /* > @@ -6222,38 +6296,72 @@ build_lswitch_rport_arp_req_flows(struct ovn_port > *op, > struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); > struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); > > - for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > - sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); > - } > - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > - sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); > + get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); > + > + const char *ip_addr; > + const char *ip_addr_next; > + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) { > + ovs_be32 ipv4_addr; > + > + /* Check if there's a network configured on op on which we could > + * expect ARP requests for the LB VIP. > + */ > + if (ip_parse(ip_addr, &ipv4_addr) && > + lrouter_port_ipv4_reachable(op, ipv4_addr)) { > + continue; > + } > + > + sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr)); > } > + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) { > + struct in6_addr ipv6_addr; > > - get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); > + /* Check if there's a network configured on op on which we could > + * expect NS requests for the LB VIP. > + */ > + if (ipv6_parse(ip_addr, &ipv6_addr) && > + lrouter_port_ipv6_reachable(op, &ipv6_addr)) { > + continue; > + } > + > + sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr)); > + } > > for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > - const struct nbrec_nat *nat = op->od->nbr->nat[i]; > + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; > + const struct nbrec_nat *nat = nat_entry->nb; > + > + if (!nat_entry_is_valid(nat_entry)) { > + continue; > + } > > if (!strcmp(nat->type, "snat")) { > continue; > } > > - ovs_be32 ip; > - ovs_be32 mask; > - struct in6_addr ipv6; > - struct in6_addr mask_v6; > + /* Check if there's a network configured on op on which we could > + * expect ARP requests/NS for the DNAT external_ip. > + */ > + if (nat_entry_is_v6(nat_entry)) { > + struct in6_addr *addr = > &nat_entry->ext_addrs.ipv6_addrs[0].addr; > > - char *error = ip_parse_masked(nat->external_ip, &ip, &mask); > - if (error) { > - free(error); > - error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); > - if (!error) { > + if (lrouter_port_ipv6_reachable(op, addr)) { > sset_add(&all_ips_v6, nat->external_ip); > } > } else { > - sset_add(&all_ips_v4, nat->external_ip); > + ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; > + > + if (lrouter_port_ipv4_reachable(op, addr)) { > + sset_add(&all_ips_v4, nat->external_ip); > + } > } > - free(error); > + } > + > + for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > + sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); > + } > + for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > + sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); > } > > if (!sset_is_empty(&all_ips_v4)) { > diff --git a/tests/ovn.at b/tests/ovn.at > index 24d93bc..88700e8 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -19249,12 +19249,14 @@ ovn-nbctl --wait=hv sync > > sw_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding > sw-agg) > sw_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding > sw-agg) > +sw1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding > sw1) > r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding > rtr1) > > r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding > sw-rtr1) > r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding > sw-rtr2) > > match_sw_metadata="metadata=0x${sw_dp_key}" > +match_sw1_metadata="metadata=0x${sw1_dp_key}" > match_r1_metadata="metadata=0x${r1_dp_key}" > > # Inject ARP request for first router owned IP address. > @@ -19370,6 +19372,78 @@ ovn-nbctl lr-nat-add rtr1 dnat_and_snat > 10.0.0.122 20.0.0.12 sw1-p2 00:00:00:02: > ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::122 20::12 sw1-p2 > 00:00:00:02:00:00 > ovn-nbctl --wait=hv sync > > +# Check that broadcast domain limiting flows match only on IPs that are > +# directly reachable via the router port. > +# For sw-rtr1: > +# - 10.0.0.1, 10::1, fe80::200:ff:fe00:100 - interface IPs. > +# - 10.0.0.11, 10::11 - LB VIPs. > +# - 10.0.0.111, 10.0.0.121, 10.0.0.122, 10::111, 10::121, 10::122 - DNAT > IPs. > +# For sw-rtr2: > +# - 10.0.0.2, 10::2, fe80::200:ff:fe00:200 - interface IPs. > +# - 10.0.0.22, 10::22 - LB VIPs. > +# - 10.0.0.222, 10::222 - DNAT IPs. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | > sort], [0], [dnl > +arp_tpa=10.0.0.1 > +arp_tpa=10.0.0.11 > +arp_tpa=10.0.0.111 > +arp_tpa=10.0.0.121 > +arp_tpa=10.0.0.122 > +arp_tpa=10.0.0.2 > +arp_tpa=10.0.0.22 > +arp_tpa=10.0.0.222 > +]) > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | > sort], [0], [dnl > +nd_target=10::1 > +nd_target=10::11 > +nd_target=10::111 > +nd_target=10::121 > +nd_target=10::122 > +nd_target=10::2 > +nd_target=10::22 > +nd_target=10::222 > +nd_target=fe80::200:ff:fe00:100 > +nd_target=fe80::200:ff:fe00:200 > +]) > + > +# For sw1-rtr1: > +# - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | > sort], [0], [dnl > +arp_tpa=20.0.0.1 > +]) > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | > sort], [0], [dnl > +nd_target=20::1 > +nd_target=fe80::200:1ff:fe00:0 > +]) > + > +# Self originated ARP/NS with SMACs owned by rtr1-sw and rtr2-sw should be > +# flooded: > +# - 00:00:00:00:01:00 - interface MAC (rtr1-sw). > +# - 00:00:00:00:02:00 - interface MAC (rtr2-sw). > +# - 00:00:00:01:00:00 - dnat_and_snat external MAC. > +# - 00:00:00:02:00:00 - dnat_and_snat external MAC. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=80,.*${match_sw_metadata}.*arp_op=1" | grep -oE > "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl > +dl_src=00:00:00:00:01:00 > +dl_src=00:00:00:00:02:00 > +dl_src=00:00:00:01:00:00 > +dl_src=00:00:00:02:00:00 > +]) > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=80,.*${match_sw_metadata}.*icmp_type=135" | grep -oE > "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl > +dl_src=00:00:00:00:01:00 > +dl_src=00:00:00:00:02:00 > +dl_src=00:00:00:01:00:00 > +dl_src=00:00:00:02:00:00 > +]) > + > +# Self originated ARP/NS with SMACs owned by rtr1-sw1 should be flooded: > +# - 00:00:01:00:00:00 - interface MAC. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=80,.*${match_sw1_metadata}.*arp_op=1" | grep -oE > "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl > +dl_src=00:00:01:00:00:00 > +]) > + > # Inject ARP request for first router owned NAT address. > send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 > 0 111) > > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >
On 7/9/20 8:15 PM, Numan Siddique wrote: > > > On Wed, Jul 8, 2020 at 8:34 PM Dumitru Ceara <dceara@redhat.com > <mailto:dceara@redhat.com>> wrote: > > Logical flows that limit the ARP/NS broadcast domain on a logical switch > should only match on ARP requests/NS for IPs that can actually be > replied to on the connected router port (i.e., an IP on the same network > is configured on the router port). > > Reported-by: Girish Moodalbail <gmoodalbail@gmail.com > <mailto:gmoodalbail@gmail.com>> > Reported-at: > https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050287.html > Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain > whenever possible.") > Signed-off-by: Dumitru Ceara <dceara@redhat.com > <mailto:dceara@redhat.com>> > > > Hi Dumitru, > > The patch LGTM. I've few nits. > > Thanks > Numan > Hi Numan, Thanks for the review, I sent a v3: https://patchwork.ozlabs.org/project/openvswitch/patch/1594324577-17459-1-git-send-email-dceara@redhat.com/ Thanks, Dumitru > > --- > northd/ovn-northd.c | 164 > ++++++++++++++++++++++++++++++++++++++++++--------- > tests/ovn.at <http://ovn.at> | 74 +++++++++++++++++++++++ > 2 files changed, 210 insertions(+), 28 deletions(-) > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 4c23158..e2e875a 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -6091,6 +6091,44 @@ build_lrouter_groups(struct hmap *ports, > struct ovs_list *lr_list) > } > } > > +/* Returns 'true' if the IPv4 'addr' is on the same subnet with one > of the > + * IPs configured on the router port. > + */ > +static bool > +lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr) > +{ > + for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > + struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i]; > + > + if ((op_addr->addr & op_addr->mask) == (addr & > op_addr->mask)) { > > > This can be - > if ((addr & op_addr->mask) == (op_addr->network)) { > > If you see lib/ovn-util.c, network is initialized as - addr & mask. > > > > > + return true; > + } > + } > + return false; > +} > + > +/* Returns 'true' if the IPv6 'addr' is on the same subnet with one > of the > + * IPs configured on the router port. > + */ > +static bool > +lrouter_port_ipv6_reachable(const struct ovn_port *op, > + const struct in6_addr *addr) > +{ > + for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > + struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i]; > + > + struct in6_addr op_addr6_masked = > + ipv6_addr_bitand(&op_addr->addr, &op_addr->mask); > > > Same as above. We don't need to do this and instead we can use - > op_addr->network > below in ipv6_addr_equals() > > > + struct in6_addr nat_addr6_masked = > + ipv6_addr_bitand(addr, &op_addr->mask); > + > + if (ipv6_addr_equals(&op_addr6_masked, &nat_addr6_masked)) { > > + return true; > + } > + } > + return false; > +} > + > /* > * Ingress table 19: Flows that flood self originated ARP/ND > packets in the > * switching domain. > @@ -6101,8 +6139,47 @@ > build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, > struct ovn_datapath *od, > struct hmap *lflows) > { > - struct ds match = DS_EMPTY_INITIALIZER; > + struct sset all_eth_addrs = SSET_INITIALIZER(&all_eth_addrs); > struct ds eth_src = DS_EMPTY_INITIALIZER; > + struct ds match = DS_EMPTY_INITIALIZER; > + > + sset_add(&all_eth_addrs, op->lrp_networks.ea_s); > + > + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; > + const struct nbrec_nat *nat = nat_entry->nb; > + > + if (!nat_entry_is_valid(nat_entry)) { > + continue; > + } > + > + if (!strcmp(nat->type, "snat")) { > + continue; > + } > + > + if (!nat->external_mac) { > + continue; > + } > + > + /* Check if there's a network configured on op on which we > could > > > s/op/ovn port > or > s/op/'op' > > I couldn't understand what op is. So either putting it in quotes or > referencing it as an > "ovn port" would be easier to understand. > > > > + * expect ARP requests/NS for the DNAT external_ip. > + */ > + if (nat_entry_is_v6(nat_entry)) { > + struct in6_addr *addr = > &nat_entry->ext_addrs.ipv6_addrs[0].addr; > + > + if (!lrouter_port_ipv6_reachable(op, addr)) { > + continue; > + } > + } else { > + ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; > + > + if (!lrouter_port_ipv4_reachable(op, addr)) { > + continue; > + } > + } > + sset_add(&all_eth_addrs, nat->external_mac); > + } > + > > /* Self originated (G)ARP requests/ND need to be flooded as usual. > * Determine that packets are self originated by also matching on > @@ -6110,15 +6187,11 @@ > build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, > * is a VLAN-backed network. > * Priority: 80. > */ > - ds_put_format(ð_src, "{ %s, ", op->lrp_networks.ea_s); > - for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > - const struct nbrec_nat *nat = op->od->nbr->nat[i]; > - > - if (!nat->external_mac) { > - continue; > - } > + const char *eth_addr; > > - ds_put_format(ð_src, "%s, ", nat->external_mac); > + ds_put_cstr(ð_src, "{"); > + SSET_FOR_EACH (eth_addr, &all_eth_addrs) { > + ds_put_format(ð_src, "%s, ", eth_addr); > } > ds_chomp(ð_src, ' '); > ds_chomp(ð_src, ','); > @@ -6130,8 +6203,9 @@ > build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, > ds_cstr(&match), > "outport = \""MC_FLOOD"\"; output;"); > > - ds_destroy(&match); > + sset_destroy(&all_eth_addrs); > ds_destroy(ð_src); > + ds_destroy(&match); > } > > /* > @@ -6222,38 +6296,72 @@ build_lswitch_rport_arp_req_flows(struct > ovn_port *op, > struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); > struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); > > - for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > - sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); > - } > - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > - sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); > + get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); > + > + const char *ip_addr; > + const char *ip_addr_next; > + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) { > + ovs_be32 ipv4_addr; > + > + /* Check if there's a network configured on op on which we > could > + * expect ARP requests for the LB VIP. > + */ > + if (ip_parse(ip_addr, &ipv4_addr) && > + lrouter_port_ipv4_reachable(op, ipv4_addr)) { > + continue; > + } > + > + sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr)); > } > + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) { > + struct in6_addr ipv6_addr; > > - get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); > + /* Check if there's a network configured on op on which we > could > + * expect NS requests for the LB VIP. > + */ > + if (ipv6_parse(ip_addr, &ipv6_addr) && > + lrouter_port_ipv6_reachable(op, &ipv6_addr)) { > + continue; > + } > + > + sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr)); > + } > > for (size_t i = 0; i < op->od->nbr->n_nat; i++) { > - const struct nbrec_nat *nat = op->od->nbr->nat[i]; > + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; > + const struct nbrec_nat *nat = nat_entry->nb; > + > + if (!nat_entry_is_valid(nat_entry)) { > + continue; > + } > > if (!strcmp(nat->type, "snat")) { > continue; > } > > - ovs_be32 ip; > - ovs_be32 mask; > - struct in6_addr ipv6; > - struct in6_addr mask_v6; > + /* Check if there's a network configured on op on which we > could > + * expect ARP requests/NS for the DNAT external_ip. > + */ > + if (nat_entry_is_v6(nat_entry)) { > + struct in6_addr *addr = > &nat_entry->ext_addrs.ipv6_addrs[0].addr; > > - char *error = ip_parse_masked(nat->external_ip, &ip, &mask); > - if (error) { > - free(error); > - error = ipv6_parse_masked(nat->external_ip, &ipv6, > &mask_v6); > - if (!error) { > + if (lrouter_port_ipv6_reachable(op, addr)) { > sset_add(&all_ips_v6, nat->external_ip); > } > } else { > - sset_add(&all_ips_v4, nat->external_ip); > + ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; > + > + if (lrouter_port_ipv4_reachable(op, addr)) { > + sset_add(&all_ips_v4, nat->external_ip); > + } > } > - free(error); > + } > + > + for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > + sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); > + } > + for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > + sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); > } > > if (!sset_is_empty(&all_ips_v4)) { > diff --git a/tests/ovn.at <http://ovn.at> b/tests/ovn.at <http://ovn.at> > index 24d93bc..88700e8 100644 > --- a/tests/ovn.at <http://ovn.at> > +++ b/tests/ovn.at <http://ovn.at> > @@ -19249,12 +19249,14 @@ ovn-nbctl --wait=hv sync > > sw_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding > sw-agg) > sw_dp_key=$(ovn-sbctl --bare --columns tunnel_key list > datapath_binding sw-agg) > +sw1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list > datapath_binding sw1) > r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list > datapath_binding rtr1) > > r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list > port_binding sw-rtr1) > r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list > port_binding sw-rtr2) > > match_sw_metadata="metadata=0x${sw_dp_key}" > +match_sw1_metadata="metadata=0x${sw1_dp_key}" > match_r1_metadata="metadata=0x${r1_dp_key}" > > # Inject ARP request for first router owned IP address. > @@ -19370,6 +19372,78 @@ ovn-nbctl lr-nat-add rtr1 dnat_and_snat > 10.0.0.122 20.0.0.12 sw1-p2 00:00:00:02: > ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::122 20::12 sw1-p2 > 00:00:00:02:00:00 > ovn-nbctl --wait=hv sync > > +# Check that broadcast domain limiting flows match only on IPs that are > +# directly reachable via the router port. > +# For sw-rtr1: > +# - 10.0.0.1, 10::1, fe80::200:ff:fe00:100 - interface IPs. > +# - 10.0.0.11, 10::11 - LB VIPs. > +# - 10.0.0.111, 10.0.0.121, 10.0.0.122, 10::111, 10::121, 10::122 - > DNAT IPs. > +# For sw-rtr2: > +# - 10.0.0.2, 10::2, fe80::200:ff:fe00:200 - interface IPs. > +# - 10.0.0.22, 10::22 - LB VIPs. > +# - 10.0.0.222, 10::222 - DNAT IPs. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" > | sort], [0], [dnl > +arp_tpa=10.0.0.1 > +arp_tpa=10.0.0.11 > +arp_tpa=10.0.0.111 > +arp_tpa=10.0.0.121 > +arp_tpa=10.0.0.122 > +arp_tpa=10.0.0.2 > +arp_tpa=10.0.0.22 > +arp_tpa=10.0.0.222 > +]) > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw_metadata}" | grep -oE > "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl > +nd_target=10::1 > +nd_target=10::11 > +nd_target=10::111 > +nd_target=10::121 > +nd_target=10::122 > +nd_target=10::2 > +nd_target=10::22 > +nd_target=10::222 > +nd_target=fe80::200:ff:fe00:100 > +nd_target=fe80::200:ff:fe00:200 > +]) > + > +# For sw1-rtr1: > +# - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" > | sort], [0], [dnl > +arp_tpa=20.0.0.1 > +]) > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=75,.*${match_sw1_metadata}" | grep -oE > "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl > +nd_target=20::1 > +nd_target=fe80::200:1ff:fe00:0 > +]) > + > +# Self originated ARP/NS with SMACs owned by rtr1-sw and rtr2-sw > should be > +# flooded: > +# - 00:00:00:00:01:00 - interface MAC (rtr1-sw). > +# - 00:00:00:00:02:00 - interface MAC (rtr2-sw). > +# - 00:00:00:01:00:00 - dnat_and_snat external MAC. > +# - 00:00:00:02:00:00 - dnat_and_snat external MAC. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=80,.*${match_sw_metadata}.*arp_op=1" | grep -oE > "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl > +dl_src=00:00:00:00:01:00 > +dl_src=00:00:00:00:02:00 > +dl_src=00:00:00:01:00:00 > +dl_src=00:00:00:02:00:00 > +]) > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=80,.*${match_sw_metadata}.*icmp_type=135" | grep -oE > "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl > +dl_src=00:00:00:00:01:00 > +dl_src=00:00:00:00:02:00 > +dl_src=00:00:00:01:00:00 > +dl_src=00:00:00:02:00:00 > +]) > + > +# Self originated ARP/NS with SMACs owned by rtr1-sw1 should be > flooded: > +# - 00:00:01:00:00:00 - interface MAC. > +as hv1 > +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E > "priority=80,.*${match_sw1_metadata}.*arp_op=1" | grep -oE > "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl > +dl_src=00:00:01:00:00:00 > +]) > + > # Inject ARP request for first router owned NAT address. > send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex > 10 0 0 111) > > > _______________________________________________ > dev mailing list > dev@openvswitch.org <mailto:dev@openvswitch.org> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 4c23158..e2e875a 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -6091,6 +6091,44 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list) } } +/* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the + * IPs configured on the router port. + */ +static bool +lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr) +{ + for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { + struct ipv4_netaddr *op_addr = &op->lrp_networks.ipv4_addrs[i]; + + if ((op_addr->addr & op_addr->mask) == (addr & op_addr->mask)) { + return true; + } + } + return false; +} + +/* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the + * IPs configured on the router port. + */ +static bool +lrouter_port_ipv6_reachable(const struct ovn_port *op, + const struct in6_addr *addr) +{ + for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { + struct ipv6_netaddr *op_addr = &op->lrp_networks.ipv6_addrs[i]; + + struct in6_addr op_addr6_masked = + ipv6_addr_bitand(&op_addr->addr, &op_addr->mask); + struct in6_addr nat_addr6_masked = + ipv6_addr_bitand(addr, &op_addr->mask); + + if (ipv6_addr_equals(&op_addr6_masked, &nat_addr6_masked)) { + return true; + } + } + return false; +} + /* * Ingress table 19: Flows that flood self originated ARP/ND packets in the * switching domain. @@ -6101,8 +6139,47 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, struct ovn_datapath *od, struct hmap *lflows) { - struct ds match = DS_EMPTY_INITIALIZER; + struct sset all_eth_addrs = SSET_INITIALIZER(&all_eth_addrs); struct ds eth_src = DS_EMPTY_INITIALIZER; + struct ds match = DS_EMPTY_INITIALIZER; + + sset_add(&all_eth_addrs, op->lrp_networks.ea_s); + + for (size_t i = 0; i < op->od->nbr->n_nat; i++) { + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; + const struct nbrec_nat *nat = nat_entry->nb; + + if (!nat_entry_is_valid(nat_entry)) { + continue; + } + + if (!strcmp(nat->type, "snat")) { + continue; + } + + if (!nat->external_mac) { + continue; + } + + /* Check if there's a network configured on op on which we could + * expect ARP requests/NS for the DNAT external_ip. + */ + if (nat_entry_is_v6(nat_entry)) { + struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr; + + if (!lrouter_port_ipv6_reachable(op, addr)) { + continue; + } + } else { + ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; + + if (!lrouter_port_ipv4_reachable(op, addr)) { + continue; + } + } + sset_add(&all_eth_addrs, nat->external_mac); + } + /* Self originated (G)ARP requests/ND need to be flooded as usual. * Determine that packets are self originated by also matching on @@ -6110,15 +6187,11 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, * is a VLAN-backed network. * Priority: 80. */ - ds_put_format(ð_src, "{ %s, ", op->lrp_networks.ea_s); - for (size_t i = 0; i < op->od->nbr->n_nat; i++) { - const struct nbrec_nat *nat = op->od->nbr->nat[i]; - - if (!nat->external_mac) { - continue; - } + const char *eth_addr; - ds_put_format(ð_src, "%s, ", nat->external_mac); + ds_put_cstr(ð_src, "{"); + SSET_FOR_EACH (eth_addr, &all_eth_addrs) { + ds_put_format(ð_src, "%s, ", eth_addr); } ds_chomp(ð_src, ' '); ds_chomp(ð_src, ','); @@ -6130,8 +6203,9 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op, ds_cstr(&match), "outport = \""MC_FLOOD"\"; output;"); - ds_destroy(&match); + sset_destroy(&all_eth_addrs); ds_destroy(ð_src); + ds_destroy(&match); } /* @@ -6222,38 +6296,72 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); - for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); - } - for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { - sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); + get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); + + const char *ip_addr; + const char *ip_addr_next; + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) { + ovs_be32 ipv4_addr; + + /* Check if there's a network configured on op on which we could + * expect ARP requests for the LB VIP. + */ + if (ip_parse(ip_addr, &ipv4_addr) && + lrouter_port_ipv4_reachable(op, ipv4_addr)) { + continue; + } + + sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr)); } + SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) { + struct in6_addr ipv6_addr; - get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6); + /* Check if there's a network configured on op on which we could + * expect NS requests for the LB VIP. + */ + if (ipv6_parse(ip_addr, &ipv6_addr) && + lrouter_port_ipv6_reachable(op, &ipv6_addr)) { + continue; + } + + sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr)); + } for (size_t i = 0; i < op->od->nbr->n_nat; i++) { - const struct nbrec_nat *nat = op->od->nbr->nat[i]; + struct ovn_nat *nat_entry = &op->od->nat_entries[i]; + const struct nbrec_nat *nat = nat_entry->nb; + + if (!nat_entry_is_valid(nat_entry)) { + continue; + } if (!strcmp(nat->type, "snat")) { continue; } - ovs_be32 ip; - ovs_be32 mask; - struct in6_addr ipv6; - struct in6_addr mask_v6; + /* Check if there's a network configured on op on which we could + * expect ARP requests/NS for the DNAT external_ip. + */ + if (nat_entry_is_v6(nat_entry)) { + struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr; - char *error = ip_parse_masked(nat->external_ip, &ip, &mask); - if (error) { - free(error); - error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6); - if (!error) { + if (lrouter_port_ipv6_reachable(op, addr)) { sset_add(&all_ips_v6, nat->external_ip); } } else { - sset_add(&all_ips_v4, nat->external_ip); + ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr; + + if (lrouter_port_ipv4_reachable(op, addr)) { + sset_add(&all_ips_v4, nat->external_ip); + } } - free(error); + } + + for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { + sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s); + } + for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { + sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s); } if (!sset_is_empty(&all_ips_v4)) { diff --git a/tests/ovn.at b/tests/ovn.at index 24d93bc..88700e8 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -19249,12 +19249,14 @@ ovn-nbctl --wait=hv sync sw_dp_uuid=$(ovn-sbctl --bare --columns _uuid list datapath_binding sw-agg) sw_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw-agg) +sw1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding sw1) r1_dp_key=$(ovn-sbctl --bare --columns tunnel_key list datapath_binding rtr1) r1_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr1) r2_tnl_key=$(ovn-sbctl --bare --columns tunnel_key list port_binding sw-rtr2) match_sw_metadata="metadata=0x${sw_dp_key}" +match_sw1_metadata="metadata=0x${sw1_dp_key}" match_r1_metadata="metadata=0x${r1_dp_key}" # Inject ARP request for first router owned IP address. @@ -19370,6 +19372,78 @@ ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10.0.0.122 20.0.0.12 sw1-p2 00:00:00:02: ovn-nbctl lr-nat-add rtr1 dnat_and_snat 10::122 20::12 sw1-p2 00:00:00:02:00:00 ovn-nbctl --wait=hv sync +# Check that broadcast domain limiting flows match only on IPs that are +# directly reachable via the router port. +# For sw-rtr1: +# - 10.0.0.1, 10::1, fe80::200:ff:fe00:100 - interface IPs. +# - 10.0.0.11, 10::11 - LB VIPs. +# - 10.0.0.111, 10.0.0.121, 10.0.0.122, 10::111, 10::121, 10::122 - DNAT IPs. +# For sw-rtr2: +# - 10.0.0.2, 10::2, fe80::200:ff:fe00:200 - interface IPs. +# - 10.0.0.22, 10::22 - LB VIPs. +# - 10.0.0.222, 10::222 - DNAT IPs. +as hv1 +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], [0], [dnl +arp_tpa=10.0.0.1 +arp_tpa=10.0.0.11 +arp_tpa=10.0.0.111 +arp_tpa=10.0.0.121 +arp_tpa=10.0.0.122 +arp_tpa=10.0.0.2 +arp_tpa=10.0.0.22 +arp_tpa=10.0.0.222 +]) +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl +nd_target=10::1 +nd_target=10::11 +nd_target=10::111 +nd_target=10::121 +nd_target=10::122 +nd_target=10::2 +nd_target=10::22 +nd_target=10::222 +nd_target=fe80::200:ff:fe00:100 +nd_target=fe80::200:ff:fe00:200 +]) + +# For sw1-rtr1: +# - 20.0.0.1, 20::1, fe80::200:1ff:fe00:0 - interface IPs. +as hv1 +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw1_metadata}" | grep -oE "arp_tpa=[[0-9.]]+" | sort], [0], [dnl +arp_tpa=20.0.0.1 +]) +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=75,.*${match_sw1_metadata}" | grep -oE "nd_target=[[0-9a-f:]]+" | sort], [0], [dnl +nd_target=20::1 +nd_target=fe80::200:1ff:fe00:0 +]) + +# Self originated ARP/NS with SMACs owned by rtr1-sw and rtr2-sw should be +# flooded: +# - 00:00:00:00:01:00 - interface MAC (rtr1-sw). +# - 00:00:00:00:02:00 - interface MAC (rtr2-sw). +# - 00:00:00:01:00:00 - dnat_and_snat external MAC. +# - 00:00:00:02:00:00 - dnat_and_snat external MAC. +as hv1 +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl +dl_src=00:00:00:00:01:00 +dl_src=00:00:00:00:02:00 +dl_src=00:00:00:01:00:00 +dl_src=00:00:00:02:00:00 +]) +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw_metadata}.*icmp_type=135" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl +dl_src=00:00:00:00:01:00 +dl_src=00:00:00:00:02:00 +dl_src=00:00:00:01:00:00 +dl_src=00:00:00:02:00:00 +]) + +# Self originated ARP/NS with SMACs owned by rtr1-sw1 should be flooded: +# - 00:00:01:00:00:00 - interface MAC. +as hv1 +AT_CHECK([ovs-ofctl dump-flows br-int | grep -E "priority=80,.*${match_sw1_metadata}.*arp_op=1" | grep -oE "dl_src=[[0-9a-f:]]+" | sort], [0], [dnl +dl_src=00:00:01:00:00:00 +]) + # Inject ARP request for first router owned NAT address. send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 111)
Logical flows that limit the ARP/NS broadcast domain on a logical switch should only match on ARP requests/NS for IPs that can actually be replied to on the connected router port (i.e., an IP on the same network is configured on the router port). Reported-by: Girish Moodalbail <gmoodalbail@gmail.com> Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050287.html Fixes: 32f5ebb06226 ("ovn-northd: Limit ARP/ND broadcast domain whenever possible.") Signed-off-by: Dumitru Ceara <dceara@redhat.com> --- northd/ovn-northd.c | 164 ++++++++++++++++++++++++++++++++++++++++++--------- tests/ovn.at | 74 +++++++++++++++++++++++ 2 files changed, 210 insertions(+), 28 deletions(-)