From patchwork Wed Jun 24 15:51:44 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Dumitru Ceara inport == P && arp.op == 1 &&
- arp.tpa == A
(ARP request)
- with the following actions:
+ arp.op == 1 && arp.tpa == A
+ (ARP request) with the following actions:
@@ -1876,6 +1875,11 @@ output;
+ IPv4: For a configured load balancer IPv4 VIP, a similar flow is
+ added with the additional match inport == P
.
+
If the router port P is a distributed gateway router
port, then the is_chassis_resident(P)
is
also added in the match condition for the load balancer IPv4
@@ -1922,9 +1926,11 @@ nd_na {
redirect-chassis
. This behavior avoids
+ redirect-chassis
. A priority-91 drop flow is
+ programmed on the other chassis when ARP requests/NS packets
+ are received on the gateway port. This behavior avoids
generation of multiple ARP responses from different chassis,
and allows upstream MAC learning to point to the
redirect-chassis
.
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 6b22316..82783f4 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -7961,7 +7961,7 @@ lrouter_nat_is_stateless(const struct nbrec_nat *nat)
static void
build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
const char *ip_address, const char *eth_addr,
- struct ds *extra_match, uint16_t priority,
+ struct ds *extra_match, bool drop, uint16_t priority,
struct hmap *lflows, const struct ovsdb_idl_row *hint)
{
struct ds match = DS_EMPTY_INITIALIZER;
@@ -7976,20 +7976,24 @@ build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
if (extra_match && ds_last(extra_match) != EOF) {
ds_put_format(&match, " && %s", ds_cstr(extra_match));
}
- ds_put_format(&actions,
- "eth.dst = eth.src; "
- "eth.src = %s; "
- "arp.op = 2; /* ARP reply */ "
- "arp.tha = arp.sha; "
- "arp.sha = %s; "
- "arp.tpa = arp.spa; "
- "arp.spa = %s; "
- "outport = inport; "
- "flags.loopback = 1; "
- "output;",
- eth_addr,
- eth_addr,
- ip_address);
+ if (drop) {
+ ds_put_format(&actions, "drop;");
+ } else {
+ ds_put_format(&actions,
+ "eth.dst = eth.src; "
+ "eth.src = %s; "
+ "arp.op = 2; /* ARP reply */ "
+ "arp.tha = arp.sha; "
+ "arp.sha = %s; "
+ "arp.tpa = arp.spa; "
+ "arp.spa = %s; "
+ "outport = inport; "
+ "flags.loopback = 1; "
+ "output;",
+ eth_addr,
+ eth_addr,
+ ip_address);
+ }
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority,
ds_cstr(&match), ds_cstr(&actions), hint);
@@ -8008,7 +8012,7 @@ static void
build_lrouter_nd_flow(struct ovn_datapath *od, struct ovn_port *op,
const char *action, const char *ip_address,
const char *sn_ip_address, const char *eth_addr,
- struct ds *extra_match, uint16_t priority,
+ struct ds *extra_match, bool drop, uint16_t priority,
struct hmap *lflows,
const struct ovsdb_idl_row *hint)
{
@@ -8030,21 +8034,25 @@ build_lrouter_nd_flow(struct ovn_datapath *od, struct ovn_port *op,
ds_put_format(&match, " && %s", ds_cstr(extra_match));
}
- ds_put_format(&actions,
- "%s { "
- "eth.src = %s; "
- "ip6.src = %s; "
- "nd.target = %s; "
- "nd.tll = %s; "
- "outport = inport; "
- "flags.loopback = 1; "
- "output; "
- "};",
- action,
- eth_addr,
- ip_address,
- ip_address,
- eth_addr);
+ if (drop) {
+ ds_put_format(&actions, "drop;");
+ } else {
+ ds_put_format(&actions,
+ "%s { "
+ "eth.src = %s; "
+ "ip6.src = %s; "
+ "nd.target = %s; "
+ "nd.tll = %s; "
+ "outport = inport; "
+ "flags.loopback = 1; "
+ "output; "
+ "};",
+ action,
+ eth_addr,
+ ip_address,
+ ip_address,
+ eth_addr);
+ }
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority,
ds_cstr(&match), ds_cstr(&actions), hint);
@@ -8234,7 +8242,41 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
"ip4.dst == 0.0.0.0/8",
"drop;");
- /* Priority-90 flows reply to ARP requests and ND packets. */
+ /* Priority-90-92 flows handle ARP requests and ND packets. Most are
+ * per logical port but DNAT addresses can be handled per datapath
+ * for non gateway router ports.
+ */
+ for (int i = 0; i < od->nbr->n_nat; i++) {
+ struct ovn_nat *nat_entry = &od->nat_entries[i];
+ const struct nbrec_nat *nat = nat_entry->nb;
+
+ /* Skip entries we failed to parse. */
+ if (!nat_entry_is_valid(nat_entry)) {
+ continue;
+ }
+
+ if (!strcmp(nat->type, "snat")) {
+ continue;
+ }
+
+ /* Priority 91 and 92 flows are added for each gateway router
+ * port to handle the special cases. In case we get the packet
+ * on a regular port, just reply with the port's ETH address.
+ */
+ struct lport_addresses *ext_addrs = &nat_entry->ext_addrs;
+ if (nat_entry_is_v6(nat_entry)) {
+ build_lrouter_nd_flow(od, NULL, "nd_na",
+ ext_addrs->ipv6_addrs[0].addr_s,
+ ext_addrs->ipv6_addrs[0].sn_addr_s,
+ REG_INPORT_ETH_ADDR, NULL, false, 90,
+ lflows, &nat->header_);
+ } else {
+ build_lrouter_arp_flow(od, NULL,
+ ext_addrs->ipv4_addrs[0].addr_s,
+ REG_INPORT_ETH_ADDR, NULL, false, 90,
+ lflows, &nat->header_);
+ }
+ }
/* Drop ARP packets (priority 85). ARP request packets for router's own
* IPs are handled with priority-90 flows.
@@ -8384,8 +8426,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
build_lrouter_arp_flow(op->od, op,
op->lrp_networks.ipv4_addrs[i].addr_s,
- REG_INPORT_ETH_ADDR, &match, 90, lflows,
- &op->nbrp->header_);
+ REG_INPORT_ETH_ADDR, &match, false, 90,
+ lflows, &op->nbrp->header_);
}
/* A set to hold all load-balancer vips that need ARP responses. */
@@ -8403,7 +8445,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
build_lrouter_arp_flow(op->od, op,
ip_address, REG_INPORT_ETH_ADDR,
- &match, 90, lflows, NULL);
+ &match, false, 90, lflows, NULL);
}
SSET_FOR_EACH (ip_address, &all_ips_v6) {
@@ -8415,7 +8457,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
build_lrouter_nd_flow(op->od, op, "nd_na",
ip_address, NULL, REG_INPORT_ETH_ADDR,
- &match, 90, lflows, NULL);
+ &match, false, 90, lflows, NULL);
}
sset_destroy(&all_ips_v4);
@@ -8468,53 +8510,84 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
/* Mac address to use when replying to ARP/NS. */
const char *mac_s = REG_INPORT_ETH_ADDR;
+ /* ARP/NS packets are taken care of per router. The only exception
+ * is on the l3dgw_port where we might need to use a different
+ * ETH address.
+ */
+ if (op != op->od->l3dgw_port) {
+ continue;
+ }
+
/* ARP / ND handling for external IP addresses.
*
* DNAT IP addresses are external IP addresses that need ARP
* handling. */
ds_clear(&match);
- if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
- struct eth_addr mac;
- if (nat->external_mac &&
- eth_addr_from_string(nat->external_mac, &mac)
- && nat->logical_port) {
- /* distributed NAT case, use nat->external_mac */
- mac_s = nat->external_mac;
- /* Traffic with eth.src = nat->external_mac should only be
- * sent from the chassis where nat->logical_port is
- * resident, so that upstream MAC learning points to the
- * correct chassis. Also need to avoid generation of
- * multiple ARP responses from different chassis. */
- ds_put_format(&match, "is_chassis_resident(\"%s\")",
- nat->logical_port);
- } else {
- mac_s = REG_INPORT_ETH_ADDR;
- /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
- * should only be sent from the "redirect-chassis", so that
- * upstream MAC learning points to the "redirect-chassis".
- * Also need to avoid generation of multiple ARP responses
- * from different chassis. */
- if (op->od->l3redirect_port) {
- ds_put_format(&match, "is_chassis_resident(%s)",
- op->od->l3redirect_port->json_key);
- }
+ struct ds match_cr_port = DS_EMPTY_INITIALIZER;
+ struct ds match_non_cr_port = DS_EMPTY_INITIALIZER;
+
+ struct eth_addr mac;
+ if (nat->external_mac &&
+ eth_addr_from_string(nat->external_mac, &mac)
+ && nat->logical_port) {
+ /* distributed NAT case, use nat->external_mac */
+ mac_s = nat->external_mac;
+ /* Traffic with eth.src = nat->external_mac should only be
+ * sent from the chassis where nat->logical_port is
+ * resident, so that upstream MAC learning points to the
+ * correct chassis. Also need to avoid generation of
+ * multiple ARP responses from different chassis. */
+ ds_put_format(&match_cr_port,
+ "is_chassis_resident(\"%s\")",
+ nat->logical_port);
+ ds_put_format(&match_non_cr_port,
+ "!is_chassis_resident(\"%s\")",
+ nat->logical_port);
+ } else {
+ mac_s = REG_INPORT_ETH_ADDR;
+ /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+ * should only be sent from the "redirect-chassis", so that
+ * upstream MAC learning points to the "redirect-chassis".
+ * Also need to avoid generation of multiple ARP responses
+ * from different chassis. */
+ if (op->od->l3redirect_port) {
+ ds_put_format(&match_cr_port,
+ "is_chassis_resident(\"%s\")",
+ op->od->l3redirect_port->json_key);
+ ds_put_format(&match_non_cr_port,
+ "!is_chassis_resident(\"%s\")",
+ op->od->l3redirect_port->json_key);
}
}
+ /* Respond to ARP/NS requests on the chassis that binds the gw
+ * port. Drop the ARP/NS requests on other chassis.
+ */
struct lport_addresses *ext_addrs = &nat_entry->ext_addrs;
if (nat_entry_is_v6(nat_entry)) {
build_lrouter_nd_flow(op->od, op, "nd_na",
ext_addrs->ipv6_addrs[0].addr_s,
ext_addrs->ipv6_addrs[0].sn_addr_s,
- mac_s, &match, 90,
+ mac_s, &match_cr_port, false, 92,
+ lflows, &nat->header_);
+ build_lrouter_nd_flow(op->od, op, "nd_na",
+ ext_addrs->ipv6_addrs[0].addr_s,
+ ext_addrs->ipv6_addrs[0].sn_addr_s,
+ mac_s, &match_non_cr_port, true, 91,
lflows, &nat->header_);
} else {
build_lrouter_arp_flow(op->od, op,
ext_addrs->ipv4_addrs[0].addr_s,
- mac_s, &match, 90,
+ mac_s, &match_cr_port, false, 92,
+ lflows, &nat->header_);
+ build_lrouter_arp_flow(op->od, op,
+ ext_addrs->ipv4_addrs[0].addr_s,
+ mac_s, &match_non_cr_port, true, 91,
lflows, &nat->header_);
}
+ ds_destroy(&match_cr_port);
+ ds_destroy(&match_non_cr_port);
}
if (!smap_get(&op->od->nbr->options, "chassis")
@@ -8700,8 +8773,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
build_lrouter_nd_flow(op->od, op, "nd_na_router",
op->lrp_networks.ipv6_addrs[i].addr_s,
op->lrp_networks.ipv6_addrs[i].sn_addr_s,
- REG_INPORT_ETH_ADDR, &match, 90, lflows,
- &op->nbrp->header_);
+ REG_INPORT_ETH_ADDR, &match, false, 90,
+ lflows, &op->nbrp->header_);
}
/* UDP/TCP port unreachable */
diff --git a/tests/ovn.at b/tests/ovn.at
index 1ff7952..15139a1 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -19169,7 +19169,7 @@ OVS_WAIT_UNTIL([
send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 121)
# Verify that the ARP request is replied to from hv1 and not hv2.
-match_arp_req="priority=90.*${match_r1_metadata}.*arp_tpa=10.0.0.121,arp_op=1"
+match_arp_req="priority=92.*${match_r1_metadata}.*arp_tpa=10.0.0.121,arp_op=1"
as hv1
OVS_WAIT_UNTIL([
@@ -19189,7 +19189,7 @@ OVS_WAIT_UNTIL([
send_arp_request 1 0 ${src_mac} $(ip_to_hex 10 0 0 254) $(ip_to_hex 10 0 0 122)
# Verify that the ARP request is replied to from hv2 and not hv1.
-match_arp_req="priority=90.*${match_r1_metadata}.*arp_tpa=10.0.0.122,arp_op=1"
+match_arp_req="priority=92.*${match_r1_metadata}.*arp_tpa=10.0.0.122,arp_op=1"
as hv2
OVS_WAIT_UNTIL([
@@ -19233,7 +19233,7 @@ dst_ipv6=00100000000000000000000000000121
send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 72dd
# Verify that the ND_NS is replied to from hv1 and not hv2.
-match_nd_ns="priority=90.*${match_r1_metadata}.*icmp_type=135.*nd_target=10::121"
+match_nd_ns="priority=92.*${match_r1_metadata}.*icmp_type=135.*nd_target=10::121"
as hv1
OVS_WAIT_UNTIL([
@@ -19255,7 +19255,7 @@ dst_ipv6=00100000000000000000000000000122
send_nd_ns 1 0 ${src_mac} ${src_ipv6} ${dst_ipv6} 72db
# Verify that the ND_NS is replied to from hv2 and not hv1.
-match_nd_ns="priority=90.*${match_r1_metadata}.*icmp_type=135.*nd_target=10::122"
+match_nd_ns="priority=92.*${match_r1_metadata}.*icmp_type=135.*nd_target=10::122"
as hv2
OVS_WAIT_UNTIL([