From patchwork Fri Dec 23 05:43:16 2016
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Han Zhou
+ In particular, if a logical switch port has option proxy_arp set to + true, we call it an arp-proxy port. In such case, ARP requests to + IPv4 addresses of any logical port on the same logical switch will + be responded with the MAC address of that arp-proxy port, except when + the request comes from that arp-proxy port, which can still learn the + real MAC of destination VM/Container. This feature is useful if we + want all the traffic to/from any logical ports on the logical switch + being forwarded to a particular logical port, which may act as a proxy + and will forward the traffic to the real destination after some pre- + processing. +
+ +ARP requests arrive from VMs from a logical switch inport of type default. For this case, the logical switch proxy ARP rules can be for other VMs or logical router ports. Logical switch proxy ARP @@ -521,6 +534,16 @@ output;
+ If arp-proxy port is present in the logical switch, the above + mentioned Ethernet address E will be the one belonging + to the arp-proxy port, and there will be Priority-60 flows that + match ARP requests from the arp-proxy port to each known IP address + of every logical switch port, and respond with ARP replies in the + same format as above but with Ethernet address E being + the real one belonging to each target logical switch port. +
+ +These flows are omitted for logical ports (other than router ports) that are down.
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index c56ac79..f897658 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -380,6 +380,10 @@ struct ovn_datapath { bool has_unknown; + /* ARP proxy port: MAC of the lport will be used by arp + * responder for other lports in the same LS. */ + struct ovn_port *arp_proxy_port; + /* IPAM data. */ struct hmap ipam; }; @@ -2012,6 +2016,15 @@ build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op, } static bool +lsp_is_arp_proxy(const struct nbrec_logical_switch_port *lsp) +{ + + bool is_arp_proxy = smap_get_bool(&lsp->options, + "arp_proxy", false); + return is_arp_proxy; +} + +static bool lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) { return !lsp->enabled || *lsp->enabled; @@ -2852,6 +2865,32 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;"); } + /* Check arp_proxy option for LS ports */ + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbsp) { + continue; + } + + if (lsp_is_arp_proxy(op->nbsp)) { + if (op->n_lsp_addrs != 1) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "one and only one address expected for arp " + "proxy port %s", op->json_key); + continue; + } + if (op->od->arp_proxy_port) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, + "ARP proxy lsp %s already existed on ls "UUID_FMT + " but another lsp %s is seen", + op->od->arp_proxy_port->json_key, + UUID_ARGS(&op->od->key), + op->json_key); + continue; + } + op->od->arp_proxy_port = op; + } + } /* Ingress table 10: ARP/ND responder, skip requests coming from localnet * and vtep ports. (priority 100); see ovn-northd.8.xml for the * rationale. */ @@ -2888,24 +2927,68 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, for (size_t i = 0; i < op->n_lsp_addrs; i++) { for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { ds_clear(&match); - ds_put_format(&match, "arp.tpa == %s && arp.op == 1", - op->lsp_addrs[i].ipv4_addrs[j].addr_s); - ds_clear(&actions); - ds_put_format(&actions, - "eth.dst = eth.src; " - "eth.src = %s; " - "arp.op = 2; /* ARP reply */ " - "arp.tha = arp.sha; " - "arp.sha = %s; " - "arp.tpa = arp.spa; " - "arp.spa = %s; " - "outport = inport; " - "flags.loopback = 1; " - "output;", - op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, - op->lsp_addrs[i].ipv4_addrs[j].addr_s); - ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, - ds_cstr(&match), ds_cstr(&actions)); + if (op->od->arp_proxy_port) { + ds_put_format(&match, "arp.tpa == %s && arp.op == 1 && inport == %s", + op->lsp_addrs[i].ipv4_addrs[j].addr_s, + op->od->arp_proxy_port->json_key); + ds_clear(&actions); + ds_put_format(&actions, + "eth.dst = eth.src; " + "eth.src = %s; " + "arp.op = 2; /* ARP reply */ " + "arp.tha = arp.sha; " + "arp.sha = %s; " + "arp.tpa = arp.spa; " + "arp.spa = %s; " + "outport = inport; " + "flags.loopback = 1; " + "output;", + op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, + op->lsp_addrs[i].ipv4_addrs[j].addr_s); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 60, + ds_cstr(&match), ds_cstr(&actions)); + + ds_clear(&match); + ds_put_format(&match, "arp.tpa == %s && arp.op == 1", + op->lsp_addrs[i].ipv4_addrs[j].addr_s); + ds_clear(&actions); + char *proxy_mac = op->od->arp_proxy_port->lsp_addrs[0].ea_s; + ds_put_format(&actions, + "eth.dst = eth.src; " + "eth.src = %s; " + "arp.op = 2; /* ARP reply */ " + "arp.tha = arp.sha; " + "arp.sha = %s; " + "arp.tpa = arp.spa; " + "arp.spa = %s; " + "outport = inport; " + "flags.loopback = 1; " + "output;", + proxy_mac, proxy_mac, + op->lsp_addrs[i].ipv4_addrs[j].addr_s); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, + ds_cstr(&match), ds_cstr(&actions)); + + } else { + ds_put_format(&match, "arp.tpa == %s && arp.op == 1", + op->lsp_addrs[i].ipv4_addrs[j].addr_s); + ds_clear(&actions); + ds_put_format(&actions, + "eth.dst = eth.src; " + "eth.src = %s; " + "arp.op = 2; /* ARP reply */ " + "arp.tha = arp.sha; " + "arp.sha = %s; " + "arp.tpa = arp.spa; " + "arp.spa = %s; " + "outport = inport; " + "flags.loopback = 1; " + "output;", + op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, + op->lsp_addrs[i].ipv4_addrs[j].addr_s); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, + ds_cstr(&match), ds_cstr(&actions)); + } /* Do not reply to an ARP request from the port that owns the * address (otherwise a DHCP client that ARPs to check for a diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml index 3e40881..a58d800 100644 --- a/ovn/ovn-nb.xml +++ b/ovn/ovn-nb.xml @@ -219,6 +219,17 @@ individually below. ++ These options apply when is not set. +
+ +
These options apply when is router
.
diff --git a/tests/ovn.at b/tests/ovn.at
index 3779741..1830920 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -6023,3 +6023,139 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
OVS_APP_EXIT_AND_WAIT([ovsdb-server])
AT_CLEANUP
+
+# 3 hypervisors, one logical switch, 3 logical ports per hypervisor, ARP proxy
+AT_SETUP([ovn -- 3 HVs, 1 LS, 3 lports/HV, ARP proxy])
+AT_KEYWORDS([ovnarpproxy])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Create hypervisors hv[123].
+# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123] to hv3.
+# Add all of the vifs to a single logical switch lsw0.
+# Set logical switch port lp11 as arp proxy.
+ovn-nbctl ls-add lsw0
+net_add n1
+for i in 1 2 3; do
+ sim_add hv$i
+ as hv$i
+ ovs-vsctl add-br br-phys
+ ovn_attach n1 br-phys 192.168.0.$i
+
+ for j in 1 2 3; do
+ ovs-vsctl add-port br-int vif$i$j -- set Interface vif$i$j external-ids:iface-id=lp$i$j options:tx_pcap=hv$i/vif$i$j-tx.pcap options:rxq_pcap=hv$i/vif$i$j-rx.pcap ofport-request=$i$j
+ ovn-nbctl lsp-add lsw0 lp$i$j
+ ovn-nbctl lsp-set-addresses lp$i$j "f0:00:00:00:00:$i$j 192.168.0.$i$j"
+ done
+done
+ovn-nbctl set Logical_Switch_Port lp11 options:arp_proxy=true
+
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+ovn_populate_arp
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 1
+
+# Given the name of a logical port, prints the name of the hypervisor
+# on which it is located.
+vif_to_hv() {
+ echo hv${1%?}
+}
+
+# test_arp INPORT SHA SPA TPA [REPLY_HA]
+#
+# Causes a packet to be received on INPORT. The packet is an ARP
+# request with SHA, SPA, and TPA as specified. If REPLY_HA is provided, then
+# it should be the hardware address of the target to expect to receive in an
+# ARP reply; otherwise no reply is expected.
+#
+# INPORT is an logical switch port number, e.g. 11 for vif11.
+# SHA and REPLY_HA are each 12 hex digits.
+# SPA and TPA are each 8 hex digits.
+test_arp() {
+ local inport=$1 sha=$2 spa=$3 tpa=$4 reply_ha=$5
+ local request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
+ hv=`vif_to_hv $inport`
+ as $hv ovs-appctl netdev-dummy/receive vif$inport $request
+
+ if test X$reply_ha = X; then
+ # Expect to receive the broadcast ARP on the other logical switch ports
+ # if no reply is expected.
+ local i j
+ for i in 1 2 3; do
+ for j in 1 2 3; do
+ if test $i$j != $inport; then
+ echo $request >> $i$j.expected
+ fi
+ done
+ done
+ else
+ # Expect to receive the reply, if any.
+ local reply=${sha}${reply_ha}08060001080006040002${reply_ha}${tpa}${sha}${spa}
+ echo $reply >> $inport.expected
+ fi
+}
+
+ip_to_hex() {
+ printf "%02x%02x%02x%02x" "$@"
+}
+
+# Send ARP requests between all pairs of source and destination ports.
+#
+# No reply when sending ARP request to self.
+#
+# If ARP request is sent from ARP proxy port, OVN will generate response with
+# MAC of destination port. Otherwise, OVN will generate response with MAC of
+# the ARP proxy port.
+
+for is in 1 2 3; do
+ for js in 1 2 3; do
+ s=$is$js
+ for id in 1 2 3; do
+ for jd in 1 2 3; do
+ d=$id$jd
+ sip=`ip_to_hex 192 168 0 $i$j`
+ tip=`ip_to_hex 192 168 0 $id$jd`
+ if test $d != $s; then
+ if test $s = "11"; then
+ reply_ha=f000000000$d
+ else
+ reply_ha=f00000000011
+ fi
+ else
+ reply_ha=
+ fi
+ test_arp $s f000000000$s $sip $tip $reply_ha
+ done
+ done
+ done
+done
+
+# dump information and flows with counters
+ovn-sbctl dump-flows -- list multicast_group
+
+echo "------ hv1 dump ------"
+as hv1 ovs-vsctl show
+as hv1 ovs-ofctl -O OpenFlow13 dump-flows br-int
+
+echo "------ hv2 dump ------"
+as hv2 ovs-vsctl show
+as hv2 ovs-ofctl -O OpenFlow13 dump-flows br-int
+
+echo "------ hv3 dump ------"
+as hv3 ovs-vsctl show
+as hv3 ovs-ofctl -O OpenFlow13 dump-flows br-int
+
+# Now check the packets actually received against the ones expected.
+for i in 1 2 3; do
+ for j in 1 2 3; do
+ OVN_CHECK_PACKETS([hv$i/vif$i$j-tx.pcap], [$i$j.expected])
+ done
+done
+
+OVN_CLEANUP([hv1],[hv2],[hv3])
+
+AT_CLEANUP