@@ -61,6 +61,8 @@ create_chassis_rec(struct ovsdb_idl_txn *txn, const char *name,
sbrec_encap_set_options(encap_rec, &options);
sbrec_encap_set_chassis_name(encap_rec, name);
sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1);
+ const struct smap oc = SMAP_CONST1(&oc, "is-vtep", "true");
+ sbrec_chassis_set_other_config(chassis_rec, &oc);
return chassis_rec;
}
@@ -37,6 +37,7 @@
#include "openvswitch/ofp-parse.h"
#include "ovn-controller.h"
#include "lib/chassis-index.h"
+#include "lib/mcast-group-index.h"
#include "lib/ovn-sb-idl.h"
#include "lib/ovn-util.h"
#include "physical.h"
@@ -63,6 +64,7 @@ static void
load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
const struct zone_ids *zone_ids,
struct ofpbuf *ofpacts_p);
+static int64_t get_vxlan_port_key(int64_t port_key);
/* UUID to identify OF flows not associated with ovsdb rows. */
static struct uuid *hc_uuid = NULL;
@@ -160,8 +162,9 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve,
} else if (tun->type == VXLAN) {
uint64_t vni = datapath->tunnel_key;
if (!is_ramp_switch) {
- /* Only some bits are used for regular tunnels. */
- vni |= (uint64_t) outport << 12;
+ /* Map southbound 16-bit port key to limited 12-bit range
+ * available for VXLAN, which differs for multicast groups. */
+ vni |= get_vxlan_port_key(outport) << 12;
}
put_load(vni, MFF_TUN_ID, 0, 24, ofpacts);
} else {
@@ -1372,6 +1375,43 @@ out:
}
}
+static int64_t
+get_vxlan_port_key(int64_t port_key)
+{
+ if (port_key >= OVN_MIN_MULTICAST) {
+ /* 0b1<11 least significant bits> */
+ return OVN_VXLAN_MIN_MULTICAST |
+ (port_key & (OVN_VXLAN_MIN_MULTICAST - 1));
+ }
+ return port_key;
+}
+
+static void
+fanout_to_chassis(enum mf_field_id mff_ovn_geneve,
+ struct sset *remote_chassis,
+ const struct hmap *chassis_tunnels,
+ const struct sbrec_datapath_binding *datapath,
+ uint16_t outport, bool is_ramp_switch,
+ struct ofpbuf *remote_ofpacts)
+{
+ const char *chassis_name;
+ const struct chassis_tunnel *prev = NULL;
+ SSET_FOR_EACH (chassis_name, remote_chassis) {
+ const struct chassis_tunnel *tun
+ = chassis_tunnel_find(chassis_tunnels, chassis_name, NULL);
+ if (!tun) {
+ continue;
+ }
+
+ if (!prev || tun->type != prev->type) {
+ put_encapsulation(mff_ovn_geneve, tun, datapath,
+ outport, is_ramp_switch, remote_ofpacts);
+ prev = tun;
+ }
+ ofpact_put_OUTPUT(remote_ofpacts)->port = tun->ofport;
+ }
+}
+
static void
consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
enum mf_field_id mff_ovn_geneve,
@@ -1390,6 +1430,7 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
}
struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
+ struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);
struct match match;
match_init_catchall(&match);
@@ -1398,7 +1439,8 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
/* Go through all of the ports in the multicast group:
*
- * - For remote ports, add the chassis to 'remote_chassis'.
+ * - For remote ports, add the chassis to 'remote_chassis' or
+ * 'vtep_chassis'.
*
* - For local ports (other than logical patch ports), add actions
* to 'ofpacts' to set the output port and resubmit.
@@ -1465,7 +1507,12 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
/* Add remote chassis only when localnet port not exist,
* otherwise multicast will reach remote ports through localnet
* port. */
- sset_add(&remote_chassis, port->chassis->name);
+ if (smap_get_bool(&port->chassis->other_config,
+ "is-vtep", false)) {
+ sset_add(&vtep_chassis, port->chassis->name);
+ } else {
+ sset_add(&remote_chassis, port->chassis->name);
+ }
}
}
@@ -1490,7 +1537,8 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
*
* Handle output to the remote chassis in the multicast group, if
* any. */
- if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) {
+ if (!sset_is_empty(&remote_chassis) ||
+ !sset_is_empty(&vtep_chassis) || remote_ofpacts.size > 0) {
if (remote_ofpacts.size > 0) {
/* Following delivery to logical patch ports, restore the
* multicast group as the logical output port. */
@@ -1498,22 +1546,12 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
&remote_ofpacts);
}
- const char *chassis_name;
- const struct chassis_tunnel *prev = NULL;
- SSET_FOR_EACH (chassis_name, &remote_chassis) {
- const struct chassis_tunnel *tun
- = chassis_tunnel_find(chassis_tunnels, chassis_name, NULL);
- if (!tun) {
- continue;
- }
-
- if (!prev || tun->type != prev->type) {
- put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
- mc->tunnel_key, true, &remote_ofpacts);
- prev = tun;
- }
- ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
- }
+ fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
+ mc->datapath, mc->tunnel_key, false,
+ &remote_ofpacts);
+ fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels,
+ mc->datapath, mc->tunnel_key, true,
+ &remote_ofpacts);
if (remote_ofpacts.size) {
if (local_ports) {
@@ -1689,6 +1727,27 @@ physical_run(struct physical_ctx *p_ctx,
&ofpacts, hc_uuid);
}
+ /* Add VXLAN specific rules to transform port keys
+ * from 12 bits to 16 bits used elsewhere. */
+ HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
+ if (tun->type == VXLAN) {
+ ofpbuf_clear(&ofpacts);
+
+ struct match match = MATCH_CATCHALL_INITIALIZER;
+ match_set_in_port(&match, tun->ofport);
+ ovs_be64 mcast_bits = htonll((OVN_VXLAN_MIN_MULTICAST << 12));
+ match_set_tun_id_masked(&match, mcast_bits, mcast_bits);
+
+ put_load(1, MFF_LOG_OUTPORT, 15, 1, &ofpacts);
+ put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 11, &ofpacts);
+ put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts);
+ put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
+
+ ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 105, 0,
+ &match, &ofpacts, hc_uuid);
+ }
+ }
+
/* Handle ramp switch encapsulations. */
HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
if (tun->type != VXLAN) {
@@ -23,6 +23,8 @@ struct sbrec_datapath_binding;
#define OVN_MIN_MULTICAST 32768
#define OVN_MAX_MULTICAST 65535
+#define OVN_VXLAN_MIN_MULTICAST 2048
+
enum ovn_mcast_tunnel_keys {
OVN_MCAST_FLOOD_TUNNEL_KEY = OVN_MIN_MULTICAST,
@@ -2797,11 +2797,14 @@
</li>
<li>
- 12-bit logical egress port identifier. IDs 0 through 32767 have the same
- meaning as for logical ingress ports. IDs 32768 through 65535,
- inclusive, may be assigned to logical multicast groups (see the
- <code>tunnel_key</code> column in the OVN Southbound
- <code>Multicast_Group</code> table).
+ 12-bit logical egress port identifier. IDs 0 through 2047 are used for
+ unicast output ports. IDs 2048 through 4095, inclusive, may be assigned
+ to logical multicast groups (see the <code>tunnel_key</code> column in
+ the OVN Southbound <code>Multicast_Group</code> table). For multicast
+ group tunnel keys, a special mapping scheme is used to internally
+ transform from internal OVN 16-bit keys to 12-bit values before sending
+ packets through a VXLAN tunnel, and back from 12-bit tunnel keys to
+ 16-bit values when receiving packets from a VXLAN tunnel.
</li>
<li>
@@ -3271,8 +3271,13 @@ AT_SETUP([VLAN transparency, passthru=true, ARP responder disabled])
ovn_start
net_add net
-check ovs-vsctl add-br br-phys
-ovn_attach net br-phys 192.168.0.1
+for i in 1 2; do
+ sim_add hv-$i
+ as hv-$i
+ check ovs-vsctl add-br br-phys
+ check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
+ ovn_attach net br-phys 192.168.0.$i 24 vxlan
+done
check ovn-nbctl ls-add ls
check ovn-nbctl --wait=sb add Logical-Switch ls other_config vlan-passthru=true
@@ -3283,6 +3288,7 @@ for i in 1 2; do
done
for i in 1 2; do
+ as hv-$i
check ovs-vsctl add-port br-int vif$i -- set Interface vif$i external-ids:iface-id=lsp$i \
options:tx_pcap=vif$i-tx.pcap \
options:rxq_pcap=vif$i-rx.pcap \
@@ -3298,18 +3304,27 @@ AT_CHECK([grep -w "ls_in_arp_rsp" lsflows | sort], [0], [dnl
table=16(ls_in_arp_rsp ), priority=0 , match=(1), action=(next;)
])
+for i in 1 2; do
+ : > $i.expected
+done
+
test_arp() {
local inport=$1 outport=$2 sha=$3 spa=$4 tpa=$5 reply_ha=$6
tag=8100fefe
local request=ffffffffffff${sha}${tag}08060001080006040001${sha}${spa}ffffffffffff${tpa}
- ovs-appctl netdev-dummy/receive vif$inport $request
+ as hv-$inport ovs-appctl netdev-dummy/receive vif$inport $request
echo $request >> $outport.expected
local reply=${sha}${reply_ha}${tag}08060001080006040002${reply_ha}${tpa}${sha}${spa}
- ovs-appctl netdev-dummy/receive vif$outport $reply
+ as hv-$outport ovs-appctl netdev-dummy/receive vif$outport $reply
echo $reply >> $inport.expected
}
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+OVN_POPULATE_ARP
+
test_arp 1 2 f00000000001 0a000001 0a000002 f00000000002
test_arp 2 1 f00000000002 0a000002 0a000001 f00000000001
The 15-bit port key range used for multicast groups can't be covered by 12-bit key space available for port keys in VXLAN. To make multicast keys work, we have to transform 16-bit multicast port keys to 12-bit keys before fanning out packets through VXLAN tunnels. Otherwise significant bits are not retained, and multicast / broadcast traffic does not reach ports located on other chassis. This patch introduces a mapping scheme between core 16-bit multicast port keys and 12-bit key range available in VXLAN. The scheme is as follows: 1) Before sending a packet through VXLAN tunnel, the most significant bit of a 16-bit port key is copied into the most significant bit of 12-bit VXLAN key. The 11 least significant bits of a 16-bit port key are copied to the least significant bits of 12-bit VXLAN key. 2) When receiving a packet through VXLAN tunnel, the most significant bit of a VXLAN 12-bit port key is copied into the most significant bit of 16-bit port key used in core. The 11 least significant bits of a VXLAN 12-bit port key are copied into the least significant bits of a 16-bit port key used in core. This change also implies that the range available for multicast port keys is more limited and fits into 11-bit space. The same rule should be enforced for unicast port keys, like we already do for tunnel keys when a VXLAN encap is present in a cluster. This enforcement is implied here but missing in master and will be implemented in a separate patch. (The missing enforcement is an oversight of the original patch that added support for VXLAN tunnels.) Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings") Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com> -- v1: initial commit v2: updated docs v2: removed newly added but unused macros v3: rebase --- controller-vtep/gateway.c | 2 + controller/physical.c | 101 ++++++++++++++++++++++++++++++-------- lib/mcast-group-index.h | 2 + ovn-architecture.7.xml | 13 +++-- tests/ovn.at | 23 +++++++-- 5 files changed, 111 insertions(+), 30 deletions(-)