@@ -238,6 +238,27 @@ Q: Does Open vSwitch support GTP-U?
set int gtpu0 type=gtpu options:key=<teid> \
options:remote_ip=172.31.1.1
+Q: Does Open vSwitch support SRv6?
+
+ A: Yes. Starting with version 3.1, the Open vSwitch userspace
+ datapath supports SRv6 (Segment Routing over IPv6). The following
+ example shows tunneling to fc00:300::1 via fc00:100::1 and fc00:200::1.
+ In the current implementation, if "IPv6 in IPv6" or "IPv4 in IPv6" packets
+ are routed to this interface, and these packets are not SRv6 packets, they
+ may be dropped, so be careful in workloads with a mix of these tunnels.
+ Also note the following restrictions:
+
+ * Segment list length is limited to 6.
+ * SRv6 packets with other than segments_left = 0 are simply dropped.
+
+ ::
+
+ $ ovs-vsctl add-br br0
+ $ ovs-vsctl add-port br0 srv6_0 -- \
+ set int srv6_0 type=srv6 \
+ options:remote_ip=fc00:300::1 \
+ options:srv6_segs="fc00:100::1,fc00:200::1,fc00:300::1"
+
Q: How do I connect two bridges?
A: First, why do you want to do this? Two connected bridges are not much
@@ -151,6 +151,7 @@ Q: Are all features available with all datapaths?
Tunnel - ERSPAN 4.18 2.10 2.10 NO
Tunnel - ERSPAN-IPv6 4.18 2.10 2.10 NO
Tunnel - GTP-U NO NO 2.14 NO
+ Tunnel - SRv6 NO NO 3.1 NO
Tunnel - Bareudp 5.7 NO NO NO
QoS - Policing YES 1.1 2.6 NO
QoS - Shaping YES 1.1 NO NO
@@ -46,6 +46,8 @@ v3.1.0 - xx xxx xxxx
* Add new experimental PMD load based sleeping feature. PMD threads can
request to sleep up to a user configured 'pmd-maxsleep' value under
low load conditions.
+ - SRv6 Tunnel Protocol
+ * Only support for userspace datapath.
v3.0.0 - 15 Aug 2022
@@ -254,6 +254,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_IP6GRE = 109,
OVS_VPORT_TYPE_GTPU = 110,
OVS_VPORT_TYPE_BAREUDP = 111, /* Bareudp tunnel. */
+ OVS_VPORT_TYPE_SRV6 = 112, /* SRv6 tunnel. */
__OVS_VPORT_TYPE_MAX
};
@@ -1692,7 +1692,9 @@ extract_l3_ipv6(struct conn_key *key, const void *data, size_t size,
uint8_t nw_frag = 0;
const struct ovs_16aligned_ip6_frag *frag_hdr;
- if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag, &frag_hdr)) {
+ const struct ip6_rt_hdr *rt_hdr;
+ if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag, &frag_hdr,
+ &rt_hdr)) {
return false;
}
@@ -129,6 +129,8 @@ vport_type_to_kind(enum ovs_vport_type type,
}
case OVS_VPORT_TYPE_GTPU:
return NULL;
+ case OVS_VPORT_TYPE_SRV6:
+ return "srv6";
case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";
case OVS_VPORT_TYPE_NETDEV:
@@ -319,6 +321,7 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
case OVS_VPORT_TYPE_GTPU:
+ case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
@@ -411,6 +414,7 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
case OVS_VPORT_TYPE_GTPU:
+ case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
@@ -519,6 +523,7 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
+ case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_BAREUDP:
return dpif_netlink_rtnl_destroy(name);
case OVS_VPORT_TYPE_NETDEV:
@@ -919,6 +919,9 @@ get_vport_type(const struct dpif_netlink_vport *vport)
case OVS_VPORT_TYPE_GTPU:
return "gtpu";
+ case OVS_VPORT_TYPE_SRV6:
+ return "srv6";
+
case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";
@@ -957,6 +960,8 @@ netdev_to_ovs_vport_type(const char *type)
return OVS_VPORT_TYPE_GRE;
} else if (!strcmp(type, "gtpu")) {
return OVS_VPORT_TYPE_GTPU;
+ } else if (!strcmp(type, "srv6")) {
+ return OVS_VPORT_TYPE_SRV6;
} else if (!strcmp(type, "bareudp")) {
return OVS_VPORT_TYPE_BAREUDP;
} else {
@@ -479,9 +479,11 @@ invalid:
static inline bool
parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
uint8_t *nw_frag,
- const struct ovs_16aligned_ip6_frag **frag_hdr)
+ const struct ovs_16aligned_ip6_frag **frag_hdr,
+ const struct ip6_rt_hdr **rt_hdr)
{
*frag_hdr = NULL;
+ *rt_hdr = NULL;
while (1) {
if (OVS_LIKELY((*nw_proto != IPPROTO_HOPOPTS)
&& (*nw_proto != IPPROTO_ROUTING)
@@ -504,7 +506,6 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
}
if ((*nw_proto == IPPROTO_HOPOPTS)
- || (*nw_proto == IPPROTO_ROUTING)
|| (*nw_proto == IPPROTO_DSTOPTS)) {
/* These headers, while different, have the fields we care
* about in the same location and with the same
@@ -515,6 +516,13 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
(ext_hdr->ip6e_len + 1) * 8))) {
return false;
}
+ } else if (*nw_proto == IPPROTO_ROUTING) {
+ *rt_hdr = *datap;
+ *nw_proto = (*rt_hdr)->nexthdr;
+ if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
+ ((*rt_hdr)->hdrlen + 1) * 8))) {
+ return false;
+ }
} else if (*nw_proto == IPPROTO_AH) {
/* A standard AH definition isn't available, but the fields
* we care about are in the same location as the generic
@@ -561,15 +569,19 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
* has FLOW_NW_FRAG_LATER set. Both first and later fragments have
* FLOW_NW_FRAG_ANY set in 'nw_frag'.
*
+ * If a routing header is found, '*rt_hdr' is set to the routing
+ * header and otherwise set to NULL.
+ *
* A return value of false indicates that there was a problem parsing
* the extension headers.*/
bool
parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
uint8_t *nw_frag,
- const struct ovs_16aligned_ip6_frag **frag_hdr)
+ const struct ovs_16aligned_ip6_frag **frag_hdr,
+ const struct ip6_rt_hdr **rt_hdr)
{
return parse_ipv6_ext_hdrs__(datap, sizep, nw_proto, nw_frag,
- frag_hdr);
+ frag_hdr, rt_hdr);
}
bool
@@ -946,8 +958,9 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
nw_proto = nh->ip6_nxt;
const struct ovs_16aligned_ip6_frag *frag_hdr;
+ const struct ip6_rt_hdr *rt_hdr;
if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag,
- &frag_hdr)) {
+ &frag_hdr, &rt_hdr)) {
goto out;
}
@@ -1201,9 +1214,10 @@ parse_tcp_flags(struct dp_packet *packet,
dp_packet_set_l2_pad_size(packet, size - plen);
size = plen;
const struct ovs_16aligned_ip6_frag *frag_hdr;
+ const struct ip6_rt_hdr *rt_hdr;
nw_proto = nh->ip6_nxt;
if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag,
- &frag_hdr)) {
+ &frag_hdr, &rt_hdr)) {
return 0;
}
} else {
@@ -132,7 +132,8 @@ void packet_expand(struct dp_packet *, const struct flow *, size_t size);
bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
uint8_t *nw_frag,
- const struct ovs_16aligned_ip6_frag **frag_hdr);
+ const struct ovs_16aligned_ip6_frag **frag_hdr,
+ const struct ip6_rt_hdr **rt_hdr);
bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key);
uint16_t parse_tcp_flags(struct dp_packet *packet, ovs_be16 *dl_type_p,
uint8_t *nw_frag_p, ovs_be16 *first_vlan_tci_p);
@@ -486,8 +486,9 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
size_t datasize = pl;
const struct ovs_16aligned_ip6_frag *frag_hdr = NULL;
- if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr)
- || !nw_frag || !frag_hdr) {
+ const struct ip6_rt_hdr *rt_hdr = NULL;
+ if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr,
+ &rt_hdr) || !nw_frag || !frag_hdr) {
ipf_print_reass_packet("Unparsed reassembled v6 packet; v6 hdr:", l3);
dp_packet_delete(pkt);
@@ -679,8 +680,9 @@ ipf_is_valid_v6_frag(struct ipf *ipf, struct dp_packet *pkt)
const void *data = l3 + 1;
size_t datasize = l3_size - l3_hdr_size;
const struct ovs_16aligned_ip6_frag *frag_hdr = NULL;
+ const struct ip6_rt_hdr *rt_hdr = NULL;
if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag,
- &frag_hdr) || !nw_frag || !frag_hdr) {
+ &frag_hdr, &rt_hdr) || !nw_frag || !frag_hdr) {
return false;
}
@@ -722,8 +724,10 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone,
const void *data = l3 + 1;
size_t datasize = dp_packet_l3_size(pkt) - sizeof *l3;
const struct ovs_16aligned_ip6_frag *frag_hdr = NULL;
+ const struct ip6_rt_hdr *rt_hdr = NULL;
- parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr);
+ parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr,
+ &rt_hdr);
ovs_assert(nw_frag && frag_hdr);
ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg;
*start_data_byte = ntohs(ip6f_offlg & IP6F_OFF_MASK) +
@@ -356,6 +356,17 @@ gre_header_len(ovs_be16 flags)
return hlen;
}
+static int
+parse_srv6_header(struct dp_packet *packet,
+ struct flow_tnl *tnl)
+{
+ unsigned int ulen;
+
+ netdev_tnl_ip_extract_tnl_md(packet, tnl, &ulen);
+
+ return ulen;
+}
+
static int
parse_gre_header(struct dp_packet *packet,
struct flow_tnl *tnl)
@@ -845,6 +856,158 @@ netdev_gtpu_build_header(const struct netdev *netdev,
return 0;
}
+static void
+srv6_build_header(struct ovs_action_push_tnl *data,
+ const struct netdev_tnl_build_header_params *params,
+ int nr_segs, const struct in6_addr *segs)
+{
+ struct ovs_16aligned_ip6_hdr *nh6;
+ struct srv6_base_hdr *srh;
+ struct in6_addr *s;
+ ovs_be16 dl_type;
+ unsigned int hlen;
+ int i;
+
+ ovs_assert(nr_segs > 0);
+
+ nh6 = (struct ovs_16aligned_ip6_hdr *) eth_build_header(data, params);
+ put_16aligned_be32(&nh6->ip6_flow, htonl(6 << 28) |
+ htonl(params->flow->tunnel.ip_tos << 20));
+ nh6->ip6_hlim = params->flow->tunnel.ip_ttl;
+ nh6->ip6_nxt = IPPROTO_ROUTING;
+ memcpy(&nh6->ip6_src, params->s_ip, sizeof(ovs_be32[4]));
+ memcpy(&nh6->ip6_dst, &segs[0], sizeof(ovs_be32[4]));
+
+
+ srh = (struct srv6_base_hdr *) (nh6 + 1);
+ dl_type = params->flow->dl_type;
+ if (dl_type == htons(ETH_TYPE_IP)) {
+ srh->rt_hdr.nexthdr = IPPROTO_IPIP;
+ } else if (dl_type == htons(ETH_TYPE_IPV6)) {
+ srh->rt_hdr.nexthdr = IPPROTO_IPV6;
+ }
+ srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
+ srh->rt_hdr.hdrlen = 2 * nr_segs;
+ srh->rt_hdr.segments_left = nr_segs - 1;
+ srh->last_entry = nr_segs - 1;
+ srh->flags = 0;
+ srh->tag = 0;
+
+ s = ALIGNED_CAST(struct in6_addr *,
+ (char *) srh + sizeof(struct srv6_base_hdr));
+ for (i = 0; i < nr_segs; i++) {
+ /* Segment list is written to the header in reverse order. */
+ memcpy(s, &segs[nr_segs - i - 1], sizeof(ovs_be32[4]));
+ s++;
+ }
+
+ hlen = IPV6_HEADER_LEN + sizeof(struct srv6_base_hdr) +
+ 8 * srh->rt_hdr.hdrlen;
+
+ data->header_len += hlen;
+ data->tnl_type = OVS_VPORT_TYPE_SRV6;
+}
+
+int
+netdev_srv6_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data,
+ const struct netdev_tnl_build_header_params *params)
+{
+ struct netdev_vport *dev = netdev_vport_cast(netdev);
+ struct netdev_tunnel_config *tnl_cfg;
+
+ ovs_mutex_lock(&dev->mutex);
+ tnl_cfg = &dev->tnl_cfg;
+
+ if (tnl_cfg->srv6_num_segs) {
+ srv6_build_header(data, params,
+ tnl_cfg->srv6_num_segs, tnl_cfg->srv6_segs);
+ } else {
+ /*
+ * If explicit segment list setting is omitted, tunnel destination
+ * is considered to be the first segment list.
+ */
+ srv6_build_header(data, params,
+ 1, ¶ms->flow->tunnel.ipv6_dst);
+ }
+
+ ovs_mutex_unlock(&dev->mutex);
+
+ return 0;
+}
+
+void
+netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
+ struct dp_packet *packet OVS_UNUSED,
+ const struct ovs_action_push_tnl *data OVS_UNUSED)
+{
+ int ip_tot_size;
+
+ netdev_tnl_push_ip_header(packet, data->header,
+ data->header_len, &ip_tot_size);
+}
+
+struct dp_packet *
+netdev_srv6_pop_header(struct dp_packet *packet)
+{
+ struct pkt_metadata *md = &packet->md;
+ struct flow_tnl *tnl = &md->tunnel;
+ const struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
+ int hlen;
+
+ size_t size = dp_packet_l3_size(packet) - IPV6_HEADER_LEN;
+ const void *data = nh + 1;
+
+ uint8_t nw_frag = 0;
+ uint8_t nw_proto = nh->ip6_nxt;
+ const struct ovs_16aligned_ip6_frag *frag_hdr = NULL;
+ const struct ip6_rt_hdr *rt_hdr = NULL;
+
+ /*
+ * Verifies that the routing header is present in the IPv6
+ * extension headers and that its type is SRv6.
+ * */
+ if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
+ &frag_hdr, &rt_hdr)) {
+ goto err;
+ }
+
+ if (!rt_hdr) {
+ goto err;
+ }
+
+ if (rt_hdr->type != IPV6_SRCRT_TYPE_4) {
+ goto err;
+ }
+
+ if (rt_hdr->segments_left > 0) {
+ VLOG_WARN_RL(&err_rl, "invalid srv6 segments_left=%d\n",
+ rt_hdr->segments_left);
+ goto err;
+ }
+
+ if (rt_hdr->nexthdr == IPPROTO_IPIP) {
+ packet->packet_type = htonl(PT_IPV4);
+ } else if (rt_hdr->nexthdr == IPPROTO_IPV6) {
+ packet->packet_type = htonl(PT_IPV6);
+ } else {
+ goto err;
+ }
+
+ pkt_metadata_init_tnl(md);
+
+ hlen = parse_srv6_header(packet, tnl);
+
+ dp_packet_reset_packet(packet, hlen);
+
+ return packet;
+
+err:
+ dp_packet_delete(packet);
+
+ return NULL;
+}
+
struct dp_packet *
netdev_vxlan_pop_header(struct dp_packet *packet)
{
@@ -65,6 +65,16 @@ netdev_gtpu_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data,
const struct netdev_tnl_build_header_params *p);
+struct dp_packet *netdev_srv6_pop_header(struct dp_packet *packet);
+
+void netdev_srv6_push_header(const struct netdev *netdev,
+ struct dp_packet *packet,
+ const struct ovs_action_push_tnl *data);
+
+int netdev_srv6_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data,
+ const struct netdev_tnl_build_header_params *p);
+
void
netdev_tnl_push_udp_header(const struct netdev *netdev,
struct dp_packet *packet,
@@ -424,6 +424,35 @@ parse_tunnel_ip(const char *value, bool accept_mcast, bool *flow,
return 0;
}
+static int
+parse_srv6_segs(char *s, struct in6_addr *segs, uint8_t *num_segs)
+{
+ char *save_ptr = NULL;
+ char *token;
+
+ if (!s) {
+ return EINVAL;
+ }
+
+ *num_segs = 0;
+
+ while ((token = strtok_r(s, ",", &save_ptr)) != NULL) {
+ if (*num_segs == SRV6_MAX_SEGS) {
+ return EINVAL;
+ }
+
+ if (inet_pton(AF_INET6, token, segs) != 1) {
+ return EINVAL;
+ }
+
+ segs++;
+ (*num_segs)++;
+ s = NULL;
+ }
+
+ return 0;
+}
+
enum tunnel_layers {
TNL_L2 = 1 << 0, /* 1 if a tunnel type can carry Ethernet traffic. */
TNL_L3 = 1 << 1 /* 1 if a tunnel type can carry L3 traffic. */
@@ -443,6 +472,8 @@ tunnel_supported_layers(const char *type,
return TNL_L3;
} else if (!strcmp(type, "bareudp")) {
return TNL_L3;
+ } else if (!strcmp(type, "srv6")) {
+ return TNL_L3;
} else {
return TNL_L2;
}
@@ -750,6 +781,17 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
goto out;
}
}
+ } else if (!strcmp(node->key, "srv6_segs")) {
+ err = parse_srv6_segs(node->value,
+ tnl_cfg.srv6_segs,
+ &tnl_cfg.srv6_num_segs);
+
+ switch (err) {
+ case EINVAL:
+ ds_put_format(&errors, "%s: bad %s 'srv6_segs'\n",
+ name, node->value);
+ break;
+ }
} else if (!strcmp(node->key, "payload_type")) {
if (!strcmp(node->value, "mpls")) {
tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS);
@@ -1290,6 +1332,17 @@ netdev_vport_tunnel_register(void)
},
{{NULL, NULL, 0, 0}}
},
+ { "srv6_sys",
+ {
+ TUNNEL_FUNCTIONS_COMMON,
+ .type = "srv6",
+ .build_header = netdev_srv6_build_header,
+ .push_header = netdev_srv6_push_header,
+ .pop_header = netdev_srv6_pop_header,
+ .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
+ },
+ {{NULL, NULL, 0, 0}}
+ },
};
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
@@ -140,6 +140,10 @@ struct netdev_tunnel_config {
bool erspan_idx_flow;
bool erspan_dir_flow;
bool erspan_hwid_flow;
+
+ uint8_t srv6_num_segs;
+ #define SRV6_MAX_SEGS 6
+ struct in6_addr srv6_segs[SRV6_MAX_SEGS];
};
void netdev_run(void);
@@ -714,6 +714,24 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
ds_put_char(ds, ')');
}
+ ds_put_char(ds, ')');
+ } else if (data->tnl_type == OVS_VPORT_TYPE_SRV6) {
+ const struct srv6_base_hdr *srh;
+ struct in6_addr *segs;
+ int i;
+ int nr_segs;
+
+ srh = (const struct srv6_base_hdr *) l4;
+ segs = ALIGNED_CAST(struct in6_addr *, srh + 1);
+ nr_segs = srh->last_entry + 1;
+
+ ds_put_format(ds, "srv6(");
+ ds_put_format(ds, "segments_left=%d", srh->rt_hdr.segments_left);
+ ds_put_format(ds, ",segs=");
+ for (i = 0; i < nr_segs; i++) {
+ ds_put_format(ds, i > 0 ? "," : "");
+ ipv6_format_addr(&segs[nr_segs - i - 1], ds);
+ }
ds_put_char(ds, ')');
} else if (data->tnl_type == OVS_VPORT_TYPE_GRE ||
data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
@@ -1534,6 +1552,7 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
uint8_t hwid, dir;
uint32_t teid;
uint8_t gtpu_flags, gtpu_msgtype;
+ uint8_t segments_left;
if (!ovs_scan_len(s, &n, "tnl_push(tnl_port(%"SCNi32"),", &data->tnl_port)) {
return -EINVAL;
@@ -1775,6 +1794,43 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
tnl_type = OVS_VPORT_TYPE_GTPU;
header_len = sizeof *eth + ip_len +
sizeof *udp + sizeof *gtph;
+ } else if (ovs_scan_len(s, &n, "srv6(segments_left=%"SCNu8,
+ &segments_left)) {
+ char seg_s[IPV6_SCAN_LEN + 1];
+ struct in6_addr seg;
+ struct in6_addr *segs;
+ struct srv6_base_hdr *srh = (struct srv6_base_hdr *) (ip6 + 1);
+ uint8_t n_segs = segments_left + 1;
+
+ ip6->ip6_nxt = IPPROTO_ROUTING;
+
+ srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
+ srh->rt_hdr.segments_left = segments_left;
+ srh->rt_hdr.hdrlen = 2 * n_segs;
+ srh->last_entry = n_segs - 1;
+
+ tnl_type = OVS_VPORT_TYPE_SRV6;
+ header_len = sizeof *eth + ip_len +
+ sizeof *srh + 8 * srh->rt_hdr.hdrlen;
+
+ /* Parse segment list */
+ if (!ovs_scan_len(s, &n, ",segs="IPV6_SCAN_FMT, seg_s)
+ || inet_pton(AF_INET6, seg_s, &seg) != 1) {
+ return -EINVAL;
+ }
+
+ segs = ALIGNED_CAST(struct in6_addr *, srh + 1);
+ segs += n_segs - 1;
+ memcpy(segs--, &seg, sizeof(struct in6_addr));
+
+ while (ovs_scan_len(s, &n, ","IPV6_SCAN_FMT, seg_s)
+ && inet_pton(AF_INET6, seg_s, &seg) == 1) {
+ memcpy(segs--, &seg, sizeof(struct in_addr));
+ }
+
+ if (!ovs_scan_len(s, &n, "))")) {
+ return -EINVAL;
+ }
} else {
return -EINVAL;
}
@@ -710,6 +710,10 @@ char *ip_parse_cidr_len(const char *s, int *n, ovs_be32 *ip,
#define IPPROTO_UDPLITE 136
#endif
+#ifndef IPPROTO_IPIP
+#define IPPROTO_IPIP 4
+#endif
+
/* TOS fields. */
#define IP_ECN_NOT_ECT 0x0
#define IP_ECN_ECT_1 0x01
@@ -988,6 +992,15 @@ struct ovs_16aligned_ip6_frag {
ovs_16aligned_be32 ip6f_ident;
};
+#define IP6_RT_HDR_LEN 4
+struct ip6_rt_hdr {
+ uint8_t nexthdr;
+ uint8_t hdrlen;
+ uint8_t type;
+ uint8_t segments_left;
+};
+BUILD_ASSERT_DECL(IP6_RT_HDR_LEN == sizeof(struct ip6_rt_hdr));
+
#define ICMP6_HEADER_LEN 4
struct icmp6_header {
uint8_t icmp6_type;
@@ -1514,6 +1527,17 @@ BUILD_ASSERT_DECL(sizeof(struct vxlanhdr) == 8);
#define VXLAN_F_GPE 0x4000
#define VXLAN_HF_GPE 0x04000000
+/* SRv6 protocol header */
+#define IPV6_SRCRT_TYPE_4 4
+#define SRV6_BASE_HDR_LEN 8
+struct srv6_base_hdr {
+ struct ip6_rt_hdr rt_hdr;
+ uint8_t last_entry;
+ uint8_t flags;
+ ovs_be16 tag;
+};
+BUILD_ASSERT_DECL(sizeof(struct srv6_base_hdr) == SRV6_BASE_HDR_LEN);
+
/* Input values for PACKET_TYPE macros have to be in host byte order.
* The _BE postfix indicates result is in network byte order. Otherwise result
* is in host byte order. */
@@ -126,7 +126,7 @@ map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
/* XXX: No fragments support. */
match.wc.masks.nw_frag = FLOW_NW_FRAG_MASK;
- /* 'tp_port' is zero for GRE tunnels. In this case it
+ /* 'tp_port' is zero for GRE and SRv6 tunnels. In this case it
* doesn't make sense to match on UDP port numbers. */
if (tp_port) {
match.wc.masks.tp_dst = OVS_BE16_MAX;
@@ -161,40 +161,39 @@ map_insert_ipdev__(struct ip_device *ip_dev, char dev_name[],
}
}
-static uint8_t
-tnl_type_to_nw_proto(const char type[])
+static void
+tnl_type_to_nw_proto(const char type[], uint8_t nw_protos[2])
{
+ nw_protos[1] = 0;
+
if (!strcmp(type, "geneve")) {
- return IPPROTO_UDP;
+ nw_protos[0] = IPPROTO_UDP;
}
if (!strcmp(type, "stt")) {
- return IPPROTO_TCP;
+ nw_protos[0] = IPPROTO_TCP;
}
if (!strcmp(type, "gre") || !strcmp(type, "erspan") ||
!strcmp(type, "ip6erspan") || !strcmp(type, "ip6gre")) {
- return IPPROTO_GRE;
+ nw_protos[0] = IPPROTO_GRE;
}
if (!strcmp(type, "vxlan")) {
- return IPPROTO_UDP;
+ nw_protos[0] = IPPROTO_UDP;
}
if (!strcmp(type, "gtpu")) {
- return IPPROTO_UDP;
+ nw_protos[0] = IPPROTO_UDP;
+ }
+ if (!strcmp(type, "srv6")) {
+ nw_protos[0] = IPPROTO_IPIP;
+ nw_protos[1] = IPPROTO_IPV6;
}
- return 0;
}
-void
-tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port,
- const char dev_name[], const char type[])
+static void
+tnl_port_map_insert__(odp_port_t port, ovs_be16 tp_port,
+ const char dev_name[], uint8_t nw_proto)
{
struct tnl_port *p;
struct ip_device *ip_dev;
- uint8_t nw_proto;
-
- nw_proto = tnl_type_to_nw_proto(type);
- if (!nw_proto) {
- return;
- }
ovs_mutex_lock(&mutex);
LIST_FOR_EACH(p, node, &port_list) {
@@ -220,6 +219,22 @@ out:
ovs_mutex_unlock(&mutex);
}
+void
+tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port,
+ const char dev_name[], const char type[])
+{
+ uint8_t nw_protos[2];
+ int i;
+
+ tnl_type_to_nw_proto(type, nw_protos);
+
+ for (i = 0; i < 2; i++) {
+ if (nw_protos[i]) {
+ tnl_port_map_insert__(port, tp_port, dev_name, nw_protos[i]);
+ }
+ }
+}
+
static void
tnl_port_unref(const struct cls_rule *cr)
{
@@ -256,14 +271,11 @@ ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 tp_port, uint8_t nw_proto)
}
}
-void
-tnl_port_map_delete(odp_port_t port, const char type[])
+static void
+tnl_port_map_delete__(odp_port_t port, uint8_t nw_proto)
{
struct tnl_port *p;
struct ip_device *ip_dev;
- uint8_t nw_proto;
-
- nw_proto = tnl_type_to_nw_proto(type);
ovs_mutex_lock(&mutex);
LIST_FOR_EACH_SAFE (p, node, &port_list) {
@@ -280,6 +292,21 @@ tnl_port_map_delete(odp_port_t port, const char type[])
ovs_mutex_unlock(&mutex);
}
+void
+tnl_port_map_delete(odp_port_t port, const char type[])
+{
+ uint8_t nw_protos[2];
+ int i;
+
+ tnl_type_to_nw_proto(type, nw_protos);
+
+ for (i = 0; i < 2; i++) {
+ if (nw_protos[i]) {
+ tnl_port_map_delete__(port, nw_protos[i]);
+ }
+ }
+}
+
/* 'flow' is non-const to allow for temporary modifications during the lookup.
* Any changes are restored before returning. */
odp_port_t
@@ -3617,20 +3617,24 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac,
struct flow *base_flow, *flow;
flow = &ctx->xin->flow;
base_flow = &ctx->base_flow;
- uint8_t nw_proto = 0;
+ uint8_t nw_protos[2] = {0};
switch (tnl_type) {
case OVS_VPORT_TYPE_GRE:
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
- nw_proto = IPPROTO_GRE;
+ nw_protos[0] = IPPROTO_GRE;
break;
case OVS_VPORT_TYPE_VXLAN:
case OVS_VPORT_TYPE_GENEVE:
case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_BAREUDP:
- nw_proto = IPPROTO_UDP;
+ nw_protos[0] = IPPROTO_UDP;
+ break;
+ case OVS_VPORT_TYPE_SRV6:
+ nw_protos[0] = IPPROTO_IPIP;
+ nw_protos[1] = IPPROTO_IPV6;
break;
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
@@ -3645,10 +3649,10 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac,
* Update base_flow first followed by flow as the dst_flow gets modified
* in the function.
*/
- propagate_tunnel_data_to_flow__(base_flow, flow, dmac, smac, s_ip6, s_ip,
- is_tnl_ipv6, nw_proto);
- propagate_tunnel_data_to_flow__(flow, flow, dmac, smac, s_ip6, s_ip,
- is_tnl_ipv6, nw_proto);
+ propagate_tunnel_data_to_flow__(base_flow, flow, dmac, smac, s_ip6,
+ s_ip, is_tnl_ipv6, nw_protos[0]);
+ propagate_tunnel_data_to_flow__(flow, flow, dmac, smac, s_ip6,
+ s_ip, is_tnl_ipv6, nw_protos[0]);
}
static int
@@ -474,6 +474,14 @@ class ODPFlow(Flow):
}
)
),
+ "srv6": nested_kv_decoder(
+ KVDecoders(
+ {
+ "segments_left": decode_int,
+ "segs": decode_default,
+ }
+ )
+ ),
}
)
),
@@ -452,6 +452,22 @@ def test_odp_fields(input_string, expected):
),
],
),
+ (
+ "actions:tnl_push(header(srv6(segments_left=0,segs=2001:cafe::92)))",
+ [
+ KeyValue(
+ "tnl_push",
+ {
+ "header": {
+ "srv6": {
+ "segments_left": 0,
+ "segs": "2001:cafe::92",
+ }
+ }
+ },
+ ),
+ ],
+ ),
(
"actions:clone(1),clone(clone(push_vlan(vid=12,pcp=0),2),1)",
[
@@ -163,6 +163,7 @@ SYSTEM_USERSPACE_TESTSUITE_AT = \
tests/system-userspace-testsuite.at \
tests/system-userspace-macros.at \
tests/system-userspace-packet-type-aware.at \
+ tests/system-userspace-traffic.at \
tests/system-route.at
SYSTEM_TSO_TESTSUITE_AT = \
@@ -346,6 +346,7 @@ tnl_push(tnl_port(6),header(size=70,type=4,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:1
tnl_push(tnl_port(6),header(size=70,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=6081,csum=0x0),geneve(oam,vni=0x1c7)),out_port(1))
tnl_push(tnl_port(6),header(size=78,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=6081,csum=0x0),geneve(crit,vni=0x1c7,options({class=0xffff,type=0x80,len=4,0xa}))),out_port(1))
tnl_push(tnl_port(6),header(size=70,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=6081,csum=0xffff),geneve(vni=0x1c7)),out_port(1))
+tnl_push(tnl_port(6),header(size=78,type=112,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=43,tclass=0x0,hlimit=64),srv6(segments_left=0,segs=2001:cafe::92)),out_port(1))
ct
ct(commit)
ct(commit,zone=5)
@@ -126,6 +126,22 @@ m4_define([ADD_VETH_BOND],
]
)
+# ADD_VETH_NS([ns1], [port1], [ip_addr1], [ns2], [port2], [ip_addr2])
+#
+# Add a pair of veth ports in 'ns1' and 'ns2'. The port names are 'port1'
+# and 'port2' respectively, and the IP addresses 'ip_addr1' and 'ip_addr2'
+# are assigned to each port.
+m4_define([ADD_VETH_NS],
+ [ AT_CHECK([ip link add $2 type veth peer name $5]),
+ AT_CHECK([ip link set $2 netns $1])
+ AT_CHECK([ip link set $5 netns $4])
+ NS_CHECK_EXEC([$1], [ip link set $2 up])
+ NS_CHECK_EXEC([$4], [ip link set $5 up])
+ NS_CHECK_EXEC([$1], [ip addr add $3 dev $2])
+ NS_CHECK_EXEC([$4], [ip addr add $6 dev $5])
+ ]
+)
+
# ADD_VLAN([port], [namespace], [vlan-id], [ip-addr])
#
# Add a VLAN device named 'port' within 'namespace'. It will be configured
@@ -331,6 +347,12 @@ m4_define([OVS_CHECK_GENEVE_UDP6ZEROCSUM],
[AT_SKIP_IF([! ip link add foo type geneve help 2>&1 | grep udp6zerocsum >/dev/null])
OVS_CHECK_FIREWALL()])
+# OVS_CHECK_SRV6()
+m4_define([OVS_CHECK_SRV6],
+ [AT_SKIP_IF([! ip -6 route add fc00::1/96 encap seg6 mode encap dev lo 2>&1 >/dev/null])
+ AT_CHECK([ip -6 route del fc00::1/96 2>&1 >/dev/null])
+ OVS_CHECK_FIREWALL()])
+
# OVS_CHECK_8021AD()
m4_define([OVS_CHECK_8021AD],
[AT_SKIP_IF([! grep -q "VLAN header stack length probed as" ovs-vswitchd.log])
@@ -26,4 +26,5 @@ m4_include([tests/system-traffic.at])
m4_include([tests/system-layer3-tunnels.at])
m4_include([tests/system-interface.at])
m4_include([tests/system-userspace-packet-type-aware.at])
+m4_include([tests/system-userspace-traffic.at])
m4_include([tests/system-route.at])
new file mode 100644
@@ -0,0 +1,120 @@
+AT_BANNER([userspace-datapath-sanity])
+
+AT_SETUP([datapath - ping over srv6 tunnel])
+OVS_CHECK_TUNNEL_TSO()
+OVS_CHECK_SRV6()
+
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0)
+ADD_NAMESPACES(at_ns1)
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.default.seg6_enabled=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv4.conf.default.forwarding=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.default.forwarding=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.all.seg6_enabled=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv4.conf.all.forwarding=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.all.forwarding=1])
+
+dnl Set up underlay link from host into the namespace 'at_ns0'
+dnl using veth pair. Kernel side tunnel endpoint (SID) is
+dnl 'fc00:a::1/128', so add it to the route.
+ADD_BR([br-underlay])
+ADD_VETH(p0, at_ns0, br-underlay, "fc00::1/64", [], [], "nodad")
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+AT_CHECK([ip addr add dev br-underlay "fc00::100/64" nodad])
+AT_CHECK([ip link set dev br-underlay up])
+AT_CHECK([ip route add fc00:a::1/128 dev br-underlay via fc00::1])
+
+dnl Set up tunnel endpoints on OVS outside the namespace.
+ADD_OVS_TUNNEL6([srv6], [br0], [at_srv6], [fc00:a::1], [10.100.100.100/24])
+AT_CHECK([ovs-vsctl set bridge br0 other_config:hwaddr=aa:55:aa:55:00:00])
+AT_CHECK([ip route add 10.1.1.0/24 dev br0 via 10.100.100.1])
+AT_CHECK([arp -s 10.100.100.1 aa:55:aa:55:00:01])
+AT_CHECK([ovs-ofctl add-flow br0 in_port=LOCAL,actions=output:at_srv6])
+AT_CHECK([ovs-ofctl add-flow br0 in_port=at_srv6,actions=mod_dl_dst:aa:55:aa:55:00:00,output:LOCAL])
+
+dnl Set up tunnel endpoints on the namespace 'at_ns0',
+dnl and overlay port on the namespace 'at_ns1'
+ADD_VETH_NS([at_ns0], [veth0], [10.1.1.2/24], [at_ns1], [veth1], [10.1.1.1/24])
+NS_CHECK_EXEC([at_ns0], [ip sr tunsrc set fc00:a::1])
+NS_CHECK_EXEC([at_ns0], [ip route add 10.100.100.0/24 encap seg6 mode encap segs fc00::100 dev p0])
+NS_CHECK_EXEC([at_ns0], [ip -6 route add fc00:a::1 encap seg6local action End.DX4 nh4 0.0.0.0 dev veth0])
+NS_CHECK_EXEC([at_ns1], [ip route add 10.100.100.0/24 via 10.1.1.2 dev veth1])
+
+dnl Linux seems to take a little time to get its IPv6 stack in order. Without
+dnl waiting, we get occasional failures due to the following error:
+dnl "connect: Cannot assign requested address"
+OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00::100])
+
+dnl First, check the underlay.
+NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00::100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Okay, now check the overlay.
+NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.100.100.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([datapath - ping6 over srv6 tunnel])
+OVS_CHECK_TUNNEL_TSO()
+OVS_CHECK_SRV6()
+
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0)
+ADD_NAMESPACES(at_ns1)
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.default.seg6_enabled=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.default.forwarding=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.all.seg6_enabled=1])
+NS_EXEC([at_ns0], [sysctl -w net.ipv6.conf.all.forwarding=1])
+
+dnl Set up underlay link from host into the namespace 'at_ns0'
+dnl using veth pair. Kernel side tunnel endpoint (SID) is
+dnl 'fc00:a::1/128', so add it to the route.
+ADD_BR([br-underlay])
+ADD_VETH(p0, at_ns0, br-underlay, "fc00::1/64", [], [], "nodad")
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+AT_CHECK([ip addr add dev br-underlay "fc00::100/64" nodad])
+AT_CHECK([ip link set dev br-underlay up])
+AT_CHECK([ip -6 route add fc00:a::1/128 dev br-underlay via fc00::1])
+
+dnl Set up tunnel endpoints on OVS outside the namespace.
+ADD_OVS_TUNNEL6([srv6], [br0], [at_srv6], [fc00:a::1], [fc00:100::100/64])
+AT_CHECK([ovs-vsctl set bridge br0 other_config:hwaddr=aa:55:aa:55:00:00])
+dnl [sleep infinity]
+AT_CHECK([ip addr add dev br0 fc00:100::100/64])
+AT_CHECK([ip -6 route add fc00:1::1/128 dev br0 via fc00:100::1])
+AT_CHECK([ip -6 neigh add fc00:100::1 lladdr aa:55:aa:55:00:01 dev br0])
+AT_CHECK([ovs-ofctl add-flow br0 in_port=LOCAL,actions=output:at_srv6])
+AT_CHECK([ovs-ofctl add-flow br0 in_port=at_srv6,actions=mod_dl_dst:aa:55:aa:55:00:00,output:LOCAL])
+
+dnl Set up tunnel endpoints on the namespace 'at_ns0',
+dnl and overlay port on the namespace 'at_ns1'
+ADD_VETH_NS([at_ns0], [veth0], [fc00:1::2/64], [at_ns1], [veth1], [fc00:1::1/64])
+NS_CHECK_EXEC([at_ns0], [ip sr tunsrc set fc00:a::1])
+NS_CHECK_EXEC([at_ns0], [ip -6 route add fc00:100::0/64 encap seg6 mode encap segs fc00::100 dev p0])
+NS_CHECK_EXEC([at_ns0], [ip -6 route add fc00:a::1 encap seg6local action End.DX6 nh6 :: dev veth0])
+NS_CHECK_EXEC([at_ns1], [ip -6 route add fc00:100::/64 via fc00:1::2 dev veth1])
+
+dnl Linux seems to take a little time to get its IPv6 stack in order. Without
+dnl waiting, we get occasional failures due to the following error:
+dnl "connect: Cannot assign requested address"
+OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00::100])
+OVS_WAIT_UNTIL([ip netns exec at_ns1 ping6 -c 1 fc00:100::100])
+
+dnl First, check the underlay.
+NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00::100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Okay, now check the overlay.
+NS_CHECK_EXEC([at_ns1], [ping6 -q -c 3 -i 0.3 -w 2 fc00:100::100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
@@ -202,6 +202,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=vxlan \
options:remote_ip=flow options:key=123 ofport_request=5\
-- add-port int-br t5 -- set Interface t5 type=gre \
options:remote_ip=2001:cafe::92 options:key=455 options:packet_type=legacy_l3 ofport_request=6\
+ -- add-port int-br t6 -- set Interface t6 type=srv6 \
+ options:remote_ip=2001:cafe::92 ofport_request=7\
], [0])
AT_CHECK([ovs-appctl dpif/show], [0], [dnl
@@ -216,12 +218,15 @@ dummy@ovs-dummy: hit:0 missed:0
t3 4/4789: (vxlan: csum=true, out_key=flow, remote_ip=2001:cafe::93)
t4 5/6081: (geneve: key=123, remote_ip=flow)
t5 6/3: (gre: key=455, packet_type=legacy_l3, remote_ip=2001:cafe::92)
+ t6 7/6: (srv6: remote_ip=2001:cafe::92)
])
AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
Listening ports:
genev_sys_6081 (6081) ref_cnt=1
gre_sys (3) ref_cnt=2
+srv6_sys (6) ref_cnt=1
+srv6_sys (6) ref_cnt=1
vxlan_sys_4789 (4789) ref_cnt=2
])
@@ -363,6 +368,8 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
Listening ports:
genev_sys_6081 (6081) ref_cnt=1
gre_sys (3) ref_cnt=2
+srv6_sys (6) ref_cnt=1
+srv6_sys (6) ref_cnt=1
vxlan_sys_4789 (4789) ref_cnt=2
])
@@ -384,6 +391,12 @@ AT_CHECK([tail -1 stdout], [0],
[Datapath actions: tnl_pop(6081)
])
+dnl Check SRv6 tunnel pop
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:cafe::92,dst=2001:cafe::88,label=0,proto=4,tclass=0x0,hlimit=64)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: tnl_pop(6)
+])
+
dnl Check VXLAN tunnel push
AT_CHECK([ovs-ofctl add-flow int-br action=2])
AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
@@ -405,6 +418,13 @@ AT_CHECK([tail -1 stdout], [0],
[Datapath actions: tnl_push(tnl_port(3),header(size=62,type=109,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=47,tclass=0x0,hlimit=64),gre((flags=0x2000,proto=0x6558),key=0x1c8)),out_port(100)),1
])
+dnl Check SRv6 tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=7])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: pop_eth,tnl_push(tnl_port(6),header(size=78,type=112,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=43,tclass=0x0,hlimit=64),srv6(segments_left=0,segs=2001:cafe::92)),out_port(100)),1
+])
+
dnl Check Geneve tunnel push
AT_CHECK([ovs-ofctl add-flow int-br "actions=set_field:2001:cafe::92->tun_ipv6_dst,5"])
AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
@@ -510,6 +530,8 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
Listening ports:
genev_sys_6081 (6081) ref_cnt=1
gre_sys (3) ref_cnt=1
+srv6_sys (6) ref_cnt=1
+srv6_sys (6) ref_cnt=1
vxlan_sys_4789 (4789) ref_cnt=1
vxlan_sys_4790 (4790) ref_cnt=1
])
@@ -518,6 +540,7 @@ AT_CHECK([ovs-vsctl del-port int-br t1 \
-- del-port int-br t2 \
-- del-port int-br t4 \
-- del-port int-br t5 \
+ -- del-port int-br t6 \
], [0])
dnl Check tunnel lookup entries after deleting all remaining tunnel ports
@@ -1223,3 +1223,59 @@ AT_CHECK([ovs-vsctl add-port br0 p1 -- set int p1 type=dummy])
OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
OVS_APP_EXIT_AND_WAIT([ovsdb-server])]
AT_CLEANUP
+
+AT_SETUP([tunnel - SRV6 basic])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy \
+ ofport_request=1 \
+ -- add-port br0 p2 -- set Interface p2 type=srv6 \
+ options:remote_ip=flow \
+ ofport_request=2])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
+
+dnl First setup dummy interface IP address, then add the route
+dnl so that tnl-port table can get valid IP address for the device.
+AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 fc00::1/64], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/add fc00::0/64 br0], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/show], [0], [dnl
+Route Table:
+User: fc00::/64 dev br0 SRC fc00::1
+])
+
+AT_DATA([flows.txt], [dnl
+in_port=1,actions=set_field:fc00::2->tun_ipv6_dst,output:2
+in_port=2,actions=1
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+ br0 65534/100: (dummy-internal)
+ p1 1/1: (dummy)
+ p2 2/6: (srv6: remote_ip=flow)
+])
+
+AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
+Listening ports:
+srv6_sys (6) ref_cnt=1
+srv6_sys (6) ref_cnt=1
+])
+
+AT_CHECK([ovs-appctl ofproto/list-tunnels], [0], [dnl
+port 6: p2 (srv6: ::->flow, key=0, legacy_l3, dp port=6, ttl=64)
+])
+
+dnl Encap: ipv4 inner packet
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=4,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: set(tunnel(ipv6_dst=fc00::2,ttl=64,flags(df))),pop_eth,6
+])
+
+dnl Encap: ipv6 inner packet
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x86dd),ipv6(src=2001:cafe::92,dst=2001:cafe::88,label=0,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: set(tunnel(ipv6_dst=fc00::2,ttl=64,flags(df))),pop_eth,6
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
SRv6 (Segment Routing IPv6) tunnel vport is responsible for encapsulation and decapsulation the inner packets with IPv6 header and an extended header called SRH (Segment Routing Header). See spec in: https://datatracker.ietf.org/doc/html/rfc8754 This patch implements SRv6 tunneling in userspace datapath. It uses `remote_ip` and `local_ip` options as with existing tunnel protocols. It also adds a dedicated `srv6_segs` option to define a sequence of routers called segment list. Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp> -- v6: * add tests that show interoperability between OVS and native kernel's implementation in tests/system-userspace-traffic.at. * fix the documentation. * add validation in routing header by parse_ipv6_ext_hdrs. * add parsing implementation and test in tests/odp.at, python/ovs/flow/odp.py and python/ovs/tests/test_odp.py. * fix coding style. * add build-time assertion on the structure size. v5: * rebased on latest master v4: * fix alignment on cast v3: * fix alignment on cast v2: * fix pointer arithmetic --- Documentation/faq/configuration.rst | 21 ++++ Documentation/faq/releases.rst | 1 + NEWS | 2 + include/linux/openvswitch.h | 1 + lib/conntrack.c | 4 +- lib/dpif-netlink-rtnl.c | 5 + lib/dpif-netlink.c | 5 + lib/flow.c | 26 ++++- lib/flow.h | 3 +- lib/ipf.c | 12 +- lib/netdev-native-tnl.c | 163 ++++++++++++++++++++++++++++ lib/netdev-native-tnl.h | 10 ++ lib/netdev-vport.c | 53 +++++++++ lib/netdev.h | 4 + lib/odp-util.c | 56 ++++++++++ lib/packets.h | 24 ++++ lib/tnl-ports.c | 73 +++++++++---- ofproto/ofproto-dpif-xlate.c | 18 +-- python/ovs/flow/odp.py | 8 ++ python/ovs/tests/test_odp.py | 16 +++ tests/automake.mk | 1 + tests/odp.at | 1 + tests/system-common-macros.at | 22 ++++ tests/system-userspace-testsuite.at | 1 + tests/system-userspace-traffic.at | 120 ++++++++++++++++++++ tests/tunnel-push-pop-ipv6.at | 23 ++++ tests/tunnel.at | 56 ++++++++++ 27 files changed, 687 insertions(+), 42 deletions(-) create mode 100644 tests/system-userspace-traffic.at