@@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len,
seg->l2_5_ofs = p->l2_5_ofs;
seg->l3_ofs = p->l3_ofs;
seg->l4_ofs = p->l4_ofs;
+ seg->inner_l3_ofs = p->inner_l3_ofs;
+ seg->inner_l4_ofs = p->inner_l4_ofs;
/* The protocol headers remain the same, so preserve hash and mark. */
*dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
@@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
const char *data_tail;
const char *data_pos;
- data_pos = dp_packet_get_tcp_payload(p);
+ if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+ dp_packet_hwol_is_tunnel_geneve(p)) {
+ data_pos = dp_packet_get_inner_tcp_payload(p);
+ } else {
+ data_pos = dp_packet_get_tcp_payload(p);
+ }
data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
return DIV_ROUND_UP(data_tail - data_pos, segsz);
@@ -91,12 +98,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
struct tcp_header *tcp_hdr;
struct ip_header *ip_hdr;
struct dp_packet *seg;
+ const char *data_pos;
uint16_t tcp_offset;
uint16_t tso_segsz;
+ uint16_t ip_id = 0;
uint32_t tcp_seq;
- uint16_t ip_id;
+ bool outer_ipv4;
int hdr_len;
int seg_len;
+ bool tnl;
tso_segsz = dp_packet_get_tso_segsz(p);
if (!tso_segsz) {
@@ -105,20 +115,35 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
return false;
}
- tcp_hdr = dp_packet_l4(p);
- tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
- tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
- hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
- + tcp_offset * 4;
- ip_id = 0;
- if (dp_packet_hwol_is_ipv4(p)) {
+ if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+ dp_packet_hwol_is_tunnel_geneve(p)) {
+ data_pos = dp_packet_get_inner_tcp_payload(p);
+ outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
+ tcp_hdr = dp_packet_inner_l4(p);
+ ip_hdr = dp_packet_inner_l3(p);
+ tnl = true;
+ if (outer_ipv4) {
+ ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
+ } else if (dp_packet_hwol_is_ipv4(p)) {
+ ip_id = ntohs(ip_hdr->ip_id);
+ }
+ } else {
+ data_pos = dp_packet_get_tcp_payload(p);
+ outer_ipv4 = dp_packet_hwol_is_ipv4(p);
+ tcp_hdr = dp_packet_l4(p);
ip_hdr = dp_packet_l3(p);
- ip_id = ntohs(ip_hdr->ip_id);
+ tnl = false;
+ if (outer_ipv4) {
+ ip_id = ntohs(ip_hdr->ip_id);
+ }
}
+ tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
+ tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
+ hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
+ + tcp_offset * 4;
const char *data_tail = (char *) dp_packet_tail(p)
- dp_packet_l2_pad_size(p);
- const char *data_pos = dp_packet_get_tcp_payload(p);
int n_segs = dp_packet_gso_nr_segs(p);
for (int i = 0; i < n_segs; i++) {
@@ -130,8 +155,26 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
data_pos += seg_len;
+ if (tnl) {
+ /* Update tunnel L3 header. */
+ if (dp_packet_hwol_is_ipv4(seg)) {
+ ip_hdr = dp_packet_inner_l3(seg);
+ ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
+ dp_packet_inner_l4_size(seg));
+ ip_hdr->ip_id = htons(ip_id);
+ ip_hdr->ip_csum = 0;
+ ip_id++;
+ } else {
+ struct ovs_16aligned_ip6_hdr *ip6_hdr;
+
+ ip6_hdr = dp_packet_inner_l3(seg);
+ ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
+ = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
+ }
+ }
+
/* Update L3 header. */
- if (dp_packet_hwol_is_ipv4(seg)) {
+ if (outer_ipv4) {
ip_hdr = dp_packet_l3(seg);
ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
dp_packet_l4_size(seg));
@@ -146,7 +189,11 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
}
/* Update L4 header. */
- tcp_hdr = dp_packet_l4(seg);
+ if (tnl) {
+ tcp_hdr = dp_packet_inner_l4(seg);
+ } else {
+ tcp_hdr = dp_packet_l4(seg);
+ }
put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
tcp_seq += seg_len;
if (OVS_LIKELY(i < (n_segs - 1))) {
@@ -526,6 +526,16 @@ dp_packet_inner_l3(const struct dp_packet *b)
: NULL;
}
+static inline size_t
+dp_packet_inner_l3_size(const struct dp_packet *b)
+{
+ return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
+ ? (const char *) dp_packet_tail(b)
+ - (const char *) dp_packet_inner_l3(b)
+ - dp_packet_l2_pad_size(b)
+ : 0;
+}
+
static inline void *
dp_packet_inner_l4(const struct dp_packet *b)
{
@@ -560,6 +570,22 @@ dp_packet_get_tcp_payload(const struct dp_packet *b)
return NULL;
}
+static inline const void *
+dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
+{
+ size_t l4_size = dp_packet_inner_l4_size(b);
+
+ if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
+ struct tcp_header *tcp = dp_packet_inner_l4(b);
+ int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+
+ if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
+ return (const char *) tcp + tcp_len;
+ }
+ }
+ return NULL;
+}
+
static inline uint32_t
dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
{
@@ -342,6 +342,14 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
if (l4_ofs != UINT16_MAX) {
packet->inner_l4_ofs = l4_ofs + data->header_len;
}
+
+ if (dp_packet_hwol_is_tso(packet)) {
+ uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
+ if (tso_segsz > data->header_len) {
+ tso_segsz -= data->header_len;
+ dp_packet_set_tso_segsz(packet, tso_segsz);
+ }
+ }
}
static void *
@@ -69,8 +69,6 @@ COVERAGE_DEFINE(netdev_received);
COVERAGE_DEFINE(netdev_sent);
COVERAGE_DEFINE(netdev_add_router);
COVERAGE_DEFINE(netdev_get_stats);
-COVERAGE_DEFINE(netdev_vxlan_tso_drops);
-COVERAGE_DEFINE(netdev_geneve_tso_drops);
COVERAGE_DEFINE(netdev_push_header_drops);
COVERAGE_DEFINE(netdev_soft_seg_good);
COVERAGE_DEFINE(netdev_soft_seg_drops);
@@ -914,23 +912,6 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
if (dp_packet_hwol_is_tso(packet)) {
- if (dp_packet_hwol_is_tunnel_vxlan(packet)
- && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) {
- VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support",
- netdev_get_name(netdev));
- COVERAGE_INC(netdev_vxlan_tso_drops);
- dp_packet_delete_batch(batch, true);
- return false;
- }
-
- if (dp_packet_hwol_is_tunnel_geneve(packet)
- && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) {
- VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support",
- netdev_get_name(netdev));
- COVERAGE_INC(netdev_geneve_tso_drops);
- dp_packet_delete_batch(batch, true);
- return false;
- }
return netdev_send_tso(netdev, qid, batch, concurrent_txq);
}
}
@@ -341,6 +341,54 @@ AT_CHECK([diff -q payload.bin data], [0])
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([datapath - tcp over vxlan tunnel with software fallback])
+AT_SKIP_IF([test $HAVE_NC = no])
+OVS_CHECK_VXLAN()
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-underlay])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+
+ADD_NAMESPACES(at_ns0)
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Test the case where only one side has all checksum and tso offload disabled.
+AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
+AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
+
+dnl Reinitialize.
+AT_CHECK([ovs-vsctl del-port ovs-p0])
+AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
+ [id 0 dstport 4789])
+
+dnl First, check the underlay.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc2.pid])
+AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc2.pid)])
+AT_CHECK([diff -q payload.bin data2], [0])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([datapath - ping vlan over vxlan tunnel])
OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_VXLAN()
When sending packets that are flagged as requiring segmentation to an interface that doens't support this feature, send the packet to the TSO software fallback instead of dropping it. Signed-off-by: Mike Pattrick <mkp@redhat.com> --- lib/dp-packet-gso.c | 73 +++++++++++++++++++++++++++++++++-------- lib/dp-packet.h | 26 +++++++++++++++ lib/netdev-native-tnl.c | 8 +++++ lib/netdev.c | 19 ----------- tests/system-traffic.at | 48 +++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 32 deletions(-)