@@ -433,6 +433,8 @@ dp_packet_reset_offsets(struct dp_packet *b)
b->l2_5_ofs = UINT16_MAX;
b->l3_ofs = UINT16_MAX;
b->l4_ofs = UINT16_MAX;
+ b->inner_l3_ofs = UINT16_MAX;
+ b->inner_l4_ofs = UINT16_MAX;
}
static inline uint16_t
@@ -865,14 +867,6 @@ dp_packet_set_data(struct dp_packet *b, void *data)
}
}
-static inline void
-dp_packet_reset_packet(struct dp_packet *b, int off)
-{
- dp_packet_set_size(b, dp_packet_size(b) - off);
- dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
- dp_packet_reset_offsets(b);
-}
-
enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
struct dp_packet_batch {
@@ -1411,21 +1405,36 @@ dp_packet_ol_reset_l4_csum_good(struct dp_packet *p)
}
}
-/* Marks packet 'p' with good integrity if the 'start' and 'offset'
- * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
- * The 'start' is the offset from the begin of the packet headers.
- * The 'offset' is the offset from start to place the checksum.
+/* Marks packet 'p' with good integrity if checksum offload locations
+ * were provided. In the case of encapsulated packets, these values may
+ * be deeper into the packet than OVS might expect. But the packet
+ * should still be considered to have good integrity.
+ * The 'csum_start' is the offset from the begin of the packet headers.
+ * The 'csum_offset' is the offset from start to place the checksum.
* The csum_start and csum_offset fields are set from the virtio_net_hdr
* struct that may be provided by a netdev on packet ingress. */
static inline void
-dp_packet_ol_l4_csum_check_partial(struct dp_packet *p, uint16_t start,
- uint16_t offset)
+dp_packet_ol_l4_csum_check_partial(struct dp_packet *p)
{
- if (p->csum_start == start && p->csum_offset == offset) {
+ if (p->csum_start && p->csum_offset) {
dp_packet_ol_set_l4_csum_partial(p);
}
}
+static inline void
+dp_packet_reset_packet(struct dp_packet *b, int off)
+{
+ dp_packet_set_size(b, dp_packet_size(b) - off);
+ dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
+ dp_packet_reset_offsets(b);
+
+ if (b->csum_start >= off && b->csum_offset) {
+ /* Adjust values for decapsulation. */
+ b->csum_start -= off;
+ dp_packet_ol_set_l4_csum_partial(b);
+ }
+}
+
static inline uint32_t ALWAYS_INLINE
dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
uint32_t hash)
@@ -776,9 +776,7 @@ mfex_ipv6_set_hwol(struct dp_packet *pkt)
static void
mfex_tcp_set_hwol(struct dp_packet *pkt)
{
- dp_packet_ol_l4_csum_check_partial(pkt, pkt->l4_ofs,
- offsetof(struct tcp_header,
- tcp_csum));
+ dp_packet_ol_l4_csum_check_partial(pkt);
if (dp_packet_l4_checksum_good(pkt)
|| dp_packet_ol_l4_csum_partial(pkt)) {
dp_packet_hwol_set_csum_tcp(pkt);
@@ -788,9 +786,7 @@ mfex_tcp_set_hwol(struct dp_packet *pkt)
static void
mfex_udp_set_hwol(struct dp_packet *pkt)
{
- dp_packet_ol_l4_csum_check_partial(pkt, pkt->l4_ofs,
- offsetof(struct udp_header,
- udp_csum));
+ dp_packet_ol_l4_csum_check_partial(pkt);
if (dp_packet_l4_checksum_good(pkt)
|| dp_packet_ol_l4_csum_partial(pkt)) {
dp_packet_hwol_set_csum_udp(pkt);
@@ -1054,9 +1054,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
}
- dp_packet_ol_l4_csum_check_partial(packet, packet->l4_ofs,
- offsetof(struct tcp_header,
- tcp_csum));
+ dp_packet_ol_l4_csum_check_partial(packet);
if (dp_packet_l4_checksum_good(packet)
|| dp_packet_ol_l4_csum_partial(packet)) {
dp_packet_hwol_set_csum_tcp(packet);
@@ -1076,9 +1074,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
}
- dp_packet_ol_l4_csum_check_partial(packet, packet->l4_ofs,
- offsetof(struct udp_header,
- udp_csum));
+ dp_packet_ol_l4_csum_check_partial(packet);
if (dp_packet_l4_checksum_good(packet)
|| dp_packet_ol_l4_csum_partial(packet)) {
dp_packet_hwol_set_csum_udp(packet);
@@ -1092,9 +1088,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
- dp_packet_ol_l4_csum_check_partial(packet, packet->l4_ofs,
- offsetof(struct sctp_header,
- sctp_csum));
+ dp_packet_ol_l4_csum_check_partial(packet);
if (dp_packet_l4_checksum_good(packet)
|| dp_packet_ol_l4_csum_partial(packet)) {
dp_packet_hwol_set_csum_sctp(packet);
@@ -7145,8 +7145,12 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
if (dp_packet_hwol_is_tso(b)) {
uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
struct tcp_header *tcp = dp_packet_l4(b);
+ struct tcp_header *inner_tcp = dp_packet_inner_l4(b);
+ if (inner_tcp) {
+ tcp = inner_tcp;
+ }
int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
- int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
+ int hdr_len = ((char *) tcp - (char *) dp_packet_eth(b))
+ tcp_hdr_len;
int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
@@ -7164,7 +7168,6 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
} else if (dp_packet_hwol_tx_ipv6(b)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
}
-
} else {
vnet->hdr_len = 0;
vnet->gso_size = 0;
@@ -7175,6 +7178,11 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
/* The packet has good L4 checksum. No need to validate again. */
vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+ if (!dp_packet_ip_checksum_good(b)) {
+ /* It is possible that L4 is good but the IP checksum isn't
+ * complete. */
+ dp_packet_ip_set_header_csum(b, false);
+ }
} else if (dp_packet_hwol_tx_l4_checksum(b)) {
/* The csum calculation is offloaded. */
if (dp_packet_hwol_l4_is_tcp(b)) {
@@ -7192,37 +7200,54 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
* the TCP pseudo header, so that replacing it by the ones
* complement checksum of the TCP header and body will give
* the correct result. */
+ void * l3_off = dp_packet_inner_l3(b);
+ void * l4_off = dp_packet_inner_l4(b);
+
+ if (!l3_off && !l4_off) {
+ l3_off = dp_packet_l3(b);
+ l4_off = dp_packet_l4(b);
+ }
- struct tcp_header *tcp_hdr = dp_packet_l4(b);
+ struct tcp_header *tcp_hdr = l4_off;
ovs_be16 csum = 0;
if (dp_packet_hwol_is_ipv4(b)) {
- const struct ip_header *ip_hdr = dp_packet_l3(b);
+ const struct ip_header *ip_hdr = l3_off;
csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
} else if (dp_packet_hwol_tx_ipv6(b)) {
- const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+ const struct ovs_16aligned_ip6_hdr *ip6_hdr = l3_off;
csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
}
tcp_hdr->tcp_csum = csum;
vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
+ vnet->csum_start = (OVS_FORCE __virtio16) ((char *) l4_off -
+ (char *) dp_packet_data(b));
vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
struct tcp_header, tcp_csum);
} else if (dp_packet_hwol_l4_is_udp(b)) {
- struct udp_header *udp_hdr = dp_packet_l4(b);
+ void * l3_off = dp_packet_inner_l3(b);
+ void * l4_off = dp_packet_inner_l4(b);
+
+ if (!l3_off && !l4_off) {
+ l3_off = dp_packet_l3(b);
+ l4_off = dp_packet_l4(b);
+ }
+
+ struct udp_header *udp_hdr = l4_off;
ovs_be16 csum = 0;
if (dp_packet_hwol_is_ipv4(b)) {
- const struct ip_header *ip_hdr = dp_packet_l3(b);
+ const struct ip_header *ip_hdr = l3_off;
csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
} else if (dp_packet_hwol_tx_ipv6(b)) {
- const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+ const struct ovs_16aligned_ip6_hdr *ip6_hdr = l3_off;
csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
}
udp_hdr->udp_csum = csum;
vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
+ vnet->csum_start = (OVS_FORCE __virtio16) ((char *) l4_off -
+ (char *) dp_packet_data(b));;
vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
struct udp_header, udp_csum);
} else if (dp_packet_hwol_l4_is_sctp(b)) {
@@ -215,7 +215,8 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
}
if (udp->udp_csum) {
- if (OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
+ if (OVS_LIKELY(!dp_packet_ol_l4_csum_partial(packet)) &&
+ OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
uint32_t csum;
if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
@@ -316,6 +317,8 @@ netdev_tnl_push_udp_header(const struct netdev *netdev,
{
struct udp_header *udp;
int ip_tot_size;
+ uint16_t l3_ofs = packet->l3_ofs;
+ uint16_t l4_ofs = packet->l4_ofs;
dp_packet_tnl_ol_process(netdev, packet, data);
udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
@@ -333,13 +336,20 @@ netdev_tnl_push_udp_header(const struct netdev *netdev,
} else {
dp_packet_hwol_set_csum_udp(packet);
}
- } else {
- dp_packet_ol_set_l4_csum_good(packet);
}
- packet->inner_l3_ofs += packet->l4_ofs;
- packet->inner_l4_ofs += packet->l4_ofs;
+ if (packet->csum_start && packet->csum_offset) {
+ dp_packet_ol_set_l4_csum_partial(packet);
+ } else if (!udp->udp_csum) {
+ dp_packet_ol_set_l4_csum_good(packet);
+ }
+ if (l3_ofs != UINT16_MAX) {
+ packet->inner_l3_ofs = l3_ofs + data->header_len;
+ }
+ if (l4_ofs != UINT16_MAX) {
+ packet->inner_l4_ofs = l4_ofs + data->header_len;
+ }
}
static void *
@@ -292,7 +292,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over vxlan tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_VXLAN()
OVS_TRAFFIC_VSWITCHD_START()
@@ -330,6 +329,15 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > data], [nc.pid])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data], [0])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
@@ -381,7 +389,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over vxlan6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_VXLAN_UDP6ZEROCSUM()
OVS_TRAFFIC_VSWITCHD_START()
@@ -425,7 +432,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over gre tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
@@ -467,7 +473,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6gre L2 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -508,7 +513,6 @@ AT_CLEANUP
AT_SETUP([datapath - ping over erspan v1 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -545,7 +549,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over erspan v2 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -582,7 +585,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v1 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -622,7 +624,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v2 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -663,7 +664,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over geneve tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
@@ -701,11 +701,19 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > data], [nc.pid])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data], [0])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over geneve tunnel, delete flow regression])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
@@ -760,7 +768,6 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d
AT_CLEANUP
AT_SETUP([datapath - flow resume with geneve tun_metadata])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
@@ -812,7 +819,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over geneve6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE_UDP6ZEROCSUM()
OVS_TRAFFIC_VSWITCHD_START()
@@ -857,7 +863,6 @@ AT_CLEANUP
AT_SETUP([datapath - slow_action on geneve6 tunnel])
AT_SKIP_IF([test $HAVE_TCPDUMP = no])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE_UDP6ZEROCSUM()
OVS_TRAFFIC_VSWITCHD_START()
@@ -981,7 +986,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over gre tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -1028,7 +1032,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -1077,7 +1080,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -1131,7 +1133,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -1187,7 +1188,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -1242,7 +1242,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over srv6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_SRV6()
OVS_TRAFFIC_VSWITCHD_START()
@@ -1304,7 +1303,6 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping6 over srv6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_SRV6()
OVS_TRAFFIC_VSWITCHD_START()
@@ -7831,7 +7829,6 @@ AT_CLEANUP
AT_SETUP([conntrack - can match and clear ct_state from outside OVS])
CHECK_CONNTRACK_LOCAL_STACK()
-OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
This patch enables most of the tunnel tests in the testsuite, and adds a large TCP transfer to a vxlan and geneve test to verify TSO functionality. Some additional changes were required to accommodate these changes with netdev-linux interfaces. The test for vlan over vxlan is purposely not enabled as the traffic produced by this test gives incorrect values in the vnet header. Signed-off-by: Mike Pattrick <mkp@redhat.com> --- lib/dp-packet.h | 39 ++++++++++++++++----------- lib/dpif-netdev-extract-avx512.c | 8 ++---- lib/flow.c | 12 +++------ lib/netdev-linux.c | 45 +++++++++++++++++++++++++------- lib/netdev-native-tnl.c | 20 ++++++++++---- tests/system-traffic.at | 39 +++++++++++++-------------- 6 files changed, 97 insertions(+), 66 deletions(-)