@@ -2044,13 +2044,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
}
if (ok) {
- bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
- if (!hwol_bad_l4_csum) {
- bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
- || dp_packet_hwol_tx_l4_checksum(pkt);
+ if (!dp_packet_l4_checksum_bad(pkt)) {
/* Validate the checksum only when hwol is not supported. */
if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
- &ctx->icmp_related, l3, !hwol_good_l4_csum,
+ &ctx->icmp_related, l3,
+ !dp_packet_l4_checksum_good(pkt) &&
+ !dp_packet_hwol_tx_l4_checksum(pkt),
NULL)) {
ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
return true;
@@ -3379,8 +3378,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
adj_seqnum(&th->tcp_seq, ec->seq_skew);
}
- th->tcp_csum = 0;
- if (!dp_packet_hwol_tx_l4_checksum(pkt)) {
+ if (dp_packet_hwol_tx_l4_checksum(pkt)) {
+ dp_packet_ol_reset_l4_csum_good(pkt);
+ } else {
+ th->tcp_csum = 0;
if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
dp_packet_l4_size(pkt));
@@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
dp_packet_init_specific(b);
/* By default assume the packet type to be Ethernet. */
b->packet_type = htonl(PT_ETH);
+ /* Reset csum start and offset. */
+ b->csum_start = 0;
+ b->csum_offset = 0;
}
static void
@@ -544,4 +547,26 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
dp_packet_ol_set_ip_csum_good(p);
dp_packet_hwol_reset_tx_ip_csum(p);
}
+
+ if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) {
+ dp_packet_hwol_reset_tx_l4_csum(p);
+ return;
+ }
+
+ if (dp_packet_hwol_l4_is_tcp(p)
+ && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
+ packet_tcp_complete_csum(p);
+ dp_packet_ol_set_l4_csum_good(p);
+ dp_packet_hwol_reset_tx_l4_csum(p);
+ } else if (dp_packet_hwol_l4_is_udp(p)
+ && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
+ packet_udp_complete_csum(p);
+ dp_packet_ol_set_l4_csum_good(p);
+ dp_packet_hwol_reset_tx_l4_csum(p);
+ } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)
+ && dp_packet_hwol_l4_is_sctp(p)) {
+ packet_sctp_complete_csum(p);
+ dp_packet_ol_set_l4_csum_good(p);
+ dp_packet_hwol_reset_tx_l4_csum(p);
+ }
}
@@ -140,6 +140,8 @@ struct dp_packet {
or UINT16_MAX. */
uint32_t cutlen; /* length in bytes to cut from the end. */
ovs_be32 packet_type; /* Packet type as defined in OpenFlow */
+ uint16_t csum_start; /* Position to start checksumming from. */
+ uint16_t csum_offset; /* Offset to place checksum. */
union {
struct pkt_metadata md;
uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
@@ -997,6 +999,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b)
return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4);
}
+/* Returns 'true' if packet 'p' is marked as IPv6. */
+static inline bool
+dp_packet_hwol_tx_ipv6(const struct dp_packet *p)
+{
+ return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
+}
+
/* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */
static inline bool
dp_packet_hwol_l4_is_tcp(const struct dp_packet *b)
@@ -1021,18 +1030,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
DP_PACKET_OL_TX_SCTP_CKSUM;
}
-/* Mark packet 'b' for IPv4 checksum offloading. */
static inline void
-dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
+dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK;
+}
+
+/* Mark packet 'p' as IPv4. */
+static inline void
+dp_packet_hwol_set_tx_ipv4(struct dp_packet *p)
{
- *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4;
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
}
-/* Mark packet 'b' for IPv6 checksum offloading. */
+/* Mark packet 'a' as IPv6. */
static inline void
-dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
+dp_packet_hwol_set_tx_ipv6(struct dp_packet *a)
{
- *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
+ *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4;
+ *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
}
/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
@@ -1147,6 +1164,55 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p)
DP_PACKET_OL_RX_L4_CKSUM_BAD;
}
+/* Returns 'true' if the packet has good integrity though the
+ * checksum in the packet 'p' is not complete. */
+static inline bool
+dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
+{
+ return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
+ DP_PACKET_OL_RX_L4_CKSUM_MASK;
+}
+
+/* Marks packet 'p' with good integrity though the checksum in the
+ * packet is not complete. */
+static inline void
+dp_packet_ol_set_l4_csum_partial(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK;
+}
+
+/* Marks packet 'p' with good L4 checksum. */
+static inline void
+dp_packet_ol_set_l4_csum_good(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD;
+}
+
+/* Marks packet 'p' with good L4 checksum as modified. */
+static inline void
+dp_packet_ol_reset_l4_csum_good(struct dp_packet *p)
+{
+ if (!dp_packet_ol_l4_csum_partial(p)) {
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD;
+ }
+}
+
+/* Marks packet 'p' with good integrity if the 'start' and 'offset'
+ * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
+ * The 'start' is the offset from the begin of the packet headers.
+ * The 'offset' is the offset from start to place the checksum.
+ * The csum_start and csum_offset fields are set from the virtio_net_hdr
+ * struct that may be provided by a netdev on packet ingress. */
+static inline void
+dp_packet_ol_vnet_csum_check(struct dp_packet *p, uint16_t start,
+ uint16_t offset)
+{
+ if (p->csum_start == start && p->csum_offset == offset) {
+ dp_packet_ol_set_l4_csum_partial(p);
+ }
+}
+
static inline uint32_t ALWAYS_INLINE
dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
uint32_t hash)
@@ -698,7 +698,6 @@ mfex_ipv6_set_l2_pad_size(struct dp_packet *pkt,
return -1;
}
dp_packet_set_l2_pad_size(pkt, len_from_ipv6 - (p_len + IPV6_HEADER_LEN));
- dp_packet_hwol_set_tx_ipv6(pkt);
return 0;
}
@@ -729,10 +728,6 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
return -1;
}
dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
- dp_packet_hwol_set_tx_ipv4(pkt);
- if (dp_packet_ip_checksum_good(pkt)) {
- dp_packet_hwol_set_tx_ip_csum(pkt);
- }
return 0;
}
@@ -763,6 +758,45 @@ mfex_check_tcp_data_offset(const struct tcp_header *tcp)
return ret;
}
+static void
+mfex_ipv4_set_hwol(struct dp_packet *pkt)
+{
+ dp_packet_hwol_set_tx_ipv4(pkt);
+ if (dp_packet_ip_checksum_good(pkt)) {
+ dp_packet_hwol_set_tx_ip_csum(pkt);
+ }
+}
+
+static void
+mfex_ipv6_set_hwol(struct dp_packet *pkt)
+{
+ dp_packet_hwol_set_tx_ipv6(pkt);
+}
+
+static void
+mfex_tcp_set_hwol(struct dp_packet *pkt)
+{
+ dp_packet_ol_vnet_csum_check(pkt, pkt->l4_ofs,
+ offsetof(struct tcp_header,
+ tcp_csum));
+ if (dp_packet_l4_checksum_good(pkt)
+ || dp_packet_ol_l4_csum_partial(pkt)) {
+ dp_packet_hwol_set_csum_tcp(pkt);
+ }
+}
+
+static void
+mfex_udp_set_hwol(struct dp_packet *pkt)
+{
+ dp_packet_ol_vnet_csum_check(pkt, pkt->l4_ofs,
+ offsetof(struct udp_header,
+ udp_csum));
+ if (dp_packet_l4_checksum_good(pkt)
+ || dp_packet_ol_l4_csum_partial(pkt)) {
+ dp_packet_hwol_set_csum_udp(pkt);
+ }
+}
+
/* Generic loop to process any mfex profile. This code is specialized into
* multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
* to ensure the compiler specializes each instance. The code is marked "hot"
@@ -864,6 +898,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
const struct tcp_header *tcp = (void *)&pkt[38];
mfex_handle_tcp_flags(tcp, &blocks[7]);
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+ mfex_ipv4_set_hwol(packet);
+ mfex_tcp_set_hwol(packet);
} break;
case PROFILE_ETH_VLAN_IPV4_UDP: {
@@ -876,6 +912,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+ mfex_ipv4_set_hwol(packet);
+ mfex_udp_set_hwol(packet);
} break;
case PROFILE_ETH_IPV4_TCP: {
@@ -891,6 +929,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+ mfex_ipv4_set_hwol(packet);
+ mfex_tcp_set_hwol(packet);
} break;
case PROFILE_ETH_IPV4_UDP: {
@@ -902,6 +942,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+ mfex_ipv4_set_hwol(packet);
+ mfex_udp_set_hwol(packet);
} break;
case PROFILE_ETH_IPV6_UDP: {
@@ -920,6 +962,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Process UDP header. */
mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[9]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
+ mfex_ipv6_set_hwol(packet);
+ mfex_udp_set_hwol(packet);
} break;
case PROFILE_ETH_IPV6_TCP: {
@@ -943,6 +987,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
}
mfex_handle_tcp_flags(tcp, &blocks[9]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
+ mfex_ipv6_set_hwol(packet);
+ mfex_tcp_set_hwol(packet);
} break;
case PROFILE_ETH_VLAN_IPV6_TCP: {
@@ -969,6 +1015,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
}
mfex_handle_tcp_flags(tcp, &blocks[10]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
+ mfex_ipv6_set_hwol(packet);
+ mfex_tcp_set_hwol(packet);
} break;
case PROFILE_ETH_VLAN_IPV6_UDP: {
@@ -990,6 +1038,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Process UDP header. */
mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[10]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
+ mfex_ipv6_set_hwol(packet);
+ mfex_udp_set_hwol(packet);
} break;
case PROFILE_ETH_IPV4_NVGRE: {
@@ -1000,6 +1050,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4(packet);
+ mfex_ipv4_set_hwol(packet);
+ mfex_udp_set_hwol(packet);
} break;
default:
@@ -1027,6 +1027,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
}
+ dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+ offsetof(struct tcp_header,
+ tcp_csum));
+ if (dp_packet_l4_checksum_good(packet)
+ || dp_packet_ol_l4_csum_partial(packet)) {
+ dp_packet_hwol_set_csum_tcp(packet);
+ }
}
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
@@ -1042,6 +1049,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
}
+ dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+ offsetof(struct udp_header,
+ udp_csum));
+ if (dp_packet_l4_checksum_good(packet)
+ || dp_packet_ol_l4_csum_partial(packet)) {
+ dp_packet_hwol_set_csum_udp(packet);
+ }
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
@@ -1051,6 +1065,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+ dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+ offsetof(struct sctp_header,
+ sctp_csum));
+ if (dp_packet_l4_checksum_good(packet)
+ || dp_packet_ol_l4_csum_partial(packet)) {
+ dp_packet_hwol_set_csum_sctp(packet);
+ }
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
@@ -3170,6 +3191,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
tcp->tcp_csum = 0;
tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
tcp, l4_len));
+ dp_packet_ol_set_l4_csum_good(p);
} else if (flow->nw_proto == IPPROTO_UDP) {
struct udp_header *udp = dp_packet_l4(p);
@@ -3179,6 +3201,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
if (!udp->udp_csum) {
udp->udp_csum = htons(0xffff);
}
+ dp_packet_ol_set_l4_csum_good(p);
} else if (flow->nw_proto == IPPROTO_ICMP) {
struct icmp_header *icmp = dp_packet_l4(p);
@@ -412,8 +412,10 @@ enum dpdk_hw_ol_features {
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
NETDEV_RX_HW_SCATTER = 1 << 2,
NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
- NETDEV_TX_TSO_OFFLOAD = 1 << 4,
- NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
+ NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
+ NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
+ NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
+ NETDEV_TX_TSO_OFFLOAD = 1 << 7,
};
/*
@@ -1008,6 +1010,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
return NULL;
}
+static void
+netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
+ enum dpdk_hw_ol_features hw_ol_features,
+ enum netdev_ol_flags flag)
+{
+ struct netdev *netdev = &dev->up;
+
+ if (dev->hw_ol_features & hw_ol_features) {
+ netdev->ol_flags |= flag;
+ } else {
+ netdev->ol_flags &= ~flag;
+ }
+}
+
+static void
+netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
+{
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
+ NETDEV_TX_OFFLOAD_IPV4_CKSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
+ NETDEV_TX_OFFLOAD_TCP_CKSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
+ NETDEV_TX_OFFLOAD_UDP_CKSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
+ NETDEV_TX_OFFLOAD_SCTP_CKSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
+ NETDEV_TX_OFFLOAD_TCP_TSO);
+}
+
static int
dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
{
@@ -1044,11 +1075,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
}
+ if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+ }
+
+ if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+ }
+
+ if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
+ }
+
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
- conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
- if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
- conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
- }
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
}
/* Limit configured rss hash functions to only those supported
@@ -1154,7 +1194,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
struct rte_ether_addr eth_addr;
int diag;
int n_rxq, n_txq;
- uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
@@ -1190,18 +1229,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
}
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ }
+
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ }
+
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+ }
+
dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
if (userspace_tso_enabled()) {
- if ((info.tx_offload_capa & tx_tso_offload_capa)
- == tx_tso_offload_capa) {
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
- if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
- dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
- } else {
- VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
- "SCTP packets sent to this device will be dropped",
- netdev_get_name(&dev->up));
- }
} else {
VLOG_WARN("%s: Tx TSO offload is not supported.",
netdev_get_name(&dev->up));
@@ -2213,6 +2262,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ mbuf->l4_len = 0;
mbuf->outer_l2_len = 0;
mbuf->outer_l3_len = 0;
@@ -4149,6 +4199,7 @@ new_device(int vid)
ovs_mutex_lock(&dev->mutex);
if (nullable_string_is_equal(ifname, dev->vhost_id)) {
uint32_t qp_num = rte_vhost_get_vring_num(vid) / VIRTIO_QNUM;
+ uint64_t features;
/* Get NUMA information */
newnode = rte_vhost_get_numa_node(vid);
@@ -4173,6 +4224,36 @@ new_device(int vid)
dev->vhost_reconfigured = true;
}
+ if (rte_vhost_get_negotiated_features(vid, &features)) {
+ VLOG_INFO("Error checking guest features for "
+ "vHost Device '%s'", dev->vhost_id);
+ } else {
+ if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
+ dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+ }
+
+ if (userspace_tso_enabled()) {
+ if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4)
+ && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) {
+
+ dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ VLOG_DBG("%s: TSO enabled on vhost port",
+ netdev_get_name(&dev->up));
+ } else {
+ VLOG_WARN("%s: Tx TSO offload is not supported.",
+ netdev_get_name(&dev->up));
+ }
+ }
+ }
+
+ /* There is no support in virtio net to offload IPv4 csum,
+ * but the vhost library handles IPv4 csum offloading fine. */
+ dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+
+ netdev_dpdk_update_netdev_flags(dev);
+
ovsrcu_index_set(&dev->vid, vid);
exists = true;
@@ -4236,6 +4317,14 @@ destroy_device(int vid)
dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
netdev_dpdk_txq_map_clear(dev);
+ /* Clear offload capabilities before next new_device. */
+ dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+ dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+ dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+ netdev_dpdk_update_netdev_flags(dev);
+
netdev_change_seq_changed(&dev->up);
ovs_mutex_unlock(&dev->mutex);
exists = true;
@@ -5246,22 +5335,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
}
err = dpdk_eth_dev_init(dev);
-
- if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
- } else {
- netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM;
- }
-
- if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
- if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
- }
- }
+ netdev_dpdk_update_netdev_flags(dev);
/* If both requested and actual hwaddr were previously
* unset (initialized to 0), then first device init above
@@ -5308,11 +5382,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
memset(dev->sw_stats, 0, sizeof *dev->sw_stats);
rte_spinlock_unlock(&dev->stats_lock);
- if (userspace_tso_enabled()) {
- dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
- VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up));
- }
-
netdev_dpdk_remap_txqs(dev);
if (netdev_dpdk_get_vid(dev) >= 0) {
@@ -5333,6 +5402,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
}
}
+ netdev_dpdk_update_netdev_flags(dev);
+
return 0;
}
@@ -5354,8 +5425,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
int err;
- uint64_t vhost_flags = 0;
- uint64_t vhost_unsup_flags;
ovs_mutex_lock(&dev->mutex);
@@ -5365,6 +5434,9 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
* 2. A path has been specified.
*/
if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) {
+ uint64_t virtio_unsup_features = 0;
+ uint64_t vhost_flags = 0;
+
/* Register client-mode device. */
vhost_flags |= RTE_VHOST_USER_CLIENT;
@@ -5411,22 +5483,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
}
if (userspace_tso_enabled()) {
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
- netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
- vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
- | 1ULL << VIRTIO_NET_F_HOST_UFO;
+ virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN
+ | 1ULL << VIRTIO_NET_F_HOST_UFO;
+ VLOG_DBG("%s: TSO enabled on vhost port",
+ netdev_get_name(&dev->up));
} else {
- /* This disables checksum offloading and all the features
- * that depends on it (TSO, UFO, ECN) according to virtio
- * specification. */
- vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
+ /* Advertise checksum offloading to the guest, but explicitly
+ * disable TSO and friends.
+ * NOTE: we can't disable HOST_ECN which may have been wrongly
+ * negotiated by a running guest. */
+ virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_TSO4
+ | 1ULL << VIRTIO_NET_F_HOST_TSO6
+ | 1ULL << VIRTIO_NET_F_HOST_UFO;
}
err = rte_vhost_driver_disable_features(dev->vhost_id,
- vhost_unsup_flags);
+ virtio_unsup_features);
if (err) {
VLOG_ERR("rte_vhost_driver_disable_features failed for "
"vhost user client port: %s\n", dev->up.name);
@@ -938,14 +938,6 @@ netdev_linux_common_construct(struct netdev *netdev_)
netnsid_unset(&netdev->netnsid);
ovs_mutex_init(&netdev->mutex);
- if (userspace_tso_enabled()) {
- netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
- netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
- netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
- netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
- netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
- }
-
return 0;
}
@@ -959,6 +951,16 @@ netdev_linux_construct(struct netdev *netdev_)
return error;
}
+ /* The socket interface doesn't offer the option to enable only
+ * csum offloading without TSO. */
+ if (userspace_tso_enabled()) {
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
error = get_flags(&netdev->up, &netdev->ifi_flags);
if (error == ENODEV) {
if (netdev->up.netdev_class != &netdev_internal_class) {
@@ -987,6 +989,7 @@ netdev_linux_construct_tap(struct netdev *netdev_)
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
static const char tap_dev[] = "/dev/net/tun";
const char *name = netdev_->name;
+ unsigned long oflags;
struct ifreq ifr;
int error = netdev_linux_common_construct(netdev_);
@@ -1004,10 +1007,7 @@ netdev_linux_construct_tap(struct netdev *netdev_)
/* Create tap device. */
get_flags(&netdev->up, &netdev->ifi_flags);
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- if (userspace_tso_enabled()) {
- ifr.ifr_flags |= IFF_VNET_HDR;
- }
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
@@ -1030,21 +1030,22 @@ netdev_linux_construct_tap(struct netdev *netdev_)
goto error_close;
}
+ oflags = TUN_F_CSUM;
if (userspace_tso_enabled()) {
- /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
- * available, it will return EINVAL when a flag is unknown.
- * Therefore, try enabling offload with no flags to check
- * if TUNSETOFFLOAD support is available or not. */
- if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) {
- unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
-
- if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
- VLOG_WARN("%s: enabling tap offloading failed: %s", name,
- ovs_strerror(errno));
- error = errno;
- goto error_close;
- }
- }
+ oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
+ }
+
+ if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+ netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
+ | NETDEV_TX_OFFLOAD_TCP_CKSUM
+ | NETDEV_TX_OFFLOAD_UDP_CKSUM);
+
+ if (userspace_tso_enabled()) {
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
+ }
+ } else {
+ VLOG_WARN("%s: Disabling hardware offloading: %s", name,
+ ovs_strerror(errno));
}
netdev->present = true;
@@ -1344,18 +1345,22 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
pkt = buffers[i];
}
- if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
- struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
- struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (virtio_net_hdr_size) {
+ int ret = netdev_linux_parse_vnet_hdr(pkt);
+ if (OVS_UNLIKELY(ret)) {
+ struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- /* Unexpected error situation: the virtio header is not present
- * or corrupted. Drop the packet but continue in case next ones
- * are correct. */
- dp_packet_delete(pkt);
- netdev->rx_dropped += 1;
- VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
- netdev_get_name(netdev_));
- continue;
+ /* Unexpected error situation: the virtio header is not
+ * present or corrupted or contains unsupported features.
+ * Drop the packet but continue in case next ones are
+ * correct. */
+ dp_packet_delete(pkt);
+ netdev->rx_dropped += 1;
+ VLOG_WARN_RL(&rl, "%s: Dropped packet: %s",
+ netdev_get_name(netdev_), ovs_strerror(ret));
+ continue;
+ }
}
for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg;
@@ -1403,7 +1408,6 @@ static int
netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
struct dp_packet_batch *batch)
{
- int virtio_net_hdr_size;
ssize_t retval;
size_t std_len;
int iovlen;
@@ -1413,16 +1417,14 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
/* Use the buffer from the allocated packet below to receive MTU
* sized packets and an aux_buf for extra TSO data. */
iovlen = IOV_TSO_SIZE;
- virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
} else {
/* Use only the buffer from the allocated packet. */
iovlen = IOV_STD_SIZE;
- virtio_net_hdr_size = 0;
}
/* The length here needs to be accounted in the same way when the
* aux_buf is allocated so that it can be prepended to TSO buffer. */
- std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
+ std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu;
for (i = 0; i < NETDEV_MAX_BURST; i++) {
struct dp_packet *buffer;
struct dp_packet *pkt;
@@ -1462,7 +1464,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
pkt = buffer;
}
- if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
+ if (netdev_linux_parse_vnet_hdr(pkt)) {
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1611,7 +1613,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
* on other interface types because we attach a socket filter to the rx
* socket. */
static int
-netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
+netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
struct dp_packet_batch *batch)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1632,9 +1634,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
ssize_t retval;
int error;
- if (tso) {
- netdev_linux_prepend_vnet_hdr(packet, mtu);
- }
+ netdev_linux_prepend_vnet_hdr(packet, mtu);
size = dp_packet_size(packet);
do {
@@ -1765,7 +1765,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
} else {
- error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch);
+ error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
}
if (error) {
if (error == ENOBUFS) {
@@ -6831,53 +6831,76 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
return 0;
}
+/* Initializes packet 'b' with features enabled in the prepended
+ * struct virtio_net_hdr. Returns 0 if successful, otherwise a
+ * positive errno value. */
static int
netdev_linux_parse_vnet_hdr(struct dp_packet *b)
{
struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet);
- uint16_t l4proto = 0;
if (OVS_UNLIKELY(!vnet)) {
- return -EINVAL;
+ return EINVAL;
}
if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
return 0;
}
- if (netdev_linux_parse_l2(b, &l4proto)) {
- return -EINVAL;
- }
-
if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- if (l4proto == IPPROTO_TCP) {
- dp_packet_hwol_set_csum_tcp(b);
- } else if (l4proto == IPPROTO_UDP) {
+ uint16_t l4proto = 0;
+
+ if (netdev_linux_parse_l2(b, &l4proto)) {
+ return EINVAL;
+ }
+
+ if (l4proto == IPPROTO_UDP) {
dp_packet_hwol_set_csum_udp(b);
- } else if (l4proto == IPPROTO_SCTP) {
- dp_packet_hwol_set_csum_sctp(b);
}
+ /* The packet has offloaded checksum. However, there is no
+ * additional information like the protocol used, so it would
+ * require to parse the packet here. The checksum starting point
+ * and offset are going to be verified when the packet headers
+ * are parsed during miniflow extraction. */
+ b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start;
+ b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset;
+ } else {
+ b->csum_start = 0;
+ b->csum_offset = 0;
}
- if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
- uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4
- | VIRTIO_NET_HDR_GSO_TCPV6
- | VIRTIO_NET_HDR_GSO_UDP;
- uint8_t type = vnet->gso_type & allowed_mask;
+ int ret = 0;
+ switch (vnet->gso_type) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ /* FIXME: The packet has offloaded TCP segmentation. The gso_size
+ * is given and needs to be respected. */
+ dp_packet_hwol_set_tcp_seg(b);
+ break;
- if (type == VIRTIO_NET_HDR_GSO_TCPV4
- || type == VIRTIO_NET_HDR_GSO_TCPV6) {
- dp_packet_hwol_set_tcp_seg(b);
- }
+ case VIRTIO_NET_HDR_GSO_UDP:
+ /* UFO is not supported. */
+ VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled.");
+ ret = ENOTSUP;
+ break;
+
+ case VIRTIO_NET_HDR_GSO_NONE:
+ break;
+
+ default:
+ ret = ENOTSUP;
+ VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x",
+ vnet->gso_type);
}
- return 0;
+ return ret;
}
static void
netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
{
- struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet);
+ struct virtio_net_hdr v;
+ struct virtio_net_hdr *vnet = &v;
if (dp_packet_hwol_is_tso(b)) {
uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
@@ -6887,30 +6910,91 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
if (dp_packet_hwol_is_ipv4(b)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- } else {
+ } else if (dp_packet_hwol_tx_ipv6(b)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
}
} else {
- vnet->flags = VIRTIO_NET_HDR_GSO_NONE;
+ vnet->hdr_len = 0;
+ vnet->gso_size = 0;
+ vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE;
}
- if (dp_packet_hwol_l4_mask(b)) {
- vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b)
- - (char *)dp_packet_eth(b));
-
+ if (dp_packet_l4_checksum_good(b)) {
+ /* The packet has good L4 checksum. No need to validate again. */
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+ } else if (dp_packet_hwol_tx_l4_checksum(b)) {
+ /* The csum calculation is offloaded. */
if (dp_packet_hwol_l4_is_tcp(b)) {
+ /* Virtual I/O Device (VIRTIO) Version 1.1
+ * 5.1.6.2 Packet Transmission
+ * If the driver negotiated VIRTIO_NET_F_CSUM, it can skip
+ * checksumming the packet:
+ * - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
+ * - csum_start is set to the offset within the packet
+ * to begin checksumming, and
+ * - csum_offset indicates how many bytes after the
+ * csum_start the new (16 bit ones complement) checksum
+ * is placed by the device.
+ * The TCP checksum field in the packet is set to the sum of
+ * the TCP pseudo header, so that replacing it by the ones
+ * complement checksum of the TCP header and body will give
+ * the correct result. */
+
+ struct tcp_header *tcp_hdr = dp_packet_l4(b);
+ ovs_be16 csum = 0;
+ if (dp_packet_hwol_is_ipv4(b)) {
+ const struct ip_header *ip_hdr = dp_packet_l3(b);
+ csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+ } else if (dp_packet_hwol_tx_ipv6(b)) {
+ const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+ csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+ }
+
+ tcp_hdr->tcp_csum = csum;
+ vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
struct tcp_header, tcp_csum);
} else if (dp_packet_hwol_l4_is_udp(b)) {
+ struct udp_header *udp_hdr = dp_packet_l4(b);
+ ovs_be16 csum = 0;
+
+ if (dp_packet_hwol_is_ipv4(b)) {
+ const struct ip_header *ip_hdr = dp_packet_l3(b);
+ csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+ } else if (dp_packet_hwol_tx_ipv6(b)) {
+ const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+ csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+ }
+
+ udp_hdr->udp_csum = csum;
+ vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
struct udp_header, udp_csum);
} else if (dp_packet_hwol_l4_is_sctp(b)) {
- vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
- struct sctp_header, sctp_csum);
+ /* The Linux kernel networking stack only supports csum_start
+ * and csum_offset when SCTP GSO is enabled. See kernel's
+ * skb_csum_hwoffload_help(). Currently there is no SCTP
+ * segmentation offload support in OVS. */
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = 0;
} else {
- VLOG_WARN_RL(&rl, "Unsupported L4 protocol");
+ /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes
+ * a new flag that is not covered in above checks. */
+ VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. "
+ "Flags: %"PRIu64,
+ (uint64_t)*dp_packet_ol_flags_ptr(b));
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = 0;
}
+ } else {
+ /* Packet L4 csum is unknown. */
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = 0;
}
+
+ dp_packet_push(b, vnet, sizeof *vnet);
}
@@ -224,28 +224,6 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
return udp + 1;
}
-static void
-netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet,
- int ip_tot_size)
-{
- uint32_t csum;
-
- if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
- csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(
- dp_packet_data(packet)));
- } else {
- csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(
- dp_packet_data(packet)));
- }
-
- csum = csum_continue(csum, udp, ip_tot_size);
- udp->udp_csum = csum_finish(csum);
-
- if (!udp->udp_csum) {
- udp->udp_csum = htons(0xffff);
- }
-}
-
void
netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
struct dp_packet *packet,
@@ -260,9 +238,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
udp->udp_src = netdev_tnl_get_src_port(packet);
udp->udp_len = htons(ip_tot_size);
- if (udp->udp_csum) {
- netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
- }
+ /* Postpone checksum to the egress netdev. */
+ dp_packet_hwol_set_csum_udp(packet);
+ dp_packet_ol_reset_l4_csum_good(packet);
}
static void *
@@ -806,7 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev,
data->header_len, &ip_tot_size);
udp->udp_src = netdev_tnl_get_src_port(packet);
udp->udp_len = htons(ip_tot_size);
- netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
+ /* Postpone checksum to the egress netdev. */
+ dp_packet_hwol_set_csum_udp(packet);
+ dp_packet_ol_reset_l4_csum_good(packet);
gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
@@ -799,8 +799,6 @@ static bool
netdev_send_prepare_packet(const uint64_t netdev_flags,
struct dp_packet *packet, char **errormsg)
{
- uint64_t l4_mask;
-
if (dp_packet_hwol_is_tso(packet)
&& !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
/* Fall back to GSO in software. */
@@ -813,36 +811,16 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
* netdev to decide what would be the best to do.
* Provide a software fallback in case the device doesn't support IP csum
* offloading. Note: Encapsulated packet must have the inner IP header
+ * csum already calculated.
+ * Packet with L4 csum offloading enabled was received with verified csum.
+ * Leave the L4 csum offloading enabled even with good checksum for the
+ * netdev to decide what would be the best to do.
+ * Netdev that requires pseudo header csum needs to calculate that.
+ * Provide a software fallback in case the netdev doesn't support L4 csum
+ * offloading. Note: Encapsulated packet must have the inner L4 header
* csum already calculated. */
dp_packet_ol_send_prepare(packet, netdev_flags);
- l4_mask = dp_packet_hwol_l4_mask(packet);
- if (l4_mask) {
- if (dp_packet_hwol_l4_is_tcp(packet)) {
- if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
- /* Fall back to TCP csum in software. */
- VLOG_ERR_BUF(errormsg, "No TCP checksum support");
- return false;
- }
- } else if (dp_packet_hwol_l4_is_udp(packet)) {
- if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
- /* Fall back to UDP csum in software. */
- VLOG_ERR_BUF(errormsg, "No UDP checksum support");
- return false;
- }
- } else if (dp_packet_hwol_l4_is_sctp(packet)) {
- if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) {
- /* Fall back to SCTP csum in software. */
- VLOG_ERR_BUF(errormsg, "No SCTP checksum support");
- return false;
- }
- } else {
- VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64,
- l4_mask);
- return false;
- }
- }
-
return true;
}
@@ -975,20 +953,16 @@ netdev_push_header(const struct netdev *netdev,
size_t i, size = dp_packet_batch_size(batch);
DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
- if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet)
- || dp_packet_hwol_l4_mask(packet))) {
+ if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) {
COVERAGE_INC(netdev_push_header_drops);
dp_packet_delete(packet);
- VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
+ VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is "
"not supported: packet dropped",
netdev_get_name(netdev));
} else {
/* The packet is going to be encapsulated and there is
* no support yet for inner network header csum offloading. */
- if (dp_packet_hwol_tx_ip_csum(packet)
- && !dp_packet_ip_checksum_good(packet)) {
- dp_packet_ip_set_header_csum(packet);
- }
+ dp_packet_ol_send_prepare(packet, 0);
netdev->netdev_class->push_header(netdev, packet, data);
@@ -485,9 +485,11 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch *batch,
size_t l4_size = dp_packet_l4_size(packet);
if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
- /* New UDP checksum. */
struct udp_header *uh = dp_packet_l4(packet);
- if (uh->udp_csum) {
+ if (dp_packet_hwol_l4_is_udp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else if (uh->udp_csum) {
+ /* New UDP checksum. */
uint16_t old_udp_checksum = ~uh->udp_csum;
uint32_t udp_checksum = old_udp_checksum + delta_checksum;
udp_checksum = csum_finish(udp_checksum);
@@ -500,13 +502,17 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch *batch,
}
} else if (nh->ip_proto == IPPROTO_TCP &&
l4_size >= TCP_HEADER_LEN) {
- /* New TCP checksum. */
- struct tcp_header *th = dp_packet_l4(packet);
- uint16_t old_tcp_checksum = ~th->tcp_csum;
- uint32_t tcp_checksum = old_tcp_checksum + delta_checksum;
- tcp_checksum = csum_finish(tcp_checksum);
-
- th->tcp_csum = tcp_checksum;
+ if (dp_packet_hwol_l4_is_tcp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ /* New TCP checksum. */
+ struct tcp_header *th = dp_packet_l4(packet);
+ uint16_t old_tcp_checksum = ~th->tcp_csum;
+ uint32_t tcp_checksum = old_tcp_checksum + delta_checksum;
+ tcp_checksum = csum_finish(tcp_checksum);
+
+ th->tcp_csum = tcp_checksum;
+ }
}
pkt_metadata_init_conn(&packet->md);
@@ -1131,16 +1131,22 @@ packet_set_ipv4_addr(struct dp_packet *packet,
pkt_metadata_init_conn(&packet->md);
if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
- struct tcp_header *th = dp_packet_l4(packet);
-
- th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+ if (dp_packet_hwol_l4_is_tcp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct tcp_header *th = dp_packet_l4(packet);
+ th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+ }
} else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) {
- struct udp_header *uh = dp_packet_l4(packet);
-
- if (uh->udp_csum) {
- uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
- if (!uh->udp_csum) {
- uh->udp_csum = htons(0xffff);
+ if (dp_packet_hwol_l4_is_udp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct udp_header *uh = dp_packet_l4(packet);
+ if (uh->udp_csum) {
+ uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
+ if (!uh->udp_csum) {
+ uh->udp_csum = htons(0xffff);
+ }
}
}
}
@@ -1246,16 +1252,24 @@ packet_update_csum128(struct dp_packet *packet, uint8_t proto,
size_t l4_size = dp_packet_l4_size(packet);
if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
- struct tcp_header *th = dp_packet_l4(packet);
+ if (dp_packet_hwol_l4_is_tcp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct tcp_header *th = dp_packet_l4(packet);
- th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+ th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+ }
} else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
- struct udp_header *uh = dp_packet_l4(packet);
+ if (dp_packet_hwol_l4_is_udp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct udp_header *uh = dp_packet_l4(packet);
- if (uh->udp_csum) {
- uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
- if (!uh->udp_csum) {
- uh->udp_csum = htons(0xffff);
+ if (uh->udp_csum) {
+ uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+ if (!uh->udp_csum) {
+ uh->udp_csum = htons(0xffff);
+ }
}
}
} else if (proto == IPPROTO_ICMPV6 &&
@@ -1375,7 +1389,9 @@ static void
packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
{
if (*port != new_port) {
- *csum = recalc_csum16(*csum, *port, new_port);
+ if (csum) {
+ *csum = recalc_csum16(*csum, *port, new_port);
+ }
*port = new_port;
}
}
@@ -1387,9 +1403,16 @@ void
packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
{
struct tcp_header *th = dp_packet_l4(packet);
+ ovs_be16 *csum = NULL;
+
+ if (dp_packet_hwol_l4_is_tcp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ csum = &th->tcp_csum;
+ }
- packet_set_port(&th->tcp_src, src, &th->tcp_csum);
- packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
+ packet_set_port(&th->tcp_src, src, csum);
+ packet_set_port(&th->tcp_dst, dst, csum);
pkt_metadata_init_conn(&packet->md);
}
@@ -1401,17 +1424,21 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
{
struct udp_header *uh = dp_packet_l4(packet);
- if (uh->udp_csum) {
- packet_set_port(&uh->udp_src, src, &uh->udp_csum);
- packet_set_port(&uh->udp_dst, dst, &uh->udp_csum);
+ if (dp_packet_hwol_l4_is_udp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ packet_set_port(&uh->udp_src, src, NULL);
+ packet_set_port(&uh->udp_dst, dst, NULL);
+ } else {
+ ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL;
+
+ packet_set_port(&uh->udp_src, src, csum);
+ packet_set_port(&uh->udp_dst, dst, csum);
- if (!uh->udp_csum) {
+ if (csum && !uh->udp_csum) {
uh->udp_csum = htons(0xffff);
}
- } else {
- uh->udp_src = src;
- uh->udp_dst = dst;
}
+
pkt_metadata_init_conn(&packet->md);
}
@@ -1422,18 +1449,27 @@ void
packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
{
struct sctp_header *sh = dp_packet_l4(packet);
- ovs_be32 old_csum, old_correct_csum, new_csum;
- uint16_t tp_len = dp_packet_l4_size(packet);
- old_csum = get_16aligned_be32(&sh->sctp_csum);
- put_16aligned_be32(&sh->sctp_csum, 0);
- old_correct_csum = crc32c((void *)sh, tp_len);
+ if (dp_packet_hwol_l4_is_sctp(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ sh->sctp_src = src;
+ sh->sctp_dst = dst;
+ } else {
+ ovs_be32 old_csum, old_correct_csum, new_csum;
+ uint16_t tp_len = dp_packet_l4_size(packet);
- sh->sctp_src = src;
- sh->sctp_dst = dst;
+ old_csum = get_16aligned_be32(&sh->sctp_csum);
+ put_16aligned_be32(&sh->sctp_csum, 0);
+ old_correct_csum = crc32c((void *) sh, tp_len);
+
+ sh->sctp_src = src;
+ sh->sctp_dst = dst;
+
+ new_csum = crc32c((void *) sh, tp_len);
+ put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum
+ ^ new_csum);
+ }
- new_csum = crc32c((void *)sh, tp_len);
- put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
pkt_metadata_init_conn(&packet->md);
}
@@ -1957,3 +1993,72 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
}
}
}
+
+/* Set TCP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_tcp_complete_csum(struct dp_packet *p)
+{
+ struct tcp_header *tcp = dp_packet_l4(p);
+
+ tcp->tcp_csum = 0;
+ if (dp_packet_hwol_is_ipv4(p)) {
+ struct ip_header *ip = dp_packet_l3(p);
+
+ tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+ tcp, dp_packet_l4_size(p)));
+ } else if (dp_packet_hwol_tx_ipv6(p)) {
+ struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+ tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt,
+ dp_packet_l4_size(p));
+ } else {
+ OVS_NOT_REACHED();
+ }
+}
+
+/* Set UDP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_udp_complete_csum(struct dp_packet *p)
+{
+ struct udp_header *udp = dp_packet_l4(p);
+
+ /* Skip csum calculation if the udp_csum is zero. */
+ if (!udp->udp_csum) {
+ return;
+ }
+
+ udp->udp_csum = 0;
+ if (dp_packet_hwol_is_ipv4(p)) {
+ struct ip_header *ip = dp_packet_l3(p);
+
+ udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+ udp, dp_packet_l4_size(p)));
+ } else if (dp_packet_hwol_tx_ipv6(p)) {
+ struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+ udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt,
+ dp_packet_l4_size(p));
+ } else {
+ OVS_NOT_REACHED();
+ }
+
+ if (!udp->udp_csum) {
+ udp->udp_csum = htons(0xffff);
+ }
+}
+
+/* Set SCTP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_sctp_complete_csum(struct dp_packet *p)
+{
+ struct sctp_header *sh = dp_packet_l4(p);
+ uint16_t tp_len = dp_packet_l4_size(p);
+ ovs_be32 csum;
+
+ put_16aligned_be32(&sh->sctp_csum, 0);
+ csum = crc32c((void *) sh, tp_len);
+ put_16aligned_be32(&sh->sctp_csum, csum);
+}
@@ -1645,6 +1645,9 @@ uint32_t packet_csum_pseudoheader(const struct ip_header *);
bool packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr,
bool *first_frag);
void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
+void packet_tcp_complete_csum(struct dp_packet *);
+void packet_udp_complete_csum(struct dp_packet *);
+void packet_sctp_complete_csum(struct dp_packet *);
#define DNS_HEADER_LEN 12
struct dns_header {