@@ -2105,13 +2105,13 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
}
if (ok) {
- bool hwol_bad_l4_csum = dp_packet_ol_l4_checksum_bad(pkt);
- if (!hwol_bad_l4_csum) {
- bool hwol_good_l4_csum = dp_packet_ol_l4_checksum_good(pkt)
- || dp_packet_ol_tx_l4_checksum(pkt);
+ if (!dp_packet_ol_l4_checksum_bad(pkt)) {
+
/* Validate the checksum only when hwol is not supported. */
if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
- &ctx->icmp_related, l3, !hwol_good_l4_csum,
+ &ctx->icmp_related, l3,
+ !dp_packet_ol_l4_checksum_good(pkt) &&
+ !dp_packet_ol_tx_l4_checksum(pkt),
NULL)) {
ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
return true;
@@ -3423,8 +3423,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
adj_seqnum(&th->tcp_seq, ec->seq_skew);
}
- th->tcp_csum = 0;
- if (!dp_packet_ol_tx_l4_checksum(pkt)) {
+ if (dp_packet_ol_tx_tcp_csum(pkt)) {
+ dp_packet_ol_reset_l4_csum_good(pkt);
+ } else {
+ th->tcp_csum = 0;
if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
dp_packet_l4_size(pkt));
@@ -39,6 +39,9 @@ dp_packet_init__(struct dp_packet *p, size_t allocated,
dp_packet_init_specific(p);
/* By default assume the packet type to be Ethernet. */
p->packet_type = htonl(PT_ETH);
+ /* Reset csum start and offset. */
+ p->csum_start = 0;
+ p->csum_offset = 0;
}
static void
@@ -189,7 +192,7 @@ dp_packet_clone_with_headroom(const struct dp_packet *p, size_t headroom)
dp_packet_size(p),
headroom);
/* Copy the following fields into the returned buffer: l2_pad_size,
- * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
+ * l2_5_ofs, l3_ofs, ..., cutlen, packet_type and md. */
memcpy(&new_buffer->l2_pad_size, &p->l2_pad_size,
sizeof(struct dp_packet) -
offsetof(struct dp_packet, l2_pad_size));
@@ -518,4 +521,22 @@ dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
dp_packet_ip_set_header_csum(p);
dp_packet_ol_set_ip_csum_good(p);
}
+
+ if (dp_packet_ol_l4_checksum_good(p) || !dp_packet_ol_tx_l4_checksum(p)) {
+ return;
+ }
+
+ if (dp_packet_ol_tx_tcp_csum(p)
+ && !(flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
+ packet_tcp_complete_csum(p);
+ dp_packet_ol_set_l4_csum_good(p);
+ } else if (dp_packet_ol_tx_udp_csum(p)
+ && !(flags & NETDEV_OFFLOAD_TX_UDP_CSUM)) {
+ packet_udp_complete_csum(p);
+ dp_packet_ol_set_l4_csum_good(p);
+ } else if (!(flags & NETDEV_OFFLOAD_TX_SCTP_CSUM)
+ && dp_packet_ol_tx_sctp_csum(p)) {
+ packet_sctp_complete_csum(p);
+ dp_packet_ol_set_l4_csum_good(p);
+ }
}
@@ -89,23 +89,24 @@ enum dp_packet_offload_mask {
/* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
};
-#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \
- DP_PACKET_OL_FLOW_MARK | \
- DP_PACKET_OL_RX_L4_CSUM_BAD | \
- DP_PACKET_OL_RX_IP_CSUM_BAD | \
- DP_PACKET_OL_RX_L4_CSUM_GOOD | \
- DP_PACKET_OL_RX_IP_CSUM_GOOD | \
- DP_PACKET_OL_TX_TCP_SEG | \
- DP_PACKET_OL_TX_IPV4 | \
- DP_PACKET_OL_TX_IPV6 | \
- DP_PACKET_OL_TX_IP_CSUM | \
- DP_PACKET_OL_TX_TCP_CSUM | \
- DP_PACKET_OL_TX_UDP_CSUM | \
+#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \
+ DP_PACKET_OL_FLOW_MARK | \
+ DP_PACKET_OL_RX_L4_CSUM_BAD | \
+ DP_PACKET_OL_RX_IP_CSUM_BAD | \
+ DP_PACKET_OL_RX_L4_CSUM_GOOD | \
+ DP_PACKET_OL_RX_IP_CSUM_GOOD | \
+ DP_PACKET_OL_TX_TCP_SEG | \
+ DP_PACKET_OL_TX_IPV4 | \
+ DP_PACKET_OL_TX_IPV6 | \
+ DP_PACKET_OL_TX_IP_CSUM | \
+ DP_PACKET_OL_TX_TCP_CSUM | \
+ DP_PACKET_OL_TX_UDP_CSUM | \
DP_PACKET_OL_TX_SCTP_CSUM)
#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CSUM | \
DP_PACKET_OL_TX_UDP_CSUM | \
DP_PACKET_OL_TX_SCTP_CSUM)
+
#define DP_PACKET_OL_RX_IP_CSUM_MASK (DP_PACKET_OL_RX_IP_CSUM_GOOD | \
DP_PACKET_OL_RX_IP_CSUM_BAD)
#define DP_PACKET_OL_RX_L4_CSUM_MASK (DP_PACKET_OL_RX_L4_CSUM_GOOD | \
@@ -140,6 +141,8 @@ struct dp_packet {
or UINT16_MAX. */
uint32_t cutlen; /* length in bytes to cut from the end. */
ovs_be32 packet_type; /* Packet type as defined in OpenFlow */
+ uint16_t csum_start; /* Position to start checksumming from. */
+ uint16_t csum_offset; /* Offset to place checksum. */
union {
struct pkt_metadata md;
uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
@@ -991,6 +994,13 @@ dp_packet_ol_tx_ipv4(const struct dp_packet *a)
return !!(*dp_packet_ol_flags_ptr(a) & DP_PACKET_OL_TX_IPV4);
}
+/* Returns 'true' if packet 'p' is marked as IPv6. */
+static inline bool
+dp_packet_ol_tx_ipv6(const struct dp_packet *p)
+{
+ return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
+}
+
/* Returns 'true' if packet 'a' is marked for TCP checksum offloading. */
static inline bool
dp_packet_ol_tx_tcp_csum(const struct dp_packet *a)
@@ -1015,18 +1025,20 @@ dp_packet_ol_tx_sctp_csum(struct dp_packet *a)
DP_PACKET_OL_TX_SCTP_CSUM;
}
-/* Mark packet 'a' as IPv4. */
+/* Mark packet 'p' as IPv4. */
static inline void
-dp_packet_ol_set_tx_ipv4(struct dp_packet *a)
+dp_packet_ol_set_tx_ipv4(struct dp_packet *p)
{
- *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV4;
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
}
-/* Mark packet 'a' as IPv6. */
+/* Mark packet 'p' as IPv6. */
static inline void
-dp_packet_ol_set_tx_ipv6(struct dp_packet *a)
+dp_packet_ol_set_tx_ipv6(struct dp_packet *p)
{
- *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV4;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV6;
}
/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
@@ -1119,6 +1131,8 @@ dp_packet_ip_set_header_csum(struct dp_packet *p)
ip->ip_csum = csum(ip, sizeof *ip);
}
+/* Returns 'true' if the packet 'p' has good integrity and the
+ * checksum in it is correct. */
static inline bool
dp_packet_ol_l4_checksum_good(const struct dp_packet *p)
{
@@ -1133,6 +1147,53 @@ dp_packet_ol_l4_checksum_bad(const struct dp_packet *p)
DP_PACKET_OL_RX_L4_CSUM_BAD;
}
+/* Returns 'true' if the packet has good integrity though the
+ * checksum in the packet 'p' is not complete. */
+static inline bool
+dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
+{
+ return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CSUM_MASK) ==
+ DP_PACKET_OL_RX_L4_CSUM_MASK;
+}
+
+/* Marks packet 'p' with good integrity though the checksum in the
+ * packet is not complete. */
+static inline void
+dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CSUM_MASK;
+}
+
+/* Marks packet 'p' with good L4 checksum. */
+static inline void
+dp_packet_ol_set_l4_csum_good(const struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CSUM_BAD;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CSUM_GOOD;
+}
+
+/* Marks packet 'p' with good L4 checksum as modified. */
+static inline void
+dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p)
+{
+ if (!dp_packet_ol_l4_csum_partial(p)) {
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CSUM_GOOD;
+ }
+}
+
+/* Marks packet 'p' with good integrity if the 'start' and 'offset'
+ * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
+ * The 'start' is the offset from the begin of the packet headers.
+ * The 'offset' is the offset from start to place the checksum. */
+static inline void
+dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start,
+ uint16_t offset)
+{
+ if (p->csum_start == start && p->csum_offset == offset) {
+ dp_packet_ol_set_l4_csum_partial(p);
+ }
+}
+
static inline void ALWAYS_INLINE
dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet)
{
@@ -1025,6 +1025,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
if (dl_type == htons(ETH_TYPE_IP)) {
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
}
+ dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+ offsetof(struct tcp_header,
+ tcp_csum));
+ if (dp_packet_ol_l4_checksum_good(packet)
+ || dp_packet_ol_l4_csum_partial(packet)) {
+ dp_packet_ol_set_tx_tcp_csum(packet);
+ }
}
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
@@ -1038,6 +1045,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
if (dl_type == htons(ETH_TYPE_IP)) {
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
}
+ dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+ offsetof(struct udp_header,
+ udp_csum));
+ if (dp_packet_ol_l4_checksum_good(packet)
+ || dp_packet_ol_l4_csum_partial(packet)) {
+ dp_packet_ol_set_tx_udp_csum(packet);
+ }
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
@@ -1047,6 +1061,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+ dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+ offsetof(struct sctp_header,
+ sctp_csum));
+ if (dp_packet_ol_l4_checksum_good(packet)
+ || dp_packet_ol_l4_csum_partial(packet)) {
+ dp_packet_ol_set_tx_sctp_csum(packet);
+ }
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
@@ -3166,6 +3187,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
tcp->tcp_csum = 0;
tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
tcp, l4_len));
+ dp_packet_ol_set_l4_csum_good(p);
} else if (flow->nw_proto == IPPROTO_UDP) {
struct udp_header *udp = dp_packet_l4(p);
@@ -3175,6 +3197,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
if (!udp->udp_csum) {
udp->udp_csum = htons(0xffff);
}
+ dp_packet_ol_set_l4_csum_good(p);
} else if (flow->nw_proto == IPPROTO_ICMP) {
struct icmp_header *icmp = dp_packet_l4(p);
@@ -145,17 +145,6 @@ typedef uint16_t dpdk_port_t;
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
-/* List of required flags advertised by the hardware that will be used
- * if TSO is enabled. Ideally this should include
- * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that
- * at the moment and SCTP is not a widely used protocol like TCP and UDP,
- * so it's optional. */
-#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO \
- | RTE_ETH_TX_OFFLOAD_TCP_CKSUM \
- | RTE_ETH_TX_OFFLOAD_UDP_CKSUM \
- | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)
-
-
static const struct rte_eth_conf port_conf = {
.rxmode = {
.split_hdr_size = 0,
@@ -398,8 +387,10 @@ enum dpdk_hw_ol_features {
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
NETDEV_RX_HW_SCATTER = 1 << 2,
NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
- NETDEV_TX_TSO_OFFLOAD = 1 << 4,
- NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
+ NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
+ NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
+ NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
+ NETDEV_TX_TSO_OFFLOAD = 1 << 7,
};
/*
@@ -953,6 +944,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
return NULL;
}
+static void
+netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
+ enum dpdk_hw_ol_features hw_ol_features,
+ enum netdev_ol_flags flag)
+{
+ struct netdev *netdev = &dev->up;
+
+ if (dev->hw_ol_features & hw_ol_features) {
+ netdev->ol_flags |= flag;
+ } else {
+ netdev->ol_flags &= ~flag;
+ }
+}
+
+static void
+netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
+{
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
+ NETDEV_OFFLOAD_TX_IPV4_CSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
+ NETDEV_OFFLOAD_TX_TCP_CSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
+ NETDEV_OFFLOAD_TX_UDP_CSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
+ NETDEV_OFFLOAD_TX_SCTP_CSUM);
+ netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
+ NETDEV_OFFLOAD_TX_TCP_TSO);
+}
+
static int
dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
{
@@ -989,11 +1009,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
}
+ if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+ }
+
+ if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+ }
+
+ if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
+ }
+
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
- conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
- if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
- conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
- }
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
}
/* Limit configured rss hash functions to only those supported
@@ -1099,7 +1128,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
struct rte_ether_addr eth_addr;
int diag;
int n_rxq, n_txq;
- uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
@@ -1135,18 +1163,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
}
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ }
+
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ }
+
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+ }
+
dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
if (userspace_tso_enabled()) {
- if ((info.tx_offload_capa & tx_tso_offload_capa)
- == tx_tso_offload_capa) {
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
- if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
- dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
- } else {
- VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
- "SCTP packets sent to this device will be dropped",
- netdev_get_name(&dev->up));
- }
} else {
VLOG_WARN("%s: Tx TSO offload is not supported.",
netdev_get_name(&dev->up));
@@ -1708,6 +1746,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+ HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD);
+ HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD);
+ HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD);
HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
#undef HWOL_SMAP_ADD
smap_add(args, "lsc_interrupt_mode",
@@ -2154,6 +2195,7 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ mbuf->l4_len = 0;
mbuf->outer_l2_len = 0;
mbuf->outer_l3_len = 0;
@@ -4935,21 +4977,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
}
err = dpdk_eth_dev_init(dev);
- if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
- } else {
- netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
- }
-
- if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
- if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
- }
- }
+ netdev_dpdk_update_netdev_flags(dev);
/* If both requested and actual hwaddr were previously
* unset (initialized to 0), then first device init above
@@ -5024,6 +5052,7 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
int err;
ovs_mutex_lock(&dev->mutex);
+ netdev_dpdk_update_netdev_flags(dev);
err = dpdk_vhost_reconfigure_helper(dev);
ovs_mutex_unlock(&dev->mutex);
@@ -5088,19 +5117,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
goto unlock;
}
+ vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
+ | 1ULL << VIRTIO_NET_F_HOST_UFO;
+
+ dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+ dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+ dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+ dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+
if (userspace_tso_enabled()) {
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
- netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
- vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
- | 1ULL << VIRTIO_NET_F_HOST_UFO;
+ dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ VLOG_DBG("%s: TSO enabled on vhost port",
+ netdev_get_name(&dev->up));
} else {
- /* This disables checksum offloading and all the features
- * that depends on it (TSO, UFO, ECN) according to virtio
- * specification. */
vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
+ vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_TSO4
+ | 1ULL << VIRTIO_NET_F_HOST_TSO6;
}
err = rte_vhost_driver_disable_features(dev->vhost_id,
@@ -927,14 +927,6 @@ netdev_linux_common_construct(struct netdev *netdev_)
netnsid_unset(&netdev->netnsid);
ovs_mutex_init(&netdev->mutex);
- if (userspace_tso_enabled()) {
- netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
- netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
- netdev_->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
- netdev_->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
- netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
- }
-
return 0;
}
@@ -948,6 +940,16 @@ netdev_linux_construct(struct netdev *netdev_)
return error;
}
+ /* The socket interface doesn't offer the option to enable only
+ * csum offloading without TSO. */
+ if (userspace_tso_enabled()) {
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+ }
+
error = get_flags(&netdev->up, &netdev->ifi_flags);
if (error == ENODEV) {
if (netdev->up.netdev_class != &netdev_internal_class) {
@@ -976,6 +978,7 @@ netdev_linux_construct_tap(struct netdev *netdev_)
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
static const char tap_dev[] = "/dev/net/tun";
const char *name = netdev_->name;
+ unsigned long oflags;
struct ifreq ifr;
int error = netdev_linux_common_construct(netdev_);
@@ -993,10 +996,7 @@ netdev_linux_construct_tap(struct netdev *netdev_)
/* Create tap device. */
get_flags(&netdev->up, &netdev->ifi_flags);
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- if (userspace_tso_enabled()) {
- ifr.ifr_flags |= IFF_VNET_HDR;
- }
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
@@ -1019,21 +1019,22 @@ netdev_linux_construct_tap(struct netdev *netdev_)
goto error_close;
}
+ oflags = TUN_F_CSUM;
if (userspace_tso_enabled()) {
- /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
- * available, it will return EINVAL when a flag is unknown.
- * Therefore, try enabling offload with no flags to check
- * if TUNSETOFFLOAD support is available or not. */
- if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) {
- unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
-
- if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
- VLOG_WARN("%s: enabling tap offloading failed: %s", name,
- ovs_strerror(errno));
- error = errno;
- goto error_close;
- }
- }
+ oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
+ }
+
+ if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+ netdev_->ol_flags |= (NETDEV_OFFLOAD_TX_IPV4_CSUM
+ | NETDEV_OFFLOAD_TX_TCP_CSUM
+ | NETDEV_OFFLOAD_TX_UDP_CSUM);
+
+ if (userspace_tso_enabled()) {
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
+ }
+ } else {
+ VLOG_WARN("%s: Disabling hardware offloading: %s", name,
+ ovs_strerror(errno));
}
netdev->present = true;
@@ -1333,18 +1334,22 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
pkt = buffers[i];
}
- if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
- struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
- struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (virtio_net_hdr_size) {
+ int ret = netdev_linux_parse_vnet_hdr(pkt);
+ if (OVS_UNLIKELY(ret)) {
+ struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- /* Unexpected error situation: the virtio header is not present
- * or corrupted. Drop the packet but continue in case next ones
- * are correct. */
- dp_packet_delete(pkt);
- netdev->rx_dropped += 1;
- VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
- netdev_get_name(netdev_));
- continue;
+ /* Unexpected error situation: the virtio header is not
+ * present or corrupted or contains unsupported features.
+ * Drop the packet but continue in case next ones are
+ * correct. */
+ dp_packet_delete(pkt);
+ netdev->rx_dropped += 1;
+ VLOG_WARN_RL(&rl, "%s: Dropped packet: %s",
+ netdev_get_name(netdev_), ovs_strerror(ret));
+ continue;
+ }
}
for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg;
@@ -1392,7 +1397,6 @@ static int
netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
struct dp_packet_batch *batch)
{
- int virtio_net_hdr_size;
ssize_t retval;
size_t std_len;
int iovlen;
@@ -1402,16 +1406,14 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
/* Use the buffer from the allocated packet below to receive MTU
* sized packets and an aux_buf for extra TSO data. */
iovlen = IOV_TSO_SIZE;
- virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
} else {
/* Use only the buffer from the allocated packet. */
iovlen = IOV_STD_SIZE;
- virtio_net_hdr_size = 0;
}
/* The length here needs to be accounted in the same way when the
* aux_buf is allocated so that it can be prepended to TSO buffer. */
- std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
+ std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu;
for (i = 0; i < NETDEV_MAX_BURST; i++) {
struct dp_packet *buffer;
struct dp_packet *pkt;
@@ -1451,7 +1453,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
pkt = buffer;
}
- if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
+ if (netdev_linux_parse_vnet_hdr(pkt)) {
struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1600,7 +1602,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
* on other interface types because we attach a socket filter to the rx
* socket. */
static int
-netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
+netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
struct dp_packet_batch *batch)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1621,9 +1623,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
ssize_t retval;
int error;
- if (tso) {
- netdev_linux_prepend_vnet_hdr(packet, mtu);
- }
+ netdev_linux_prepend_vnet_hdr(packet, mtu);
size = dp_packet_size(packet);
do {
@@ -1754,7 +1754,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
} else {
- error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch);
+ error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
}
if (error) {
if (error == ENOBUFS) {
@@ -6628,59 +6628,78 @@ netdev_linux_parse_l2(struct dp_packet *p, uint16_t *l4proto)
}
*l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
- dp_packet_ol_set_tx_ipv6(p);
}
return 0;
}
+/* Initializes packet 'b' with features enabled in the prepended
+ * struct virtio_net_hdr. Returns 0 if successful, otherwise a
+ * positive errno value. */
static int
netdev_linux_parse_vnet_hdr(struct dp_packet *p)
{
struct virtio_net_hdr *vnet = dp_packet_pull(p, sizeof *vnet);
- uint16_t l4proto = 0;
if (OVS_UNLIKELY(!vnet)) {
- return -EINVAL;
+ return EINVAL;
}
if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
return 0;
}
- if (netdev_linux_parse_l2(p, &l4proto)) {
- return -EINVAL;
- }
-
if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- if (l4proto == IPPROTO_TCP) {
- dp_packet_ol_set_tx_tcp_csum(p);
- } else if (l4proto == IPPROTO_UDP) {
- dp_packet_ol_set_tx_udp_csum(p);
- } else if (l4proto == IPPROTO_SCTP) {
- dp_packet_ol_set_tx_sctp_csum(p);
- }
- }
+ uint16_t l4proto = 0;
- if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
- uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4
- | VIRTIO_NET_HDR_GSO_TCPV6
- | VIRTIO_NET_HDR_GSO_UDP;
- uint8_t type = vnet->gso_type & allowed_mask;
-
- if (type == VIRTIO_NET_HDR_GSO_TCPV4
- || type == VIRTIO_NET_HDR_GSO_TCPV6) {
- dp_packet_ol_set_tcp_seg(p);
+ if (netdev_linux_parse_l2(p, &l4proto)) {
+ return EINVAL;
}
- }
- return 0;
+ if (l4proto == IPPROTO_UDP) {
+ dp_packet_ol_set_tx_udp_csum(p);
+ }
+ /* The packet has offloaded checksum. However, there is no
+ * additional information like the protocol used, so it would
+ * require to parse the packet here. The checksum starting point
+ * and offset are going to be verified when the packet headers
+ * are parsed during miniflow extraction. */
+ p->csum_start = vnet->csum_start;
+ p->csum_offset = vnet->csum_offset;
+ } else {
+ p->csum_start = 0;
+ p->csum_offset = 0;
+ }
+
+ int ret = 0;
+ switch (vnet->gso_type) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ /* FIXME: The packet has offloaded TCP segmentation. The gso_size
+ * is given and needs to be respected. */
+ dp_packet_ol_set_tcp_seg(p);
+ break;
+ case VIRTIO_NET_HDR_GSO_UDP:
+ /* UFO is not supported. */
+ VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled.");
+ ret = ENOTSUP;
+ break;
+ case VIRTIO_NET_HDR_GSO_NONE:
+ break;
+ default:
+ ret = ENOTSUP;
+ VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x",
+ vnet->gso_type);
+ }
+
+ return ret;
}
static void
netdev_linux_prepend_vnet_hdr(struct dp_packet *p, int mtu)
{
- struct virtio_net_hdr *vnet = dp_packet_push_zeros(p, sizeof *vnet);
+ struct virtio_net_hdr v;
+ struct virtio_net_hdr *vnet = &v;
if (dp_packet_ol_tcp_seg(p)) {
uint16_t hdr_len = ((char *) dp_packet_l4(p)
@@ -6690,30 +6709,92 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *p, int mtu)
vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
if (dp_packet_ol_tx_ipv4(p)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- } else {
+ } else if (dp_packet_ol_tx_ipv6(p)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
}
} else {
- vnet->flags = VIRTIO_NET_HDR_GSO_NONE;
- }
-
- if (dp_packet_ol_l4_mask(p)) {
- vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet->csum_start = (OVS_FORCE __virtio16)((char *) dp_packet_l4(p)
- - (char *) dp_packet_eth(p));
-
+ vnet->hdr_len = 0;
+ vnet->gso_size = 0;
+ vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ }
+
+ if (dp_packet_ol_l4_checksum_good(p)) {
+ /* The packet has good checksum in the packet.
+ * No need to validate again. */
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+ } else if (dp_packet_ol_tx_l4_checksum(p)) {
+ /* The csum calculation is offloaded. */
if (dp_packet_ol_tx_tcp_csum(p)) {
+ /* Virtual I/O Device (VIRTIO) Version 1.1
+ * 5.1.6.2 Packet Transmission
+ If the driver negotiated VIRTIO_NET_F_CSUM, it can skip
+ checksumming the packet:
+ - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
+ - csum_start is set to the offset within the packet
+ to begin checksumming, and
+ - csum_offset indicates how many bytes after the
+ csum_start the new (16 bit ones complement) checksum
+ is placed by the device.
+ The TCP checksum field in the packet is set to the sum of
+ the TCP pseudo header, so that replacing it by the ones
+ complement checksum of the TCP header and body will give
+ the correct result. */
+
+ struct tcp_header *tcp_hdr = dp_packet_l4(p);
+ ovs_be16 csum = 0;
+ if (dp_packet_ol_tx_ipv4(p)) {
+ const struct ip_header *ip_hdr = dp_packet_l3(p);
+ csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+ } else if (dp_packet_ol_tx_ipv6(p)) {
+ const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(p);
+ csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+ }
+
+ tcp_hdr->tcp_csum = csum;
+ vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet->csum_start = (OVS_FORCE __virtio16) p->l4_ofs;
vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
struct tcp_header, tcp_csum);
} else if (dp_packet_ol_tx_udp_csum(p)) {
+ struct udp_header *udp_hdr = dp_packet_l4(p);
+ ovs_be16 csum = 0;
+
+ if (dp_packet_ol_tx_ipv4(p)) {
+ const struct ip_header *ip_hdr = dp_packet_l3(p);
+ csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+ } else if (dp_packet_ol_tx_ipv6(p)) {
+ const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(p);
+ csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+ }
+
+ udp_hdr->udp_csum = csum;
+ vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet->csum_start = (OVS_FORCE __virtio16) p->l4_ofs;
vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
struct udp_header, udp_csum);
} else if (dp_packet_ol_tx_sctp_csum(p)) {
- vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
- struct sctp_header, sctp_csum);
+ /* The Linux kernel networking stack only supports csum_start
+ * and csum_offset when SCTP GSO is enabled. See kernel's
+ * skb_csum_hwoffload_help(). Currently there is no SCTP
+ * segmentation offload support in OVS. */
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = 0;
} else {
- VLOG_WARN_RL(&rl, "Unsupported L4 protocol");
+ /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes
+ * a new flag that is not covered in above checks. */
+ VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. "
+ "Flags: %"PRIu64,
+ (uint64_t)*dp_packet_ol_flags_ptr(p));
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = 0;
}
+ } else {
+ /* Packet L4 csum is unknown. */
+ vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+ vnet->flags = 0;
}
+
+ dp_packet_push(p, vnet, sizeof *vnet);
}
@@ -224,28 +224,6 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
return udp + 1;
}
-static void
-netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet,
- int ip_tot_size)
-{
- uint32_t csum;
-
- if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
- csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(
- dp_packet_data(packet)));
- } else {
- csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(
- dp_packet_data(packet)));
- }
-
- csum = csum_continue(csum, udp, ip_tot_size);
- udp->udp_csum = csum_finish(csum);
-
- if (!udp->udp_csum) {
- udp->udp_csum = htons(0xffff);
- }
-}
-
void
netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
struct dp_packet *packet,
@@ -260,9 +238,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
udp->udp_src = netdev_tnl_get_src_port(packet);
udp->udp_len = htons(ip_tot_size);
- if (udp->udp_csum) {
- netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
- }
+ /* Postpone checksum to the egress netdev. */
+ dp_packet_ol_set_tx_udp_csum(packet);
+ dp_packet_ol_reset_l4_csum_good(packet);
}
static void *
@@ -806,7 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev,
data->header_len, &ip_tot_size);
udp->udp_src = netdev_tnl_get_src_port(packet);
udp->udp_len = htons(ip_tot_size);
- netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
+ /* Postpone checksum to the egress netdev. */
+ dp_packet_ol_set_tx_udp_csum(packet);
+ dp_packet_ol_reset_l4_csum_good(packet);
gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
@@ -804,36 +804,16 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
* netdev to decide what would be the best to do.
* Provide a software fallback in case the device doesn't support IP csum
* offloading. Note: Encapsulated packet must have the inner IP header
+ * csum already calculated.
+ * Packet with L4 csum offloading enabled was received with verified csum.
+ * Leave the L4 csum offloading enabled even with good checksum for the
+ * netdev to decide what would be the best to do.
+ * Netdev that requires pseudo header csum needs to calculate that.
+ * Provide a software fallback in case the netdev doesn't support L4 csum
+ * offloading. Note: Encapsulated packet must have the inner L4 header
* csum already calculated. */
dp_packet_ol_send_prepare(packet, netdev_flags);
- if (dp_packet_ol_l4_mask(packet)) {
- if (dp_packet_ol_tx_tcp_csum(packet)) {
- if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
- /* Fall back to TCP csum in software. */
- VLOG_ERR_BUF(errormsg, "No TCP checksum support");
- return false;
- }
- } else if (dp_packet_ol_tx_udp_csum(packet)) {
- if (!(netdev_flags & NETDEV_OFFLOAD_TX_UDP_CSUM)) {
- /* Fall back to UDP csum in software. */
- VLOG_ERR_BUF(errormsg, "No UDP checksum support");
- return false;
- }
- } else if (dp_packet_ol_tx_sctp_csum(packet)) {
- if (!(netdev_flags & NETDEV_OFFLOAD_TX_SCTP_CSUM)) {
- /* Fall back to SCTP csum in software. */
- VLOG_ERR_BUF(errormsg, "No SCTP checksum support");
- return false;
- }
- } else {
- uint64_t ol_flags = *dp_packet_ol_flags_ptr(packet);
- VLOG_ERR_BUF(errormsg, "No L4 checksum support: "
- "offload mask: %"PRIu64, ol_flags);
- return false;
- }
- }
-
return true;
}
@@ -966,11 +946,10 @@ netdev_push_header(const struct netdev *netdev,
size_t i, size = dp_packet_batch_size(batch);
DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
- if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet)
- || dp_packet_ol_l4_mask(packet))) {
+ if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet))) {
COVERAGE_INC(netdev_push_header_drops);
dp_packet_delete(packet);
- VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
+ VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is "
"not supported: packet dropped",
netdev_get_name(netdev));
} else {
@@ -1132,16 +1132,22 @@ packet_set_ipv4_addr(struct dp_packet *packet,
pkt_metadata_init_conn(&packet->md);
if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
- struct tcp_header *th = dp_packet_l4(packet);
-
- th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+ if (dp_packet_ol_tx_tcp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct tcp_header *th = dp_packet_l4(packet);
+ th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+ }
} else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) {
- struct udp_header *uh = dp_packet_l4(packet);
-
- if (uh->udp_csum) {
- uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
- if (!uh->udp_csum) {
- uh->udp_csum = htons(0xffff);
+ if (dp_packet_ol_tx_udp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct udp_header *uh = dp_packet_l4(packet);
+ if (uh->udp_csum) {
+ uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
+ if (!uh->udp_csum) {
+ uh->udp_csum = htons(0xffff);
+ }
}
}
}
@@ -1245,16 +1251,24 @@ packet_update_csum128(struct dp_packet *packet, uint8_t proto,
size_t l4_size = dp_packet_l4_size(packet);
if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
- struct tcp_header *th = dp_packet_l4(packet);
+ if (dp_packet_ol_tx_tcp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct tcp_header *th = dp_packet_l4(packet);
- th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+ th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+ }
} else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
- struct udp_header *uh = dp_packet_l4(packet);
+ if (dp_packet_ol_tx_udp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ struct udp_header *uh = dp_packet_l4(packet);
- if (uh->udp_csum) {
- uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
- if (!uh->udp_csum) {
- uh->udp_csum = htons(0xffff);
+ if (uh->udp_csum) {
+ uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+ if (!uh->udp_csum) {
+ uh->udp_csum = htons(0xffff);
+ }
}
}
} else if (proto == IPPROTO_ICMPV6 &&
@@ -1372,7 +1386,9 @@ static void
packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
{
if (*port != new_port) {
- *csum = recalc_csum16(*csum, *port, new_port);
+ if (csum) {
+ *csum = recalc_csum16(*csum, *port, new_port);
+ }
*port = new_port;
}
}
@@ -1384,9 +1400,16 @@ void
packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
{
struct tcp_header *th = dp_packet_l4(packet);
+ ovs_be16 *csum = NULL;
+
+ if (dp_packet_ol_tx_tcp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ } else {
+ csum = &th->tcp_csum;
+ }
- packet_set_port(&th->tcp_src, src, &th->tcp_csum);
- packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
+ packet_set_port(&th->tcp_src, src, csum);
+ packet_set_port(&th->tcp_dst, dst, csum);
pkt_metadata_init_conn(&packet->md);
}
@@ -1398,17 +1421,21 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
{
struct udp_header *uh = dp_packet_l4(packet);
- if (uh->udp_csum) {
- packet_set_port(&uh->udp_src, src, &uh->udp_csum);
- packet_set_port(&uh->udp_dst, dst, &uh->udp_csum);
+ if (dp_packet_ol_tx_udp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ packet_set_port(&uh->udp_src, src, NULL);
+ packet_set_port(&uh->udp_dst, dst, NULL);
+ } else {
+ ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL;
+
+ packet_set_port(&uh->udp_src, src, csum);
+ packet_set_port(&uh->udp_dst, dst, csum);
- if (!uh->udp_csum) {
+ if (csum && !uh->udp_csum) {
uh->udp_csum = htons(0xffff);
}
- } else {
- uh->udp_src = src;
- uh->udp_dst = dst;
}
+
pkt_metadata_init_conn(&packet->md);
}
@@ -1419,18 +1446,27 @@ void
packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
{
struct sctp_header *sh = dp_packet_l4(packet);
- ovs_be32 old_csum, old_correct_csum, new_csum;
- uint16_t tp_len = dp_packet_l4_size(packet);
- old_csum = get_16aligned_be32(&sh->sctp_csum);
- put_16aligned_be32(&sh->sctp_csum, 0);
- old_correct_csum = crc32c((void *)sh, tp_len);
+ if (dp_packet_ol_tx_sctp_csum(packet)) {
+ dp_packet_ol_reset_l4_csum_good(packet);
+ sh->sctp_src = src;
+ sh->sctp_dst = dst;
+ } else {
+ ovs_be32 old_csum, old_correct_csum, new_csum;
+ uint16_t tp_len = dp_packet_l4_size(packet);
- sh->sctp_src = src;
- sh->sctp_dst = dst;
+ old_csum = get_16aligned_be32(&sh->sctp_csum);
+ put_16aligned_be32(&sh->sctp_csum, 0);
+ old_correct_csum = crc32c((void *) sh, tp_len);
+
+ sh->sctp_src = src;
+ sh->sctp_dst = dst;
+
+ new_csum = crc32c((void *) sh, tp_len);
+ put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum
+ ^ new_csum);
+ }
- new_csum = crc32c((void *)sh, tp_len);
- put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
pkt_metadata_init_conn(&packet->md);
}
@@ -1954,3 +1990,72 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
}
}
}
+
+/* Set TCP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_tcp_complete_csum(struct dp_packet *p)
+{
+ struct tcp_header *tcp = dp_packet_l4(p);
+
+ tcp->tcp_csum = 0;
+ if (dp_packet_ol_tx_ipv4(p)) {
+ struct ip_header *ip = dp_packet_l3(p);
+
+ tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+ tcp, dp_packet_l4_size(p)));
+ } else if (dp_packet_ol_tx_ipv6(p)) {
+ struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+ tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt,
+ dp_packet_l4_size(p));
+ } else {
+ OVS_NOT_REACHED();
+ }
+}
+
+/* Set UDP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_udp_complete_csum(struct dp_packet *p)
+{
+ struct udp_header *udp = dp_packet_l4(p);
+
+ /* Skip csum calculation if the udp_csum is zero. */
+ if (!udp->udp_csum) {
+ return;
+ }
+
+ udp->udp_csum = 0;
+ if (dp_packet_ol_tx_ipv4(p)) {
+ struct ip_header *ip = dp_packet_l3(p);
+
+ udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+ udp, dp_packet_l4_size(p)));
+ } else if (dp_packet_ol_tx_ipv6(p)) {
+ struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+ udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt,
+ dp_packet_l4_size(p));
+ } else {
+ OVS_NOT_REACHED();
+ }
+
+ if (!udp->udp_csum) {
+ udp->udp_csum = htons(0xffff);
+ }
+}
+
+/* Set SCTP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_sctp_complete_csum(struct dp_packet *p)
+{
+ struct sctp_header *sh = dp_packet_l4(p);
+ uint16_t tp_len = dp_packet_l4_size(p);
+ ovs_be32 csum;
+
+ put_16aligned_be32(&sh->sctp_csum, 0);
+ csum = crc32c((void *) sh, tp_len);
+ put_16aligned_be32(&sh->sctp_csum, csum);
+}
@@ -1643,6 +1643,9 @@ void packet_put_ra_prefix_opt(struct dp_packet *,
const ovs_be128 router_prefix);
uint32_t packet_csum_pseudoheader(const struct ip_header *);
void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
+void packet_tcp_complete_csum(struct dp_packet *);
+void packet_udp_complete_csum(struct dp_packet *);
+void packet_sctp_complete_csum(struct dp_packet *);
#define DNS_HEADER_LEN 12
struct dns_header {