@@ -2043,16 +2043,15 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
ctx->key.dl_type = dl_type;
if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
- bool hwol_bad_l3_csum = dp_packet_ip_checksum_bad(pkt);
- if (hwol_bad_l3_csum) {
+ if (dp_packet_ip_checksum_bad(pkt)) {
ok = false;
COVERAGE_INC(conntrack_l3csum_err);
} else {
- bool hwol_good_l3_csum = dp_packet_ip_checksum_valid(pkt)
- || dp_packet_hwol_is_ipv4(pkt);
- /* Validate the checksum only when hwol is not supported. */
+ /* Validate the checksum only when hwol is not supported and the
+ * packet's checksum status is not known. */
ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
- !hwol_good_l3_csum);
+ !dp_packet_hwol_is_ipv4(pkt) &&
+ !dp_packet_ip_checksum_good(pkt));
}
} else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
@@ -2063,8 +2062,8 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
if (ok) {
bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
if (!hwol_bad_l4_csum) {
- bool hwol_good_l4_csum = dp_packet_l4_checksum_valid(pkt)
- || dp_packet_hwol_tx_l4_checksum(pkt);
+ bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
+ || dp_packet_hwol_tx_l4_checksum(pkt);
/* Validate the checksum only when hwol is not supported. */
if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
&ctx->icmp_related, l3, !hwol_good_l4_csum,
@@ -3373,7 +3372,9 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
}
if (seq_skew) {
ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
- if (!dp_packet_hwol_is_ipv4(pkt)) {
+ if (dp_packet_hwol_tx_ip_csum(pkt)) {
+ dp_packet_ol_reset_ip_csum_good(pkt);
+ } else {
l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
l3_hdr->ip_tot_len,
htons(ip_len));
@@ -21,6 +21,7 @@
#include "dp-packet.h"
#include "netdev-afxdp.h"
#include "netdev-dpdk.h"
+#include "netdev-provider.h"
#include "openvswitch/dynamic-string.h"
#include "util.h"
@@ -530,3 +531,17 @@ dp_packet_compare_offsets(struct dp_packet *b1, struct dp_packet *b2,
}
return true;
}
+
+/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
+ * and if not, updates the packet with the software fall back. */
+void
+dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
+{
+ if (dp_packet_ip_checksum_good(p) || !dp_packet_hwol_tx_ip_csum(p)) {
+ dp_packet_hwol_reset_tx_ip_csum(p);
+ } else if (!(flags & NETDEV_TX_OFFLOAD_IPV4_CKSUM)) {
+ dp_packet_ip_set_header_csum(p);
+ dp_packet_ol_set_ip_csum_good(p);
+ dp_packet_hwol_reset_tx_ip_csum(p);
+ }
+}
@@ -25,6 +25,7 @@
#include <rte_mbuf.h>
#endif
+#include "csum.h"
#include "netdev-afxdp.h"
#include "netdev-dpdk.h"
#include "openvswitch/list.h"
@@ -83,6 +84,8 @@ enum dp_packet_offload_mask {
DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
/* Offload SCTP checksum. */
DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
+ /* Offload IP checksum. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000),
/* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
};
@@ -97,7 +100,8 @@ enum dp_packet_offload_mask {
DP_PACKET_OL_TX_IPV6 | \
DP_PACKET_OL_TX_TCP_CKSUM | \
DP_PACKET_OL_TX_UDP_CKSUM | \
- DP_PACKET_OL_TX_SCTP_CKSUM)
+ DP_PACKET_OL_TX_SCTP_CKSUM | \
+ DP_PACKET_OL_TX_IP_CKSUM)
#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
DP_PACKET_OL_TX_UDP_CKSUM | \
@@ -239,6 +243,7 @@ static inline bool dp_packet_equal(const struct dp_packet *,
bool dp_packet_compare_offsets(struct dp_packet *good,
struct dp_packet *test,
struct ds *err_str);
+void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t);
/* Frees memory that 'b' points to, as well as 'b' itself. */
@@ -1030,6 +1035,26 @@ dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
}
+/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
+static inline bool
+dp_packet_hwol_tx_ip_csum(const struct dp_packet *p)
+{
+ return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CKSUM);
+}
+
+/* Marks packet 'p' for IPv4 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_ip_csum(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CKSUM;
+}
+
+static inline void
+dp_packet_hwol_reset_tx_ip_csum(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IP_CKSUM;
+}
+
/* Mark packet 'b' for TCP checksum offloading. It implies that either
* the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
static inline void
@@ -1063,13 +1088,31 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
}
+/* Returns 'true' if the IP header has good integrity and the
+ * checksum in it is complete. */
static inline bool
-dp_packet_ip_checksum_valid(const struct dp_packet *p)
+dp_packet_ip_checksum_good(const struct dp_packet *p)
{
return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) ==
DP_PACKET_OL_RX_IP_CKSUM_GOOD;
}
+/* Marks packet 'p' with good IPv4 checksum. */
+static inline void
+dp_packet_ol_set_ip_csum_good(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CKSUM_BAD;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+/* Resets IP good checksum flag in packet 'p'. */
+static inline void
+dp_packet_ol_reset_ip_csum_good(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+/* Marks packet 'p' with bad IPv4 checksum. */
static inline bool
dp_packet_ip_checksum_bad(const struct dp_packet *p)
{
@@ -1077,8 +1120,21 @@ dp_packet_ip_checksum_bad(const struct dp_packet *p)
DP_PACKET_OL_RX_IP_CKSUM_BAD;
}
+/* Calculate and set the IPv4 header checksum in packet 'p'. */
+static inline void
+dp_packet_ip_set_header_csum(struct dp_packet *p)
+{
+ struct ip_header *ip = dp_packet_l3(p);
+
+ ovs_assert(ip);
+ ip->ip_csum = 0;
+ ip->ip_csum = csum(ip, sizeof *ip);
+}
+
+/* Returns 'true' if the packet 'p' has good integrity and the
+ * checksum in it is correct. */
static inline bool
-dp_packet_l4_checksum_valid(const struct dp_packet *p)
+dp_packet_l4_checksum_good(const struct dp_packet *p)
{
return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
DP_PACKET_OL_RX_L4_CKSUM_GOOD;
@@ -698,6 +698,7 @@ mfex_ipv6_set_l2_pad_size(struct dp_packet *pkt,
return -1;
}
dp_packet_set_l2_pad_size(pkt, len_from_ipv6 - (p_len + IPV6_HEADER_LEN));
+ dp_packet_hwol_set_tx_ipv6(pkt);
return 0;
}
@@ -728,6 +729,10 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
return -1;
}
dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
+ dp_packet_hwol_set_tx_ipv4(pkt);
+ if (dp_packet_ip_checksum_good(pkt)) {
+ dp_packet_hwol_set_tx_ip_csum(pkt);
+ }
return 0;
}
@@ -7913,6 +7913,8 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
ds_destroy(&ds);
}
+ dp_packet_ol_send_prepare(packet_, 0);
+
return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
actions, wc, put_actions, dp->upcall_aux);
}
@@ -935,6 +935,10 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
nw_proto = nh->ip_proto;
nw_frag = ipv4_get_nw_frag(nh);
data_pull(&data, &size, ip_len);
+ dp_packet_hwol_set_tx_ipv4(packet);
+ if (dp_packet_ip_checksum_good(packet)) {
+ dp_packet_hwol_set_tx_ip_csum(packet);
+ }
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
const struct ovs_16aligned_ip6_hdr *nh = data;
ovs_be32 tc_flow;
@@ -948,6 +952,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
}
data_pull(&data, &size, sizeof *nh);
+ dp_packet_hwol_set_tx_ipv6(packet);
plen = ntohs(nh->ip6_plen);
dp_packet_set_l2_pad_size(packet, size - plen);
size = plen; /* Never pull padding. */
@@ -3247,9 +3252,12 @@ packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
struct ip_header *ip = dp_packet_l3(p);
ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
- ip->ip_csum = 0;
- ip->ip_csum = csum(ip, sizeof *ip);
-
+ if (dp_packet_hwol_tx_ip_csum(p)) {
+ dp_packet_ol_reset_ip_csum_good(p);
+ } else {
+ dp_packet_ip_set_header_csum(p);
+ dp_packet_ol_set_ip_csum_good(p);
+ }
pseudo_hdr_csum = packet_csum_pseudoheader(ip);
} else { /* ETH_TYPE_IPV6 */
struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
@@ -3339,6 +3347,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
/* Checksum has already been zeroed by put_zeros call. */
ip->ip_csum = csum(ip, sizeof *ip);
+ dp_packet_ol_set_ip_csum_good(p);
pseudo_hdr_csum = packet_csum_pseudoheader(ip);
flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
@@ -433,7 +433,9 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
len += rest_len;
l3 = dp_packet_l3(pkt);
ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
- if (!dp_packet_hwol_is_ipv4(pkt)) {
+ if (dp_packet_hwol_tx_ip_csum(pkt)) {
+ dp_packet_ol_reset_ip_csum_good(pkt);
+ } else {
l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
new_ip_frag_off);
l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
@@ -608,8 +610,7 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
goto invalid_pkt;
}
- if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(pkt)
- && !dp_packet_hwol_is_ipv4(pkt)
+ if (OVS_UNLIKELY(!dp_packet_ip_checksum_good(pkt)
&& csum(l3, ip_hdr_len) != 0)) {
COVERAGE_INC(ipf_l3csum_err);
goto invalid_pkt;
@@ -1186,7 +1187,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
} else {
struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
struct ip_header *l3_reass = dp_packet_l3(pkt);
- if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) {
+ if (dp_packet_hwol_tx_ip_csum(frag_i->pkt)) {
+ dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
+ } else {
ovs_be32 reass_ip =
get_16aligned_be32(&l3_reass->ip_src);
ovs_be32 frag_ip =
@@ -411,8 +411,9 @@ enum dpdk_hw_ol_features {
NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
NETDEV_RX_HW_SCATTER = 1 << 2,
- NETDEV_TX_TSO_OFFLOAD = 1 << 3,
- NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
+ NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
+ NETDEV_TX_TSO_OFFLOAD = 1 << 4,
+ NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
};
/*
@@ -1039,6 +1040,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
}
+ if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
@@ -1179,6 +1184,12 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
}
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+ }
+
dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
if (userspace_tso_enabled()) {
if ((info.tx_offload_capa & tx_tso_offload_capa)
@@ -2227,13 +2238,16 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
{
struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
- if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
- mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
- mbuf->outer_l2_len = 0;
- mbuf->outer_l3_len = 0;
+ if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK
+ | RTE_MBUF_F_TX_TCP_SEG))) {
+ return true;
}
+ mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+ mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
+
if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
struct tcp_header *th = dp_packet_l4(pkt);
@@ -2292,13 +2306,11 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
uint32_t nb_tx = 0;
uint16_t nb_tx_prep = cnt;
- if (userspace_tso_enabled()) {
- nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
- if (nb_tx_prep != cnt) {
- VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
- "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
- cnt, rte_strerror(rte_errno));
- }
+ nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
+ if (nb_tx_prep != cnt) {
+ VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+ "Only %u/%u are valid: %s", netdev_get_name(&dev->up),
+ nb_tx_prep, cnt, rte_strerror(rte_errno));
}
while (nb_tx != nb_tx_prep) {
@@ -2637,11 +2649,19 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
- if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
- - (char *)dp_packet_eth(pkt_dest);
- mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
+ if (dp_packet_l3(pkt_dest)) {
+ if (dp_packet_eth(pkt_dest)) {
+ mbuf_dest->l2_len = (char *) dp_packet_l3(pkt_dest)
+ - (char *) dp_packet_eth(pkt_dest);
+ } else {
+ mbuf_dest->l2_len = 0;
+ }
+ if (dp_packet_l4(pkt_dest)) {
+ mbuf_dest->l3_len = (char *) dp_packet_l4(pkt_dest)
- (char *) dp_packet_l3(pkt_dest);
+ } else {
+ mbuf_dest->l3_len = 0;
+ }
}
return pkt_dest;
@@ -2699,11 +2719,9 @@ netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch,
pkt_cnt = cnt;
/* Prepare each mbuf for hardware offloading. */
- if (userspace_tso_enabled()) {
- cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt);
- stats->tx_invalid_hwol_drops += pkt_cnt - cnt;
- pkt_cnt = cnt;
- }
+ cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt);
+ stats->tx_invalid_hwol_drops += pkt_cnt - cnt;
+ pkt_cnt = cnt;
/* Apply Quality of Service policy. */
cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true);
@@ -5260,6 +5278,13 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
}
err = dpdk_eth_dev_init(dev);
+
+ if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+ netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ } else {
+ netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
@@ -147,6 +147,11 @@ struct netdev_dummy {
int requested_n_txq OVS_GUARDED;
int requested_n_rxq OVS_GUARDED;
int requested_numa_id OVS_GUARDED;
+
+ /* Enable netdev IP csum offload. */
+ bool ol_ip_csum OVS_GUARDED;
+ /* Flag RX packet with good csum. */
+ bool ol_ip_csum_set_good OVS_GUARDED;
};
/* Max 'recv_queue_len' in struct netdev_dummy. */
@@ -914,6 +919,13 @@ netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
}
}
+ netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
+ false);
+ netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", false);
+ if (netdev->ol_ip_csum) {
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
netdev_change_seq_changed(netdev_);
/* 'dummy-pmd' specific config. */
@@ -1092,6 +1104,10 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
netdev->rxq_stats[rxq_->queue_id].bytes += dp_packet_size(packet);
netdev->custom_stats[0].value++;
netdev->custom_stats[1].value++;
+ if (netdev->ol_ip_csum_set_good) {
+ /* The netdev hardware sets the flag when the packet has good csum. */
+ dp_packet_ol_set_ip_csum_good(packet);
+ }
ovs_mutex_unlock(&netdev->mutex);
dp_packet_batch_init_packet(batch, packet);
@@ -1173,6 +1189,12 @@ netdev_dummy_send(struct netdev *netdev, int qid,
}
}
+ if (dp_packet_hwol_tx_ip_csum(packet) &&
+ !dp_packet_ip_checksum_good(packet)) {
+ dp_packet_ip_set_header_csum(packet);
+ dp_packet_ol_set_ip_csum_good(packet);
+ }
+
ovs_mutex_lock(&dev->mutex);
dev->stats.tx_packets++;
dev->txq_stats[qid].packets++;
@@ -88,7 +88,10 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
ovs_be32 ip_src, ip_dst;
- if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
+ /* A packet coming from a network device might have the
+ * csum already checked. In this case, skip the check. */
+ if (OVS_UNLIKELY(!dp_packet_ip_checksum_good(packet))
+ && !dp_packet_hwol_tx_ip_csum(packet)) {
if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
return NULL;
@@ -142,7 +145,8 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
*
* This function sets the IP header's ip_tot_len field (which should be zeroed
* as part of 'header') and puts its value into '*ip_tot_size' as well. Also
- * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
+ * updates IP header checksum if not offloaded, as well as the l3 and l4
+ * offsets in the 'packet'.
*
* Return pointer to the L4 header added to 'packet'. */
void *
@@ -168,11 +172,16 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
ip6->ip6_plen = htons(*ip_tot_size);
packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label);
packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
+ dp_packet_hwol_set_tx_ipv6(packet);
+ dp_packet_ol_reset_ip_csum_good(packet);
return ip6 + 1;
} else {
ip = netdev_tnl_ip_hdr(eth);
ip->ip_tot_len = htons(*ip_tot_size);
- ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
+ /* Postpone checksum to when the packet is pushed to the port. */
+ dp_packet_hwol_set_tx_ipv4(packet);
+ dp_packet_hwol_set_tx_ip_csum(packet);
+ dp_packet_ol_reset_ip_csum_good(packet);
*ip_tot_size -= IP_HEADER_LEN;
packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
return ip + 1;
@@ -191,7 +200,7 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
}
if (udp->udp_csum) {
- if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet))) {
+ if (OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
uint32_t csum;
if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
@@ -299,8 +308,8 @@ netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
htons(IP_DF) : 0;
- /* Checksum has already been zeroed by eth_build_header. */
- ip->ip_csum = csum(ip, sizeof *ip);
+ /* The checksum will be calculated when the headers are pushed
+ * to the packet if offloading is not enabled. */
data->header_len += IP_HEADER_LEN;
return ip + 1;
@@ -808,6 +808,14 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
return false;
}
+ /* Packet with IP csum offloading enabled was received with verified csum.
+ * Leave the IP csum offloading enabled even with good checksum to the
+ * netdev to decide what would be the best to do.
+ * Provide a software fallback in case the device doesn't support IP csum
+ * offloading. Note: Encapsulated packet must have the inner IP header
+ * csum already calculated. */
+ dp_packet_ol_send_prepare(packet, netdev_flags);
+
l4_mask = dp_packet_hwol_l4_mask(packet);
if (l4_mask) {
if (dp_packet_hwol_l4_is_tcp(packet)) {
@@ -975,7 +983,15 @@ netdev_push_header(const struct netdev *netdev,
"not supported: packet dropped",
netdev_get_name(netdev));
} else {
+ /* The packet is going to be encapsulated and there is
+ * no support yet for inner network header csum offloading. */
+ if (dp_packet_hwol_tx_ip_csum(packet)
+ && !dp_packet_ip_checksum_good(packet)) {
+ dp_packet_ip_set_header_csum(packet);
+ }
+
netdev->netdev_class->push_header(netdev, packet, data);
+
pkt_metadata_init(&packet->md, data->out_port);
dp_packet_batch_refill(batch, packet, i);
}
@@ -450,7 +450,6 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch *batch,
DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
struct ip_header *nh = dp_packet_l3(packet);
- ovs_be16 old_csum = ~nh->ip_csum;
/* Load the 20 bytes of the IPv4 header. Without options, which is the
* most common case it's 20 bytes, but can be up to 60 bytes. */
@@ -463,13 +462,20 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch *batch,
* (v_pkt_masked). */
__m256i v_new_hdr = _mm256_or_si256(v_key_shuf, v_pkt_masked);
- /* Update the IP checksum based on updated IP values. */
- uint16_t delta = avx512_ipv4_hdr_csum_delta(v_packet, v_new_hdr);
- uint32_t new_csum = old_csum + delta;
- delta = csum_finish(new_csum);
+ if (dp_packet_hwol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ ovs_be16 old_csum = ~nh->ip_csum;
- /* Insert new checksum. */
- v_new_hdr = _mm256_insert_epi16(v_new_hdr, delta, 5);
+ /* Update the IP checksum based on updated IP values. */
+ uint16_t delta = avx512_ipv4_hdr_csum_delta(v_packet, v_new_hdr);
+ uint32_t new_csum = old_csum + delta;
+
+ delta = csum_finish(new_csum);
+
+ /* Insert new checksum. */
+ v_new_hdr = _mm256_insert_epi16(v_new_hdr, delta, 5);
+ }
/* If ip_src or ip_dst has been modified, L4 checksum needs to
* be updated too. */
@@ -169,9 +169,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
if (nh->ip_tos != new_tos) {
- nh->ip_csum = recalc_csum16(nh->ip_csum,
- htons((uint16_t) nh->ip_tos),
- htons((uint16_t) new_tos));
+ if (dp_packet_hwol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum,
+ htons((uint16_t) nh->ip_tos),
+ htons((uint16_t) new_tos));
+ }
+
nh->ip_tos = new_tos;
}
}
@@ -180,8 +185,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
- htons(new_ttl << 8));
+ if (dp_packet_hwol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum,
+ htons(nh->ip_ttl << 8),
+ htons(new_ttl << 8));
+ }
+
nh->ip_ttl = new_ttl;
}
}
@@ -1144,7 +1144,12 @@ packet_set_ipv4_addr(struct dp_packet *packet,
}
}
}
- nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+
+ if (dp_packet_hwol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+ }
put_16aligned_be32(addr, new_addr);
}
@@ -1311,16 +1316,26 @@ packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
if (nh->ip_tos != tos) {
uint8_t *field = &nh->ip_tos;
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
- htons((uint16_t) tos));
+ if (dp_packet_hwol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
+ htons((uint16_t) tos));
+ }
+
*field = tos;
}
if (nh->ip_ttl != ttl) {
uint8_t *field = &nh->ip_ttl;
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
- htons(ttl << 8));
+ if (dp_packet_hwol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
+ htons(ttl << 8));
+ }
+
*field = ttl;
}
}
@@ -1931,8 +1946,13 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
tos |= IP_ECN_CE;
if (nh->ip_tos != tos) {
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
- htons((uint16_t) tos));
+ if (dp_packet_hwol_tx_ip_csum(pkt)) {
+ dp_packet_ol_reset_ip_csum_good(pkt);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
+ htons((uint16_t) tos));
+ }
+
nh->ip_tos = tos;
}
}
@@ -734,3 +734,81 @@ AT_CHECK([test `ovs-vsctl get Interface p2 statistics:tx_q0_packets` -gt 0 -a dn
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([userspace offload - ip csum offload])
+OVS_VSWITCHD_START(
+ [add-br br1 -- set bridge br1 datapath-type=dummy -- \
+ add-port br1 p1 -- \
+ set Interface p1 type=dummy -- \
+ add-port br1 p2 -- \
+ set Interface p2 type=dummy --])
+
+# Modify the ip_dst addr to force changing the IP csum.
+AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
+
+# Check if no offload remains ok.
+AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+
+# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
+# by the datapath while processing the packet.
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by egress handling
+# in the datapath and not by the netdev.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by netdev and not
+# by the datapath.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading disabled to check
+# if the datapath updates the csum, but does not fix the issue.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading enabled to check
+# if the driver updates and fixes the csum.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP