@@ -2089,16 +2089,14 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
ctx->key.dl_type = dl_type;
if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
- bool hwol_bad_l3_csum = dp_packet_ol_ip_checksum_bad(pkt);
- if (hwol_bad_l3_csum) {
+ if (dp_packet_ol_ip_checksum_bad(pkt)) {
ok = false;
COVERAGE_INC(conntrack_l3csum_err);
} else {
- bool hwol_good_l3_csum = dp_packet_ol_ip_checksum_good(pkt)
- || dp_packet_ol_tx_ipv4(pkt);
/* Validate the checksum only when hwol is not supported. */
ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
- !hwol_good_l3_csum);
+ !dp_packet_ol_tx_ipv4(pkt) &&
+ !dp_packet_ol_ip_checksum_good(pkt));
}
} else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
@@ -3402,7 +3400,9 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
}
if (seq_skew) {
ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
- if (!dp_packet_ol_tx_ipv4(pkt)) {
+ if (dp_packet_ol_tx_ip_csum(pkt)) {
+ dp_packet_ol_reset_ip_csum_good(pkt);
+ } else {
l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
l3_hdr->ip_tot_len,
htons(ip_len));
@@ -21,6 +21,7 @@
#include "dp-packet.h"
#include "netdev-afxdp.h"
#include "netdev-dpdk.h"
+#include "netdev-provider.h"
#include "openvswitch/dynamic-string.h"
#include "util.h"
@@ -507,3 +508,14 @@ dp_packet_resize_l2(struct dp_packet *p, int increment)
dp_packet_adjust_layer_offset(&p->l2_5_ofs, increment);
return dp_packet_data(p);
}
+
+/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
+ * and if not, update the packet with the software fall back. */
+void
+dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
+ if (!dp_packet_ol_ip_checksum_good(p) && dp_packet_ol_tx_ip_csum(p)
+ && !(flags & NETDEV_OFFLOAD_TX_IPV4_CSUM)) {
+ dp_packet_ip_set_header_csum(p);
+ dp_packet_ol_set_ip_csum_good(p);
+ }
+}
@@ -25,6 +25,7 @@
#include <rte_mbuf.h>
#endif
+#include "csum.h"
#include "netdev-afxdp.h"
#include "netdev-dpdk.h"
#include "openvswitch/list.h"
@@ -77,24 +78,27 @@ enum dp_packet_offload_mask {
DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, RTE_MBUF_F_TX_IPV4, 0x80),
/* Offloaded packet is IPv6. */
DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, RTE_MBUF_F_TX_IPV6, 0x100),
+ /* Offload IP checksum. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x200),
/* Offload TCP checksum. */
- DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, RTE_MBUF_F_TX_TCP_CKSUM, 0x200),
+ DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, RTE_MBUF_F_TX_TCP_CKSUM, 0x400),
/* Offload UDP checksum. */
- DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
+ DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x800),
/* Offload SCTP checksum. */
- DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
+ DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x1000),
/* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
};
-#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \
- DP_PACKET_OL_FLOW_MARK | \
+#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \
+ DP_PACKET_OL_FLOW_MARK | \
DP_PACKET_OL_RX_L4_CSUM_BAD | \
DP_PACKET_OL_RX_IP_CSUM_BAD | \
DP_PACKET_OL_RX_L4_CSUM_GOOD | \
DP_PACKET_OL_RX_IP_CSUM_GOOD | \
- DP_PACKET_OL_TX_TCP_SEG | \
- DP_PACKET_OL_TX_IPV4 | \
- DP_PACKET_OL_TX_IPV6 | \
+ DP_PACKET_OL_TX_TCP_SEG | \
+ DP_PACKET_OL_TX_IPV4 | \
+ DP_PACKET_OL_TX_IPV6 | \
+ DP_PACKET_OL_TX_IP_CSUM | \
DP_PACKET_OL_TX_TCP_CSUM | \
DP_PACKET_OL_TX_UDP_CSUM | \
DP_PACKET_OL_TX_SCTP_CSUM)
@@ -235,6 +239,7 @@ void *dp_packet_steal_data(struct dp_packet *);
static inline bool dp_packet_equal(const struct dp_packet *,
const struct dp_packet *);
+void dp_packet_ol_send_prepare(struct dp_packet *, const uint64_t);
/* Frees memory that 'p' points to, as well as 'p' itself. */
@@ -979,7 +984,7 @@ dp_packet_ol_tcp_seg(const struct dp_packet *a)
return !!(*dp_packet_ol_flags_ptr(a) & DP_PACKET_OL_TX_TCP_SEG);
}
-/* Returns 'true' if packet 'a' is marked for IPv4 checksum offloading. */
+/* Returns 'true' if packet 'a' is marked as IPv4. */
static inline bool
dp_packet_ol_tx_ipv4(const struct dp_packet *a)
{
@@ -1010,20 +1015,34 @@ dp_packet_ol_tx_sctp_csum(struct dp_packet *a)
DP_PACKET_OL_TX_SCTP_CSUM;
}
-/* Mark packet 'a' for IPv4 checksum offloading. */
+/* Mark packet 'a' as IPv4. */
static inline void
dp_packet_ol_set_tx_ipv4(struct dp_packet *a)
{
*dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV4;
}
-/* Mark packet 'a' for IPv6 checksum offloading. */
+/* Mark packet 'a' as IPv6. */
static inline void
dp_packet_ol_set_tx_ipv6(struct dp_packet *a)
{
*dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
}
+/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
+static inline bool
+dp_packet_ol_tx_ip_csum(const struct dp_packet *p)
+{
+ return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CSUM);
+}
+
+/* Marks packet 'p' for IPv4 checksum offloading. */
+static inline void
+dp_packet_ol_set_tx_ip_csum(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CSUM;
+}
+
/* Mark packet 'a' for TCP checksum offloading. It implies that either
* the packet 'a' is marked for IPv4 or IPv6 checksum offloading. */
static inline void
@@ -1057,6 +1076,8 @@ dp_packet_ol_set_tcp_seg(struct dp_packet *a)
*dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_TCP_SEG;
}
+/* Returns 'true' is the IP has good integrity and the
+ * checksum in it is complete. */
static inline bool
dp_packet_ol_ip_checksum_good(const struct dp_packet *p)
{
@@ -1064,6 +1085,22 @@ dp_packet_ol_ip_checksum_good(const struct dp_packet *p)
DP_PACKET_OL_RX_IP_CSUM_GOOD;
}
+/* Marks packet 'p' with good IPv4 checksum. */
+static inline void
+dp_packet_ol_set_ip_csum_good(const struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_BAD;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CSUM_GOOD;
+}
+
+/* Resets IP good checksum flag in packet 'p'. */
+static inline void
+dp_packet_ol_reset_ip_csum_good(const struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_GOOD;
+}
+
+/* Marks packet 'p' with bad IPv4 checksum. */
static inline bool
dp_packet_ol_ip_checksum_bad(const struct dp_packet *p)
{
@@ -1071,6 +1108,17 @@ dp_packet_ol_ip_checksum_bad(const struct dp_packet *p)
DP_PACKET_OL_RX_IP_CSUM_BAD;
}
+/* Calculate and set the IPv4 header checksum in packet 'p'. */
+static inline void
+dp_packet_ip_set_header_csum(struct dp_packet *p)
+{
+ struct ip_header *ip = dp_packet_l3(p);
+
+ ovs_assert(ip);
+ ip->ip_csum = 0;
+ ip->ip_csum = csum(ip, sizeof *ip);
+}
+
static inline bool
dp_packet_ol_l4_checksum_good(const struct dp_packet *p)
{
@@ -869,7 +869,7 @@ void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux);
*
* Returns 0 if successful, ENOSPC if the flow limit has been reached and no
* flow should be installed, or some otherwise a positive errno value. */
-typedef int upcall_callback(const struct dp_packet *packet,
+typedef int upcall_callback(struct dp_packet *packet,
const struct flow *flow,
ovs_u128 *ufid,
unsigned pmd_id,
@@ -907,6 +907,10 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
nw_proto = nh->ip_proto;
nw_frag = ipv4_get_nw_frag(nh);
data_pull(&data, &size, ip_len);
+ dp_packet_ol_set_tx_ipv4(packet);
+ if (dp_packet_ol_ip_checksum_good(packet)) {
+ dp_packet_ol_set_tx_ip_csum(packet);
+ }
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
const struct ovs_16aligned_ip6_hdr *nh = data;
ovs_be32 tc_flow;
@@ -920,6 +924,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
}
data_pull(&data, &size, sizeof *nh);
+ dp_packet_ol_set_tx_ipv6(packet);
plen = ntohs(nh->ip6_plen);
dp_packet_set_l2_pad_size(packet, size - plen);
size = plen; /* Never pull padding. */
@@ -3217,9 +3222,12 @@ packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
struct ip_header *ip = dp_packet_l3(p);
ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
- ip->ip_csum = 0;
- ip->ip_csum = csum(ip, sizeof *ip);
-
+ if (dp_packet_ol_tx_ip_csum(p)) {
+ dp_packet_ol_reset_ip_csum_good(p);
+ } else {
+ dp_packet_ip_set_header_csum(p);
+ dp_packet_ol_set_ip_csum_good(p);
+ }
pseudo_hdr_csum = packet_csum_pseudoheader(ip);
} else { /* ETH_TYPE_IPV6 */
struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
@@ -3309,6 +3317,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
/* Checksum has already been zeroed by put_zeros call. */
ip->ip_csum = csum(ip, sizeof *ip);
+ dp_packet_ol_set_ip_csum_good(p);
pseudo_hdr_csum = packet_csum_pseudoheader(ip);
flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
@@ -433,7 +433,9 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
len += rest_len;
l3 = dp_packet_l3(pkt);
ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
- if (!dp_packet_ol_tx_ipv4(pkt)) {
+ if (dp_packet_ol_tx_ip_csum(pkt)) {
+ dp_packet_ol_reset_ip_csum_good(pkt);
+ } else {
l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
new_ip_frag_off);
l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
@@ -609,7 +611,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
}
if (OVS_UNLIKELY(!dp_packet_ol_ip_checksum_good(pkt)
- && !dp_packet_ol_tx_ipv4(pkt)
&& csum(l3, ip_hdr_len) != 0)) {
COVERAGE_INC(ipf_l3csum_err);
goto invalid_pkt;
@@ -1185,7 +1186,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
} else {
struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
struct ip_header *l3_reass = dp_packet_l3(pkt);
- if (!dp_packet_ol_tx_ipv4(frag_i->pkt)) {
+ if (dp_packet_ol_tx_ip_csum(frag_i->pkt)) {
+ dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
+ } else {
ovs_be32 reass_ip =
get_16aligned_be32(&l3_reass->ip_src);
ovs_be32 frag_ip =
@@ -397,8 +397,9 @@ enum dpdk_hw_ol_features {
NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
NETDEV_RX_HW_SCATTER = 1 << 2,
- NETDEV_TX_TSO_OFFLOAD = 1 << 3,
- NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
+ NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
+ NETDEV_TX_TSO_OFFLOAD = 1 << 4,
+ NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
};
/*
@@ -984,6 +985,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
}
+ if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+ conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
@@ -1124,6 +1129,12 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
}
+ if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+ }
+
dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
if (userspace_tso_enabled()) {
if ((info.tx_offload_capa & tx_tso_offload_capa)
@@ -1693,16 +1704,12 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
dev->requested_txq_size);
smap_add_format(args, "configured_txq_descriptors", "%d",
dev->txq_size);
- if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
- smap_add(args, "rx_csum_offload", "true");
- } else {
- smap_add(args, "rx_csum_offload", "false");
- }
- if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
- smap_add(args, "tx_tso_offload", "true");
- } else {
- smap_add(args, "tx_tso_offload", "false");
- }
+#define HWOL_SMAP_ADD(FIELD, FLAG) \
+ smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
+ HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
+ HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+ HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
+#undef HWOL_SMAP_ADD
smap_add(args, "lsc_interrupt_mode",
dev->lsc_interrupt_mode ? "true" : "false");
@@ -2145,12 +2152,10 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
{
struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
- if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
- mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
- mbuf->outer_l2_len = 0;
- mbuf->outer_l3_len = 0;
- }
+ mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+ mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
struct tcp_header *th = dp_packet_l4(pkt);
@@ -2210,13 +2215,11 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
uint32_t nb_tx = 0;
uint16_t nb_tx_prep = cnt;
- if (userspace_tso_enabled()) {
- nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
- if (nb_tx_prep != cnt) {
- VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
- "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
- cnt, rte_strerror(rte_errno));
- }
+ nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
+ if (nb_tx_prep != cnt) {
+ VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+ "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
+ cnt, rte_strerror(rte_errno));
}
while (nb_tx != nb_tx_prep) {
@@ -2558,7 +2561,7 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_dpdk *dev,
sw_stats->tx_failure_drops += sw_stats_add->tx_failure_drops;
sw_stats->tx_mtu_exceeded_drops += sw_stats_add->tx_mtu_exceeded_drops;
sw_stats->tx_qos_drops += sw_stats_add->tx_qos_drops;
- sw_stats->tx_invalid_ol_drops += sw_stats_add->tx_invalid_ol_drops;
+ sw_stats->tx_invalid_ol_drops += sw_stats_add->tx_invalid_ol_drops;
}
}
@@ -2656,12 +2659,10 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
- if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
- mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
- - (char *)dp_packet_eth(pkt_dest);
- mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
- - (char *) dp_packet_l3(pkt_dest);
- }
+ mbuf_dest->l2_len = (char *) dp_packet_l3(pkt_dest)
+ - (char *) dp_packet_eth(pkt_dest);
+ mbuf_dest->l3_len = (char *) dp_packet_l4(pkt_dest)
+ - (char *) dp_packet_l3(pkt_dest);
return pkt_dest;
}
@@ -2718,11 +2719,9 @@ netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch,
pkt_cnt = cnt;
/* Prepare each mbuf for hardware offloading. */
- if (userspace_tso_enabled()) {
- cnt = netdev_dpdk_prep_ol_batch(dev, pkts, pkt_cnt);
- stats->tx_invalid_ol_drops += pkt_cnt - cnt;
- pkt_cnt = cnt;
- }
+ cnt = netdev_dpdk_prep_ol_batch(dev, pkts, pkt_cnt);
+ stats->tx_invalid_ol_drops += pkt_cnt - cnt;
+ pkt_cnt = cnt;
/* Apply Quality of Service policy. */
cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true);
@@ -4936,6 +4935,12 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
}
err = dpdk_eth_dev_init(dev);
+ if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+ netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+ } else {
+ netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
+ }
+
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
@@ -148,6 +148,11 @@ struct netdev_dummy {
int requested_n_txq OVS_GUARDED;
int requested_n_rxq OVS_GUARDED;
int requested_numa_id OVS_GUARDED;
+
+ /* Enable netdev IP csum offload. */
+ bool ol_ip_csum OVS_GUARDED;
+ /* Flag RX packet with good csum. */
+ bool ol_ip_csum_set_good OVS_GUARDED;
};
/* Max 'recv_queue_len' in struct netdev_dummy. */
@@ -910,6 +915,13 @@ netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
}
}
+ netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
+ false);
+ netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", false);
+ if (netdev->ol_ip_csum) {
+ netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+ }
+
netdev_change_seq_changed(netdev_);
/* 'dummy-pmd' specific config. */
@@ -1088,6 +1100,10 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
netdev->rxq_stats[rxq_->queue_id].bytes += dp_packet_size(packet);
netdev->custom_stats[0].value++;
netdev->custom_stats[1].value++;
+ if (netdev->ol_ip_csum_set_good) {
+ /* The netdev hardware sets the flag when the packet has good csum. */
+ dp_packet_ol_set_ip_csum_good(packet);
+ }
ovs_mutex_unlock(&netdev->mutex);
dp_packet_batch_init_packet(batch, packet);
@@ -1170,6 +1186,13 @@ netdev_dummy_send(struct netdev *netdev, int qid,
}
ovs_mutex_lock(&dev->mutex);
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ if (!dp_packet_ol_ip_checksum_good(packet)) {
+ dp_packet_ip_set_header_csum(packet);
+ dp_packet_ol_set_ip_csum_good(packet);
+ }
+ }
+
dev->stats.tx_packets++;
dev->txq_stats[qid].packets++;
dev->stats.tx_bytes += size;
@@ -88,7 +88,10 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
ovs_be32 ip_src, ip_dst;
- if (OVS_UNLIKELY(!dp_packet_ol_ip_checksum_good(packet))) {
+ /* A packet coming from a network device might have the
+ * csum already checked. In this case, skip the check. */
+ if (!dp_packet_ol_ip_checksum_good(packet)
+ && !dp_packet_ol_tx_ip_csum(packet)) {
if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
return NULL;
@@ -142,7 +145,8 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
*
* This function sets the IP header's ip_tot_len field (which should be zeroed
* as part of 'header') and puts its value into '*ip_tot_size' as well. Also
- * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
+ * updates IP header checksum if not offloaded, as well as the l3 and l4
+ * offsets in 'packet'.
*
* Return pointer to the L4 header added to 'packet'. */
void *
@@ -167,11 +171,16 @@ netdev_tnl_push_ip_header(struct dp_packet *packet,
*ip_tot_size -= IPV6_HEADER_LEN;
ip6->ip6_plen = htons(*ip_tot_size);
packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
+ dp_packet_ol_set_tx_ipv6(packet);
+ dp_packet_ol_reset_ip_csum_good(packet);
return ip6 + 1;
} else {
ip = netdev_tnl_ip_hdr(eth);
ip->ip_tot_len = htons(*ip_tot_size);
- ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
+ /* Postpone checksum to when the packet is pushed to the port. */
+ dp_packet_ol_set_tx_ipv4(packet);
+ dp_packet_ol_set_tx_ip_csum(packet);
+ dp_packet_ol_reset_ip_csum_good(packet);
*ip_tot_size -= IP_HEADER_LEN;
packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
return ip + 1;
@@ -297,8 +306,8 @@ netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
htons(IP_DF) : 0;
- /* Checksum has already been zeroed by eth_build_header. */
- ip->ip_csum = csum(ip, sizeof *ip);
+ /* The checksum will be calculated when the headers are pushed
+ * to the packet if offloading is not enabled. */
data->header_len += IP_HEADER_LEN;
return ip + 1;
@@ -799,6 +799,14 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
return false;
}
+ /* Packet with IP csum offloading enabled was received with verified csum.
+ * Leave the IP csum offloading enabled even with good checksum to the
+ * netdev to decide what would be the best to do.
+ * Provide a software fallback in case the device doesn't support IP csum
+ * offloading. Note: Encapsulated packet must have the inner IP header
+ * csum already calculated. */
+ dp_packet_ol_send_prepare(packet, netdev_flags);
+
if (dp_packet_ol_l4_mask(packet)) {
if (dp_packet_ol_tx_tcp_csum(packet)) {
if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
@@ -966,7 +974,21 @@ netdev_push_header(const struct netdev *netdev,
"not supported: packet dropped",
netdev_get_name(netdev));
} else {
+ /* The packet is going to be encapsulated and there is
+ * no support yet for inner network header csum offloading. */
+ if (dp_packet_ol_tx_ip_csum(packet)
+ && !dp_packet_ol_ip_checksum_good(packet)) {
+ dp_packet_ip_set_header_csum(packet);
+ }
+
netdev->netdev_class->push_header(netdev, packet, data);
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else if (dp_packet_ol_tx_ipv4(packet)) {
+ dp_packet_ip_set_header_csum(packet);
+ dp_packet_ol_set_ip_csum_good(packet);
+ }
+
pkt_metadata_init(&packet->md, data->out_port);
dp_packet_batch_refill(batch, packet, i);
}
@@ -167,9 +167,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
if (nh->ip_tos != new_tos) {
- nh->ip_csum = recalc_csum16(nh->ip_csum,
- htons((uint16_t) nh->ip_tos),
- htons((uint16_t) new_tos));
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum,
+ htons((uint16_t) nh->ip_tos),
+ htons((uint16_t) new_tos));
+ }
+
nh->ip_tos = new_tos;
}
}
@@ -178,8 +183,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
- htons(new_ttl << 8));
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum,
+ htons(nh->ip_ttl << 8),
+ htons(new_ttl << 8));
+ }
+
nh->ip_ttl = new_ttl;
}
}
@@ -1144,7 +1144,12 @@ packet_set_ipv4_addr(struct dp_packet *packet,
}
}
}
- nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+ }
put_16aligned_be32(addr, new_addr);
}
@@ -1309,16 +1314,26 @@ packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
if (nh->ip_tos != tos) {
uint8_t *field = &nh->ip_tos;
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
- htons((uint16_t) tos));
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
+ htons((uint16_t) tos));
+ }
+
*field = tos;
}
if (nh->ip_ttl != ttl) {
uint8_t *field = &nh->ip_ttl;
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
- htons(ttl << 8));
+ if (dp_packet_ol_tx_ip_csum(packet)) {
+ dp_packet_ol_reset_ip_csum_good(packet);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
+ htons(ttl << 8));
+ }
+
*field = ttl;
}
}
@@ -1927,8 +1942,13 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
tos |= IP_ECN_CE;
if (nh->ip_tos != tos) {
- nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
- htons((uint16_t) tos));
+ if (dp_packet_ol_tx_ip_csum(pkt)) {
+ dp_packet_ol_reset_ip_csum_good(pkt);
+ } else {
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
+ htons((uint16_t) tos));
+ }
+
nh->ip_tos = tos;
}
}
@@ -215,7 +215,7 @@ struct upcall {
enum odp_key_fitness fitness; /* Fitness of 'flow' relative to ODP key. */
const ovs_u128 *ufid; /* Unique identifier for 'flow'. */
unsigned pmd_id; /* Datapath poll mode driver id. */
- const struct dp_packet *packet; /* Packet associated with this upcall. */
+ struct dp_packet *packet; /* Packet associated with this upcall. */
ofp_port_t ofp_in_port; /* OpenFlow in port, or OFPP_NONE. */
uint16_t mru; /* If !0, Maximum receive unit of
fragmented IP packet */
@@ -395,7 +395,7 @@ static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
struct udpif_key *ukey);
static int upcall_receive(struct upcall *, const struct dpif_backer *,
- const struct dp_packet *packet, enum dpif_upcall_type,
+ struct dp_packet *packet, enum dpif_upcall_type,
const struct nlattr *userdata, const struct flow *,
const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id);
@@ -1140,7 +1140,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
* since the 'upcall->put_actions' remains uninitialized. */
static int
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
- const struct dp_packet *packet, enum dpif_upcall_type type,
+ struct dp_packet *packet, enum dpif_upcall_type type,
const struct nlattr *userdata, const struct flow *flow,
const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id)
@@ -1336,7 +1336,7 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall)
}
static int
-upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
+upcall_cb(struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
unsigned pmd_id, enum dpif_upcall_type type,
const struct nlattr *userdata, struct ofpbuf *actions,
struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
@@ -1446,7 +1446,7 @@ static int
process_upcall(struct udpif *udpif, struct upcall *upcall,
struct ofpbuf *odp_actions, struct flow_wildcards *wc)
{
- const struct dp_packet *packet = upcall->packet;
+ struct dp_packet *packet = upcall->packet;
const struct flow *flow = upcall->flow;
size_t actions_len = 0;
@@ -1524,6 +1524,10 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
break;
}
+ /* The packet is going to be encapsulated and sent to
+ * the controller. */
+ dp_packet_ol_send_prepare(packet, 0);
+
const struct frozen_state *state = &recirc_node->state;
struct ofproto_async_msg *am = xmalloc(sizeof *am);
@@ -162,6 +162,7 @@ SYSTEM_KMOD_TESTSUITE_AT = \
SYSTEM_USERSPACE_TESTSUITE_AT = \
tests/system-userspace-testsuite.at \
tests/system-userspace-macros.at \
+ tests/system-userspace-offload.at \
tests/system-userspace-packet-type-aware.at \
tests/system-route.at
new file mode 100644
@@ -0,0 +1,79 @@
+AT_BANNER([userspace offload])
+
+AT_SETUP([userspace offload - ip csum offload])
+OVS_VSWITCHD_START(
+ [add-br br1 -- set bridge br1 datapath-type=dummy -- \
+ add-port br1 p1 -- \
+ set Interface p1 type=dummy -- \
+ add-port br1 p2 -- \
+ set Interface p2 type=dummy --])
+
+# Modify the ip_dst addr to force changing the IP csum.
+AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
+
+# Check if no offload remains ok.
+AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+
+# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
+# by the datapath while processing the packet.
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by egress handling
+# in the datapath and not by the netdev.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by netdev and not
+# by the datapath.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading disabled to check
+# if the datapath updates the csum, but does not fix the issue.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading enabled to check
+# if the driver updates and fixes the csum.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
@@ -25,5 +25,6 @@ m4_include([tests/system-common-macros.at])
m4_include([tests/system-traffic.at])
m4_include([tests/system-layer3-tunnels.at])
m4_include([tests/system-interface.at])
+m4_include([tests/system-userspace-offload.at])
m4_include([tests/system-userspace-packet-type-aware.at])
m4_include([tests/system-route.at])