Message ID | 20221124053049.1894509-5-mkp@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Enhance support for checksum offloading | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | success | test: success |
On Thu, Nov 24, 2022 at 6:31 AM Mike Pattrick <mkp@redhat.com> wrote: > > From: Flavio Leitner <fbl@sysclose.org> > > The netdev receiving packets is supposed to provide the flags > indicating if the L4 checksum was verified and it is OK or BAD, > otherwise the stack will check when appropriate by software. > > If the packet comes with good checksum, then postpone the > checksum calculation to the egress device if needed. > > When encapsulate a packet with that flag, set the checksum > of the inner L4 header since that is not yet supported. > > Calculate the L4 checksum when the packet is going to be sent > over a device that doesn't support the feature. > > Linux tap devices allows enabling L3 and L4 offload, so this > patch enables the feature. However, Linux socket interface > remains disabled because the API doesn't allow enabling > those two features without enabling TSO too. > > Signed-off-by: Flavio Leitner <fbl@sysclose.org> > Co-authored-by: Mike Pattrick <mkp@redhat.com> > Signed-off-by: Mike Pattrick <mkp@redhat.com> I tested tcp traffic in various setups (with mlx5 nic as physical port): - external host <-> ovs <-> kernel host - external host <-> ovs <-> kernel vm - kernel vm <-> ovs <-> kernel host - kernel vm1 <-> ovs <-> kernel vm2 The changes may need more eyes on the netdev-linux parts, but the rest looks good to me. Reviewed-by: David Marchand <david.marchand@redhat.com>
On 11/24/22 06:30, Mike Pattrick wrote: > From: Flavio Leitner <fbl@sysclose.org> > > The netdev receiving packets is supposed to provide the flags > indicating if the L4 checksum was verified and it is OK or BAD, > otherwise the stack will check when appropriate by software. > > If the packet comes with good checksum, then postpone the > checksum calculation to the egress device if needed. > > When encapsulate a packet with that flag, set the checksum > of the inner L4 header since that is not yet supported. > > Calculate the L4 checksum when the packet is going to be sent > over a device that doesn't support the feature. > > Linux tap devices allows enabling L3 and L4 offload, so this > patch enables the feature. However, Linux socket interface > remains disabled because the API doesn't allow enabling > those two features without enabling TSO too. > > Signed-off-by: Flavio Leitner <fbl@sysclose.org> > Co-authored-by: Mike Pattrick <mkp@redhat.com> > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- Didn't test this as well. Only visual review. Should we enable checksum offloading in CONFIGURE_VETH_OFFLOADS for check-system-userspace testsuite since support is enabled by default? More comments inline. Best regards, Ilya Maximets. > lib/conntrack.c | 15 +-- > lib/dp-packet.c | 25 ++++ > lib/dp-packet.h | 78 ++++++++++++- > lib/flow.c | 23 ++++ > lib/netdev-dpdk.c | 188 ++++++++++++++++++++---------- > lib/netdev-linux.c | 252 ++++++++++++++++++++++++++-------------- > lib/netdev-native-tnl.c | 32 +---- > lib/netdev.c | 46 ++------ > lib/packets.c | 175 ++++++++++++++++++++++------ > lib/packets.h | 3 + > 10 files changed, 580 insertions(+), 257 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 12194cce8..57e6a55e0 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -2118,13 +2118,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > } > > if (ok) { > - bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt); > - if (!hwol_bad_l4_csum) { > - bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt) > - || dp_packet_hwol_tx_l4_checksum(pkt); > + if (!dp_packet_l4_checksum_bad(pkt)) { > /* Validate the checksum only when hwol is not supported. */ > if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt), > - &ctx->icmp_related, l3, !hwol_good_l4_csum, > + &ctx->icmp_related, l3, > + !dp_packet_l4_checksum_good(pkt) && > + !dp_packet_hwol_tx_l4_checksum(pkt), > NULL)) { > ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); > return true; > @@ -3453,8 +3452,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, > adj_seqnum(&th->tcp_seq, ec->seq_skew); > } > > - th->tcp_csum = 0; > - if (!dp_packet_hwol_tx_l4_checksum(pkt)) { > + if (dp_packet_hwol_tx_l4_checksum(pkt)) { > + dp_packet_ol_reset_l4_csum_good(pkt); > + } else { > + th->tcp_csum = 0; > if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) { > th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto, > dp_packet_l4_size(pkt)); > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index 90ef85de3..2cfaf5274 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so > dp_packet_init_specific(b); > /* By default assume the packet type to be Ethernet. */ > b->packet_type = htonl(PT_ETH); > + /* Reset csum start and offset. */ > + b->csum_start = 0; > + b->csum_offset = 0; > } > > static void > @@ -544,4 +547,26 @@ dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) > dp_packet_ol_set_ip_csum_good(p); > dp_packet_hwol_reset_tx_ip_csum(p); > } > + > + if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) { > + dp_packet_hwol_reset_tx_l4_csum(p); > + return; > + } > + > + if (dp_packet_hwol_l4_is_tcp(p) > + && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { > + packet_tcp_complete_csum(p); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_tx_l4_csum(p); > + } else if (dp_packet_hwol_l4_is_udp(p) > + && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { Indentation. > + packet_udp_complete_csum(p); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_tx_l4_csum(p); > + } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) > + && dp_packet_hwol_l4_is_sctp(p)) { Indentation. > + packet_sctp_complete_csum(p); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_tx_l4_csum(p); > + } > } > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index f60618716..d550b099c 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -140,6 +140,8 @@ struct dp_packet { > or UINT16_MAX. */ > uint32_t cutlen; /* length in bytes to cut from the end. */ > ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ > + uint16_t csum_start; /* Position to start checksumming from. */ > + uint16_t csum_offset; /* Offset to place checksum. */ > union { > struct pkt_metadata md; > uint64_t data[DP_PACKET_CONTEXT_SIZE / 8]; > @@ -995,6 +997,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b) > return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4); > } > > +/* Returns 'true' if packet 'p' is marked as IPv6. */ > +static inline bool > +dp_packet_hwol_tx_ipv6(const struct dp_packet *p) > +{ > + return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6); > +} > + > /* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */ > static inline bool > dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) > @@ -1019,18 +1028,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) > DP_PACKET_OL_TX_SCTP_CKSUM; > } > > -/* Mark packet 'b' for IPv4 checksum offloading. */ > static inline void > -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) > +dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK; > +} > + > +/* Mark packet 'p' as IPv4. */ > +static inline void > +dp_packet_hwol_set_tx_ipv4(struct dp_packet *p) > { > - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4; > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6; > + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4; > } > > -/* Mark packet 'b' for IPv6 checksum offloading. */ > +/* Mark packet 'a' as IPv6. */ > static inline void > -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) > +dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) > { > - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6; > + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4; > + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; > } > > /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ > @@ -1129,6 +1146,8 @@ dp_packet_ip_set_header_csum(struct dp_packet *p) > ip->ip_csum = csum(ip, sizeof *ip); > } > > +/* Returns 'true' if the packet 'p' has good integrity and the > + * checksum in it is correct. */ Should be in a previous patch? > static inline bool > dp_packet_l4_checksum_good(const struct dp_packet *p) > { > @@ -1143,6 +1162,53 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p) > DP_PACKET_OL_RX_L4_CKSUM_BAD; > } > > +/* Returns 'true' if the packet has good integrity though the > + * checksum in the packet 'p' is not complete. */ > +static inline bool > +dp_packet_ol_l4_csum_partial(const struct dp_packet *p) > +{ > + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) == > + DP_PACKET_OL_RX_L4_CKSUM_MASK; > +} > + > +/* Marks packet 'p' with good integrity though the checksum in the > + * packet is not complete. */ > +static inline void > +dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p) s/const// > +{ > + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK; > +} > + > +/* Marks packet 'p' with good L4 checksum. */ > +static inline void > +dp_packet_ol_set_l4_csum_good(const struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD; > + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD; > +} > + > +/* Marks packet 'p' with good L4 checksum as modified. */ > +static inline void > +dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p) > +{ > + if (!dp_packet_ol_l4_csum_partial(p)) { > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD; > + } > +} > + > +/* Marks packet 'p' with good integrity if the 'start' and 'offset' > + * matches with the 'csum_start' and 'csum_offset' in packet 'p'. > + * The 'start' is the offset from the begin of the packet headers. > + * The 'offset' is the offset from start to place the checksum. */ > +static inline void > +dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start, 'vnet' part looks strange here. Unclear what it supposed to mean. > + uint16_t offset) > +{ > + if (p->csum_start == start && p->csum_offset == offset) { > + dp_packet_ol_set_l4_csum_partial(p); > + } > +} > + > static inline void ALWAYS_INLINE > dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet) > { > diff --git a/lib/flow.c b/lib/flow.c > index 6c8bf7fc0..5aaf3b420 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -1027,6 +1027,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > dp_packet_update_rss_hash_ipv6_tcp_udp(packet); > } > + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, > + offsetof(struct tcp_header, > + tcp_csum)); > + if (dp_packet_l4_checksum_good(packet) > + || dp_packet_ol_l4_csum_partial(packet)) { > + dp_packet_hwol_set_csum_tcp(packet); > + } > } > } > } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { > @@ -1042,6 +1049,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > dp_packet_update_rss_hash_ipv6_tcp_udp(packet); > } > + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, > + offsetof(struct udp_header, > + udp_csum)); > + if (dp_packet_l4_checksum_good(packet) > + || dp_packet_ol_l4_csum_partial(packet)) { > + dp_packet_hwol_set_csum_udp(packet); > + } > } > } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) { > if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) { > @@ -1051,6 +1065,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) > miniflow_push_be16(mf, tp_dst, sctp->sctp_dst); > miniflow_push_be16(mf, ct_tp_src, ct_tp_src); > miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); > + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, > + offsetof(struct sctp_header, > + sctp_csum)); > + if (dp_packet_l4_checksum_good(packet) > + || dp_packet_ol_l4_csum_partial(packet)) { > + dp_packet_hwol_set_csum_sctp(packet); > + } avx512 implementation changes also needed, AFAIU. > } > } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) { > if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) { > @@ -3170,6 +3191,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow, > tcp->tcp_csum = 0; > tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum, > tcp, l4_len)); > + dp_packet_ol_set_l4_csum_good(p); > } else if (flow->nw_proto == IPPROTO_UDP) { > struct udp_header *udp = dp_packet_l4(p); > > @@ -3179,6 +3201,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow, > if (!udp->udp_csum) { > udp->udp_csum = htons(0xffff); > } > + dp_packet_ol_set_l4_csum_good(p); > } else if (flow->nw_proto == IPPROTO_ICMP) { > struct icmp_header *icmp = dp_packet_l4(p); > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 4ccc56b0e..d36d5a75a 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -146,17 +146,6 @@ typedef uint16_t dpdk_port_t; > > #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > > -/* List of required flags advertised by the hardware that will be used > - * if TSO is enabled. Ideally this should include > - * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that > - * at the moment and SCTP is not a widely used protocol like TCP and UDP, > - * so it's optional. */ > -#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO \ > - | RTE_ETH_TX_OFFLOAD_TCP_CKSUM \ > - | RTE_ETH_TX_OFFLOAD_UDP_CKSUM \ > - | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) > - > - > static const struct rte_eth_conf port_conf = { > .rxmode = { > .split_hdr_size = 0, > @@ -407,8 +396,10 @@ enum dpdk_hw_ol_features { > NETDEV_RX_HW_CRC_STRIP = 1 << 1, > NETDEV_RX_HW_SCATTER = 1 << 2, > NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3, > - NETDEV_TX_TSO_OFFLOAD = 1 << 4, > - NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5, > + NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4, > + NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, > + NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, > + NETDEV_TX_TSO_OFFLOAD = 1 << 7, > }; > > /* > @@ -1004,6 +995,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED) > return NULL; > } > > +static void > +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev, > + enum dpdk_hw_ol_features hw_ol_features, > + enum netdev_ol_flags flag) > +{ > + struct netdev *netdev = &dev->up; > + > + if (dev->hw_ol_features & hw_ol_features) { > + netdev->ol_flags |= flag; > + } else { > + netdev->ol_flags &= ~flag; > + } > +} > + > +static void > +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) > +{ > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_IPV4_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_TCP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_UDP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_SCTP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, > + NETDEV_TX_OFFLOAD_TCP_TSO); > +} > + > static int > dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > { > @@ -1040,11 +1060,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; > } > > + if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; > + } > + > if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > - conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; > - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > - conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; > - } > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; > } > > /* Limit configured rss hash functions to only those supported > @@ -1150,7 +1179,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > struct rte_ether_addr eth_addr; > int diag; > int n_rxq, n_txq; > - uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; > uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM | > RTE_ETH_RX_OFFLOAD_TCP_CKSUM | > RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; > @@ -1186,18 +1214,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; > } > > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } > + > dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > if (userspace_tso_enabled()) { > - if ((info.tx_offload_capa & tx_tso_offload_capa) > - == tx_tso_offload_capa) { > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { > dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { > - dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; > - } else { > - VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " > - "SCTP packets sent to this device will be dropped", > - netdev_get_name(&dev->up)); > - } > } else { > VLOG_WARN("%s: Tx TSO offload is not supported.", > netdev_get_name(&dev->up)); > @@ -1759,6 +1797,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) > smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false"); > HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD); > HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD); Probably, should not be here. See the comments for the previous patch. > HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD); > #undef HWOL_SMAP_ADD > smap_add(args, "lsc_interrupt_mode", > @@ -2210,6 +2251,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > > mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); > mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); > + mbuf->l4_len = 0; > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > > @@ -3968,6 +4010,7 @@ new_device(int vid) > ovs_mutex_lock(&dev->mutex); > if (nullable_string_is_equal(ifname, dev->vhost_id)) { > uint32_t qp_num = rte_vhost_get_vring_num(vid) / VIRTIO_QNUM; > + uint64_t features; > > /* Get NUMA information */ > newnode = rte_vhost_get_numa_node(vid); > @@ -3992,6 +4035,36 @@ new_device(int vid) > dev->vhost_reconfigured = true; > } > > + if (rte_vhost_get_negotiated_features(vid, &features)) { > + VLOG_INFO("Error checking guest features for " > + "vHost Device '%s'", dev->vhost_id); > + } else { > + if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { > + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } > + > + if (userspace_tso_enabled()) { > + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4) > + && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) { > + > + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > + VLOG_DBG("%s: TSO enabled on vhost port", > + netdev_get_name(&dev->up)); > + } else { > + VLOG_WARN("%s: Tx TSO offload is not supported.", > + netdev_get_name(&dev->up)); > + } > + } > + } > + > + /* There is no support in virtio net to offload IPv4 csum, > + * but the vhost library handles IPv4 csum offloading fine. */ > + dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD; > + > + netdev_dpdk_update_netdev_flags(dev); > + > ovsrcu_index_set(&dev->vid, vid); > exists = true; > > @@ -4055,6 +4128,14 @@ destroy_device(int vid) > dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled); > netdev_dpdk_txq_map_clear(dev); > > + /* Clear offload capabilities before next new_device. */ > + dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > + netdev_dpdk_update_netdev_flags(dev); > + > netdev_change_seq_changed(&dev->up); > ovs_mutex_unlock(&dev->mutex); > exists = true; > @@ -4992,22 +5073,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > } > > err = dpdk_eth_dev_init(dev); > - > - if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - } else { > - netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - } > - > - if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > - } > - } > + netdev_dpdk_update_netdev_flags(dev); > > /* If both requested and actual hwaddr were previously > * unset (initialized to 0), then first device init above > @@ -5049,11 +5115,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) > dev->tx_q[0].map = 0; > } > > - if (userspace_tso_enabled()) { > - dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > - VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up)); > - } > - > netdev_dpdk_remap_txqs(dev); > > if (netdev_dpdk_get_vid(dev) >= 0) { > @@ -5074,6 +5135,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) > } > } > > + netdev_dpdk_update_netdev_flags(dev); > + > return 0; > } > > @@ -5095,8 +5158,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > int err; > - uint64_t vhost_flags = 0; > - uint64_t vhost_unsup_flags; > > ovs_mutex_lock(&dev->mutex); > > @@ -5106,6 +5167,9 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) > * 2. A path has been specified. > */ > if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) { > + uint64_t virtio_unsup_features = 0; > + uint64_t vhost_flags = 0; > + > /* Register client-mode device. */ > vhost_flags |= RTE_VHOST_USER_CLIENT; > > @@ -5149,22 +5213,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) > } > > if (userspace_tso_enabled()) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN > - | 1ULL << VIRTIO_NET_F_HOST_UFO; > + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN > + | 1ULL << VIRTIO_NET_F_HOST_UFO; > + VLOG_DBG("%s: TSO enabled on vhost port", > + netdev_get_name(&dev->up)); > } else { > - /* This disables checksum offloading and all the features > - * that depends on it (TSO, UFO, ECN) according to virtio > - * specification. */ > - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; > + /* Advertise checksum offloading to the guest, but explicitly > + * disable TSO and friends. > + * NOTE: we can't disable HOST_ECN which may have been wrongly > + * negotiated by a running guest. */ > + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_TSO4 > + | 1ULL << VIRTIO_NET_F_HOST_TSO6 > + | 1ULL << VIRTIO_NET_F_HOST_UFO; > } > > err = rte_vhost_driver_disable_features(dev->vhost_id, > - vhost_unsup_flags); > + virtio_unsup_features); > if (err) { > VLOG_ERR("rte_vhost_driver_disable_features failed for " > "vhost user client port: %s\n", dev->up.name); > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 59e8dc0ae..4d8ebdae5 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -938,14 +938,6 @@ netdev_linux_common_construct(struct netdev *netdev_) > netnsid_unset(&netdev->netnsid); > ovs_mutex_init(&netdev->mutex); > > - if (userspace_tso_enabled()) { > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - } > - > return 0; > } > > @@ -959,6 +951,16 @@ netdev_linux_construct(struct netdev *netdev_) > return error; > } > > + /* The socket interface doesn't offer the option to enable only > + * csum offloading without TSO. */ > + if (userspace_tso_enabled()) { > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > + } > + > error = get_flags(&netdev->up, &netdev->ifi_flags); > if (error == ENODEV) { > if (netdev->up.netdev_class != &netdev_internal_class) { > @@ -987,6 +989,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > static const char tap_dev[] = "/dev/net/tun"; > const char *name = netdev_->name; > + unsigned long oflags; > struct ifreq ifr; > > int error = netdev_linux_common_construct(netdev_); > @@ -1004,10 +1007,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) > > /* Create tap device. */ > get_flags(&netdev->up, &netdev->ifi_flags); > - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; > - if (userspace_tso_enabled()) { > - ifr.ifr_flags |= IFF_VNET_HDR; > - } > + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; > > ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); > if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { > @@ -1030,21 +1030,22 @@ netdev_linux_construct_tap(struct netdev *netdev_) > goto error_close; > } > > + oflags = TUN_F_CSUM; > if (userspace_tso_enabled()) { > - /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is > - * available, it will return EINVAL when a flag is unknown. > - * Therefore, try enabling offload with no flags to check > - * if TUNSETOFFLOAD support is available or not. */ > - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) { > - unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; > - > - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) { > - VLOG_WARN("%s: enabling tap offloading failed: %s", name, > - ovs_strerror(errno)); > - error = errno; > - goto error_close; > - } > - } > + oflags |= (TUN_F_TSO4 | TUN_F_TSO6); > + } > + > + if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) { > + netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM > + | NETDEV_TX_OFFLOAD_TCP_CKSUM > + | NETDEV_TX_OFFLOAD_UDP_CKSUM); > + > + if (userspace_tso_enabled()) { > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > + } > + } else { > + VLOG_WARN("%s: Disabling hardware offloading: %s", name, > + ovs_strerror(errno)); > } > > netdev->present = true; > @@ -1344,18 +1345,22 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, > pkt = buffers[i]; > } > > - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { > - struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); > - struct netdev_linux *netdev = netdev_linux_cast(netdev_); > + if (virtio_net_hdr_size) { > + int ret = netdev_linux_parse_vnet_hdr(pkt); > + if (OVS_UNLIKELY(ret)) { > + struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > > - /* Unexpected error situation: the virtio header is not present > - * or corrupted. Drop the packet but continue in case next ones > - * are correct. */ > - dp_packet_delete(pkt); > - netdev->rx_dropped += 1; > - VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header", > - netdev_get_name(netdev_)); > - continue; > + /* Unexpected error situation: the virtio header is not > + * present or corrupted or contains unsupported features. > + * Drop the packet but continue in case next ones are > + * correct. */ > + dp_packet_delete(pkt); > + netdev->rx_dropped += 1; > + VLOG_WARN_RL(&rl, "%s: Dropped packet: %s", > + netdev_get_name(netdev_), ovs_strerror(ret)); > + continue; > + } > } > > for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg; > @@ -1403,7 +1408,6 @@ static int > netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, > struct dp_packet_batch *batch) > { > - int virtio_net_hdr_size; > ssize_t retval; > size_t std_len; > int iovlen; > @@ -1413,16 +1417,14 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, > /* Use the buffer from the allocated packet below to receive MTU > * sized packets and an aux_buf for extra TSO data. */ > iovlen = IOV_TSO_SIZE; > - virtio_net_hdr_size = sizeof(struct virtio_net_hdr); > } else { > /* Use only the buffer from the allocated packet. */ > iovlen = IOV_STD_SIZE; > - virtio_net_hdr_size = 0; > } > > /* The length here needs to be accounted in the same way when the > * aux_buf is allocated so that it can be prepended to TSO buffer. */ > - std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; > + std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu; > for (i = 0; i < NETDEV_MAX_BURST; i++) { > struct dp_packet *buffer; > struct dp_packet *pkt; > @@ -1462,7 +1464,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, > pkt = buffer; > } > > - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { > + if (netdev_linux_parse_vnet_hdr(pkt)) { > struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > > @@ -1611,7 +1613,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu, > * on other interface types because we attach a socket filter to the rx > * socket. */ > static int > -netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, > +netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu, > struct dp_packet_batch *batch) > { > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > @@ -1632,9 +1634,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, > ssize_t retval; > int error; > > - if (tso) { > - netdev_linux_prepend_vnet_hdr(packet, mtu); > - } > + netdev_linux_prepend_vnet_hdr(packet, mtu); > > size = dp_packet_size(packet); > do { > @@ -1765,7 +1765,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, > > error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch); > } else { > - error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch); > + error = netdev_linux_tap_batch_send(netdev_, mtu, batch); > } > if (error) { > if (error == ENOBUFS) { > @@ -6819,53 +6819,73 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) > return 0; > } > > +/* Initializes packet 'b' with features enabled in the prepended > + * struct virtio_net_hdr. Returns 0 if successful, otherwise a > + * positive errno value. */ > static int > netdev_linux_parse_vnet_hdr(struct dp_packet *b) > { > struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet); > - uint16_t l4proto = 0; > > if (OVS_UNLIKELY(!vnet)) { > - return -EINVAL; > + return EINVAL; > } > > if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) { > return 0; > } > > - if (netdev_linux_parse_l2(b, &l4proto)) { > - return -EINVAL; > - } > - > if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { > - if (l4proto == IPPROTO_TCP) { > - dp_packet_hwol_set_csum_tcp(b); > - } else if (l4proto == IPPROTO_UDP) { > - dp_packet_hwol_set_csum_udp(b); > - } else if (l4proto == IPPROTO_SCTP) { > - dp_packet_hwol_set_csum_sctp(b); > - } > - } > + uint16_t l4proto = 0; > > - if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) { > - uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4 > - | VIRTIO_NET_HDR_GSO_TCPV6 > - | VIRTIO_NET_HDR_GSO_UDP; > - uint8_t type = vnet->gso_type & allowed_mask; > - > - if (type == VIRTIO_NET_HDR_GSO_TCPV4 > - || type == VIRTIO_NET_HDR_GSO_TCPV6) { > - dp_packet_hwol_set_tcp_seg(b); > + if (netdev_linux_parse_l2(b, &l4proto)) { > + return EINVAL; > } > - } > > - return 0; > + if (l4proto == IPPROTO_UDP) { > + dp_packet_hwol_set_csum_udp(b); > + } > + /* The packet has offloaded checksum. However, there is no > + * additional information like the protocol used, so it would > + * require to parse the packet here. The checksum starting point > + * and offset are going to be verified when the packet headers > + * are parsed during miniflow extraction. */> + b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start; > + b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset; > + } else { > + b->csum_start = 0; > + b->csum_offset = 0; > + } > + > + int ret = 0; > + switch (vnet->gso_type) { > + case VIRTIO_NET_HDR_GSO_TCPV4: > + case VIRTIO_NET_HDR_GSO_TCPV6: > + /* FIXME: The packet has offloaded TCP segmentation. The gso_size > + * is given and needs to be respected. */ > + dp_packet_hwol_set_tcp_seg(b); > + break; An empty line should separate cases. > + case VIRTIO_NET_HDR_GSO_UDP: > + /* UFO is not supported. */ > + VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled."); > + ret = ENOTSUP; > + break; > + case VIRTIO_NET_HDR_GSO_NONE: > + break; > + default: > + ret = ENOTSUP; > + VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x", > + vnet->gso_type); > + } > + > + return ret; > } > > static void > netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) > { > - struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet); > + struct virtio_net_hdr v; > + struct virtio_net_hdr *vnet = &v; > > if (dp_packet_hwol_is_tso(b)) { > uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b)) > @@ -6875,30 +6895,92 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) > vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len); > if (dp_packet_hwol_is_ipv4(b)) { > vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; > - } else { > + } else if (dp_packet_hwol_tx_ipv6(b)) { > vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; > } > > } else { > - vnet->flags = VIRTIO_NET_HDR_GSO_NONE; > - } > - > - if (dp_packet_hwol_l4_mask(b)) { > - vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; > - vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b) > - - (char *)dp_packet_eth(b)); > - > + vnet->hdr_len = 0; > + vnet->gso_size = 0; > + vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE; > + } > + > + if (dp_packet_l4_checksum_good(b)) { > + /* The packet has good checksum in the packet. 'in the header' ? > + * No need to validate again. */ > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID; > + } else if (dp_packet_hwol_tx_l4_checksum(b)) { > + /* The csum calculation is offloaded. */ > if (dp_packet_hwol_l4_is_tcp(b)) { > + /* Virtual I/O Device (VIRTIO) Version 1.1 > + * 5.1.6.2 Packet Transmission > + If the driver negotiated VIRTIO_NET_F_CSUM, it can skip > + checksumming the packet: > + - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, > + - csum_start is set to the offset within the packet > + to begin checksumming, and > + - csum_offset indicates how many bytes after the > + csum_start the new (16 bit ones complement) checksum > + is placed by the device. > + The TCP checksum field in the packet is set to the sum of > + the TCP pseudo header, so that replacing it by the ones > + complement checksum of the TCP header and body will give > + the correct result. */ Comment style is strange. > + > + struct tcp_header *tcp_hdr = dp_packet_l4(b); > + ovs_be16 csum = 0; > + if (dp_packet_hwol_is_ipv4(b)) { > + const struct ip_header *ip_hdr = dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); > + } else if (dp_packet_hwol_tx_ipv6(b)) { > + const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); > + } > + > + tcp_hdr->tcp_csum = csum; > + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; > + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; > vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( > struct tcp_header, tcp_csum); > } else if (dp_packet_hwol_l4_is_udp(b)) { > + struct udp_header *udp_hdr = dp_packet_l4(b); > + ovs_be16 csum = 0; > + > + if (dp_packet_hwol_is_ipv4(b)) { > + const struct ip_header *ip_hdr = dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); > + } else if (dp_packet_hwol_tx_ipv6(b)) { > + const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); > + } > + > + udp_hdr->udp_csum = csum; > + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; > + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; > vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( I wonder why we're using __builtin_offsetof() instead of just offsetof(). Not an issue of this patch though. > struct udp_header, udp_csum); > } else if (dp_packet_hwol_l4_is_sctp(b)) { > - vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( > - struct sctp_header, sctp_csum); > + /* The Linux kernel networking stack only supports csum_start > + * and csum_offset when SCTP GSO is enabled. See kernel's > + * skb_csum_hwoffload_help(). Currently there is no SCTP > + * segmentation offload support in OVS. */ > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = 0; > } else { > - VLOG_WARN_RL(&rl, "Unsupported L4 protocol"); > + /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes > + * a new flag that is not covered in above checks. */ > + VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. " > + "Flags: %"PRIu64, > + (uint64_t)*dp_packet_ol_flags_ptr(b)); > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = 0; > } > + } else { > + /* Packet L4 csum is unknown. */ > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = 0; > } > + > + dp_packet_push(b, vnet, sizeof *vnet); > } > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index 754e2d78d..dc054336a 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -224,28 +224,6 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, > return udp + 1; > } > > -static void > -netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet, > - int ip_tot_size) > -{ > - uint32_t csum; > - > - if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { > - csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr( > - dp_packet_data(packet))); > - } else { > - csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr( > - dp_packet_data(packet))); > - } > - > - csum = csum_continue(csum, udp, ip_tot_size); > - udp->udp_csum = csum_finish(csum); > - > - if (!udp->udp_csum) { > - udp->udp_csum = htons(0xffff); > - } > -} > - > void > netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, > struct dp_packet *packet, > @@ -260,9 +238,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, > udp->udp_src = netdev_tnl_get_src_port(packet); > udp->udp_len = htons(ip_tot_size); > > - if (udp->udp_csum) { > - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); > - } > + /* Postpone checksum to the egress netdev. */ > + dp_packet_hwol_set_csum_udp(packet); > + dp_packet_ol_reset_l4_csum_good(packet); > } > > static void * > @@ -806,7 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev, > data->header_len, &ip_tot_size); > udp->udp_src = netdev_tnl_get_src_port(packet); > udp->udp_len = htons(ip_tot_size); > - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); > + /* Postpone checksum to the egress netdev. */ > + dp_packet_hwol_set_csum_udp(packet); > + dp_packet_ol_reset_l4_csum_good(packet); > > gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1); > > diff --git a/lib/netdev.c b/lib/netdev.c > index 6d3f678f0..12e1cb948 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -798,8 +798,6 @@ static bool > netdev_send_prepare_packet(const uint64_t netdev_flags, > struct dp_packet *packet, char **errormsg) > { > - uint64_t l4_mask; > - > if (dp_packet_hwol_is_tso(packet) > && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > /* Fall back to GSO in software. */ > @@ -812,36 +810,16 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, > * netdev to decide what would be the best to do. > * Provide a software fallback in case the device doesn't support IP csum > * offloading. Note: Encapsulated packet must have the inner IP header > + * csum already calculated. > + * Packet with L4 csum offloading enabled was received with verified csum. > + * Leave the L4 csum offloading enabled even with good checksum for the > + * netdev to decide what would be the best to do. > + * Netdev that requires pseudo header csum needs to calculate that. > + * Provide a software fallback in case the netdev doesn't support L4 csum > + * offloading. Note: Encapsulated packet must have the inner L4 header > * csum already calculated. */ > dp_packet_ol_send_prepare(packet, netdev_flags); > > - l4_mask = dp_packet_hwol_l4_mask(packet); > - if (l4_mask) { > - if (dp_packet_hwol_l4_is_tcp(packet)) { > - if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { > - /* Fall back to TCP csum in software. */ > - VLOG_ERR_BUF(errormsg, "No TCP checksum support"); > - return false; > - } > - } else if (dp_packet_hwol_l4_is_udp(packet)) { > - if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { > - /* Fall back to UDP csum in software. */ > - VLOG_ERR_BUF(errormsg, "No UDP checksum support"); > - return false; > - } > - } else if (dp_packet_hwol_l4_is_sctp(packet)) { > - if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) { > - /* Fall back to SCTP csum in software. */ > - VLOG_ERR_BUF(errormsg, "No SCTP checksum support"); > - return false; > - } > - } else { > - VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, > - l4_mask); > - return false; > - } > - } > - > return true; > } > > @@ -974,20 +952,16 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) > - || dp_packet_hwol_l4_mask(packet))) { > + if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { > COVERAGE_INC(netdev_push_header_drops); > dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " > + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is " TSO already contains the word 'offloading'. > "not supported: packet dropped", > netdev_get_name(netdev)); > } else { > /* The packet is going to be encapsulated and there is > * no support yet for inner network header csum offloading. */ > - if (dp_packet_hwol_tx_ip_csum(packet) > - && !dp_packet_ip_checksum_good(packet)) { > - dp_packet_ip_set_header_csum(packet); > - } > + dp_packet_ol_send_prepare(packet, 0); > > netdev->netdev_class->push_header(netdev, packet, data); > > diff --git a/lib/packets.c b/lib/packets.c > index a1d668190..8c69e6e3e 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -1131,16 +1131,22 @@ packet_set_ipv4_addr(struct dp_packet *packet, > pkt_metadata_init_conn(&packet->md); > > if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { > - struct tcp_header *th = dp_packet_l4(packet); > - > - th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); > + if (dp_packet_hwol_l4_is_tcp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct tcp_header *th = dp_packet_l4(packet); > + th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); > + } > } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) { > - struct udp_header *uh = dp_packet_l4(packet); > - > - if (uh->udp_csum) { > - uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); > - if (!uh->udp_csum) { > - uh->udp_csum = htons(0xffff); > + if (dp_packet_hwol_l4_is_udp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct udp_header *uh = dp_packet_l4(packet); > + if (uh->udp_csum) { > + uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); > + if (!uh->udp_csum) { > + uh->udp_csum = htons(0xffff); > + } > } > } > } > @@ -1246,16 +1252,24 @@ packet_update_csum128(struct dp_packet *packet, uint8_t proto, > size_t l4_size = dp_packet_l4_size(packet); > > if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { > - struct tcp_header *th = dp_packet_l4(packet); > + if (dp_packet_hwol_l4_is_tcp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct tcp_header *th = dp_packet_l4(packet); > > - th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); > + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); > + } > } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) { > - struct udp_header *uh = dp_packet_l4(packet); > + if (dp_packet_hwol_l4_is_udp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct udp_header *uh = dp_packet_l4(packet); > > - if (uh->udp_csum) { > - uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); > - if (!uh->udp_csum) { > - uh->udp_csum = htons(0xffff); > + if (uh->udp_csum) { > + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); > + if (!uh->udp_csum) { > + uh->udp_csum = htons(0xffff); > + } > } > } > } else if (proto == IPPROTO_ICMPV6 && > @@ -1375,7 +1389,9 @@ static void > packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) > { > if (*port != new_port) { > - *csum = recalc_csum16(*csum, *port, new_port); > + if (csum) { > + *csum = recalc_csum16(*csum, *port, new_port); > + } > *port = new_port; > } > } > @@ -1387,9 +1403,16 @@ void > packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) > { > struct tcp_header *th = dp_packet_l4(packet); > + ovs_be16 *csum = NULL; > + > + if (dp_packet_hwol_l4_is_tcp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + csum = &th->tcp_csum; > + } > > - packet_set_port(&th->tcp_src, src, &th->tcp_csum); > - packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); > + packet_set_port(&th->tcp_src, src, csum); > + packet_set_port(&th->tcp_dst, dst, csum); > pkt_metadata_init_conn(&packet->md); > } > > @@ -1401,17 +1424,21 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) > { > struct udp_header *uh = dp_packet_l4(packet); > > - if (uh->udp_csum) { > - packet_set_port(&uh->udp_src, src, &uh->udp_csum); > - packet_set_port(&uh->udp_dst, dst, &uh->udp_csum); > + if (dp_packet_hwol_l4_is_udp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + packet_set_port(&uh->udp_src, src, NULL); > + packet_set_port(&uh->udp_dst, dst, NULL); > + } else { > + ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL; > + > + packet_set_port(&uh->udp_src, src, csum); > + packet_set_port(&uh->udp_dst, dst, csum); > > - if (!uh->udp_csum) { > + if (csum && !uh->udp_csum) { > uh->udp_csum = htons(0xffff); > } > - } else { > - uh->udp_src = src; > - uh->udp_dst = dst; > } > + > pkt_metadata_init_conn(&packet->md); > } > > @@ -1422,18 +1449,27 @@ void > packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) > { > struct sctp_header *sh = dp_packet_l4(packet); > - ovs_be32 old_csum, old_correct_csum, new_csum; > - uint16_t tp_len = dp_packet_l4_size(packet); > > - old_csum = get_16aligned_be32(&sh->sctp_csum); > - put_16aligned_be32(&sh->sctp_csum, 0); > - old_correct_csum = crc32c((void *)sh, tp_len); > + if (dp_packet_hwol_l4_is_sctp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + sh->sctp_src = src; > + sh->sctp_dst = dst; > + } else { > + ovs_be32 old_csum, old_correct_csum, new_csum; > + uint16_t tp_len = dp_packet_l4_size(packet); > > - sh->sctp_src = src; > - sh->sctp_dst = dst; > + old_csum = get_16aligned_be32(&sh->sctp_csum); > + put_16aligned_be32(&sh->sctp_csum, 0); > + old_correct_csum = crc32c((void *) sh, tp_len); > + > + sh->sctp_src = src; > + sh->sctp_dst = dst; > + > + new_csum = crc32c((void *) sh, tp_len); > + put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum > + ^ new_csum); > + } > > - new_csum = crc32c((void *)sh, tp_len); > - put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum); > pkt_metadata_init_conn(&packet->md); > } > > @@ -1957,3 +1993,72 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) > } > } > } > + > +/* Set TCP checksum field in packet 'p' with complete checksum. > + * The packet must have the L3 and L4 offsets. */ > +void > +packet_tcp_complete_csum(struct dp_packet *p) > +{ > + struct tcp_header *tcp = dp_packet_l4(p); > + > + tcp->tcp_csum = 0; > + if (dp_packet_hwol_is_ipv4(p)) { > + struct ip_header *ip = dp_packet_l3(p); > + > + tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip), > + tcp, dp_packet_l4_size(p))); > + } else if (dp_packet_hwol_tx_ipv6(p)) { > + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); > + > + tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt, > + dp_packet_l4_size(p)); > + } else { > + OVS_NOT_REACHED(); > + } > +} > + > +/* Set UDP checksum field in packet 'p' with complete checksum. > + * The packet must have the L3 and L4 offsets. */ > +void > +packet_udp_complete_csum(struct dp_packet *p) > +{ > + struct udp_header *udp = dp_packet_l4(p); > + > + /* Skip csum calculation if the udp_csum is zero. */ > + if (!udp->udp_csum) { > + return; > + } > + > + udp->udp_csum = 0; > + if (dp_packet_hwol_is_ipv4(p)) { > + struct ip_header *ip = dp_packet_l3(p); > + > + udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip), > + udp, dp_packet_l4_size(p))); > + } else if (dp_packet_hwol_tx_ipv6(p)) { > + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); > + > + udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt, > + dp_packet_l4_size(p)); > + } else { > + OVS_NOT_REACHED(); > + } > + > + if (!udp->udp_csum) { > + udp->udp_csum = htons(0xffff); > + } > +} > + > +/* Set SCTP checksum field in packet 'p' with complete checksum. > + * The packet must have the L3 and L4 offsets. */ > +void > +packet_sctp_complete_csum(struct dp_packet *p) > +{ > + struct sctp_header *sh = dp_packet_l4(p); > + uint16_t tp_len = dp_packet_l4_size(p); > + ovs_be32 csum; > + > + put_16aligned_be32(&sh->sctp_csum, 0); > + csum = crc32c((void *) sh, tp_len); > + put_16aligned_be32(&sh->sctp_csum, csum); > +} > diff --git a/lib/packets.h b/lib/packets.h > index 5bdf6e4bb..28950b8b1 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -1643,6 +1643,9 @@ void packet_put_ra_prefix_opt(struct dp_packet *, > const ovs_be128 router_prefix); > uint32_t packet_csum_pseudoheader(const struct ip_header *); > void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); > +void packet_tcp_complete_csum(struct dp_packet *); > +void packet_udp_complete_csum(struct dp_packet *); > +void packet_sctp_complete_csum(struct dp_packet *); > > #define DNS_HEADER_LEN 12 > struct dns_header {
Please ignore this email. fbl On 7/7/23 16:21, Flavio Leitner wrote: > From: Ilya Maximets <i.maximets@ovn.org> > > On 11/24/22 06:30, Mike Pattrick wrote: >> From: Flavio Leitner <fbl@sysclose.org> >> >> The netdev receiving packets is supposed to provide the flags >> indicating if the L4 checksum was verified and it is OK or BAD, >> otherwise the stack will check when appropriate by software. >> >> If the packet comes with good checksum, then postpone the >> checksum calculation to the egress device if needed. >> >> When encapsulate a packet with that flag, set the checksum >> of the inner L4 header since that is not yet supported. >> >> Calculate the L4 checksum when the packet is going to be sent >> over a device that doesn't support the feature. >> >> Linux tap devices allows enabling L3 and L4 offload, so this >> patch enables the feature. However, Linux socket interface >> remains disabled because the API doesn't allow enabling >> those two features without enabling TSO too. >> >> Signed-off-by: Flavio Leitner <fbl@sysclose.org> >> Co-authored-by: Mike Pattrick <mkp@redhat.com> >> Signed-off-by: Mike Pattrick <mkp@redhat.com> >> --- > Didn't test this as well. Only visual review. > > Should we enable checksum offloading in CONFIGURE_VETH_OFFLOADS for > check-system-userspace testsuite since support is enabled by default? > > More comments inline. > > Best regards, Ilya Maximets. > >> lib/conntrack.c | 15 +-- >> lib/dp-packet.c | 25 ++++ >> lib/dp-packet.h | 78 ++++++++++++- >> lib/flow.c | 23 ++++ >> lib/netdev-dpdk.c | 188 ++++++++++++++++++++---------- >> lib/netdev-linux.c | 252 ++++++++++++++++++++++++++-------------- >> lib/netdev-native-tnl.c | 32 +---- >> lib/netdev.c | 46 ++------ >> lib/packets.c | 175 ++++++++++++++++++++++------ >> lib/packets.h | 3 + >> 10 files changed, 580 insertions(+), 257 deletions(-) >> >> diff --git a/lib/conntrack.c b/lib/conntrack.c >> index 12194cce8..57e6a55e0 100644 >> --- a/lib/conntrack.c >> +++ b/lib/conntrack.c >> @@ -2118,13 +2118,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, >> } >> >> if (ok) { >> - bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt); >> - if (!hwol_bad_l4_csum) { >> - bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt) >> - || dp_packet_hwol_tx_l4_checksum(pkt); >> + if (!dp_packet_l4_checksum_bad(pkt)) { >> /* Validate the checksum only when hwol is not supported. */ >> if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt), >> - &ctx->icmp_related, l3, !hwol_good_l4_csum, >> + &ctx->icmp_related, l3, >> + !dp_packet_l4_checksum_good(pkt) && >> + !dp_packet_hwol_tx_l4_checksum(pkt), >> NULL)) { >> ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); >> return true; >> @@ -3453,8 +3452,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, >> adj_seqnum(&th->tcp_seq, ec->seq_skew); >> } >> >> - th->tcp_csum = 0; >> - if (!dp_packet_hwol_tx_l4_checksum(pkt)) { >> + if (dp_packet_hwol_tx_l4_checksum(pkt)) { >> + dp_packet_ol_reset_l4_csum_good(pkt); >> + } else { >> + th->tcp_csum = 0; >> if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) { >> th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto, >> dp_packet_l4_size(pkt)); >> diff --git a/lib/dp-packet.c b/lib/dp-packet.c >> index 90ef85de3..2cfaf5274 100644 >> --- a/lib/dp-packet.c >> +++ b/lib/dp-packet.c >> @@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so >> dp_packet_init_specific(b); >> /* By default assume the packet type to be Ethernet. */ >> b->packet_type = htonl(PT_ETH); >> + /* Reset csum start and offset. */ >> + b->csum_start = 0; >> + b->csum_offset = 0; >> } >> >> static void >> @@ -544,4 +547,26 @@ dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) >> dp_packet_ol_set_ip_csum_good(p); >> dp_packet_hwol_reset_tx_ip_csum(p); >> } >> + >> + if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) { >> + dp_packet_hwol_reset_tx_l4_csum(p); >> + return; >> + } >> + >> + if (dp_packet_hwol_l4_is_tcp(p) >> + && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { >> + packet_tcp_complete_csum(p); >> + dp_packet_ol_set_l4_csum_good(p); >> + dp_packet_hwol_reset_tx_l4_csum(p); >> + } else if (dp_packet_hwol_l4_is_udp(p) >> + && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { > Indentation. > >> + packet_udp_complete_csum(p); >> + dp_packet_ol_set_l4_csum_good(p); >> + dp_packet_hwol_reset_tx_l4_csum(p); >> + } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) >> + && dp_packet_hwol_l4_is_sctp(p)) { > Indentation. > >> + packet_sctp_complete_csum(p); >> + dp_packet_ol_set_l4_csum_good(p); >> + dp_packet_hwol_reset_tx_l4_csum(p); >> + } >> } >> diff --git a/lib/dp-packet.h b/lib/dp-packet.h >> index f60618716..d550b099c 100644 >> --- a/lib/dp-packet.h >> +++ b/lib/dp-packet.h >> @@ -140,6 +140,8 @@ struct dp_packet { >> or UINT16_MAX. */ >> uint32_t cutlen; /* length in bytes to cut from the end. */ >> ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ >> + uint16_t csum_start; /* Position to start checksumming from. */ >> + uint16_t csum_offset; /* Offset to place checksum. */ >> union { >> struct pkt_metadata md; >> uint64_t data[DP_PACKET_CONTEXT_SIZE / 8]; >> @@ -995,6 +997,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b) >> return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4); >> } >> >> +/* Returns 'true' if packet 'p' is marked as IPv6. */ >> +static inline bool >> +dp_packet_hwol_tx_ipv6(const struct dp_packet *p) >> +{ >> + return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6); >> +} >> + >> /* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */ >> static inline bool >> dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) >> @@ -1019,18 +1028,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) >> DP_PACKET_OL_TX_SCTP_CKSUM; >> } >> >> -/* Mark packet 'b' for IPv4 checksum offloading. */ >> static inline void >> -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) >> +dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) >> +{ >> + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK; >> +} >> + >> +/* Mark packet 'p' as IPv4. */ >> +static inline void >> +dp_packet_hwol_set_tx_ipv4(struct dp_packet *p) >> { >> - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4; >> + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6; >> + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4; >> } >> >> -/* Mark packet 'b' for IPv6 checksum offloading. */ >> +/* Mark packet 'a' as IPv6. */ >> static inline void >> -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) >> +dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) >> { >> - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6; >> + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4; >> + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; >> } >> >> /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ >> @@ -1129,6 +1146,8 @@ dp_packet_ip_set_header_csum(struct dp_packet *p) >> ip->ip_csum = csum(ip, sizeof *ip); >> } >> >> +/* Returns 'true' if the packet 'p' has good integrity and the >> + * checksum in it is correct. */ > Should be in a previous patch? > >> static inline bool >> dp_packet_l4_checksum_good(const struct dp_packet *p) >> { >> @@ -1143,6 +1162,53 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p) >> DP_PACKET_OL_RX_L4_CKSUM_BAD; >> } >> >> +/* Returns 'true' if the packet has good integrity though the >> + * checksum in the packet 'p' is not complete. */ >> +static inline bool >> +dp_packet_ol_l4_csum_partial(const struct dp_packet *p) >> +{ >> + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) == >> + DP_PACKET_OL_RX_L4_CKSUM_MASK; >> +} >> + >> +/* Marks packet 'p' with good integrity though the checksum in the >> + * packet is not complete. */ >> +static inline void >> +dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p) > s/const// > >> +{ >> + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK; >> +} >> + >> +/* Marks packet 'p' with good L4 checksum. */ >> +static inline void >> +dp_packet_ol_set_l4_csum_good(const struct dp_packet *p) >> +{ >> + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD; >> + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD; >> +} >> + >> +/* Marks packet 'p' with good L4 checksum as modified. */ >> +static inline void >> +dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p) >> +{ >> + if (!dp_packet_ol_l4_csum_partial(p)) { >> + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD; >> + } >> +} >> + >> +/* Marks packet 'p' with good integrity if the 'start' and 'offset' >> + * matches with the 'csum_start' and 'csum_offset' in packet 'p'. >> + * The 'start' is the offset from the begin of the packet headers. >> + * The 'offset' is the offset from start to place the checksum. */ >> +static inline void >> +dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start, > 'vnet' part looks strange here. Unclear what it supposed to mean. > >> + uint16_t offset) >> +{ >> + if (p->csum_start == start && p->csum_offset == offset) { >> + dp_packet_ol_set_l4_csum_partial(p); >> + } >> +} >> + >> static inline void ALWAYS_INLINE >> dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet) >> { >> diff --git a/lib/flow.c b/lib/flow.c >> index 6c8bf7fc0..5aaf3b420 100644 >> --- a/lib/flow.c >> +++ b/lib/flow.c >> @@ -1027,6 +1027,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) >> } else if (dl_type == htons(ETH_TYPE_IPV6)) { >> dp_packet_update_rss_hash_ipv6_tcp_udp(packet); >> } >> + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, >> + offsetof(struct tcp_header, >> + tcp_csum)); >> + if (dp_packet_l4_checksum_good(packet) >> + || dp_packet_ol_l4_csum_partial(packet)) { >> + dp_packet_hwol_set_csum_tcp(packet); >> + } >> } >> } >> } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { >> @@ -1042,6 +1049,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) >> } else if (dl_type == htons(ETH_TYPE_IPV6)) { >> dp_packet_update_rss_hash_ipv6_tcp_udp(packet); >> } >> + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, >> + offsetof(struct udp_header, >> + udp_csum)); >> + if (dp_packet_l4_checksum_good(packet) >> + || dp_packet_ol_l4_csum_partial(packet)) { >> + dp_packet_hwol_set_csum_udp(packet); >> + } >> } >> } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) { >> if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) { >> @@ -1051,6 +1065,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) >> miniflow_push_be16(mf, tp_dst, sctp->sctp_dst); >> miniflow_push_be16(mf, ct_tp_src, ct_tp_src); >> miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); >> + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, >> + offsetof(struct sctp_header, >> + sctp_csum)); >> + if (dp_packet_l4_checksum_good(packet) >> + || dp_packet_ol_l4_csum_partial(packet)) { >> + dp_packet_hwol_set_csum_sctp(packet); >> + } > avx512 implementation changes also needed, AFAIU. > >> } >> } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) { >> if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) { >> @@ -3170,6 +3191,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow, >> tcp->tcp_csum = 0; >> tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum, >> tcp, l4_len)); >> + dp_packet_ol_set_l4_csum_good(p); >> } else if (flow->nw_proto == IPPROTO_UDP) { >> struct udp_header *udp = dp_packet_l4(p); >> >> @@ -3179,6 +3201,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow, >> if (!udp->udp_csum) { >> udp->udp_csum = htons(0xffff); >> } >> + dp_packet_ol_set_l4_csum_good(p); >> } else if (flow->nw_proto == IPPROTO_ICMP) { >> struct icmp_header *icmp = dp_packet_l4(p); >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> index 4ccc56b0e..d36d5a75a 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c >> @@ -146,17 +146,6 @@ typedef uint16_t dpdk_port_t; >> >> #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) >> >> -/* List of required flags advertised by the hardware that will be used >> - * if TSO is enabled. Ideally this should include >> - * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that >> - * at the moment and SCTP is not a widely used protocol like TCP and UDP, >> - * so it's optional. */ >> -#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO \ >> - | RTE_ETH_TX_OFFLOAD_TCP_CKSUM \ >> - | RTE_ETH_TX_OFFLOAD_UDP_CKSUM \ >> - | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) >> - >> - >> static const struct rte_eth_conf port_conf = { >> .rxmode = { >> .split_hdr_size = 0, >> @@ -407,8 +396,10 @@ enum dpdk_hw_ol_features { >> NETDEV_RX_HW_CRC_STRIP = 1 << 1, >> NETDEV_RX_HW_SCATTER = 1 << 2, >> NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3, >> - NETDEV_TX_TSO_OFFLOAD = 1 << 4, >> - NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5, >> + NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4, >> + NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, >> + NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, >> + NETDEV_TX_TSO_OFFLOAD = 1 << 7, >> }; >> >> /* >> @@ -1004,6 +995,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED) >> return NULL; >> } >> >> +static void >> +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev, >> + enum dpdk_hw_ol_features hw_ol_features, >> + enum netdev_ol_flags flag) >> +{ >> + struct netdev *netdev = &dev->up; >> + >> + if (dev->hw_ol_features & hw_ol_features) { >> + netdev->ol_flags |= flag; >> + } else { >> + netdev->ol_flags &= ~flag; >> + } >> +} >> + >> +static void >> +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) >> +{ >> + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD, >> + NETDEV_TX_OFFLOAD_IPV4_CKSUM); >> + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD, >> + NETDEV_TX_OFFLOAD_TCP_CKSUM); >> + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD, >> + NETDEV_TX_OFFLOAD_UDP_CKSUM); >> + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD, >> + NETDEV_TX_OFFLOAD_SCTP_CKSUM); >> + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, >> + NETDEV_TX_OFFLOAD_TCP_TSO); >> +} >> + >> static int >> dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) >> { >> @@ -1040,11 +1060,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) >> conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; >> } >> >> + if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) { >> + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM; >> + } >> + >> + if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) { >> + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM; >> + } >> + >> + if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) { >> + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; >> + } >> + >> if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { >> - conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; >> - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { >> - conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; >> - } >> + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; >> } >> >> /* Limit configured rss hash functions to only those supported >> @@ -1150,7 +1179,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) >> struct rte_ether_addr eth_addr; >> int diag; >> int n_rxq, n_txq; >> - uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; >> uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM | >> RTE_ETH_RX_OFFLOAD_TCP_CKSUM | >> RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; >> @@ -1186,18 +1214,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) >> dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; >> } >> >> + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) { >> + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; >> + } else { >> + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; >> + } >> + >> + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) { >> + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; >> + } else { >> + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; >> + } >> + >> + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { >> + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; >> + } else { >> + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; >> + } >> + >> dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; >> if (userspace_tso_enabled()) { >> - if ((info.tx_offload_capa & tx_tso_offload_capa) >> - == tx_tso_offload_capa) { >> + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { >> dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; >> - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { >> - dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; >> - } else { >> - VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " >> - "SCTP packets sent to this device will be dropped", >> - netdev_get_name(&dev->up)); >> - } >> } else { >> VLOG_WARN("%s: Tx TSO offload is not supported.", >> netdev_get_name(&dev->up)); >> @@ -1759,6 +1797,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) >> smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false"); >> HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD); >> HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD); >> + HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD); >> + HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD); >> + HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD); > Probably, should not be here. See the comments for the previous patch. > >> HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD); >> #undef HWOL_SMAP_ADD >> smap_add(args, "lsc_interrupt_mode", >> @@ -2210,6 +2251,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) >> >> mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); >> mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); >> + mbuf->l4_len = 0; >> mbuf->outer_l2_len = 0; >> mbuf->outer_l3_len = 0; >> >> @@ -3968,6 +4010,7 @@ new_device(int vid) >> ovs_mutex_lock(&dev->mutex); >> if (nullable_string_is_equal(ifname, dev->vhost_id)) { >> uint32_t qp_num = rte_vhost_get_vring_num(vid) / VIRTIO_QNUM; >> + uint64_t features; >> >> /* Get NUMA information */ >> newnode = rte_vhost_get_numa_node(vid); >> @@ -3992,6 +4035,36 @@ new_device(int vid) >> dev->vhost_reconfigured = true; >> } >> >> + if (rte_vhost_get_negotiated_features(vid, &features)) { >> + VLOG_INFO("Error checking guest features for " >> + "vHost Device '%s'", dev->vhost_id); >> + } else { >> + if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { >> + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; >> + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; >> + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; >> + } >> + >> + if (userspace_tso_enabled()) { >> + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4) >> + && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) { >> + >> + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; >> + VLOG_DBG("%s: TSO enabled on vhost port", >> + netdev_get_name(&dev->up)); >> + } else { >> + VLOG_WARN("%s: Tx TSO offload is not supported.", >> + netdev_get_name(&dev->up)); >> + } >> + } >> + } >> + >> + /* There is no support in virtio net to offload IPv4 csum, >> + * but the vhost library handles IPv4 csum offloading fine. */ >> + dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD; >> + >> + netdev_dpdk_update_netdev_flags(dev); >> + >> ovsrcu_index_set(&dev->vid, vid); >> exists = true; >> >> @@ -4055,6 +4128,14 @@ destroy_device(int vid) >> dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled); >> netdev_dpdk_txq_map_clear(dev); >> >> + /* Clear offload capabilities before next new_device. */ >> + dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; >> + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; >> + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; >> + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; >> + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; >> + netdev_dpdk_update_netdev_flags(dev); >> + >> netdev_change_seq_changed(&dev->up); >> ovs_mutex_unlock(&dev->mutex); >> exists = true; >> @@ -4992,22 +5073,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) >> } >> >> err = dpdk_eth_dev_init(dev); >> - >> - if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) { >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; >> - } else { >> - netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM; >> - } >> - >> - if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; >> - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; >> - } >> - } >> + netdev_dpdk_update_netdev_flags(dev); >> >> /* If both requested and actual hwaddr were previously >> * unset (initialized to 0), then first device init above >> @@ -5049,11 +5115,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) >> dev->tx_q[0].map = 0; >> } >> >> - if (userspace_tso_enabled()) { >> - dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; >> - VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up)); >> - } >> - >> netdev_dpdk_remap_txqs(dev); >> >> if (netdev_dpdk_get_vid(dev) >= 0) { >> @@ -5074,6 +5135,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) >> } >> } >> >> + netdev_dpdk_update_netdev_flags(dev); >> + >> return 0; >> } >> >> @@ -5095,8 +5158,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) >> { >> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); >> int err; >> - uint64_t vhost_flags = 0; >> - uint64_t vhost_unsup_flags; >> >> ovs_mutex_lock(&dev->mutex); >> >> @@ -5106,6 +5167,9 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) >> * 2. A path has been specified. >> */ >> if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) { >> + uint64_t virtio_unsup_features = 0; >> + uint64_t vhost_flags = 0; >> + >> /* Register client-mode device. */ >> vhost_flags |= RTE_VHOST_USER_CLIENT; >> >> @@ -5149,22 +5213,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) >> } >> >> if (userspace_tso_enabled()) { >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; >> - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; >> - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN >> - | 1ULL << VIRTIO_NET_F_HOST_UFO; >> + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN >> + | 1ULL << VIRTIO_NET_F_HOST_UFO; >> + VLOG_DBG("%s: TSO enabled on vhost port", >> + netdev_get_name(&dev->up)); >> } else { >> - /* This disables checksum offloading and all the features >> - * that depends on it (TSO, UFO, ECN) according to virtio >> - * specification. */ >> - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; >> + /* Advertise checksum offloading to the guest, but explicitly >> + * disable TSO and friends. >> + * NOTE: we can't disable HOST_ECN which may have been wrongly >> + * negotiated by a running guest. */ >> + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_TSO4 >> + | 1ULL << VIRTIO_NET_F_HOST_TSO6 >> + | 1ULL << VIRTIO_NET_F_HOST_UFO; >> } >> >> err = rte_vhost_driver_disable_features(dev->vhost_id, >> - vhost_unsup_flags); >> + virtio_unsup_features); >> if (err) { >> VLOG_ERR("rte_vhost_driver_disable_features failed for " >> "vhost user client port: %s\n", dev->up.name); >> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c >> index 59e8dc0ae..4d8ebdae5 100644 >> --- a/lib/netdev-linux.c >> +++ b/lib/netdev-linux.c >> @@ -938,14 +938,6 @@ netdev_linux_common_construct(struct netdev *netdev_) >> netnsid_unset(&netdev->netnsid); >> ovs_mutex_init(&netdev->mutex); >> >> - if (userspace_tso_enabled()) { >> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; >> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; >> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; >> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; >> - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; >> - } >> - >> return 0; >> } >> >> @@ -959,6 +951,16 @@ netdev_linux_construct(struct netdev *netdev_) >> return error; >> } >> >> + /* The socket interface doesn't offer the option to enable only >> + * csum offloading without TSO. */ >> + if (userspace_tso_enabled()) { >> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; >> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; >> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; >> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; >> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; >> + } >> + >> error = get_flags(&netdev->up, &netdev->ifi_flags); >> if (error == ENODEV) { >> if (netdev->up.netdev_class != &netdev_internal_class) { >> @@ -987,6 +989,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) >> struct netdev_linux *netdev = netdev_linux_cast(netdev_); >> static const char tap_dev[] = "/dev/net/tun"; >> const char *name = netdev_->name; >> + unsigned long oflags; >> struct ifreq ifr; >> >> int error = netdev_linux_common_construct(netdev_); >> @@ -1004,10 +1007,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) >> >> /* Create tap device. */ >> get_flags(&netdev->up, &netdev->ifi_flags); >> - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; >> - if (userspace_tso_enabled()) { >> - ifr.ifr_flags |= IFF_VNET_HDR; >> - } >> + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; >> >> ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); >> if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { >> @@ -1030,21 +1030,22 @@ netdev_linux_construct_tap(struct netdev *netdev_) >> goto error_close; >> } >> >> + oflags = TUN_F_CSUM; >> if (userspace_tso_enabled()) { >> - /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is >> - * available, it will return EINVAL when a flag is unknown. >> - * Therefore, try enabling offload with no flags to check >> - * if TUNSETOFFLOAD support is available or not. */ >> - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) { >> - unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; >> - >> - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) { >> - VLOG_WARN("%s: enabling tap offloading failed: %s", name, >> - ovs_strerror(errno)); >> - error = errno; >> - goto error_close; >> - } >> - } >> + oflags |= (TUN_F_TSO4 | TUN_F_TSO6); >> + } >> + >> + if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) { >> + netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM >> + | NETDEV_TX_OFFLOAD_TCP_CKSUM >> + | NETDEV_TX_OFFLOAD_UDP_CKSUM); >> + >> + if (userspace_tso_enabled()) { >> + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; >> + } >> + } else { >> + VLOG_WARN("%s: Disabling hardware offloading: %s", name, >> + ovs_strerror(errno)); >> } >> >> netdev->present = true; >> @@ -1344,18 +1345,22 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, >> pkt = buffers[i]; >> } >> >> - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { >> - struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); >> - struct netdev_linux *netdev = netdev_linux_cast(netdev_); >> + if (virtio_net_hdr_size) { >> + int ret = netdev_linux_parse_vnet_hdr(pkt); >> + if (OVS_UNLIKELY(ret)) { >> + struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); >> + struct netdev_linux *netdev = netdev_linux_cast(netdev_); >> >> - /* Unexpected error situation: the virtio header is not present >> - * or corrupted. Drop the packet but continue in case next ones >> - * are correct. */ >> - dp_packet_delete(pkt); >> - netdev->rx_dropped += 1; >> - VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header", >> - netdev_get_name(netdev_)); >> - continue; >> + /* Unexpected error situation: the virtio header is not >> + * present or corrupted or contains unsupported features. >> + * Drop the packet but continue in case next ones are >> + * correct. */ >> + dp_packet_delete(pkt); >> + netdev->rx_dropped += 1; >> + VLOG_WARN_RL(&rl, "%s: Dropped packet: %s", >> + netdev_get_name(netdev_), ovs_strerror(ret)); >> + continue; >> + } >> } >> >> for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg; >> @@ -1403,7 +1408,6 @@ static int >> netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, >> struct dp_packet_batch *batch) >> { >> - int virtio_net_hdr_size; >> ssize_t retval; >> size_t std_len; >> int iovlen; >> @@ -1413,16 +1417,14 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, >> /* Use the buffer from the allocated packet below to receive MTU >> * sized packets and an aux_buf for extra TSO data. */ >> iovlen = IOV_TSO_SIZE; >> - virtio_net_hdr_size = sizeof(struct virtio_net_hdr); >> } else { >> /* Use only the buffer from the allocated packet. */ >> iovlen = IOV_STD_SIZE; >> - virtio_net_hdr_size = 0; >> } >> >> /* The length here needs to be accounted in the same way when the >> * aux_buf is allocated so that it can be prepended to TSO buffer. */ >> - std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; >> + std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu; >> for (i = 0; i < NETDEV_MAX_BURST; i++) { >> struct dp_packet *buffer; >> struct dp_packet *pkt; >> @@ -1462,7 +1464,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, >> pkt = buffer; >> } >> >> - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { >> + if (netdev_linux_parse_vnet_hdr(pkt)) { >> struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); >> struct netdev_linux *netdev = netdev_linux_cast(netdev_); >> >> @@ -1611,7 +1613,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu, >> * on other interface types because we attach a socket filter to the rx >> * socket. */ >> static int >> -netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, >> +netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu, >> struct dp_packet_batch *batch) >> { >> struct netdev_linux *netdev = netdev_linux_cast(netdev_); >> @@ -1632,9 +1634,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, >> ssize_t retval; >> int error; >> >> - if (tso) { >> - netdev_linux_prepend_vnet_hdr(packet, mtu); >> - } >> + netdev_linux_prepend_vnet_hdr(packet, mtu); >> >> size = dp_packet_size(packet); >> do { >> @@ -1765,7 +1765,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, >> >> error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch); >> } else { >> - error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch); >> + error = netdev_linux_tap_batch_send(netdev_, mtu, batch); >> } >> if (error) { >> if (error == ENOBUFS) { >> @@ -6819,53 +6819,73 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) >> return 0; >> } >> >> +/* Initializes packet 'b' with features enabled in the prepended >> + * struct virtio_net_hdr. Returns 0 if successful, otherwise a >> + * positive errno value. */ >> static int >> netdev_linux_parse_vnet_hdr(struct dp_packet *b) >> { >> struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet); >> - uint16_t l4proto = 0; >> >> if (OVS_UNLIKELY(!vnet)) { >> - return -EINVAL; >> + return EINVAL; >> } >> >> if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) { >> return 0; >> } >> >> - if (netdev_linux_parse_l2(b, &l4proto)) { >> - return -EINVAL; >> - } >> - >> if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { >> - if (l4proto == IPPROTO_TCP) { >> - dp_packet_hwol_set_csum_tcp(b); >> - } else if (l4proto == IPPROTO_UDP) { >> - dp_packet_hwol_set_csum_udp(b); >> - } else if (l4proto == IPPROTO_SCTP) { >> - dp_packet_hwol_set_csum_sctp(b); >> - } >> - } >> + uint16_t l4proto = 0; >> >> - if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) { >> - uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4 >> - | VIRTIO_NET_HDR_GSO_TCPV6 >> - | VIRTIO_NET_HDR_GSO_UDP; >> - uint8_t type = vnet->gso_type & allowed_mask; >> - >> - if (type == VIRTIO_NET_HDR_GSO_TCPV4 >> - || type == VIRTIO_NET_HDR_GSO_TCPV6) { >> - dp_packet_hwol_set_tcp_seg(b); >> + if (netdev_linux_parse_l2(b, &l4proto)) { >> + return EINVAL; >> } >> - } >> >> - return 0; >> + if (l4proto == IPPROTO_UDP) { >> + dp_packet_hwol_set_csum_udp(b); >> + } >> + /* The packet has offloaded checksum. However, there is no >> + * additional information like the protocol used, so it would >> + * require to parse the packet here. The checksum starting point >> + * and offset are going to be verified when the packet headers >> + * are parsed during miniflow extraction. */> + b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start; >> + b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset; >> + } else { >> + b->csum_start = 0; >> + b->csum_offset = 0; >> + } >> + >> + int ret = 0; >> + switch (vnet->gso_type) { >> + case VIRTIO_NET_HDR_GSO_TCPV4: >> + case VIRTIO_NET_HDR_GSO_TCPV6: >> + /* FIXME: The packet has offloaded TCP segmentation. The gso_size >> + * is given and needs to be respected. */ >> + dp_packet_hwol_set_tcp_seg(b); >> + break; > An empty line should separate cases. > >> + case VIRTIO_NET_HDR_GSO_UDP: >> + /* UFO is not supported. */ >> + VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled."); >> + ret = ENOTSUP; >> + break; >> + case VIRTIO_NET_HDR_GSO_NONE: >> + break; >> + default: >> + ret = ENOTSUP; >> + VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x", >> + vnet->gso_type); >> + } >> + >> + return ret; >> } >> >> static void >> netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) >> { >> - struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet); >> + struct virtio_net_hdr v; >> + struct virtio_net_hdr *vnet = &v; >> >> if (dp_packet_hwol_is_tso(b)) { >> uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b)) >> @@ -6875,30 +6895,92 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) >> vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len); >> if (dp_packet_hwol_is_ipv4(b)) { >> vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; >> - } else { >> + } else if (dp_packet_hwol_tx_ipv6(b)) { >> vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; >> } >> >> } else { >> - vnet->flags = VIRTIO_NET_HDR_GSO_NONE; >> - } >> - >> - if (dp_packet_hwol_l4_mask(b)) { >> - vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; >> - vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b) >> - - (char *)dp_packet_eth(b)); >> - >> + vnet->hdr_len = 0; >> + vnet->gso_size = 0; >> + vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE; >> + } >> + >> + if (dp_packet_l4_checksum_good(b)) { >> + /* The packet has good checksum in the packet. > 'in the header' ? > >> + * No need to validate again. */ >> + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; >> + vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID; >> + } else if (dp_packet_hwol_tx_l4_checksum(b)) { >> + /* The csum calculation is offloaded. */ >> if (dp_packet_hwol_l4_is_tcp(b)) { >> + /* Virtual I/O Device (VIRTIO) Version 1.1 >> + * 5.1.6.2 Packet Transmission >> + If the driver negotiated VIRTIO_NET_F_CSUM, it can skip >> + checksumming the packet: >> + - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, >> + - csum_start is set to the offset within the packet >> + to begin checksumming, and >> + - csum_offset indicates how many bytes after the >> + csum_start the new (16 bit ones complement) checksum >> + is placed by the device. >> + The TCP checksum field in the packet is set to the sum of >> + the TCP pseudo header, so that replacing it by the ones >> + complement checksum of the TCP header and body will give >> + the correct result. */ > Comment style is strange. > >> + >> + struct tcp_header *tcp_hdr = dp_packet_l4(b); >> + ovs_be16 csum = 0; >> + if (dp_packet_hwol_is_ipv4(b)) { >> + const struct ip_header *ip_hdr = dp_packet_l3(b); >> + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); >> + } else if (dp_packet_hwol_tx_ipv6(b)) { >> + const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b); >> + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); >> + } >> + >> + tcp_hdr->tcp_csum = csum; >> + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; >> + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; >> vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( >> struct tcp_header, tcp_csum); >> } else if (dp_packet_hwol_l4_is_udp(b)) { >> + struct udp_header *udp_hdr = dp_packet_l4(b); >> + ovs_be16 csum = 0; >> + >> + if (dp_packet_hwol_is_ipv4(b)) { >> + const struct ip_header *ip_hdr = dp_packet_l3(b); >> + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); >> + } else if (dp_packet_hwol_tx_ipv6(b)) { >> + const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b); >> + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); >> + } >> + >> + udp_hdr->udp_csum = csum; >> + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; >> + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; >> vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( > I wonder why we're using __builtin_offsetof() instead of just offsetof(). > Not an issue of this patch though. > >> struct udp_header, udp_csum); >> } else if (dp_packet_hwol_l4_is_sctp(b)) { >> - vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( >> - struct sctp_header, sctp_csum); >> + /* The Linux kernel networking stack only supports csum_start >> + * and csum_offset when SCTP GSO is enabled. See kernel's >> + * skb_csum_hwoffload_help(). Currently there is no SCTP >> + * segmentation offload support in OVS. */ >> + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; >> + vnet->flags = 0; >> } else { >> - VLOG_WARN_RL(&rl, "Unsupported L4 protocol"); >> + /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes >> + * a new flag that is not covered in above checks. */ >> + VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. " >> + "Flags: %"PRIu64, >> + (uint64_t)*dp_packet_ol_flags_ptr(b)); >> + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; >> + vnet->flags = 0; >> } >> + } else { >> + /* Packet L4 csum is unknown. */ >> + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; >> + vnet->flags = 0; >> } >> + >> + dp_packet_push(b, vnet, sizeof *vnet); >> } >> diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c >> index 754e2d78d..dc054336a 100644 >> --- a/lib/netdev-native-tnl.c >> +++ b/lib/netdev-native-tnl.c >> @@ -224,28 +224,6 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, >> return udp + 1; >> } >> >> -static void >> -netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet, >> - int ip_tot_size) >> -{ >> - uint32_t csum; >> - >> - if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { >> - csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr( >> - dp_packet_data(packet))); >> - } else { >> - csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr( >> - dp_packet_data(packet))); >> - } >> - >> - csum = csum_continue(csum, udp, ip_tot_size); >> - udp->udp_csum = csum_finish(csum); >> - >> - if (!udp->udp_csum) { >> - udp->udp_csum = htons(0xffff); >> - } >> -} >> - >> void >> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, >> struct dp_packet *packet, >> @@ -260,9 +238,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, >> udp->udp_src = netdev_tnl_get_src_port(packet); >> udp->udp_len = htons(ip_tot_size); >> >> - if (udp->udp_csum) { >> - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); >> - } >> + /* Postpone checksum to the egress netdev. */ >> + dp_packet_hwol_set_csum_udp(packet); >> + dp_packet_ol_reset_l4_csum_good(packet); >> } >> >> static void * >> @@ -806,7 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev, >> data->header_len, &ip_tot_size); >> udp->udp_src = netdev_tnl_get_src_port(packet); >> udp->udp_len = htons(ip_tot_size); >> - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); >> + /* Postpone checksum to the egress netdev. */ >> + dp_packet_hwol_set_csum_udp(packet); >> + dp_packet_ol_reset_l4_csum_good(packet); >> >> gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1); >> >> diff --git a/lib/netdev.c b/lib/netdev.c >> index 6d3f678f0..12e1cb948 100644 >> --- a/lib/netdev.c >> +++ b/lib/netdev.c >> @@ -798,8 +798,6 @@ static bool >> netdev_send_prepare_packet(const uint64_t netdev_flags, >> struct dp_packet *packet, char **errormsg) >> { >> - uint64_t l4_mask; >> - >> if (dp_packet_hwol_is_tso(packet) >> && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { >> /* Fall back to GSO in software. */ >> @@ -812,36 +810,16 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, >> * netdev to decide what would be the best to do. >> * Provide a software fallback in case the device doesn't support IP csum >> * offloading. Note: Encapsulated packet must have the inner IP header >> + * csum already calculated. >> + * Packet with L4 csum offloading enabled was received with verified csum. >> + * Leave the L4 csum offloading enabled even with good checksum for the >> + * netdev to decide what would be the best to do. >> + * Netdev that requires pseudo header csum needs to calculate that. >> + * Provide a software fallback in case the netdev doesn't support L4 csum >> + * offloading. Note: Encapsulated packet must have the inner L4 header >> * csum already calculated. */ >> dp_packet_ol_send_prepare(packet, netdev_flags); >> >> - l4_mask = dp_packet_hwol_l4_mask(packet); >> - if (l4_mask) { >> - if (dp_packet_hwol_l4_is_tcp(packet)) { >> - if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { >> - /* Fall back to TCP csum in software. */ >> - VLOG_ERR_BUF(errormsg, "No TCP checksum support"); >> - return false; >> - } >> - } else if (dp_packet_hwol_l4_is_udp(packet)) { >> - if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { >> - /* Fall back to UDP csum in software. */ >> - VLOG_ERR_BUF(errormsg, "No UDP checksum support"); >> - return false; >> - } >> - } else if (dp_packet_hwol_l4_is_sctp(packet)) { >> - if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) { >> - /* Fall back to SCTP csum in software. */ >> - VLOG_ERR_BUF(errormsg, "No SCTP checksum support"); >> - return false; >> - } >> - } else { >> - VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, >> - l4_mask); >> - return false; >> - } >> - } >> - >> return true; >> } >> >> @@ -974,20 +952,16 @@ netdev_push_header(const struct netdev *netdev, >> size_t i, size = dp_packet_batch_size(batch); >> >> DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { >> - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) >> - || dp_packet_hwol_l4_mask(packet))) { >> + if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { >> COVERAGE_INC(netdev_push_header_drops); >> dp_packet_delete(packet); >> - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " >> + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is " > TSO already contains the word 'offloading'. > >> "not supported: packet dropped", >> netdev_get_name(netdev)); >> } else { >> /* The packet is going to be encapsulated and there is >> * no support yet for inner network header csum offloading. */ >> - if (dp_packet_hwol_tx_ip_csum(packet) >> - && !dp_packet_ip_checksum_good(packet)) { >> - dp_packet_ip_set_header_csum(packet); >> - } >> + dp_packet_ol_send_prepare(packet, 0); >> >> netdev->netdev_class->push_header(netdev, packet, data); >> >> diff --git a/lib/packets.c b/lib/packets.c >> index a1d668190..8c69e6e3e 100644 >> --- a/lib/packets.c >> +++ b/lib/packets.c >> @@ -1131,16 +1131,22 @@ packet_set_ipv4_addr(struct dp_packet *packet, >> pkt_metadata_init_conn(&packet->md); >> >> if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { >> - struct tcp_header *th = dp_packet_l4(packet); >> - >> - th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); >> + if (dp_packet_hwol_l4_is_tcp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + } else { >> + struct tcp_header *th = dp_packet_l4(packet); >> + th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); >> + } >> } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) { >> - struct udp_header *uh = dp_packet_l4(packet); >> - >> - if (uh->udp_csum) { >> - uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); >> - if (!uh->udp_csum) { >> - uh->udp_csum = htons(0xffff); >> + if (dp_packet_hwol_l4_is_udp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + } else { >> + struct udp_header *uh = dp_packet_l4(packet); >> + if (uh->udp_csum) { >> + uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); >> + if (!uh->udp_csum) { >> + uh->udp_csum = htons(0xffff); >> + } >> } >> } >> } >> @@ -1246,16 +1252,24 @@ packet_update_csum128(struct dp_packet *packet, uint8_t proto, >> size_t l4_size = dp_packet_l4_size(packet); >> >> if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { >> - struct tcp_header *th = dp_packet_l4(packet); >> + if (dp_packet_hwol_l4_is_tcp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + } else { >> + struct tcp_header *th = dp_packet_l4(packet); >> >> - th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); >> + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); >> + } >> } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) { >> - struct udp_header *uh = dp_packet_l4(packet); >> + if (dp_packet_hwol_l4_is_udp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + } else { >> + struct udp_header *uh = dp_packet_l4(packet); >> >> - if (uh->udp_csum) { >> - uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); >> - if (!uh->udp_csum) { >> - uh->udp_csum = htons(0xffff); >> + if (uh->udp_csum) { >> + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); >> + if (!uh->udp_csum) { >> + uh->udp_csum = htons(0xffff); >> + } >> } >> } >> } else if (proto == IPPROTO_ICMPV6 && >> @@ -1375,7 +1389,9 @@ static void >> packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) >> { >> if (*port != new_port) { >> - *csum = recalc_csum16(*csum, *port, new_port); >> + if (csum) { >> + *csum = recalc_csum16(*csum, *port, new_port); >> + } >> *port = new_port; >> } >> } >> @@ -1387,9 +1403,16 @@ void >> packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) >> { >> struct tcp_header *th = dp_packet_l4(packet); >> + ovs_be16 *csum = NULL; >> + >> + if (dp_packet_hwol_l4_is_tcp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + } else { >> + csum = &th->tcp_csum; >> + } >> >> - packet_set_port(&th->tcp_src, src, &th->tcp_csum); >> - packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); >> + packet_set_port(&th->tcp_src, src, csum); >> + packet_set_port(&th->tcp_dst, dst, csum); >> pkt_metadata_init_conn(&packet->md); >> } >> >> @@ -1401,17 +1424,21 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) >> { >> struct udp_header *uh = dp_packet_l4(packet); >> >> - if (uh->udp_csum) { >> - packet_set_port(&uh->udp_src, src, &uh->udp_csum); >> - packet_set_port(&uh->udp_dst, dst, &uh->udp_csum); >> + if (dp_packet_hwol_l4_is_udp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + packet_set_port(&uh->udp_src, src, NULL); >> + packet_set_port(&uh->udp_dst, dst, NULL); >> + } else { >> + ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL; >> + >> + packet_set_port(&uh->udp_src, src, csum); >> + packet_set_port(&uh->udp_dst, dst, csum); >> >> - if (!uh->udp_csum) { >> + if (csum && !uh->udp_csum) { >> uh->udp_csum = htons(0xffff); >> } >> - } else { >> - uh->udp_src = src; >> - uh->udp_dst = dst; >> } >> + >> pkt_metadata_init_conn(&packet->md); >> } >> >> @@ -1422,18 +1449,27 @@ void >> packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) >> { >> struct sctp_header *sh = dp_packet_l4(packet); >> - ovs_be32 old_csum, old_correct_csum, new_csum; >> - uint16_t tp_len = dp_packet_l4_size(packet); >> >> - old_csum = get_16aligned_be32(&sh->sctp_csum); >> - put_16aligned_be32(&sh->sctp_csum, 0); >> - old_correct_csum = crc32c((void *)sh, tp_len); >> + if (dp_packet_hwol_l4_is_sctp(packet)) { >> + dp_packet_ol_reset_l4_csum_good(packet); >> + sh->sctp_src = src; >> + sh->sctp_dst = dst; >> + } else { >> + ovs_be32 old_csum, old_correct_csum, new_csum; >> + uint16_t tp_len = dp_packet_l4_size(packet); >> >> - sh->sctp_src = src; >> - sh->sctp_dst = dst; >> + old_csum = get_16aligned_be32(&sh->sctp_csum); >> + put_16aligned_be32(&sh->sctp_csum, 0); >> + old_correct_csum = crc32c((void *) sh, tp_len); >> + >> + sh->sctp_src = src; >> + sh->sctp_dst = dst; >> + >> + new_csum = crc32c((void *) sh, tp_len); >> + put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum >> + ^ new_csum); >> + } >> >> - new_csum = crc32c((void *)sh, tp_len); >> - put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum); >> pkt_metadata_init_conn(&packet->md); >> } >> >> @@ -1957,3 +1993,72 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) >> } >> } >> } >> + >> +/* Set TCP checksum field in packet 'p' with complete checksum. >> + * The packet must have the L3 and L4 offsets. */ >> +void >> +packet_tcp_complete_csum(struct dp_packet *p) >> +{ >> + struct tcp_header *tcp = dp_packet_l4(p); >> + >> + tcp->tcp_csum = 0; >> + if (dp_packet_hwol_is_ipv4(p)) { >> + struct ip_header *ip = dp_packet_l3(p); >> + >> + tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip), >> + tcp, dp_packet_l4_size(p))); >> + } else if (dp_packet_hwol_tx_ipv6(p)) { >> + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); >> + >> + tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt, >> + dp_packet_l4_size(p)); >> + } else { >> + OVS_NOT_REACHED(); >> + } >> +} >> + >> +/* Set UDP checksum field in packet 'p' with complete checksum. >> + * The packet must have the L3 and L4 offsets. */ >> +void >> +packet_udp_complete_csum(struct dp_packet *p) >> +{ >> + struct udp_header *udp = dp_packet_l4(p); >> + >> + /* Skip csum calculation if the udp_csum is zero. */ >> + if (!udp->udp_csum) { >> + return; >> + } >> + >> + udp->udp_csum = 0; >> + if (dp_packet_hwol_is_ipv4(p)) { >> + struct ip_header *ip = dp_packet_l3(p); >> + >> + udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip), >> + udp, dp_packet_l4_size(p))); >> + } else if (dp_packet_hwol_tx_ipv6(p)) { >> + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); >> + >> + udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt, >> + dp_packet_l4_size(p)); >> + } else { >> + OVS_NOT_REACHED(); >> + } >> + >> + if (!udp->udp_csum) { >> + udp->udp_csum = htons(0xffff); >> + } >> +} >> + >> +/* Set SCTP checksum field in packet 'p' with complete checksum. >> + * The packet must have the L3 and L4 offsets. */ >> +void >> +packet_sctp_complete_csum(struct dp_packet *p) >> +{ >> + struct sctp_header *sh = dp_packet_l4(p); >> + uint16_t tp_len = dp_packet_l4_size(p); >> + ovs_be32 csum; >> + >> + put_16aligned_be32(&sh->sctp_csum, 0); >> + csum = crc32c((void *) sh, tp_len); >> + put_16aligned_be32(&sh->sctp_csum, csum); >> +} >> diff --git a/lib/packets.h b/lib/packets.h >> index 5bdf6e4bb..28950b8b1 100644 >> --- a/lib/packets.h >> +++ b/lib/packets.h >> @@ -1643,6 +1643,9 @@ void packet_put_ra_prefix_opt(struct dp_packet *, >> const ovs_be128 router_prefix); >> uint32_t packet_csum_pseudoheader(const struct ip_header *); >> void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); >> +void packet_tcp_complete_csum(struct dp_packet *); >> +void packet_udp_complete_csum(struct dp_packet *); >> +void packet_sctp_complete_csum(struct dp_packet *); >> >> #define DNS_HEADER_LEN 12 >> struct dns_header { > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff --git a/lib/conntrack.c b/lib/conntrack.c index 12194cce8..57e6a55e0 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -2118,13 +2118,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, } if (ok) { - bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt); - if (!hwol_bad_l4_csum) { - bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt) - || dp_packet_hwol_tx_l4_checksum(pkt); + if (!dp_packet_l4_checksum_bad(pkt)) { /* Validate the checksum only when hwol is not supported. */ if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt), - &ctx->icmp_related, l3, !hwol_good_l4_csum, + &ctx->icmp_related, l3, + !dp_packet_l4_checksum_good(pkt) && + !dp_packet_hwol_tx_l4_checksum(pkt), NULL)) { ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); return true; @@ -3453,8 +3452,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, adj_seqnum(&th->tcp_seq, ec->seq_skew); } - th->tcp_csum = 0; - if (!dp_packet_hwol_tx_l4_checksum(pkt)) { + if (dp_packet_hwol_tx_l4_checksum(pkt)) { + dp_packet_ol_reset_l4_csum_good(pkt); + } else { + th->tcp_csum = 0; if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) { th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto, dp_packet_l4_size(pkt)); diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 90ef85de3..2cfaf5274 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so dp_packet_init_specific(b); /* By default assume the packet type to be Ethernet. */ b->packet_type = htonl(PT_ETH); + /* Reset csum start and offset. */ + b->csum_start = 0; + b->csum_offset = 0; } static void @@ -544,4 +547,26 @@ dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) dp_packet_ol_set_ip_csum_good(p); dp_packet_hwol_reset_tx_ip_csum(p); } + + if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) { + dp_packet_hwol_reset_tx_l4_csum(p); + return; + } + + if (dp_packet_hwol_l4_is_tcp(p) + && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { + packet_tcp_complete_csum(p); + dp_packet_ol_set_l4_csum_good(p); + dp_packet_hwol_reset_tx_l4_csum(p); + } else if (dp_packet_hwol_l4_is_udp(p) + && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { + packet_udp_complete_csum(p); + dp_packet_ol_set_l4_csum_good(p); + dp_packet_hwol_reset_tx_l4_csum(p); + } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) + && dp_packet_hwol_l4_is_sctp(p)) { + packet_sctp_complete_csum(p); + dp_packet_ol_set_l4_csum_good(p); + dp_packet_hwol_reset_tx_l4_csum(p); + } } diff --git a/lib/dp-packet.h b/lib/dp-packet.h index f60618716..d550b099c 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -140,6 +140,8 @@ struct dp_packet { or UINT16_MAX. */ uint32_t cutlen; /* length in bytes to cut from the end. */ ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ + uint16_t csum_start; /* Position to start checksumming from. */ + uint16_t csum_offset; /* Offset to place checksum. */ union { struct pkt_metadata md; uint64_t data[DP_PACKET_CONTEXT_SIZE / 8]; @@ -995,6 +997,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b) return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4); } +/* Returns 'true' if packet 'p' is marked as IPv6. */ +static inline bool +dp_packet_hwol_tx_ipv6(const struct dp_packet *p) +{ + return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6); +} + /* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */ static inline bool dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) @@ -1019,18 +1028,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) DP_PACKET_OL_TX_SCTP_CKSUM; } -/* Mark packet 'b' for IPv4 checksum offloading. */ static inline void -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) +dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK; +} + +/* Mark packet 'p' as IPv4. */ +static inline void +dp_packet_hwol_set_tx_ipv4(struct dp_packet *p) { - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4; + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6; + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4; } -/* Mark packet 'b' for IPv6 checksum offloading. */ +/* Mark packet 'a' as IPv6. */ static inline void -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) +dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) { - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6; + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4; + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; } /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ @@ -1129,6 +1146,8 @@ dp_packet_ip_set_header_csum(struct dp_packet *p) ip->ip_csum = csum(ip, sizeof *ip); } +/* Returns 'true' if the packet 'p' has good integrity and the + * checksum in it is correct. */ static inline bool dp_packet_l4_checksum_good(const struct dp_packet *p) { @@ -1143,6 +1162,53 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p) DP_PACKET_OL_RX_L4_CKSUM_BAD; } +/* Returns 'true' if the packet has good integrity though the + * checksum in the packet 'p' is not complete. */ +static inline bool +dp_packet_ol_l4_csum_partial(const struct dp_packet *p) +{ + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_MASK; +} + +/* Marks packet 'p' with good integrity though the checksum in the + * packet is not complete. */ +static inline void +dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK; +} + +/* Marks packet 'p' with good L4 checksum. */ +static inline void +dp_packet_ol_set_l4_csum_good(const struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD; + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD; +} + +/* Marks packet 'p' with good L4 checksum as modified. */ +static inline void +dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p) +{ + if (!dp_packet_ol_l4_csum_partial(p)) { + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD; + } +} + +/* Marks packet 'p' with good integrity if the 'start' and 'offset' + * matches with the 'csum_start' and 'csum_offset' in packet 'p'. + * The 'start' is the offset from the begin of the packet headers. + * The 'offset' is the offset from start to place the checksum. */ +static inline void +dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start, + uint16_t offset) +{ + if (p->csum_start == start && p->csum_offset == offset) { + dp_packet_ol_set_l4_csum_partial(p); + } +} + static inline void ALWAYS_INLINE dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet) { diff --git a/lib/flow.c b/lib/flow.c index 6c8bf7fc0..5aaf3b420 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1027,6 +1027,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) } else if (dl_type == htons(ETH_TYPE_IPV6)) { dp_packet_update_rss_hash_ipv6_tcp_udp(packet); } + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, + offsetof(struct tcp_header, + tcp_csum)); + if (dp_packet_l4_checksum_good(packet) + || dp_packet_ol_l4_csum_partial(packet)) { + dp_packet_hwol_set_csum_tcp(packet); + } } } } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { @@ -1042,6 +1049,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) } else if (dl_type == htons(ETH_TYPE_IPV6)) { dp_packet_update_rss_hash_ipv6_tcp_udp(packet); } + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, + offsetof(struct udp_header, + udp_csum)); + if (dp_packet_l4_checksum_good(packet) + || dp_packet_ol_l4_csum_partial(packet)) { + dp_packet_hwol_set_csum_udp(packet); + } } } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) { if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) { @@ -1051,6 +1065,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) miniflow_push_be16(mf, tp_dst, sctp->sctp_dst); miniflow_push_be16(mf, ct_tp_src, ct_tp_src); miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, + offsetof(struct sctp_header, + sctp_csum)); + if (dp_packet_l4_checksum_good(packet) + || dp_packet_ol_l4_csum_partial(packet)) { + dp_packet_hwol_set_csum_sctp(packet); + } } } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) { if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) { @@ -3170,6 +3191,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow, tcp->tcp_csum = 0; tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum, tcp, l4_len)); + dp_packet_ol_set_l4_csum_good(p); } else if (flow->nw_proto == IPPROTO_UDP) { struct udp_header *udp = dp_packet_l4(p); @@ -3179,6 +3201,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow, if (!udp->udp_csum) { udp->udp_csum = htons(0xffff); } + dp_packet_ol_set_l4_csum_good(p); } else if (flow->nw_proto == IPPROTO_ICMP) { struct icmp_header *icmp = dp_packet_l4(p); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 4ccc56b0e..d36d5a75a 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -146,17 +146,6 @@ typedef uint16_t dpdk_port_t; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) -/* List of required flags advertised by the hardware that will be used - * if TSO is enabled. Ideally this should include - * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that - * at the moment and SCTP is not a widely used protocol like TCP and UDP, - * so it's optional. */ -#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO \ - | RTE_ETH_TX_OFFLOAD_TCP_CKSUM \ - | RTE_ETH_TX_OFFLOAD_UDP_CKSUM \ - | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) - - static const struct rte_eth_conf port_conf = { .rxmode = { .split_hdr_size = 0, @@ -407,8 +396,10 @@ enum dpdk_hw_ol_features { NETDEV_RX_HW_CRC_STRIP = 1 << 1, NETDEV_RX_HW_SCATTER = 1 << 2, NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3, - NETDEV_TX_TSO_OFFLOAD = 1 << 4, - NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5, + NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4, + NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, + NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, + NETDEV_TX_TSO_OFFLOAD = 1 << 7, }; /* @@ -1004,6 +995,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED) return NULL; } +static void +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev, + enum dpdk_hw_ol_features hw_ol_features, + enum netdev_ol_flags flag) +{ + struct netdev *netdev = &dev->up; + + if (dev->hw_ol_features & hw_ol_features) { + netdev->ol_flags |= flag; + } else { + netdev->ol_flags &= ~flag; + } +} + +static void +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) +{ + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD, + NETDEV_TX_OFFLOAD_IPV4_CKSUM); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD, + NETDEV_TX_OFFLOAD_TCP_CKSUM); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD, + NETDEV_TX_OFFLOAD_UDP_CKSUM); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD, + NETDEV_TX_OFFLOAD_SCTP_CKSUM); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, + NETDEV_TX_OFFLOAD_TCP_TSO); +} + static int dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) { @@ -1040,11 +1060,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; } + if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM; + } + + if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM; + } + + if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; + } + if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { - conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { - conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; - } + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; } /* Limit configured rss hash functions to only those supported @@ -1150,7 +1179,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) struct rte_ether_addr eth_addr; int diag; int n_rxq, n_txq; - uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; @@ -1186,18 +1214,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; } + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) { + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; + } else { + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; + } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) { + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; + } else { + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; + } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; + } else { + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; + } + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; if (userspace_tso_enabled()) { - if ((info.tx_offload_capa & tx_tso_offload_capa) - == tx_tso_offload_capa) { + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { - dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; - } else { - VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " - "SCTP packets sent to this device will be dropped", - netdev_get_name(&dev->up)); - } } else { VLOG_WARN("%s: Tx TSO offload is not supported.", netdev_get_name(&dev->up)); @@ -1759,6 +1797,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false"); HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD); HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD); + HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD); + HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD); + HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD); HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD); #undef HWOL_SMAP_ADD smap_add(args, "lsc_interrupt_mode", @@ -2210,6 +2251,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); + mbuf->l4_len = 0; mbuf->outer_l2_len = 0; mbuf->outer_l3_len = 0; @@ -3968,6 +4010,7 @@ new_device(int vid) ovs_mutex_lock(&dev->mutex); if (nullable_string_is_equal(ifname, dev->vhost_id)) { uint32_t qp_num = rte_vhost_get_vring_num(vid) / VIRTIO_QNUM; + uint64_t features; /* Get NUMA information */ newnode = rte_vhost_get_numa_node(vid); @@ -3992,6 +4035,36 @@ new_device(int vid) dev->vhost_reconfigured = true; } + if (rte_vhost_get_negotiated_features(vid, &features)) { + VLOG_INFO("Error checking guest features for " + "vHost Device '%s'", dev->vhost_id); + } else { + if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; + } + + if (userspace_tso_enabled()) { + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4) + && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) { + + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; + VLOG_DBG("%s: TSO enabled on vhost port", + netdev_get_name(&dev->up)); + } else { + VLOG_WARN("%s: Tx TSO offload is not supported.", + netdev_get_name(&dev->up)); + } + } + } + + /* There is no support in virtio net to offload IPv4 csum, + * but the vhost library handles IPv4 csum offloading fine. */ + dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD; + + netdev_dpdk_update_netdev_flags(dev); + ovsrcu_index_set(&dev->vid, vid); exists = true; @@ -4055,6 +4128,14 @@ destroy_device(int vid) dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled); netdev_dpdk_txq_map_clear(dev); + /* Clear offload capabilities before next new_device. */ + dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; + netdev_dpdk_update_netdev_flags(dev); + netdev_change_seq_changed(&dev->up); ovs_mutex_unlock(&dev->mutex); exists = true; @@ -4992,22 +5073,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) } err = dpdk_eth_dev_init(dev); - - if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) { - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; - } else { - netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM; - } - - if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; - } - } + netdev_dpdk_update_netdev_flags(dev); /* If both requested and actual hwaddr were previously * unset (initialized to 0), then first device init above @@ -5049,11 +5115,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) dev->tx_q[0].map = 0; } - if (userspace_tso_enabled()) { - dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; - VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up)); - } - netdev_dpdk_remap_txqs(dev); if (netdev_dpdk_get_vid(dev) >= 0) { @@ -5074,6 +5135,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) } } + netdev_dpdk_update_netdev_flags(dev); + return 0; } @@ -5095,8 +5158,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); int err; - uint64_t vhost_flags = 0; - uint64_t vhost_unsup_flags; ovs_mutex_lock(&dev->mutex); @@ -5106,6 +5167,9 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) * 2. A path has been specified. */ if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) { + uint64_t virtio_unsup_features = 0; + uint64_t vhost_flags = 0; + /* Register client-mode device. */ vhost_flags |= RTE_VHOST_USER_CLIENT; @@ -5149,22 +5213,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) } if (userspace_tso_enabled()) { - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN - | 1ULL << VIRTIO_NET_F_HOST_UFO; + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN + | 1ULL << VIRTIO_NET_F_HOST_UFO; + VLOG_DBG("%s: TSO enabled on vhost port", + netdev_get_name(&dev->up)); } else { - /* This disables checksum offloading and all the features - * that depends on it (TSO, UFO, ECN) according to virtio - * specification. */ - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; + /* Advertise checksum offloading to the guest, but explicitly + * disable TSO and friends. + * NOTE: we can't disable HOST_ECN which may have been wrongly + * negotiated by a running guest. */ + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_TSO4 + | 1ULL << VIRTIO_NET_F_HOST_TSO6 + | 1ULL << VIRTIO_NET_F_HOST_UFO; } err = rte_vhost_driver_disable_features(dev->vhost_id, - vhost_unsup_flags); + virtio_unsup_features); if (err) { VLOG_ERR("rte_vhost_driver_disable_features failed for " "vhost user client port: %s\n", dev->up.name); diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 59e8dc0ae..4d8ebdae5 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -938,14 +938,6 @@ netdev_linux_common_construct(struct netdev *netdev_) netnsid_unset(&netdev->netnsid); ovs_mutex_init(&netdev->mutex); - if (userspace_tso_enabled()) { - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; - } - return 0; } @@ -959,6 +951,16 @@ netdev_linux_construct(struct netdev *netdev_) return error; } + /* The socket interface doesn't offer the option to enable only + * csum offloading without TSO. */ + if (userspace_tso_enabled()) { + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; + } + error = get_flags(&netdev->up, &netdev->ifi_flags); if (error == ENODEV) { if (netdev->up.netdev_class != &netdev_internal_class) { @@ -987,6 +989,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) struct netdev_linux *netdev = netdev_linux_cast(netdev_); static const char tap_dev[] = "/dev/net/tun"; const char *name = netdev_->name; + unsigned long oflags; struct ifreq ifr; int error = netdev_linux_common_construct(netdev_); @@ -1004,10 +1007,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) /* Create tap device. */ get_flags(&netdev->up, &netdev->ifi_flags); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - if (userspace_tso_enabled()) { - ifr.ifr_flags |= IFF_VNET_HDR; - } + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { @@ -1030,21 +1030,22 @@ netdev_linux_construct_tap(struct netdev *netdev_) goto error_close; } + oflags = TUN_F_CSUM; if (userspace_tso_enabled()) { - /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is - * available, it will return EINVAL when a flag is unknown. - * Therefore, try enabling offload with no flags to check - * if TUNSETOFFLOAD support is available or not. */ - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) { - unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; - - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) { - VLOG_WARN("%s: enabling tap offloading failed: %s", name, - ovs_strerror(errno)); - error = errno; - goto error_close; - } - } + oflags |= (TUN_F_TSO4 | TUN_F_TSO6); + } + + if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) { + netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM + | NETDEV_TX_OFFLOAD_TCP_CKSUM + | NETDEV_TX_OFFLOAD_UDP_CKSUM); + + if (userspace_tso_enabled()) { + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; + } + } else { + VLOG_WARN("%s: Disabling hardware offloading: %s", name, + ovs_strerror(errno)); } netdev->present = true; @@ -1344,18 +1345,22 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, pkt = buffers[i]; } - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { - struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); - struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (virtio_net_hdr_size) { + int ret = netdev_linux_parse_vnet_hdr(pkt); + if (OVS_UNLIKELY(ret)) { + struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); + struct netdev_linux *netdev = netdev_linux_cast(netdev_); - /* Unexpected error situation: the virtio header is not present - * or corrupted. Drop the packet but continue in case next ones - * are correct. */ - dp_packet_delete(pkt); - netdev->rx_dropped += 1; - VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header", - netdev_get_name(netdev_)); - continue; + /* Unexpected error situation: the virtio header is not + * present or corrupted or contains unsupported features. + * Drop the packet but continue in case next ones are + * correct. */ + dp_packet_delete(pkt); + netdev->rx_dropped += 1; + VLOG_WARN_RL(&rl, "%s: Dropped packet: %s", + netdev_get_name(netdev_), ovs_strerror(ret)); + continue; + } } for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg; @@ -1403,7 +1408,6 @@ static int netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, struct dp_packet_batch *batch) { - int virtio_net_hdr_size; ssize_t retval; size_t std_len; int iovlen; @@ -1413,16 +1417,14 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, /* Use the buffer from the allocated packet below to receive MTU * sized packets and an aux_buf for extra TSO data. */ iovlen = IOV_TSO_SIZE; - virtio_net_hdr_size = sizeof(struct virtio_net_hdr); } else { /* Use only the buffer from the allocated packet. */ iovlen = IOV_STD_SIZE; - virtio_net_hdr_size = 0; } /* The length here needs to be accounted in the same way when the * aux_buf is allocated so that it can be prepended to TSO buffer. */ - std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; + std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu; for (i = 0; i < NETDEV_MAX_BURST; i++) { struct dp_packet *buffer; struct dp_packet *pkt; @@ -1462,7 +1464,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, pkt = buffer; } - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { + if (netdev_linux_parse_vnet_hdr(pkt)) { struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); struct netdev_linux *netdev = netdev_linux_cast(netdev_); @@ -1611,7 +1613,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu, * on other interface types because we attach a socket filter to the rx * socket. */ static int -netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, +netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu, struct dp_packet_batch *batch) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); @@ -1632,9 +1634,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, ssize_t retval; int error; - if (tso) { - netdev_linux_prepend_vnet_hdr(packet, mtu); - } + netdev_linux_prepend_vnet_hdr(packet, mtu); size = dp_packet_size(packet); do { @@ -1765,7 +1765,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch); } else { - error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch); + error = netdev_linux_tap_batch_send(netdev_, mtu, batch); } if (error) { if (error == ENOBUFS) { @@ -6819,53 +6819,73 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) return 0; } +/* Initializes packet 'b' with features enabled in the prepended + * struct virtio_net_hdr. Returns 0 if successful, otherwise a + * positive errno value. */ static int netdev_linux_parse_vnet_hdr(struct dp_packet *b) { struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet); - uint16_t l4proto = 0; if (OVS_UNLIKELY(!vnet)) { - return -EINVAL; + return EINVAL; } if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) { return 0; } - if (netdev_linux_parse_l2(b, &l4proto)) { - return -EINVAL; - } - if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { - if (l4proto == IPPROTO_TCP) { - dp_packet_hwol_set_csum_tcp(b); - } else if (l4proto == IPPROTO_UDP) { - dp_packet_hwol_set_csum_udp(b); - } else if (l4proto == IPPROTO_SCTP) { - dp_packet_hwol_set_csum_sctp(b); - } - } + uint16_t l4proto = 0; - if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4 - | VIRTIO_NET_HDR_GSO_TCPV6 - | VIRTIO_NET_HDR_GSO_UDP; - uint8_t type = vnet->gso_type & allowed_mask; - - if (type == VIRTIO_NET_HDR_GSO_TCPV4 - || type == VIRTIO_NET_HDR_GSO_TCPV6) { - dp_packet_hwol_set_tcp_seg(b); + if (netdev_linux_parse_l2(b, &l4proto)) { + return EINVAL; } - } - return 0; + if (l4proto == IPPROTO_UDP) { + dp_packet_hwol_set_csum_udp(b); + } + /* The packet has offloaded checksum. However, there is no + * additional information like the protocol used, so it would + * require to parse the packet here. The checksum starting point + * and offset are going to be verified when the packet headers + * are parsed during miniflow extraction. */ + b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start; + b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset; + } else { + b->csum_start = 0; + b->csum_offset = 0; + } + + int ret = 0; + switch (vnet->gso_type) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + /* FIXME: The packet has offloaded TCP segmentation. The gso_size + * is given and needs to be respected. */ + dp_packet_hwol_set_tcp_seg(b); + break; + case VIRTIO_NET_HDR_GSO_UDP: + /* UFO is not supported. */ + VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled."); + ret = ENOTSUP; + break; + case VIRTIO_NET_HDR_GSO_NONE: + break; + default: + ret = ENOTSUP; + VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x", + vnet->gso_type); + } + + return ret; } static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) { - struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet); + struct virtio_net_hdr v; + struct virtio_net_hdr *vnet = &v; if (dp_packet_hwol_is_tso(b)) { uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b)) @@ -6875,30 +6895,92 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len); if (dp_packet_hwol_is_ipv4(b)) { vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - } else { + } else if (dp_packet_hwol_tx_ipv6(b)) { vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; } } else { - vnet->flags = VIRTIO_NET_HDR_GSO_NONE; - } - - if (dp_packet_hwol_l4_mask(b)) { - vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b) - - (char *)dp_packet_eth(b)); - + vnet->hdr_len = 0; + vnet->gso_size = 0; + vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE; + } + + if (dp_packet_l4_checksum_good(b)) { + /* The packet has good checksum in the packet. + * No need to validate again. */ + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; + vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID; + } else if (dp_packet_hwol_tx_l4_checksum(b)) { + /* The csum calculation is offloaded. */ if (dp_packet_hwol_l4_is_tcp(b)) { + /* Virtual I/O Device (VIRTIO) Version 1.1 + * 5.1.6.2 Packet Transmission + If the driver negotiated VIRTIO_NET_F_CSUM, it can skip + checksumming the packet: + - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, + - csum_start is set to the offset within the packet + to begin checksumming, and + - csum_offset indicates how many bytes after the + csum_start the new (16 bit ones complement) checksum + is placed by the device. + The TCP checksum field in the packet is set to the sum of + the TCP pseudo header, so that replacing it by the ones + complement checksum of the TCP header and body will give + the correct result. */ + + struct tcp_header *tcp_hdr = dp_packet_l4(b); + ovs_be16 csum = 0; + if (dp_packet_hwol_is_ipv4(b)) { + const struct ip_header *ip_hdr = dp_packet_l3(b); + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); + } else if (dp_packet_hwol_tx_ipv6(b)) { + const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b); + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); + } + + tcp_hdr->tcp_csum = csum; + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( struct tcp_header, tcp_csum); } else if (dp_packet_hwol_l4_is_udp(b)) { + struct udp_header *udp_hdr = dp_packet_l4(b); + ovs_be16 csum = 0; + + if (dp_packet_hwol_is_ipv4(b)) { + const struct ip_header *ip_hdr = dp_packet_l3(b); + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); + } else if (dp_packet_hwol_tx_ipv6(b)) { + const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b); + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); + } + + udp_hdr->udp_csum = csum; + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( struct udp_header, udp_csum); } else if (dp_packet_hwol_l4_is_sctp(b)) { - vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( - struct sctp_header, sctp_csum); + /* The Linux kernel networking stack only supports csum_start + * and csum_offset when SCTP GSO is enabled. See kernel's + * skb_csum_hwoffload_help(). Currently there is no SCTP + * segmentation offload support in OVS. */ + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; + vnet->flags = 0; } else { - VLOG_WARN_RL(&rl, "Unsupported L4 protocol"); + /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes + * a new flag that is not covered in above checks. */ + VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. " + "Flags: %"PRIu64, + (uint64_t)*dp_packet_ol_flags_ptr(b)); + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; + vnet->flags = 0; } + } else { + /* Packet L4 csum is unknown. */ + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; + vnet->flags = 0; } + + dp_packet_push(b, vnet, sizeof *vnet); } diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index 754e2d78d..dc054336a 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -224,28 +224,6 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, return udp + 1; } -static void -netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet, - int ip_tot_size) -{ - uint32_t csum; - - if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { - csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr( - dp_packet_data(packet))); - } else { - csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr( - dp_packet_data(packet))); - } - - csum = csum_continue(csum, udp, ip_tot_size); - udp->udp_csum = csum_finish(csum); - - if (!udp->udp_csum) { - udp->udp_csum = htons(0xffff); - } -} - void netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, struct dp_packet *packet, @@ -260,9 +238,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, udp->udp_src = netdev_tnl_get_src_port(packet); udp->udp_len = htons(ip_tot_size); - if (udp->udp_csum) { - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); - } + /* Postpone checksum to the egress netdev. */ + dp_packet_hwol_set_csum_udp(packet); + dp_packet_ol_reset_l4_csum_good(packet); } static void * @@ -806,7 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev, data->header_len, &ip_tot_size); udp->udp_src = netdev_tnl_get_src_port(packet); udp->udp_len = htons(ip_tot_size); - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); + /* Postpone checksum to the egress netdev. */ + dp_packet_hwol_set_csum_udp(packet); + dp_packet_ol_reset_l4_csum_good(packet); gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1); diff --git a/lib/netdev.c b/lib/netdev.c index 6d3f678f0..12e1cb948 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -798,8 +798,6 @@ static bool netdev_send_prepare_packet(const uint64_t netdev_flags, struct dp_packet *packet, char **errormsg) { - uint64_t l4_mask; - if (dp_packet_hwol_is_tso(packet) && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { /* Fall back to GSO in software. */ @@ -812,36 +810,16 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, * netdev to decide what would be the best to do. * Provide a software fallback in case the device doesn't support IP csum * offloading. Note: Encapsulated packet must have the inner IP header + * csum already calculated. + * Packet with L4 csum offloading enabled was received with verified csum. + * Leave the L4 csum offloading enabled even with good checksum for the + * netdev to decide what would be the best to do. + * Netdev that requires pseudo header csum needs to calculate that. + * Provide a software fallback in case the netdev doesn't support L4 csum + * offloading. Note: Encapsulated packet must have the inner L4 header * csum already calculated. */ dp_packet_ol_send_prepare(packet, netdev_flags); - l4_mask = dp_packet_hwol_l4_mask(packet); - if (l4_mask) { - if (dp_packet_hwol_l4_is_tcp(packet)) { - if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { - /* Fall back to TCP csum in software. */ - VLOG_ERR_BUF(errormsg, "No TCP checksum support"); - return false; - } - } else if (dp_packet_hwol_l4_is_udp(packet)) { - if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { - /* Fall back to UDP csum in software. */ - VLOG_ERR_BUF(errormsg, "No UDP checksum support"); - return false; - } - } else if (dp_packet_hwol_l4_is_sctp(packet)) { - if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) { - /* Fall back to SCTP csum in software. */ - VLOG_ERR_BUF(errormsg, "No SCTP checksum support"); - return false; - } - } else { - VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, - l4_mask); - return false; - } - } - return true; } @@ -974,20 +952,16 @@ netdev_push_header(const struct netdev *netdev, size_t i, size = dp_packet_batch_size(batch); DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) - || dp_packet_hwol_l4_mask(packet))) { + if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { COVERAGE_INC(netdev_push_header_drops); dp_packet_delete(packet); - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is " "not supported: packet dropped", netdev_get_name(netdev)); } else { /* The packet is going to be encapsulated and there is * no support yet for inner network header csum offloading. */ - if (dp_packet_hwol_tx_ip_csum(packet) - && !dp_packet_ip_checksum_good(packet)) { - dp_packet_ip_set_header_csum(packet); - } + dp_packet_ol_send_prepare(packet, 0); netdev->netdev_class->push_header(netdev, packet, data); diff --git a/lib/packets.c b/lib/packets.c index a1d668190..8c69e6e3e 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1131,16 +1131,22 @@ packet_set_ipv4_addr(struct dp_packet *packet, pkt_metadata_init_conn(&packet->md); if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { - struct tcp_header *th = dp_packet_l4(packet); - - th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); + if (dp_packet_hwol_l4_is_tcp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + } else { + struct tcp_header *th = dp_packet_l4(packet); + th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); + } } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) { - struct udp_header *uh = dp_packet_l4(packet); - - if (uh->udp_csum) { - uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); - if (!uh->udp_csum) { - uh->udp_csum = htons(0xffff); + if (dp_packet_hwol_l4_is_udp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + } else { + struct udp_header *uh = dp_packet_l4(packet); + if (uh->udp_csum) { + uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); + if (!uh->udp_csum) { + uh->udp_csum = htons(0xffff); + } } } } @@ -1246,16 +1252,24 @@ packet_update_csum128(struct dp_packet *packet, uint8_t proto, size_t l4_size = dp_packet_l4_size(packet); if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { - struct tcp_header *th = dp_packet_l4(packet); + if (dp_packet_hwol_l4_is_tcp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + } else { + struct tcp_header *th = dp_packet_l4(packet); - th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); + } } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) { - struct udp_header *uh = dp_packet_l4(packet); + if (dp_packet_hwol_l4_is_udp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + } else { + struct udp_header *uh = dp_packet_l4(packet); - if (uh->udp_csum) { - uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); - if (!uh->udp_csum) { - uh->udp_csum = htons(0xffff); + if (uh->udp_csum) { + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); + if (!uh->udp_csum) { + uh->udp_csum = htons(0xffff); + } } } } else if (proto == IPPROTO_ICMPV6 && @@ -1375,7 +1389,9 @@ static void packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) { if (*port != new_port) { - *csum = recalc_csum16(*csum, *port, new_port); + if (csum) { + *csum = recalc_csum16(*csum, *port, new_port); + } *port = new_port; } } @@ -1387,9 +1403,16 @@ void packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) { struct tcp_header *th = dp_packet_l4(packet); + ovs_be16 *csum = NULL; + + if (dp_packet_hwol_l4_is_tcp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + } else { + csum = &th->tcp_csum; + } - packet_set_port(&th->tcp_src, src, &th->tcp_csum); - packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); + packet_set_port(&th->tcp_src, src, csum); + packet_set_port(&th->tcp_dst, dst, csum); pkt_metadata_init_conn(&packet->md); } @@ -1401,17 +1424,21 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) { struct udp_header *uh = dp_packet_l4(packet); - if (uh->udp_csum) { - packet_set_port(&uh->udp_src, src, &uh->udp_csum); - packet_set_port(&uh->udp_dst, dst, &uh->udp_csum); + if (dp_packet_hwol_l4_is_udp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + packet_set_port(&uh->udp_src, src, NULL); + packet_set_port(&uh->udp_dst, dst, NULL); + } else { + ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL; + + packet_set_port(&uh->udp_src, src, csum); + packet_set_port(&uh->udp_dst, dst, csum); - if (!uh->udp_csum) { + if (csum && !uh->udp_csum) { uh->udp_csum = htons(0xffff); } - } else { - uh->udp_src = src; - uh->udp_dst = dst; } + pkt_metadata_init_conn(&packet->md); } @@ -1422,18 +1449,27 @@ void packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) { struct sctp_header *sh = dp_packet_l4(packet); - ovs_be32 old_csum, old_correct_csum, new_csum; - uint16_t tp_len = dp_packet_l4_size(packet); - old_csum = get_16aligned_be32(&sh->sctp_csum); - put_16aligned_be32(&sh->sctp_csum, 0); - old_correct_csum = crc32c((void *)sh, tp_len); + if (dp_packet_hwol_l4_is_sctp(packet)) { + dp_packet_ol_reset_l4_csum_good(packet); + sh->sctp_src = src; + sh->sctp_dst = dst; + } else { + ovs_be32 old_csum, old_correct_csum, new_csum; + uint16_t tp_len = dp_packet_l4_size(packet); - sh->sctp_src = src; - sh->sctp_dst = dst; + old_csum = get_16aligned_be32(&sh->sctp_csum); + put_16aligned_be32(&sh->sctp_csum, 0); + old_correct_csum = crc32c((void *) sh, tp_len); + + sh->sctp_src = src; + sh->sctp_dst = dst; + + new_csum = crc32c((void *) sh, tp_len); + put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum + ^ new_csum); + } - new_csum = crc32c((void *)sh, tp_len); - put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum); pkt_metadata_init_conn(&packet->md); } @@ -1957,3 +1993,72 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) } } } + +/* Set TCP checksum field in packet 'p' with complete checksum. + * The packet must have the L3 and L4 offsets. */ +void +packet_tcp_complete_csum(struct dp_packet *p) +{ + struct tcp_header *tcp = dp_packet_l4(p); + + tcp->tcp_csum = 0; + if (dp_packet_hwol_is_ipv4(p)) { + struct ip_header *ip = dp_packet_l3(p); + + tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip), + tcp, dp_packet_l4_size(p))); + } else if (dp_packet_hwol_tx_ipv6(p)) { + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); + + tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt, + dp_packet_l4_size(p)); + } else { + OVS_NOT_REACHED(); + } +} + +/* Set UDP checksum field in packet 'p' with complete checksum. + * The packet must have the L3 and L4 offsets. */ +void +packet_udp_complete_csum(struct dp_packet *p) +{ + struct udp_header *udp = dp_packet_l4(p); + + /* Skip csum calculation if the udp_csum is zero. */ + if (!udp->udp_csum) { + return; + } + + udp->udp_csum = 0; + if (dp_packet_hwol_is_ipv4(p)) { + struct ip_header *ip = dp_packet_l3(p); + + udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip), + udp, dp_packet_l4_size(p))); + } else if (dp_packet_hwol_tx_ipv6(p)) { + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); + + udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt, + dp_packet_l4_size(p)); + } else { + OVS_NOT_REACHED(); + } + + if (!udp->udp_csum) { + udp->udp_csum = htons(0xffff); + } +} + +/* Set SCTP checksum field in packet 'p' with complete checksum. + * The packet must have the L3 and L4 offsets. */ +void +packet_sctp_complete_csum(struct dp_packet *p) +{ + struct sctp_header *sh = dp_packet_l4(p); + uint16_t tp_len = dp_packet_l4_size(p); + ovs_be32 csum; + + put_16aligned_be32(&sh->sctp_csum, 0); + csum = crc32c((void *) sh, tp_len); + put_16aligned_be32(&sh->sctp_csum, csum); +} diff --git a/lib/packets.h b/lib/packets.h index 5bdf6e4bb..28950b8b1 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1643,6 +1643,9 @@ void packet_put_ra_prefix_opt(struct dp_packet *, const ovs_be128 router_prefix); uint32_t packet_csum_pseudoheader(const struct ip_header *); void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); +void packet_tcp_complete_csum(struct dp_packet *); +void packet_udp_complete_csum(struct dp_packet *); +void packet_sctp_complete_csum(struct dp_packet *); #define DNS_HEADER_LEN 12 struct dns_header {