Message ID | 20200601101557.85072-1-yang_y_yi@163.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v1] Enable VXLAN TSO for DPDK datapath | expand |
Bleep bloop. Greetings yang_y_yi, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: ERROR: Inappropriate bracing around statement ERROR: C99 style comment #215 FILE: lib/netdev-dpdk.c:2219: if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) { //UDP ERROR: C99 style comment #241 FILE: lib/netdev-dpdk.c:2240: //mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; Lines checked: 424, Warnings: 0, Errors: 3 build: dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) ^ lib/dp-packet.h:1076:52: error: unused parameter 'l2_len' [-Werror=unused-parameter] dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) ^ lib/dp-packet.h: In function 'dp_packet_hwol_set_l3_len': lib/dp-packet.h:1082:45: error: unused parameter 'b' [-Werror=unused-parameter] dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) ^ lib/dp-packet.h:1082:52: error: unused parameter 'l3_len' [-Werror=unused-parameter] dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) ^ lib/dp-packet.h: In function 'dp_packet_hwol_set_l4_len': lib/dp-packet.h:1088:45: error: unused parameter 'b' [-Werror=unused-parameter] dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) ^ lib/dp-packet.h:1088:52: error: unused parameter 'l4_len' [-Werror=unused-parameter] dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) ^ cc1: all warnings being treated as errors make[2]: *** [lib/bfd.lo] Error 1 make[2]: Leaving directory `/var/lib/jenkins/jobs/0day_robot_upstream_build_from_pw/workspace' make[1]: *** [all-recursive] Error 1 make[1]: Leaving directory `/var/lib/jenkins/jobs/0day_robot_upstream_build_from_pw/workspace' make: *** [all] Error 2 Please check this out. If you feel there has been an error, please email aconole@redhat.com Thanks, 0-day Robot
On 6/1/2020 11:15 AM, yang_y_yi@163.com wrote: > From: Yi Yang <yangyi01@inspur.com> > > Many NICs can support VXLAN TSO which can help > improve across-compute-node VM-to-VM performance > in case that MTU is set to 1500. > > This patch allows dpdkvhostuserclient interface > and veth/tap interface to leverage NICs' offload > capability to maximize across-compute-node TCP > performance, with it applied, OVS DPDK can reach > linespeed for across-compute-node VM-to-VM TCP > performance. > > GSO (for UDP and software VXLAN TSO ) and GRO will > be added in the near future. Hi Yi, thanks for working on this, I have not tested yet as there were multiple compilation issues to be addressed, I've flagged these below. If you can address these and meanwhile I'll look to deploy a setup in our lab to enable testing/review in greater detail. > > Signed-off-by: Yi Yang <yangyi01@inspur.com> > --- > lib/dp-packet.h | 58 ++++++++++++++++ > lib/netdev-dpdk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++---- > lib/netdev.c | 16 ++--- > 3 files changed, 243 insertions(+), 24 deletions(-) > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 0430cca..7faa675 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -1032,6 +1032,64 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +#ifdef DPDK_NETDEV > +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ > +static inline void > +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) > +{ > + b->mbuf.ol_flags |= PKT_TX_TUNNEL_VXLAN; > + b->mbuf.l2_len += sizeof(struct udp_header) + > + sizeof(struct vxlanhdr); > + b->mbuf.outer_l2_len = ETH_HEADER_LEN; > + b->mbuf.outer_l3_len = IP_HEADER_LEN; > +} > + > +/* Set l2_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) > +{ > + b->mbuf.l2_len = l2_len; > +} > + > +/* Set l3_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) > +{ > + b->mbuf.l3_len = l3_len; > +} > + > +/* Set l4_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) > +{ > + b->mbuf.l4_len = l4_len; > +} > +#else For non DPDK case, need to add OVS_UNUSED after each function parameter below, otherwise it will be treated as unused parameter and cause compilation failure for non DPDK case. > +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ > +static inline void > +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) > +{ > +} > + > +/* Set l2_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) > +{ > +} > + > +/* Set l3_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) > +{ > +} > + > +/* Set l4_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) > +{ > +} > +#endif /* DPDK_NETDEV */ > + > static inline bool > dp_packet_ip_checksum_valid(const struct dp_packet *p) > { > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 44ebf96..c2424f7 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -44,6 +44,7 @@ > #include <rte_pci.h> > #include <rte_version.h> > #include <rte_vhost.h> > +#include <rte_ip.h> > > #include "cmap.h" > #include "coverage.h" > @@ -405,6 +406,7 @@ enum dpdk_hw_ol_features { > NETDEV_RX_HW_SCATTER = 1 << 2, > NETDEV_TX_TSO_OFFLOAD = 1 << 3, > NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4, > + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 5, > }; > > /* > @@ -988,6 +990,12 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > > if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; > + /* Enable VXLAN TSO support if available */ > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; > + conf.txmode.offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; > + conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; > + } > if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; > } > @@ -1126,6 +1134,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > if ((info.tx_offload_capa & tx_tso_offload_capa) > == tx_tso_offload_capa) { > dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > + /* Enable VXLAN TSO support if available */ > + if (info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } > if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) { > dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; > } else { > @@ -2137,29 +2149,97 @@ static bool > netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > { > struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); > + uint16_t l4_proto = 0; > + struct rte_ether_hdr *eth_hdr = > + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); > + struct rte_ipv4_hdr *ip_hdr; > + struct rte_ipv6_hdr *ip6_hdr; > + > + if (mbuf->ol_flags & PKT_TX_TUNNEL_VXLAN) { > + /* Handle VXLAN TSO */ > + struct rte_udp_hdr *udp_hdr; > + > + if (mbuf->ol_flags & PKT_TX_IPV4) { We try to avoid use of magic numbers where possible, might be worth defining the + 1 separately for where it's used below and elsewhere. > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); > + > + /* outer IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; > + > + ip_hdr = (struct rte_ipv4_hdr *) > + ((uint8_t *)udp_hdr + mbuf->l2_len); > + l4_proto = ip_hdr->next_proto_id; > + > + /* inner IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } else if (mbuf->ol_flags & PKT_TX_IPV6) { > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); > + > + /* outer IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; > + > + ip6_hdr = (struct rte_ipv6_hdr *) > + ((uint8_t *)udp_hdr + mbuf->l2_len); > + l4_proto = ip6_hdr->proto; > + > + /* inner IP checksum offload offload */ > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } > + } else if (mbuf->ol_flags & PKT_TX_L4_MASK) { > + /* Handle VLAN TSO */ > + /* no inner IP checksum for IPV6 */ > + if (mbuf->ol_flags & PKT_TX_IPV4) { > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + l4_proto = ip_hdr->next_proto_id; > + > + /* IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } else if (mbuf->ol_flags & PKT_TX_IPV6) { > + ip6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); > + l4_proto = ip6_hdr->proto; > + > + /* IP checksum offload */ > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } > > - if (mbuf->ol_flags & PKT_TX_L4_MASK) { > mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt); > mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt); > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > } > > - if (mbuf->ol_flags & PKT_TX_TCP_SEG) { > - struct tcp_header *th = dp_packet_l4(pkt); > - > - if (!th) { > + if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) { //UDP > + if (l4_proto != IPPROTO_UDP) { > + VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header" > + " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > + return false; > + } > + /* VXLAN GSO can be done here */ > + } else if (mbuf->ol_flags & PKT_TX_TCP_SEG || > + mbuf->ol_flags & PKT_TX_TCP_CKSUM) { > + if (l4_proto != IPPROTO_TCP) { > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > return false; > } > > - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > - mbuf->ol_flags |= PKT_TX_TCP_CKSUM; > - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + if (mbuf->pkt_len - mbuf->l2_len > 1450) { > + dp_packet_hwol_set_tcp_seg(pkt); > + } > > - if (mbuf->ol_flags & PKT_TX_IPV4) { > - mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_TCP_CKSUM; > + if (mbuf->ol_flags & PKT_TX_TCP_SEG) { > + //mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + mbuf->tso_segsz = 1450 - mbuf->l3_len - mbuf->l4_len; > + } else { > + mbuf->tso_segsz = 0; > } > } > return true; > @@ -2365,6 +2445,71 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_dpdk *dev, > } > } > > +static void > +netdev_linux_parse_l2(struct dp_packet *pkt, uint16_t *l4_proto) > +{ > + struct rte_mbuf *mbuf = (struct rte_mbuf *)pkt; > + struct rte_ether_hdr *eth_hdr = > + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); > + ovs_be16 eth_type; > + int l2_len; > + int l3_len = 0; > + int l4_len = 0; > + > + l2_len = ETH_HEADER_LEN; Will cause compilation issue below, mixing base types, will expect restricted ovs_be16 but will be assigned unsigned short. > + eth_type = eth_hdr->ether_type; > + if (eth_type_vlan(eth_type)) { > + struct rte_vlan_hdr *vlan_hdr = > + (struct rte_vlan_hdr *)(eth_hdr + 1); > + Same issue as flagged above, unexpected type. > + eth_type = vlan_hdr->eth_proto; > + l2_len += VLAN_HEADER_LEN; > + } > + > + dp_packet_hwol_set_l2_len(pkt, l2_len); > + > + if (eth_type == htons(ETH_TYPE_IP)) { > + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) > + ((char *)eth_hdr + l2_len); > + > + l3_len = IP_HEADER_LEN; > + dp_packet_hwol_set_tx_ipv4(pkt); > + *l4_proto = ipv4_hdr->next_proto_id; > + } else if (eth_type == htons(RTE_ETHER_TYPE_IPV6)) { > + struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) > + ((char *)eth_hdr + l2_len); > + l3_len = IPV6_HEADER_LEN; > + dp_packet_hwol_set_tx_ipv6(pkt); > + *l4_proto = ipv6_hdr->proto; > + } > + > + dp_packet_hwol_set_l3_len(pkt, l3_len); > + > + if (*l4_proto == IPPROTO_TCP) { > + struct rte_tcp_hdr *tcp_hdr = (struct rte_tcp_hdr *) > + ((char *)eth_hdr + l2_len + l3_len); > + > + l4_len = (tcp_hdr->data_off & 0xf0) >> 2; > + dp_packet_hwol_set_l4_len(pkt, l4_len); > + } > +} > + > +static void > +netdev_dpdk_parse_hdr(struct dp_packet *b) > +{ > + uint16_t l4_proto = 0; > + > + netdev_linux_parse_l2(b, &l4_proto); > + > + if (l4_proto == IPPROTO_TCP) { > + dp_packet_hwol_set_csum_tcp(b); > + } else if (l4_proto == IPPROTO_UDP) { > + dp_packet_hwol_set_csum_udp(b); > + } else if (l4_proto == IPPROTO_SCTP) { > + dp_packet_hwol_set_csum_sctp(b); > + } > +} > + > /* > * The receive path for the vhost port is the TX path out from guest. > */ > @@ -2378,6 +2523,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > uint16_t qos_drops = 0; > int qid = rxq->queue_id * VIRTIO_QNUM + VIRTIO_TXQ; > int vid = netdev_dpdk_get_vid(dev); > + struct dp_packet *packet; > > if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured > || !(dev->flags & NETDEV_UP))) { > @@ -2417,6 +2563,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > batch->count = nb_rx; > dp_packet_batch_init_packet_fields(batch); > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + struct rte_mbuf *mbuf = (struct rte_mbuf *)packet; > + > + /* Clear ol_flags and set it by parsing header */ > + mbuf->ol_flags = 0; > + netdev_dpdk_parse_hdr(packet); > + } > + > return 0; > } > > @@ -2737,13 +2891,18 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) > > mbuf_dest->tx_offload = pkt_orig->mbuf.tx_offload; > mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; > - mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & > - ~(EXT_ATTACHED_MBUF | IND_ATTACHED_MBUF)); > + mbuf_dest->ol_flags |= pkt_orig->mbuf.ol_flags; > + mbuf_dest->l2_len = pkt_orig->mbuf.l2_len; > + mbuf_dest->l3_len = pkt_orig->mbuf.l3_len; > + mbuf_dest->l4_len = pkt_orig->mbuf.l4_len; > + mbuf_dest->outer_l2_len = pkt_orig->mbuf.outer_l2_len; > + mbuf_dest->outer_l3_len = pkt_orig->mbuf.outer_l3_len; > > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); > > - if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) { > + if ((mbuf_dest->outer_l2_len == 0) && > + (mbuf_dest->ol_flags & PKT_TX_L4_MASK)) { > mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) > - (char *)dp_packet_eth(pkt_dest); > mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) > @@ -2773,6 +2932,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > uint32_t tx_failure = 0; > uint32_t mtu_drops = 0; > uint32_t qos_drops = 0; > + struct rte_mbuf *mbuf; > > if (dev->type != DPDK_DEV_VHOST) { > /* Check if QoS has been configured for this netdev. */ > @@ -2801,6 +2961,9 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > break; > } > > + mbuf = (struct rte_mbuf *)pkts[txcnt]; > + netdev_dpdk_prep_hwol_packet(dev, mbuf); > + > txcnt++; > } > > @@ -4949,6 +5112,10 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > + /* Enable VXLAN TSO support if available */ > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + netdev->ol_flags |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } > if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > } > diff --git a/lib/netdev.c b/lib/netdev.c > index 90962ee..378f04f 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -960,18 +960,12 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) > - || dp_packet_hwol_l4_mask(packet))) { Removing the line below will break coverage and cause compilation failure. If coverage for the function is unused then you need need to either remove the existing COVERAGE_DEFINE(netdev_push_header_drops); or else define a new one for use later in the function. > - COVERAGE_INC(netdev_push_header_drops); > - dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " > - "not supported: packet dropped", > - netdev_get_name(netdev)); > - } else { > - netdev->netdev_class->push_header(netdev, packet, data); > - pkt_metadata_init(&packet->md, data->out_port); > - dp_packet_batch_refill(batch, packet, i); > + netdev->netdev_class->push_header(netdev, packet, data); > + if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { > + dp_packet_hwol_set_vxlan_tcp_seg(packet); > } > + pkt_metadata_init(&packet->md, data->out_port); > + dp_packet_batch_refill(batch, packet, i); > } > > return 0; > It would be worth to update the documentation at some stage in the future as part of the patch as the limitations directly calls out VXLAN not being supported. http://docs.openvswitch.org/en/latest/topics/userspace-tso/#limitations BR Ian
Ian, thank you so much for comments, I will run travis to find out all the similar issues and fix them in v2. For setup, I have sent you an email which included dpdk bug link, please use that to setup it, please let me know if you have any issue. -----邮件原件----- 发件人: Stokes, Ian [mailto:ian.stokes@intel.com] 发送时间: 2020年6月3日 20:43 收件人: yang_y_yi@163.com; ovs-dev@openvswitch.org 抄送: blp@ovn.org; u9012063@gmail.com; Yi Yang (杨燚)-云服务集团 <yangyi01@inspur.com> 主题: Re: [PATCH v1] Enable VXLAN TSO for DPDK datapath On 6/1/2020 11:15 AM, yang_y_yi@163.com wrote: > From: Yi Yang <yangyi01@inspur.com> > > Many NICs can support VXLAN TSO which can help improve > across-compute-node VM-to-VM performance in case that MTU is set to > 1500. > > This patch allows dpdkvhostuserclient interface and veth/tap interface > to leverage NICs' offload capability to maximize across-compute-node > TCP performance, with it applied, OVS DPDK can reach linespeed for > across-compute-node VM-to-VM TCP performance. > > GSO (for UDP and software VXLAN TSO ) and GRO will be added in the > near future. Hi Yi, thanks for working on this, I have not tested yet as there were multiple compilation issues to be addressed, I've flagged these below. If you can address these and meanwhile I'll look to deploy a setup in our lab to enable testing/review in greater detail. > > Signed-off-by: Yi Yang <yangyi01@inspur.com> > --- > lib/dp-packet.h | 58 ++++++++++++++++ > lib/netdev-dpdk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++---- > lib/netdev.c | 16 ++--- > 3 files changed, 243 insertions(+), 24 deletions(-) > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 0430cca..7faa675 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -1032,6 +1032,64 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +#ifdef DPDK_NETDEV > +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ > +static inline void > +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) > +{ > + b->mbuf.ol_flags |= PKT_TX_TUNNEL_VXLAN; > + b->mbuf.l2_len += sizeof(struct udp_header) + > + sizeof(struct vxlanhdr); > + b->mbuf.outer_l2_len = ETH_HEADER_LEN; > + b->mbuf.outer_l3_len = IP_HEADER_LEN; > +} > + > +/* Set l2_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) > +{ > + b->mbuf.l2_len = l2_len; > +} > + > +/* Set l3_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) > +{ > + b->mbuf.l3_len = l3_len; > +} > + > +/* Set l4_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) > +{ > + b->mbuf.l4_len = l4_len; > +} > +#else For non DPDK case, need to add OVS_UNUSED after each function parameter below, otherwise it will be treated as unused parameter and cause compilation failure for non DPDK case. > +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ > +static inline void > +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) > +{ > +} > + > +/* Set l2_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) > +{ > +} > + > +/* Set l3_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) > +{ > +} > + > +/* Set l4_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) > +{ > +} > +#endif /* DPDK_NETDEV */ > + > static inline bool > dp_packet_ip_checksum_valid(const struct dp_packet *p) > { > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 44ebf96..c2424f7 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -44,6 +44,7 @@ > #include <rte_pci.h> > #include <rte_version.h> > #include <rte_vhost.h> > +#include <rte_ip.h> > > #include "cmap.h" > #include "coverage.h" > @@ -405,6 +406,7 @@ enum dpdk_hw_ol_features { > NETDEV_RX_HW_SCATTER = 1 << 2, > NETDEV_TX_TSO_OFFLOAD = 1 << 3, > NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4, > + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 5, > }; > > /* > @@ -988,6 +990,12 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > > if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; > + /* Enable VXLAN TSO support if available */ > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; > + conf.txmode.offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; > + conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; > + } > if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; > } > @@ -1126,6 +1134,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > if ((info.tx_offload_capa & tx_tso_offload_capa) > == tx_tso_offload_capa) { > dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > + /* Enable VXLAN TSO support if available */ > + if (info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } > if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) { > dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; > } else { > @@ -2137,29 +2149,97 @@ static bool > netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > { > struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); > + uint16_t l4_proto = 0; > + struct rte_ether_hdr *eth_hdr = > + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); > + struct rte_ipv4_hdr *ip_hdr; > + struct rte_ipv6_hdr *ip6_hdr; > + > + if (mbuf->ol_flags & PKT_TX_TUNNEL_VXLAN) { > + /* Handle VXLAN TSO */ > + struct rte_udp_hdr *udp_hdr; > + > + if (mbuf->ol_flags & PKT_TX_IPV4) { We try to avoid use of magic numbers where possible, might be worth defining the + 1 separately for where it's used below and elsewhere. > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); > + > + /* outer IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; > + > + ip_hdr = (struct rte_ipv4_hdr *) > + ((uint8_t *)udp_hdr + mbuf->l2_len); > + l4_proto = ip_hdr->next_proto_id; > + > + /* inner IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } else if (mbuf->ol_flags & PKT_TX_IPV6) { > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); > + > + /* outer IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; > + > + ip6_hdr = (struct rte_ipv6_hdr *) > + ((uint8_t *)udp_hdr + mbuf->l2_len); > + l4_proto = ip6_hdr->proto; > + > + /* inner IP checksum offload offload */ > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } > + } else if (mbuf->ol_flags & PKT_TX_L4_MASK) { > + /* Handle VLAN TSO */ > + /* no inner IP checksum for IPV6 */ > + if (mbuf->ol_flags & PKT_TX_IPV4) { > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + l4_proto = ip_hdr->next_proto_id; > + > + /* IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } else if (mbuf->ol_flags & PKT_TX_IPV6) { > + ip6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); > + l4_proto = ip6_hdr->proto; > + > + /* IP checksum offload */ > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } > > - if (mbuf->ol_flags & PKT_TX_L4_MASK) { > mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt); > mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt); > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > } > > - if (mbuf->ol_flags & PKT_TX_TCP_SEG) { > - struct tcp_header *th = dp_packet_l4(pkt); > - > - if (!th) { > + if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) { //UDP > + if (l4_proto != IPPROTO_UDP) { > + VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header" > + " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > + return false; > + } > + /* VXLAN GSO can be done here */ > + } else if (mbuf->ol_flags & PKT_TX_TCP_SEG || > + mbuf->ol_flags & PKT_TX_TCP_CKSUM) { > + if (l4_proto != IPPROTO_TCP) { > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > return false; > } > > - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > - mbuf->ol_flags |= PKT_TX_TCP_CKSUM; > - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + if (mbuf->pkt_len - mbuf->l2_len > 1450) { > + dp_packet_hwol_set_tcp_seg(pkt); > + } > > - if (mbuf->ol_flags & PKT_TX_IPV4) { > - mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_TCP_CKSUM; > + if (mbuf->ol_flags & PKT_TX_TCP_SEG) { > + //mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + mbuf->tso_segsz = 1450 - mbuf->l3_len - mbuf->l4_len; > + } else { > + mbuf->tso_segsz = 0; > } > } > return true; > @@ -2365,6 +2445,71 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_dpdk *dev, > } > } > > +static void > +netdev_linux_parse_l2(struct dp_packet *pkt, uint16_t *l4_proto) > +{ > + struct rte_mbuf *mbuf = (struct rte_mbuf *)pkt; > + struct rte_ether_hdr *eth_hdr = > + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); > + ovs_be16 eth_type; > + int l2_len; > + int l3_len = 0; > + int l4_len = 0; > + > + l2_len = ETH_HEADER_LEN; Will cause compilation issue below, mixing base types, will expect restricted ovs_be16 but will be assigned unsigned short. > + eth_type = eth_hdr->ether_type; > + if (eth_type_vlan(eth_type)) { > + struct rte_vlan_hdr *vlan_hdr = > + (struct rte_vlan_hdr *)(eth_hdr + 1); > + Same issue as flagged above, unexpected type. > + eth_type = vlan_hdr->eth_proto; > + l2_len += VLAN_HEADER_LEN; > + } > + > + dp_packet_hwol_set_l2_len(pkt, l2_len); > + > + if (eth_type == htons(ETH_TYPE_IP)) { > + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) > + ((char *)eth_hdr + l2_len); > + > + l3_len = IP_HEADER_LEN; > + dp_packet_hwol_set_tx_ipv4(pkt); > + *l4_proto = ipv4_hdr->next_proto_id; > + } else if (eth_type == htons(RTE_ETHER_TYPE_IPV6)) { > + struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) > + ((char *)eth_hdr + l2_len); > + l3_len = IPV6_HEADER_LEN; > + dp_packet_hwol_set_tx_ipv6(pkt); > + *l4_proto = ipv6_hdr->proto; > + } > + > + dp_packet_hwol_set_l3_len(pkt, l3_len); > + > + if (*l4_proto == IPPROTO_TCP) { > + struct rte_tcp_hdr *tcp_hdr = (struct rte_tcp_hdr *) > + ((char *)eth_hdr + l2_len + l3_len); > + > + l4_len = (tcp_hdr->data_off & 0xf0) >> 2; > + dp_packet_hwol_set_l4_len(pkt, l4_len); > + } > +} > + > +static void > +netdev_dpdk_parse_hdr(struct dp_packet *b) > +{ > + uint16_t l4_proto = 0; > + > + netdev_linux_parse_l2(b, &l4_proto); > + > + if (l4_proto == IPPROTO_TCP) { > + dp_packet_hwol_set_csum_tcp(b); > + } else if (l4_proto == IPPROTO_UDP) { > + dp_packet_hwol_set_csum_udp(b); > + } else if (l4_proto == IPPROTO_SCTP) { > + dp_packet_hwol_set_csum_sctp(b); > + } > +} > + > /* > * The receive path for the vhost port is the TX path out from guest. > */ > @@ -2378,6 +2523,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > uint16_t qos_drops = 0; > int qid = rxq->queue_id * VIRTIO_QNUM + VIRTIO_TXQ; > int vid = netdev_dpdk_get_vid(dev); > + struct dp_packet *packet; > > if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured > || !(dev->flags & NETDEV_UP))) { > @@ -2417,6 +2563,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > batch->count = nb_rx; > dp_packet_batch_init_packet_fields(batch); > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + struct rte_mbuf *mbuf = (struct rte_mbuf *)packet; > + > + /* Clear ol_flags and set it by parsing header */ > + mbuf->ol_flags = 0; > + netdev_dpdk_parse_hdr(packet); > + } > + > return 0; > } > > @@ -2737,13 +2891,18 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) > > mbuf_dest->tx_offload = pkt_orig->mbuf.tx_offload; > mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; > - mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & > - ~(EXT_ATTACHED_MBUF | IND_ATTACHED_MBUF)); > + mbuf_dest->ol_flags |= pkt_orig->mbuf.ol_flags; > + mbuf_dest->l2_len = pkt_orig->mbuf.l2_len; > + mbuf_dest->l3_len = pkt_orig->mbuf.l3_len; > + mbuf_dest->l4_len = pkt_orig->mbuf.l4_len; > + mbuf_dest->outer_l2_len = pkt_orig->mbuf.outer_l2_len; > + mbuf_dest->outer_l3_len = pkt_orig->mbuf.outer_l3_len; > > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); > > - if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) { > + if ((mbuf_dest->outer_l2_len == 0) && > + (mbuf_dest->ol_flags & PKT_TX_L4_MASK)) { > mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) > - (char *)dp_packet_eth(pkt_dest); > mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) > @@ -2773,6 +2932,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > uint32_t tx_failure = 0; > uint32_t mtu_drops = 0; > uint32_t qos_drops = 0; > + struct rte_mbuf *mbuf; > > if (dev->type != DPDK_DEV_VHOST) { > /* Check if QoS has been configured for this netdev. */ > @@ -2801,6 +2961,9 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > break; > } > > + mbuf = (struct rte_mbuf *)pkts[txcnt]; > + netdev_dpdk_prep_hwol_packet(dev, mbuf); > + > txcnt++; > } > > @@ -4949,6 +5112,10 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > + /* Enable VXLAN TSO support if available */ > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + netdev->ol_flags |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } > if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > } > diff --git a/lib/netdev.c b/lib/netdev.c > index 90962ee..378f04f 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -960,18 +960,12 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) > - || dp_packet_hwol_l4_mask(packet))) { Removing the line below will break coverage and cause compilation failure. If coverage for the function is unused then you need need to either remove the existing COVERAGE_DEFINE(netdev_push_header_drops); or else define a new one for use later in the function. > - COVERAGE_INC(netdev_push_header_drops); > - dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " > - "not supported: packet dropped", > - netdev_get_name(netdev)); > - } else { > - netdev->netdev_class->push_header(netdev, packet, data); > - pkt_metadata_init(&packet->md, data->out_port); > - dp_packet_batch_refill(batch, packet, i); > + netdev->netdev_class->push_header(netdev, packet, data); > + if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { > + dp_packet_hwol_set_vxlan_tcp_seg(packet); > } > + pkt_metadata_init(&packet->md, data->out_port); > + dp_packet_batch_refill(batch, packet, i); > } > > return 0; > It would be worth to update the documentation at some stage in the future as part of the patch as the limitations directly calls out VXLAN not being supported. http://docs.openvswitch.org/en/latest/topics/userspace-tso/#limitations BR Ian
Hi, Ian I have fixed issues your comments mentioned and passed travis build check, it is in my github, https://github.com/yyang13/ovs, v1 sent out last week missed a change in lib/netdev-linux.c, please use this one patch in https://github.com/yyang13/ovs for your setup. -----邮件原件----- 发件人: Yi Yang (杨燚)-云服务集团 发送时间: 2020年6月4日 8:30 收件人: 'ian.stokes@intel.com' <ian.stokes@intel.com>; 'yang_y_yi@163.com' <yang_y_yi@163.com>; 'ovs-dev@openvswitch.org' <ovs-dev@openvswitch.org> 抄送: 'blp@ovn.org' <blp@ovn.org>; 'u9012063@gmail.com' <u9012063@gmail.com> 主题: 答复: [PATCH v1] Enable VXLAN TSO for DPDK datapath 重要性: 高 Ian, thank you so much for comments, I will run travis to find out all the similar issues and fix them in v2. For setup, I have sent you an email which included dpdk bug link, please use that to setup it, please let me know if you have any issue. -----邮件原件----- 发件人: Stokes, Ian [mailto:ian.stokes@intel.com] 发送时间: 2020年6月3日 20:43 收件人: yang_y_yi@163.com; ovs-dev@openvswitch.org 抄送: blp@ovn.org; u9012063@gmail.com; Yi Yang (杨燚)-云服务集团 <yangyi01@inspur.com> 主题: Re: [PATCH v1] Enable VXLAN TSO for DPDK datapath On 6/1/2020 11:15 AM, yang_y_yi@163.com wrote: > From: Yi Yang <yangyi01@inspur.com> > > Many NICs can support VXLAN TSO which can help improve > across-compute-node VM-to-VM performance in case that MTU is set to > 1500. > > This patch allows dpdkvhostuserclient interface and veth/tap interface > to leverage NICs' offload capability to maximize across-compute-node > TCP performance, with it applied, OVS DPDK can reach linespeed for > across-compute-node VM-to-VM TCP performance. > > GSO (for UDP and software VXLAN TSO ) and GRO will be added in the > near future. Hi Yi, thanks for working on this, I have not tested yet as there were multiple compilation issues to be addressed, I've flagged these below. If you can address these and meanwhile I'll look to deploy a setup in our lab to enable testing/review in greater detail. > > Signed-off-by: Yi Yang <yangyi01@inspur.com> > --- > lib/dp-packet.h | 58 ++++++++++++++++ > lib/netdev-dpdk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++---- > lib/netdev.c | 16 ++--- > 3 files changed, 243 insertions(+), 24 deletions(-) > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 0430cca..7faa675 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -1032,6 +1032,64 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +#ifdef DPDK_NETDEV > +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ > +static inline void > +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) > +{ > + b->mbuf.ol_flags |= PKT_TX_TUNNEL_VXLAN; > + b->mbuf.l2_len += sizeof(struct udp_header) + > + sizeof(struct vxlanhdr); > + b->mbuf.outer_l2_len = ETH_HEADER_LEN; > + b->mbuf.outer_l3_len = IP_HEADER_LEN; > +} > + > +/* Set l2_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) > +{ > + b->mbuf.l2_len = l2_len; > +} > + > +/* Set l3_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) > +{ > + b->mbuf.l3_len = l3_len; > +} > + > +/* Set l4_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) > +{ > + b->mbuf.l4_len = l4_len; > +} > +#else For non DPDK case, need to add OVS_UNUSED after each function parameter below, otherwise it will be treated as unused parameter and cause compilation failure for non DPDK case. > +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ > +static inline void > +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) > +{ > +} > + > +/* Set l2_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) > +{ > +} > + > +/* Set l3_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) > +{ > +} > + > +/* Set l4_len for the packet 'b' */ > +static inline void > +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) > +{ > +} > +#endif /* DPDK_NETDEV */ > + > static inline bool > dp_packet_ip_checksum_valid(const struct dp_packet *p) > { > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 44ebf96..c2424f7 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -44,6 +44,7 @@ > #include <rte_pci.h> > #include <rte_version.h> > #include <rte_vhost.h> > +#include <rte_ip.h> > > #include "cmap.h" > #include "coverage.h" > @@ -405,6 +406,7 @@ enum dpdk_hw_ol_features { > NETDEV_RX_HW_SCATTER = 1 << 2, > NETDEV_TX_TSO_OFFLOAD = 1 << 3, > NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4, > + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 5, > }; > > /* > @@ -988,6 +990,12 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > > if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; > + /* Enable VXLAN TSO support if available */ > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + conf.txmode.offloads |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; > + conf.txmode.offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; > + conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; > + } > if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; > } > @@ -1126,6 +1134,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > if ((info.tx_offload_capa & tx_tso_offload_capa) > == tx_tso_offload_capa) { > dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > + /* Enable VXLAN TSO support if available */ > + if (info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO) { > + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } > if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) { > dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; > } else { > @@ -2137,29 +2149,97 @@ static bool > netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > { > struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); > + uint16_t l4_proto = 0; > + struct rte_ether_hdr *eth_hdr = > + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); > + struct rte_ipv4_hdr *ip_hdr; > + struct rte_ipv6_hdr *ip6_hdr; > + > + if (mbuf->ol_flags & PKT_TX_TUNNEL_VXLAN) { > + /* Handle VXLAN TSO */ > + struct rte_udp_hdr *udp_hdr; > + > + if (mbuf->ol_flags & PKT_TX_IPV4) { We try to avoid use of magic numbers where possible, might be worth defining the + 1 separately for where it's used below and elsewhere. > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); > + > + /* outer IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; > + > + ip_hdr = (struct rte_ipv4_hdr *) > + ((uint8_t *)udp_hdr + mbuf->l2_len); > + l4_proto = ip_hdr->next_proto_id; > + > + /* inner IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } else if (mbuf->ol_flags & PKT_TX_IPV6) { > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); > + > + /* outer IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; > + > + ip6_hdr = (struct rte_ipv6_hdr *) > + ((uint8_t *)udp_hdr + mbuf->l2_len); > + l4_proto = ip6_hdr->proto; > + > + /* inner IP checksum offload offload */ > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } > + } else if (mbuf->ol_flags & PKT_TX_L4_MASK) { > + /* Handle VLAN TSO */ > + /* no inner IP checksum for IPV6 */ > + if (mbuf->ol_flags & PKT_TX_IPV4) { > + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); > + l4_proto = ip_hdr->next_proto_id; > + > + /* IP checksum offload */ > + ip_hdr->hdr_checksum = 0; > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } else if (mbuf->ol_flags & PKT_TX_IPV6) { > + ip6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); > + l4_proto = ip6_hdr->proto; > + > + /* IP checksum offload */ > + mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + } > > - if (mbuf->ol_flags & PKT_TX_L4_MASK) { > mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt); > mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt); > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > } > > - if (mbuf->ol_flags & PKT_TX_TCP_SEG) { > - struct tcp_header *th = dp_packet_l4(pkt); > - > - if (!th) { > + if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) { //UDP > + if (l4_proto != IPPROTO_UDP) { > + VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header" > + " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > + return false; > + } > + /* VXLAN GSO can be done here */ > + } else if (mbuf->ol_flags & PKT_TX_TCP_SEG || > + mbuf->ol_flags & PKT_TX_TCP_CKSUM) { > + if (l4_proto != IPPROTO_TCP) { > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > return false; > } > > - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > - mbuf->ol_flags |= PKT_TX_TCP_CKSUM; > - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + if (mbuf->pkt_len - mbuf->l2_len > 1450) { > + dp_packet_hwol_set_tcp_seg(pkt); > + } > > - if (mbuf->ol_flags & PKT_TX_IPV4) { > - mbuf->ol_flags |= PKT_TX_IP_CKSUM; > + mbuf->ol_flags |= PKT_TX_TCP_CKSUM; > + if (mbuf->ol_flags & PKT_TX_TCP_SEG) { > + //mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > + mbuf->tso_segsz = 1450 - mbuf->l3_len - mbuf->l4_len; > + } else { > + mbuf->tso_segsz = 0; > } > } > return true; > @@ -2365,6 +2445,71 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_dpdk *dev, > } > } > > +static void > +netdev_linux_parse_l2(struct dp_packet *pkt, uint16_t *l4_proto) > +{ > + struct rte_mbuf *mbuf = (struct rte_mbuf *)pkt; > + struct rte_ether_hdr *eth_hdr = > + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); > + ovs_be16 eth_type; > + int l2_len; > + int l3_len = 0; > + int l4_len = 0; > + > + l2_len = ETH_HEADER_LEN; Will cause compilation issue below, mixing base types, will expect restricted ovs_be16 but will be assigned unsigned short. > + eth_type = eth_hdr->ether_type; > + if (eth_type_vlan(eth_type)) { > + struct rte_vlan_hdr *vlan_hdr = > + (struct rte_vlan_hdr *)(eth_hdr + 1); > + Same issue as flagged above, unexpected type. > + eth_type = vlan_hdr->eth_proto; > + l2_len += VLAN_HEADER_LEN; > + } > + > + dp_packet_hwol_set_l2_len(pkt, l2_len); > + > + if (eth_type == htons(ETH_TYPE_IP)) { > + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) > + ((char *)eth_hdr + l2_len); > + > + l3_len = IP_HEADER_LEN; > + dp_packet_hwol_set_tx_ipv4(pkt); > + *l4_proto = ipv4_hdr->next_proto_id; > + } else if (eth_type == htons(RTE_ETHER_TYPE_IPV6)) { > + struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) > + ((char *)eth_hdr + l2_len); > + l3_len = IPV6_HEADER_LEN; > + dp_packet_hwol_set_tx_ipv6(pkt); > + *l4_proto = ipv6_hdr->proto; > + } > + > + dp_packet_hwol_set_l3_len(pkt, l3_len); > + > + if (*l4_proto == IPPROTO_TCP) { > + struct rte_tcp_hdr *tcp_hdr = (struct rte_tcp_hdr *) > + ((char *)eth_hdr + l2_len + l3_len); > + > + l4_len = (tcp_hdr->data_off & 0xf0) >> 2; > + dp_packet_hwol_set_l4_len(pkt, l4_len); > + } > +} > + > +static void > +netdev_dpdk_parse_hdr(struct dp_packet *b) > +{ > + uint16_t l4_proto = 0; > + > + netdev_linux_parse_l2(b, &l4_proto); > + > + if (l4_proto == IPPROTO_TCP) { > + dp_packet_hwol_set_csum_tcp(b); > + } else if (l4_proto == IPPROTO_UDP) { > + dp_packet_hwol_set_csum_udp(b); > + } else if (l4_proto == IPPROTO_SCTP) { > + dp_packet_hwol_set_csum_sctp(b); > + } > +} > + > /* > * The receive path for the vhost port is the TX path out from guest. > */ > @@ -2378,6 +2523,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > uint16_t qos_drops = 0; > int qid = rxq->queue_id * VIRTIO_QNUM + VIRTIO_TXQ; > int vid = netdev_dpdk_get_vid(dev); > + struct dp_packet *packet; > > if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured > || !(dev->flags & NETDEV_UP))) { > @@ -2417,6 +2563,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > batch->count = nb_rx; > dp_packet_batch_init_packet_fields(batch); > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + struct rte_mbuf *mbuf = (struct rte_mbuf *)packet; > + > + /* Clear ol_flags and set it by parsing header */ > + mbuf->ol_flags = 0; > + netdev_dpdk_parse_hdr(packet); > + } > + > return 0; > } > > @@ -2737,13 +2891,18 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) > > mbuf_dest->tx_offload = pkt_orig->mbuf.tx_offload; > mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; > - mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & > - ~(EXT_ATTACHED_MBUF | IND_ATTACHED_MBUF)); > + mbuf_dest->ol_flags |= pkt_orig->mbuf.ol_flags; > + mbuf_dest->l2_len = pkt_orig->mbuf.l2_len; > + mbuf_dest->l3_len = pkt_orig->mbuf.l3_len; > + mbuf_dest->l4_len = pkt_orig->mbuf.l4_len; > + mbuf_dest->outer_l2_len = pkt_orig->mbuf.outer_l2_len; > + mbuf_dest->outer_l3_len = pkt_orig->mbuf.outer_l3_len; > > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); > > - if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) { > + if ((mbuf_dest->outer_l2_len == 0) && > + (mbuf_dest->ol_flags & PKT_TX_L4_MASK)) { > mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) > - (char *)dp_packet_eth(pkt_dest); > mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) > @@ -2773,6 +2932,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > uint32_t tx_failure = 0; > uint32_t mtu_drops = 0; > uint32_t qos_drops = 0; > + struct rte_mbuf *mbuf; > > if (dev->type != DPDK_DEV_VHOST) { > /* Check if QoS has been configured for this netdev. */ > @@ -2801,6 +2961,9 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > break; > } > > + mbuf = (struct rte_mbuf *)pkts[txcnt]; > + netdev_dpdk_prep_hwol_packet(dev, mbuf); > + > txcnt++; > } > > @@ -4949,6 +5112,10 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > + /* Enable VXLAN TSO support if available */ > + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { > + netdev->ol_flags |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; > + } > if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > } > diff --git a/lib/netdev.c b/lib/netdev.c > index 90962ee..378f04f 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -960,18 +960,12 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) > - || dp_packet_hwol_l4_mask(packet))) { Removing the line below will break coverage and cause compilation failure. If coverage for the function is unused then you need need to either remove the existing COVERAGE_DEFINE(netdev_push_header_drops); or else define a new one for use later in the function. > - COVERAGE_INC(netdev_push_header_drops); > - dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " > - "not supported: packet dropped", > - netdev_get_name(netdev)); > - } else { > - netdev->netdev_class->push_header(netdev, packet, data); > - pkt_metadata_init(&packet->md, data->out_port); > - dp_packet_batch_refill(batch, packet, i); > + netdev->netdev_class->push_header(netdev, packet, data); > + if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { > + dp_packet_hwol_set_vxlan_tcp_seg(packet); > } > + pkt_metadata_init(&packet->md, data->out_port); > + dp_packet_batch_refill(batch, packet, i); > } > > return 0; > It would be worth to update the documentation at some stage in the future as part of the patch as the limitations directly calls out VXLAN not being supported. http://docs.openvswitch.org/en/latest/topics/userspace-tso/#limitations BR Ian
diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 0430cca..7faa675 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -1032,6 +1032,64 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; } +#ifdef DPDK_NETDEV +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ +static inline void +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) +{ + b->mbuf.ol_flags |= PKT_TX_TUNNEL_VXLAN; + b->mbuf.l2_len += sizeof(struct udp_header) + + sizeof(struct vxlanhdr); + b->mbuf.outer_l2_len = ETH_HEADER_LEN; + b->mbuf.outer_l3_len = IP_HEADER_LEN; +} + +/* Set l2_len for the packet 'b' */ +static inline void +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) +{ + b->mbuf.l2_len = l2_len; +} + +/* Set l3_len for the packet 'b' */ +static inline void +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) +{ + b->mbuf.l3_len = l3_len; +} + +/* Set l4_len for the packet 'b' */ +static inline void +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) +{ + b->mbuf.l4_len = l4_len; +} +#else +/* Mark packet 'b' for VXLAN TCP segmentation offloading. */ +static inline void +dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b) +{ +} + +/* Set l2_len for the packet 'b' */ +static inline void +dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len) +{ +} + +/* Set l3_len for the packet 'b' */ +static inline void +dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len) +{ +} + +/* Set l4_len for the packet 'b' */ +static inline void +dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len) +{ +} +#endif /* DPDK_NETDEV */ + static inline bool dp_packet_ip_checksum_valid(const struct dp_packet *p) { diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 44ebf96..c2424f7 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -44,6 +44,7 @@ #include <rte_pci.h> #include <rte_version.h> #include <rte_vhost.h> +#include <rte_ip.h> #include "cmap.h" #include "coverage.h" @@ -405,6 +406,7 @@ enum dpdk_hw_ol_features { NETDEV_RX_HW_SCATTER = 1 << 2, NETDEV_TX_TSO_OFFLOAD = 1 << 3, NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4, + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 5, }; /* @@ -988,6 +990,12 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; + /* Enable VXLAN TSO support if available */ + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { + conf.txmode.offloads |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; + conf.txmode.offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; + conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; + } if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; } @@ -1126,6 +1134,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) if ((info.tx_offload_capa & tx_tso_offload_capa) == tx_tso_offload_capa) { dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; + /* Enable VXLAN TSO support if available */ + if (info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO) { + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; + } if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) { dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; } else { @@ -2137,29 +2149,97 @@ static bool netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) { struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); + uint16_t l4_proto = 0; + struct rte_ether_hdr *eth_hdr = + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + struct rte_ipv4_hdr *ip_hdr; + struct rte_ipv6_hdr *ip6_hdr; + + if (mbuf->ol_flags & PKT_TX_TUNNEL_VXLAN) { + /* Handle VXLAN TSO */ + struct rte_udp_hdr *udp_hdr; + + if (mbuf->ol_flags & PKT_TX_IPV4) { + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); + + /* outer IP checksum offload */ + ip_hdr->hdr_checksum = 0; + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; + + ip_hdr = (struct rte_ipv4_hdr *) + ((uint8_t *)udp_hdr + mbuf->l2_len); + l4_proto = ip_hdr->next_proto_id; + + /* inner IP checksum offload */ + ip_hdr->hdr_checksum = 0; + mbuf->ol_flags |= PKT_TX_IP_CKSUM; + } else if (mbuf->ol_flags & PKT_TX_IPV6) { + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); + udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1); + + /* outer IP checksum offload */ + ip_hdr->hdr_checksum = 0; + mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM; + mbuf->ol_flags |= PKT_TX_OUTER_IPV4; + + ip6_hdr = (struct rte_ipv6_hdr *) + ((uint8_t *)udp_hdr + mbuf->l2_len); + l4_proto = ip6_hdr->proto; + + /* inner IP checksum offload offload */ + mbuf->ol_flags |= PKT_TX_IP_CKSUM; + } + } else if (mbuf->ol_flags & PKT_TX_L4_MASK) { + /* Handle VLAN TSO */ + /* no inner IP checksum for IPV6 */ + if (mbuf->ol_flags & PKT_TX_IPV4) { + ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); + l4_proto = ip_hdr->next_proto_id; + + /* IP checksum offload */ + ip_hdr->hdr_checksum = 0; + mbuf->ol_flags |= PKT_TX_IP_CKSUM; + } else if (mbuf->ol_flags & PKT_TX_IPV6) { + ip6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); + l4_proto = ip6_hdr->proto; + + /* IP checksum offload */ + mbuf->ol_flags |= PKT_TX_IP_CKSUM; + } - if (mbuf->ol_flags & PKT_TX_L4_MASK) { mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt); mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt); mbuf->outer_l2_len = 0; mbuf->outer_l3_len = 0; } - if (mbuf->ol_flags & PKT_TX_TCP_SEG) { - struct tcp_header *th = dp_packet_l4(pkt); - - if (!th) { + if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) { //UDP + if (l4_proto != IPPROTO_UDP) { + VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header" + " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); + return false; + } + /* VXLAN GSO can be done here */ + } else if (mbuf->ol_flags & PKT_TX_TCP_SEG || + mbuf->ol_flags & PKT_TX_TCP_CKSUM) { + if (l4_proto != IPPROTO_TCP) { VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); return false; } - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; - mbuf->ol_flags |= PKT_TX_TCP_CKSUM; - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; + if (mbuf->pkt_len - mbuf->l2_len > 1450) { + dp_packet_hwol_set_tcp_seg(pkt); + } - if (mbuf->ol_flags & PKT_TX_IPV4) { - mbuf->ol_flags |= PKT_TX_IP_CKSUM; + mbuf->ol_flags |= PKT_TX_TCP_CKSUM; + if (mbuf->ol_flags & PKT_TX_TCP_SEG) { + //mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; + mbuf->tso_segsz = 1450 - mbuf->l3_len - mbuf->l4_len; + } else { + mbuf->tso_segsz = 0; } } return true; @@ -2365,6 +2445,71 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_dpdk *dev, } } +static void +netdev_linux_parse_l2(struct dp_packet *pkt, uint16_t *l4_proto) +{ + struct rte_mbuf *mbuf = (struct rte_mbuf *)pkt; + struct rte_ether_hdr *eth_hdr = + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + ovs_be16 eth_type; + int l2_len; + int l3_len = 0; + int l4_len = 0; + + l2_len = ETH_HEADER_LEN; + eth_type = eth_hdr->ether_type; + if (eth_type_vlan(eth_type)) { + struct rte_vlan_hdr *vlan_hdr = + (struct rte_vlan_hdr *)(eth_hdr + 1); + + eth_type = vlan_hdr->eth_proto; + l2_len += VLAN_HEADER_LEN; + } + + dp_packet_hwol_set_l2_len(pkt, l2_len); + + if (eth_type == htons(ETH_TYPE_IP)) { + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) + ((char *)eth_hdr + l2_len); + + l3_len = IP_HEADER_LEN; + dp_packet_hwol_set_tx_ipv4(pkt); + *l4_proto = ipv4_hdr->next_proto_id; + } else if (eth_type == htons(RTE_ETHER_TYPE_IPV6)) { + struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) + ((char *)eth_hdr + l2_len); + l3_len = IPV6_HEADER_LEN; + dp_packet_hwol_set_tx_ipv6(pkt); + *l4_proto = ipv6_hdr->proto; + } + + dp_packet_hwol_set_l3_len(pkt, l3_len); + + if (*l4_proto == IPPROTO_TCP) { + struct rte_tcp_hdr *tcp_hdr = (struct rte_tcp_hdr *) + ((char *)eth_hdr + l2_len + l3_len); + + l4_len = (tcp_hdr->data_off & 0xf0) >> 2; + dp_packet_hwol_set_l4_len(pkt, l4_len); + } +} + +static void +netdev_dpdk_parse_hdr(struct dp_packet *b) +{ + uint16_t l4_proto = 0; + + netdev_linux_parse_l2(b, &l4_proto); + + if (l4_proto == IPPROTO_TCP) { + dp_packet_hwol_set_csum_tcp(b); + } else if (l4_proto == IPPROTO_UDP) { + dp_packet_hwol_set_csum_udp(b); + } else if (l4_proto == IPPROTO_SCTP) { + dp_packet_hwol_set_csum_sctp(b); + } +} + /* * The receive path for the vhost port is the TX path out from guest. */ @@ -2378,6 +2523,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, uint16_t qos_drops = 0; int qid = rxq->queue_id * VIRTIO_QNUM + VIRTIO_TXQ; int vid = netdev_dpdk_get_vid(dev); + struct dp_packet *packet; if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || !(dev->flags & NETDEV_UP))) { @@ -2417,6 +2563,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, batch->count = nb_rx; dp_packet_batch_init_packet_fields(batch); + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + struct rte_mbuf *mbuf = (struct rte_mbuf *)packet; + + /* Clear ol_flags and set it by parsing header */ + mbuf->ol_flags = 0; + netdev_dpdk_parse_hdr(packet); + } + return 0; } @@ -2737,13 +2891,18 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) mbuf_dest->tx_offload = pkt_orig->mbuf.tx_offload; mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; - mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & - ~(EXT_ATTACHED_MBUF | IND_ATTACHED_MBUF)); + mbuf_dest->ol_flags |= pkt_orig->mbuf.ol_flags; + mbuf_dest->l2_len = pkt_orig->mbuf.l2_len; + mbuf_dest->l3_len = pkt_orig->mbuf.l3_len; + mbuf_dest->l4_len = pkt_orig->mbuf.l4_len; + mbuf_dest->outer_l2_len = pkt_orig->mbuf.outer_l2_len; + mbuf_dest->outer_l3_len = pkt_orig->mbuf.outer_l3_len; memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); - if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) { + if ((mbuf_dest->outer_l2_len == 0) && + (mbuf_dest->ol_flags & PKT_TX_L4_MASK)) { mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) - (char *)dp_packet_eth(pkt_dest); mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) @@ -2773,6 +2932,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) uint32_t tx_failure = 0; uint32_t mtu_drops = 0; uint32_t qos_drops = 0; + struct rte_mbuf *mbuf; if (dev->type != DPDK_DEV_VHOST) { /* Check if QoS has been configured for this netdev. */ @@ -2801,6 +2961,9 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) break; } + mbuf = (struct rte_mbuf *)pkts[txcnt]; + netdev_dpdk_prep_hwol_packet(dev, mbuf); + txcnt++; } @@ -4949,6 +5112,10 @@ netdev_dpdk_reconfigure(struct netdev *netdev) netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; + /* Enable VXLAN TSO support if available */ + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { + netdev->ol_flags |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; + } if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; } diff --git a/lib/netdev.c b/lib/netdev.c index 90962ee..378f04f 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -960,18 +960,12 @@ netdev_push_header(const struct netdev *netdev, size_t i, size = dp_packet_batch_size(batch); DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) - || dp_packet_hwol_l4_mask(packet))) { - COVERAGE_INC(netdev_push_header_drops); - dp_packet_delete(packet); - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is " - "not supported: packet dropped", - netdev_get_name(netdev)); - } else { - netdev->netdev_class->push_header(netdev, packet, data); - pkt_metadata_init(&packet->md, data->out_port); - dp_packet_batch_refill(batch, packet, i); + netdev->netdev_class->push_header(netdev, packet, data); + if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { + dp_packet_hwol_set_vxlan_tcp_seg(packet); } + pkt_metadata_init(&packet->md, data->out_port); + dp_packet_batch_refill(batch, packet, i); } return 0;