From patchwork Thu Nov 21 15:20:47 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Rahul Lakkireddy X-Patchwork-Id: 1198999 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=chelsio.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 47Jk4t0JqDz9sR5 for ; Fri, 22 Nov 2019 02:29:34 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726716AbfKUP3d (ORCPT ); Thu, 21 Nov 2019 10:29:33 -0500 Received: from stargate.chelsio.com ([12.32.117.8]:47208 "EHLO stargate.chelsio.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726293AbfKUP3c (ORCPT ); Thu, 21 Nov 2019 10:29:32 -0500 Received: from localhost (scalar.blr.asicdesigners.com [10.193.185.94]) by stargate.chelsio.com (8.13.8/8.13.8) with ESMTP id xALFTQV8009021; Thu, 21 Nov 2019 07:29:26 -0800 From: Rahul Lakkireddy To: netdev@vger.kernel.org, linux-crypto@vger.kernel.org Cc: davem@davemloft.net, herbert@gondor.apana.org.au, nirranjan@chelsio.com, atul.gupta@chelsio.com, vishal@chelsio.com, dt@chelsio.com Subject: [PATCH net-next 1/3] cxgb4/chcr: update SGL DMA unmap for USO Date: Thu, 21 Nov 2019 20:50:47 +0530 Message-Id: <497c5592eea2473191b4c2fcb021f2fc3c72e2f2.1574347161.git.rahul.lakkireddy@chelsio.com> X-Mailer: git-send-email 2.5.3 In-Reply-To: References: In-Reply-To: References: Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org The FW_ETH_TX_EO_WR used for sending UDP Segmentation Offload (USO) requests expects the headers to be part of the descriptor and the payload to be part of the SGL containing the DMA mapped addresses. Hence, the DMA address in the first entry of the SGL can start after the packet headers. Currently, unmap_sgl() tries to unmap from this wrong offset, instead of the originally mapped DMA address. So, use existing unmap_skb() instead, which takes originally saved DMA addresses as input. Update all necessary Tx paths to save the original DMA addresses, so that unmap_skb() can unmap them properly. Signed-off-by: Rahul Lakkireddy --- drivers/crypto/chelsio/chcr_ipsec.c | 27 ++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 19 ++- .../ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 119 +++++------------- 4 files changed, 52 insertions(+), 115 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c index 24355680f30a..9da0f93a330b 100644 --- a/drivers/crypto/chelsio/chcr_ipsec.c +++ b/drivers/crypto/chelsio/chcr_ipsec.c @@ -673,16 +673,16 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n) int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_state *x = xfrm_input_state(skb); + unsigned int last_desc, ndesc, flits = 0; struct ipsec_sa_entry *sa_entry; u64 *pos, *end, *before, *sgl; + struct tx_sw_desc *sgl_sdesc; int qidx, left, credits; - unsigned int flits = 0, ndesc; - struct adapter *adap; + bool immediate = false; struct sge_eth_txq *q; + struct adapter *adap; struct port_info *pi; - dma_addr_t addr[MAX_SKB_FRAGS + 1]; struct sec_path *sp; - bool immediate = false; if (!x->xso.offload_handle) return NETDEV_TX_BUSY; @@ -715,8 +715,14 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_BUSY; } + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + if (!immediate && - unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); q->mapping_err++; goto out_free; } @@ -742,17 +748,10 @@ out_free: dev_kfree_skb_any(skb); cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); } else { - int last_desc; - cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, - 0, addr); + 0, sgl_sdesc->addr); skb_orphan(skb); - - last_desc = q->q.pidx + ndesc - 1; - if (last_desc >= q->q.size) - last_desc -= q->q.size; - q->q.sdesc[last_desc].skb = skb; - q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl; + sgl_sdesc->skb = skb; } txq_advance(&q->q, ndesc); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 3121ed83d8e2..61a2cf62f694 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -735,7 +735,12 @@ struct tx_desc { __be64 flit[8]; }; -struct tx_sw_desc; +struct ulptx_sgl; + +struct tx_sw_desc { + struct sk_buff *skb; /* SKB to free after getting completion */ + dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */ +}; struct sge_txq { unsigned int in_use; /* # of in-use Tx descriptors */ @@ -814,15 +819,10 @@ enum sge_eosw_state { CXGB4_EO_STATE_FLOWC_CLOSE_REPLY, /* Waiting for FLOWC close reply */ }; -struct sge_eosw_desc { - struct sk_buff *skb; /* SKB to free after getting completion */ - dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */ -}; - struct sge_eosw_txq { spinlock_t lock; /* Per queue lock to synchronize completions */ enum sge_eosw_state state; /* Current ETHOFLD State */ - struct sge_eosw_desc *desc; /* Descriptor ring to hold packets */ + struct tx_sw_desc *desc; /* Descriptor ring to hold packets */ u32 ndesc; /* Number of descriptors */ u32 pidx; /* Current Producer Index */ u32 last_pidx; /* Last successfully transmitted Producer Index */ @@ -1151,11 +1151,6 @@ enum { SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ }; -struct tx_sw_desc { /* SW state per Tx descriptor */ - struct sk_buff *skb; - struct ulptx_sgl *sgl; -}; - /* Support for "sched_queue" command to allow one or more NIC TX Queues * to be bound to a TX Scheduling Class. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c index db55673b77bd..477973d2e341 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c @@ -70,7 +70,7 @@ static int cxgb4_init_eosw_txq(struct net_device *dev, u32 eotid, u32 hwqid) { struct adapter *adap = netdev2adap(dev); - struct sge_eosw_desc *ring; + struct tx_sw_desc *ring; memset(eosw_txq, 0, sizeof(*eosw_txq)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index a0400b9a11e9..5ad54493f825 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -300,65 +300,6 @@ static void deferred_unmap_destructor(struct sk_buff *skb) } #endif -static void unmap_sgl(struct device *dev, const struct sk_buff *skb, - const struct ulptx_sgl *sgl, const struct sge_txq *q) -{ - const struct ulptx_sge_pair *p; - unsigned int nfrags = skb_shinfo(skb)->nr_frags; - - if (likely(skb_headlen(skb))) - dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0), - DMA_TO_DEVICE); - else { - dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0), - DMA_TO_DEVICE); - nfrags--; - } - - /* - * the complexity below is because of the possibility of a wrap-around - * in the middle of an SGL - */ - for (p = sgl->sge; nfrags >= 2; nfrags -= 2) { - if (likely((u8 *)(p + 1) <= (u8 *)q->stat)) { -unmap: dma_unmap_page(dev, be64_to_cpu(p->addr[0]), - ntohl(p->len[0]), DMA_TO_DEVICE); - dma_unmap_page(dev, be64_to_cpu(p->addr[1]), - ntohl(p->len[1]), DMA_TO_DEVICE); - p++; - } else if ((u8 *)p == (u8 *)q->stat) { - p = (const struct ulptx_sge_pair *)q->desc; - goto unmap; - } else if ((u8 *)p + 8 == (u8 *)q->stat) { - const __be64 *addr = (const __be64 *)q->desc; - - dma_unmap_page(dev, be64_to_cpu(addr[0]), - ntohl(p->len[0]), DMA_TO_DEVICE); - dma_unmap_page(dev, be64_to_cpu(addr[1]), - ntohl(p->len[1]), DMA_TO_DEVICE); - p = (const struct ulptx_sge_pair *)&addr[2]; - } else { - const __be64 *addr = (const __be64 *)q->desc; - - dma_unmap_page(dev, be64_to_cpu(p->addr[0]), - ntohl(p->len[0]), DMA_TO_DEVICE); - dma_unmap_page(dev, be64_to_cpu(addr[0]), - ntohl(p->len[1]), DMA_TO_DEVICE); - p = (const struct ulptx_sge_pair *)&addr[1]; - } - } - if (nfrags) { - __be64 addr; - - if ((u8 *)p == (u8 *)q->stat) - p = (const struct ulptx_sge_pair *)q->desc; - addr = (u8 *)p + 16 <= (u8 *)q->stat ? p->addr[0] : - *(const __be64 *)q->desc; - dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]), - DMA_TO_DEVICE); - } -} - /** * free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter @@ -372,15 +313,16 @@ unmap: dma_unmap_page(dev, be64_to_cpu(p->addr[0]), void free_tx_desc(struct adapter *adap, struct sge_txq *q, unsigned int n, bool unmap) { - struct tx_sw_desc *d; unsigned int cidx = q->cidx; - struct device *dev = adap->pdev_dev; + struct tx_sw_desc *d; d = &q->sdesc[cidx]; while (n--) { if (d->skb) { /* an SGL is present */ - if (unmap) - unmap_sgl(dev, d->skb, d->sgl, q); + if (unmap && d->addr[0]) { + unmap_skb(adap->pdev_dev, d->skb, d->addr); + memset(d->addr, 0, sizeof(d->addr)); + } dev_consume_skb_any(d->skb); d->skb = NULL; } @@ -1414,13 +1356,13 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE; bool ptp_enabled = is_ptp_enabled(skb, dev); - dma_addr_t addr[MAX_SKB_FRAGS + 1]; + unsigned int last_desc, flits, ndesc; const struct skb_shared_info *ssi; + struct tx_sw_desc *sgl_sdesc; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; int len, qidx, credits, ret; const struct port_info *pi; - unsigned int flits, ndesc; bool immediate = false; u32 wr_mid, ctrl0, op; u64 cntrl, *end, *sgl; @@ -1489,8 +1431,14 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->encapsulation && chip_ver > CHELSIO_T5) tnl_type = cxgb_encap_offload_supported(skb); + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + if (!immediate && - unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); q->mapping_err++; if (ptp_enabled) spin_unlock(&adap->ptp_lock); @@ -1618,16 +1566,10 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); } else { - int last_desc; - - cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, addr); + cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, + sgl_sdesc->addr); skb_orphan(skb); - - last_desc = q->q.pidx + ndesc - 1; - if (last_desc >= q->q.size) - last_desc -= q->q.size; - q->q.sdesc[last_desc].skb = skb; - q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl; + sgl_sdesc->skb = skb; } txq_advance(&q->q, ndesc); @@ -1725,12 +1667,12 @@ static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb) static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - dma_addr_t addr[MAX_SKB_FRAGS + 1]; + unsigned int last_desc, flits, ndesc; const struct skb_shared_info *ssi; struct fw_eth_tx_pkt_vm_wr *wr; + struct tx_sw_desc *sgl_sdesc; struct cpl_tx_pkt_core *cpl; const struct port_info *pi; - unsigned int flits, ndesc; struct sge_eth_txq *txq; struct adapter *adapter; int qidx, credits, ret; @@ -1782,12 +1724,19 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } + last_desc = txq->q.pidx + ndesc - 1; + if (last_desc >= txq->q.size) + last_desc -= txq->q.size; + sgl_sdesc = &txq->q.sdesc[last_desc]; + if (!t4vf_is_eth_imm(skb) && - unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, + sgl_sdesc->addr) < 0)) { /* We need to map the skb into PCI DMA space (because it can't * be in-lined directly into the Work Request) and the mapping * operation failed. Record the error and drop the packet. */ + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); txq->mapping_err++; goto out_free; } @@ -1962,7 +1911,6 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, */ struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1); struct sge_txq *tq = &txq->q; - int last_desc; /* If the Work Request header was an exact multiple of our TX * Descriptor length, then it's possible that the starting SGL @@ -1976,14 +1924,9 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, ((void *)end - (void *)tq->stat)); } - cxgb4_write_sgl(skb, tq, sgl, end, 0, addr); + cxgb4_write_sgl(skb, tq, sgl, end, 0, sgl_sdesc->addr); skb_orphan(skb); - - last_desc = tq->pidx + ndesc - 1; - if (last_desc >= tq->size) - last_desc -= tq->size; - tq->sdesc[last_desc].skb = skb; - tq->sdesc[last_desc].sgl = sgl; + sgl_sdesc->skb = skb; } /* Advance our internal TX Queue state, tell the hardware about @@ -2035,7 +1978,7 @@ static inline void eosw_txq_advance_index(u32 *idx, u32 n, u32 max) void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *eosw_txq, u32 ndesc) { - struct sge_eosw_desc *d; + struct tx_sw_desc *d; d = &eosw_txq->desc[eosw_txq->last_cidx]; while (ndesc--) { @@ -2167,7 +2110,7 @@ static void ethofld_hard_xmit(struct net_device *dev, struct cpl_tx_pkt_core *cpl; struct fw_eth_tx_eo_wr *wr; bool skip_eotx_wr = false; - struct sge_eosw_desc *d; + struct tx_sw_desc *d; struct sk_buff *skb; u8 flits, ndesc; int left; From patchwork Thu Nov 21 15:20:48 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Rahul Lakkireddy X-Patchwork-Id: 1199000 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=chelsio.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 47Jk4x3Crgz9sPK for ; Fri, 22 Nov 2019 02:29:37 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726774AbfKUP3g (ORCPT ); Thu, 21 Nov 2019 10:29:36 -0500 Received: from stargate.chelsio.com ([12.32.117.8]:62182 "EHLO stargate.chelsio.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726279AbfKUP3g (ORCPT ); Thu, 21 Nov 2019 10:29:36 -0500 Received: from localhost (scalar.blr.asicdesigners.com [10.193.185.94]) by stargate.chelsio.com (8.13.8/8.13.8) with ESMTP id xALFTTTq009024; Thu, 21 Nov 2019 07:29:30 -0800 From: Rahul Lakkireddy To: netdev@vger.kernel.org, linux-crypto@vger.kernel.org Cc: davem@davemloft.net, herbert@gondor.apana.org.au, nirranjan@chelsio.com, atul.gupta@chelsio.com, vishal@chelsio.com, dt@chelsio.com Subject: [PATCH net-next 2/3] cxgb4: add UDP segmentation offload support Date: Thu, 21 Nov 2019 20:50:48 +0530 Message-Id: <2ddeefa22022f3949901c96892b8bf56a369f724.1574347161.git.rahul.lakkireddy@chelsio.com> X-Mailer: git-send-email 2.5.3 In-Reply-To: References: In-Reply-To: References: Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Implement and export UDP segmentation offload (USO) support for both NIC and MQPRIO QoS offload Tx path. Update appropriate logic in Tx to parse GSO info in skb and configure FW_ETH_TX_EO_WR request needed to perform USO. Signed-off-by: Rahul Lakkireddy --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 1 + .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 3 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 11 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 159 ++++++++++++------ drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 14 +- 6 files changed, 139 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 61a2cf62f694..04cb8909feeb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -772,6 +772,7 @@ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ u8 dbqt; /* SGE Doorbell Queue Timer in use */ unsigned int dbqtimerix; /* SGE Doorbell Queue Timer Index */ unsigned long tso; /* # of TSO requests */ + unsigned long uso; /* # of USO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ unsigned long mapping_err; /* # of I/O MMU packet mapping errors */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index a13b03f771cc..fa229d0f1016 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2748,6 +2748,7 @@ do { \ RL("RxDrops:", stats.rx_drops); RL("RxBadPkts:", stats.bad_rx_pkts); TL("TSO:", tso); + TL("USO:", uso); TL("TxCSO:", tx_cso); TL("VLANins:", vlan_ins); TL("TxQFull:", q.stops); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 76538f4cd595..f57457453561 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -91,6 +91,7 @@ static const char stats_strings[][ETH_GSTRING_LEN] = { "rx_bg3_frames_trunc ", "tso ", + "uso ", "tx_csum_offload ", "rx_csum_good ", "vlan_extractions ", @@ -220,6 +221,7 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data) */ struct queue_port_stats { u64 tso; + u64 uso; u64 tx_csum; u64 rx_csum; u64 vlan_ex; @@ -247,6 +249,7 @@ static void collect_sge_port_stats(const struct adapter *adap, memset(s, 0, sizeof(*s)); for (i = 0; i < p->nqsets; i++, rx++, tx++) { s->tso += tx->tso; + s->uso += tx->uso; s->tx_csum += tx->tx_cso; s->rx_csum += rx->stats.rx_cso; s->vlan_ex += rx->stats.vlan_ex; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e8a1826a1e90..12ff69b3ba91 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1136,11 +1136,17 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, if (dev->num_tc) { struct port_info *pi = netdev2pinfo(dev); + u8 ver, proto; + + ver = ip_hdr(skb)->version; + proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr : + ip_hdr(skb)->protocol; /* Send unsupported traffic pattern to normal NIC queues. */ txq = netdev_pick_tx(dev, skb, sb_dev); if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) || - ip_hdr(skb)->protocol != IPPROTO_TCP) + skb->encapsulation || + (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) txq = txq % pi->nqsets; return txq; @@ -5838,7 +5844,8 @@ static void free_some_resources(struct adapter *adapter) t4_fw_bye(adapter, adapter->pf); } -#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \ + NETIF_F_GSO_UDP_L4) #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \ NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA) #define SEGMENT_SIZE 128 diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 5ad54493f825..308e54d7c5e3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -734,6 +734,8 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver) chip_ver > CHELSIO_T5) { hdrlen = sizeof(struct cpl_tx_tnl_lso); hdrlen += sizeof(struct cpl_tx_pkt_core); + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + return 0; } else { hdrlen = skb_shinfo(skb)->gso_size ? sizeof(struct cpl_tx_pkt_lso_core) : 0; @@ -775,12 +777,20 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb, */ flits = sgl_len(skb_shinfo(skb)->nr_frags + 1); if (skb_shinfo(skb)->gso_size) { - if (skb->encapsulation && chip_ver > CHELSIO_T5) + if (skb->encapsulation && chip_ver > CHELSIO_T5) { hdrlen = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_tnl_lso); - else + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + u32 pkt_hdrlen; + + pkt_hdrlen = eth_get_headlen(skb->dev, skb->data, + skb_headlen(skb)); + hdrlen = sizeof(struct fw_eth_tx_eo_wr) + + round_up(pkt_hdrlen, 16); + } else { hdrlen = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_lso_core); + } hdrlen += sizeof(struct cpl_tx_pkt_core); flits += (hdrlen / sizeof(__be64)); @@ -1345,6 +1355,25 @@ static inline int cxgb4_validate_skb(struct sk_buff *skb, return 0; } +static inline void *write_eo_udp_wr(struct sk_buff *skb, + struct fw_eth_tx_eo_wr *wr, u32 hdr_len) +{ + wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; + wr->u.udpseg.ethlen = skb_network_offset(skb); + wr->u.udpseg.iplen = cpu_to_be16(skb_network_header_len(skb)); + wr->u.udpseg.udplen = sizeof(struct udphdr); + wr->u.udpseg.rtplen = 0; + wr->u.udpseg.r4 = 0; + if (skb_shinfo(skb)->gso_size) + wr->u.udpseg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + else + wr->u.udpseg.mss = cpu_to_be16(skb->len - hdr_len); + wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; + wr->u.udpseg.plen = cpu_to_be32(skb->len - hdr_len); + + return (void *)(wr + 1); +} + /** * cxgb4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet @@ -1357,14 +1386,15 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE; bool ptp_enabled = is_ptp_enabled(skb, dev); unsigned int last_desc, flits, ndesc; + u32 wr_mid, ctrl0, op, sgl_off = 0; const struct skb_shared_info *ssi; + int len, qidx, credits, ret, left; struct tx_sw_desc *sgl_sdesc; + struct fw_eth_tx_eo_wr *eowr; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; - int len, qidx, credits, ret; const struct port_info *pi; bool immediate = false; - u32 wr_mid, ctrl0, op; u64 cntrl, *end, *sgl; struct sge_eth_txq *q; unsigned int chip_ver; @@ -1469,13 +1499,17 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) } wr = (void *)&q->q.desc[q->q.pidx]; + eowr = (void *)&q->q.desc[q->q.pidx]; wr->equiq_to_len16 = htonl(wr_mid); wr->r3 = cpu_to_be64(0); - end = (u64 *)wr + flits; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + end = (u64 *)eowr + flits; + else + end = (u64 *)wr + flits; len = immediate ? skb->len : 0; len += sizeof(*cpl); - if (ssi->gso_size) { + if (ssi->gso_size && !(ssi->gso_type & SKB_GSO_UDP_L4)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1); @@ -1507,20 +1541,29 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cntrl = hwcsum(adap->params.chip, skb); } sgl = (u64 *)(cpl + 1); /* sgl start here */ - if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) { - /* If current position is already at the end of the - * txq, reset the current to point to start of the queue - * and update the end ptr as well. - */ - if (sgl == (u64 *)q->q.stat) { - int left = (u8 *)end - (u8 *)q->q.stat; - - end = (void *)q->q.desc + left; - sgl = (void *)q->q.desc; - } - } q->tso++; q->tx_cso += ssi->gso_segs; + } else if (ssi->gso_size) { + u64 *start; + u32 hdrlen; + + hdrlen = eth_get_headlen(dev, skb->data, skb_headlen(skb)); + len += hdrlen; + wr->op_immdlen = cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_EO_WR) | + FW_ETH_TX_EO_WR_IMMDLEN_V(len)); + cpl = write_eo_udp_wr(skb, eowr, hdrlen); + cntrl = hwcsum(adap->params.chip, skb); + + start = (u64 *)(cpl + 1); + sgl = (u64 *)inline_tx_skb_header(skb, &q->q, (void *)start, + hdrlen); + if (unlikely(start > sgl)) { + left = (u8 *)end - (u8 *)q->q.stat; + end = (void *)q->q.desc + left; + } + sgl_off = hdrlen; + q->uso++; + q->tx_cso += ssi->gso_segs; } else { if (ptp_enabled) op = FW_PTP_TX_PKT_WR; @@ -1537,6 +1580,16 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) } } + if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) { + /* If current position is already at the end of the + * txq, reset the current to point to start of the queue + * and update the end ptr as well. + */ + left = (u8 *)end - (u8 *)q->q.stat; + end = (void *)q->q.desc + left; + sgl = (void *)q->q.desc; + } + if (skb_vlan_tag_present(skb)) { q->vlan_ins++; cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); @@ -1566,7 +1619,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); } else { - cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, + cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, sgl_off, sgl_sdesc->addr); skb_orphan(skb); sgl_sdesc->skb = skb; @@ -2024,7 +2077,8 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap, u32 wrlen; wrlen = sizeof(struct fw_eth_tx_eo_wr) + sizeof(struct cpl_tx_pkt_core); - if (skb_shinfo(skb)->gso_size) + if (skb_shinfo(skb)->gso_size && + !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)) wrlen += sizeof(struct cpl_tx_pkt_lso_core); wrlen += roundup(hdr_len, 16); @@ -2032,10 +2086,14 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap, /* Packet headers + WR + CPLs */ flits = DIV_ROUND_UP(wrlen, 8); - if (skb_shinfo(skb)->nr_frags > 0) - nsgl = sgl_len(skb_shinfo(skb)->nr_frags); - else if (skb->len - hdr_len) + if (skb_shinfo(skb)->nr_frags > 0) { + if (skb_headlen(skb) - hdr_len) + nsgl = sgl_len(skb_shinfo(skb)->nr_frags + 1); + else + nsgl = sgl_len(skb_shinfo(skb)->nr_frags); + } else if (skb->len - hdr_len) { nsgl = sgl_len(1); + } return flits + nsgl; } @@ -2049,16 +2107,16 @@ static inline void *write_eo_wr(struct adapter *adap, struct cpl_tx_pkt_core *cpl; u32 immd_len, wrlen16; bool compl = false; + u8 ver, proto; + + ver = ip_hdr(skb)->version; + proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr : ip_hdr(skb)->protocol; wrlen16 = DIV_ROUND_UP(wrlen, 16); immd_len = sizeof(struct cpl_tx_pkt_core); - if (skb_shinfo(skb)->gso_size) { - if (skb->encapsulation && - CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) - immd_len += sizeof(struct cpl_tx_tnl_lso); - else - immd_len += sizeof(struct cpl_tx_pkt_lso_core); - } + if (skb_shinfo(skb)->gso_size && + !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)) + immd_len += sizeof(struct cpl_tx_pkt_lso_core); immd_len += hdr_len; if (!eosw_txq->ncompl || @@ -2074,23 +2132,27 @@ static inline void *write_eo_wr(struct adapter *adap, wr->equiq_to_len16 = cpu_to_be32(FW_WR_LEN16_V(wrlen16) | FW_WR_FLOWID_V(eosw_txq->hwtid)); wr->r3 = 0; - wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; - wr->u.tcpseg.ethlen = skb_network_offset(skb); - wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb)); - wr->u.tcpseg.tcplen = tcp_hdrlen(skb); - wr->u.tcpseg.tsclk_tsoff = 0; - wr->u.tcpseg.r4 = 0; - wr->u.tcpseg.r5 = 0; - wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len); - - if (ssi->gso_size) { - struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); - - wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size); - cpl = write_tso_wr(adap, skb, lso); + if (proto == IPPROTO_UDP) { + cpl = write_eo_udp_wr(skb, wr, hdr_len); } else { - wr->u.tcpseg.mss = cpu_to_be16(0xffff); - cpl = (void *)(wr + 1); + wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; + wr->u.tcpseg.ethlen = skb_network_offset(skb); + wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb)); + wr->u.tcpseg.tcplen = tcp_hdrlen(skb); + wr->u.tcpseg.tsclk_tsoff = 0; + wr->u.tcpseg.r4 = 0; + wr->u.tcpseg.r5 = 0; + wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len); + + if (ssi->gso_size) { + struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); + + wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size); + cpl = write_tso_wr(adap, skb, lso); + } else { + wr->u.tcpseg.mss = cpu_to_be16(0xffff); + cpl = (void *)(wr + 1); + } } eosw_txq->cred -= wrlen16; @@ -4312,7 +4374,10 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, txq->q.q_type = CXGB4_TXQ_ETH; init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd))); txq->txq = netdevq; - txq->tso = txq->tx_cso = txq->vlan_ins = 0; + txq->tso = 0; + txq->uso = 0; + txq->tx_cso = 0; + txq->vlan_ins = 0; txq->mapping_err = 0; txq->dbqt = dbqt; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 414e5cca293e..ac4fb43bdec6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -536,7 +536,8 @@ struct fw_eth_tx_pkt_wr { }; enum fw_eth_tx_eo_type { - FW_ETH_TX_EO_TYPE_TCPSEG = 1, + FW_ETH_TX_EO_TYPE_UDPSEG = 0, + FW_ETH_TX_EO_TYPE_TCPSEG, }; struct fw_eth_tx_eo_wr { @@ -544,6 +545,17 @@ struct fw_eth_tx_eo_wr { __be32 equiq_to_len16; __be64 r3; union fw_eth_tx_eo { + struct fw_eth_tx_eo_udpseg { + __u8 type; + __u8 ethlen; + __be16 iplen; + __u8 udplen; + __u8 rtplen; + __be16 r4; + __be16 mss; + __be16 schedpktsize; + __be32 plen; + } udpseg; struct fw_eth_tx_eo_tcpseg { __u8 type; __u8 ethlen; From patchwork Thu Nov 21 15:20:49 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Rahul Lakkireddy X-Patchwork-Id: 1199001 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=chelsio.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 47Jk566yZpz9sPK for ; Fri, 22 Nov 2019 02:29:46 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726833AbfKUP3q (ORCPT ); Thu, 21 Nov 2019 10:29:46 -0500 Received: from stargate.chelsio.com ([12.32.117.8]:3802 "EHLO stargate.chelsio.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726279AbfKUP3p (ORCPT ); Thu, 21 Nov 2019 10:29:45 -0500 Received: from localhost (scalar.blr.asicdesigners.com [10.193.185.94]) by stargate.chelsio.com (8.13.8/8.13.8) with ESMTP id xALFTXPc009027; Thu, 21 Nov 2019 07:29:34 -0800 From: Rahul Lakkireddy To: netdev@vger.kernel.org, linux-crypto@vger.kernel.org Cc: davem@davemloft.net, herbert@gondor.apana.org.au, nirranjan@chelsio.com, atul.gupta@chelsio.com, vishal@chelsio.com, dt@chelsio.com Subject: [PATCH net-next 3/3] cxgb4: add stats for MQPRIO QoS offload Tx path Date: Thu, 21 Nov 2019 20:50:49 +0530 Message-Id: <9898b911b3d8b71496894340d400339be3a14eb5.1574347161.git.rahul.lakkireddy@chelsio.com> X-Mailer: git-send-email 2.5.3 In-Reply-To: References: In-Reply-To: References: Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Export necessary stats for traffic flowing through MQPRIO QoS offload Tx path. Signed-off-by: Rahul Lakkireddy --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 13 ++++++++++++- drivers/net/ethernet/chelsio/cxgb4/sge.c | 14 ++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 04cb8909feeb..a70ac2097892 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -850,6 +850,7 @@ struct sge_eohw_txq { struct sge_txq q; /* HW Txq */ struct adapter *adap; /* Backpointer to adapter */ unsigned long tso; /* # of TSO requests */ + unsigned long uso; /* # of USO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ unsigned long mapping_err; /* # of I/O MMU packet mapping errors */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index fa229d0f1016..93868dca186a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2797,6 +2797,7 @@ do { \ RL("RxAN", stats.an); RL("RxNoMem", stats.nomem); TL("TSO:", tso); + TL("USO:", uso); TL("TxCSO:", tx_cso); TL("VLANins:", vlan_ins); TL("TxQFull:", q.stops); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index f57457453561..20ab3b6285a2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -242,9 +242,10 @@ static void collect_sge_port_stats(const struct adapter *adap, const struct port_info *p, struct queue_port_stats *s) { - int i; const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset]; const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset]; + struct sge_eohw_txq *eohw_tx; + unsigned int i; memset(s, 0, sizeof(*s)); for (i = 0; i < p->nqsets; i++, rx++, tx++) { @@ -257,6 +258,16 @@ static void collect_sge_port_stats(const struct adapter *adap, s->gro_pkts += rx->stats.lro_pkts; s->gro_merged += rx->stats.lro_merged; } + + if (adap->sge.eohw_txq) { + eohw_tx = &adap->sge.eohw_txq[p->first_qset]; + for (i = 0; i < p->nqsets; i++, eohw_tx++) { + s->tso += eohw_tx->tso; + s->uso += eohw_tx->uso; + s->tx_csum += eohw_tx->tx_cso; + s->vlan_ins += eohw_tx->vlan_ins; + } + } } static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 308e54d7c5e3..f9441e803d6b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2262,6 +2262,19 @@ static void ethofld_hard_xmit(struct net_device *dev, d->addr); } + if (skb_shinfo(skb)->gso_size) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + eohw_txq->uso++; + else + eohw_txq->tso++; + eohw_txq->tx_cso += skb_shinfo(skb)->gso_segs; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + eohw_txq->tx_cso++; + } + + if (skb_vlan_tag_present(skb)) + eohw_txq->vlan_ins++; + txq_advance(&eohw_txq->q, ndesc); cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc); eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc); @@ -4546,6 +4559,7 @@ int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq, spin_lock_init(&txq->lock); txq->adap = adap; txq->tso = 0; + txq->uso = 0; txq->tx_cso = 0; txq->vlan_ins = 0; txq->mapping_err = 0;