From patchwork Sun Nov 27 14:51:03 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Mintz, Yuval" X-Patchwork-Id: 699669 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3tRXqF4QKFz9vF7 for ; Mon, 28 Nov 2016 01:52:57 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752379AbcK0Owy (ORCPT ); Sun, 27 Nov 2016 09:52:54 -0500 Received: from mx0a-0016ce01.pphosted.com ([67.231.148.157]:47045 "EHLO mx0b-0016ce01.pphosted.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1752135AbcK0Ows (ORCPT ); Sun, 27 Nov 2016 09:52:48 -0500 Received: from pps.filterd (m0095336.ppops.net [127.0.0.1]) by mx0a-0016ce01.pphosted.com (8.16.0.20/8.16.0.20) with SMTP id uAREqeqZ026378; Sun, 27 Nov 2016 06:52:40 -0800 Received: from avcashub1.qlogic.com ([198.186.0.115]) by mx0a-0016ce01.pphosted.com with ESMTP id 26y9c8j1cx-1 (version=TLSv1 cipher=ECDHE-RSA-AES256-SHA bits=256 verify=NOT); Sun, 27 Nov 2016 06:52:40 -0800 Received: from localhost.qlogic.org (10.185.6.94) by avcashub1.qlogic.org (10.1.4.190) with Microsoft SMTP Server id 14.3.235.1; Sun, 27 Nov 2016 06:52:39 -0800 From: Yuval Mintz To: , CC: Yuval Mintz Subject: [PATCH net-next 01/11] qede: Optimize aggregation information size Date: Sun, 27 Nov 2016 16:51:03 +0200 Message-ID: <1480258273-24973-2-git-send-email-Yuval.Mintz@cavium.com> X-Mailer: git-send-email 1.9.3 In-Reply-To: <1480258273-24973-1-git-send-email-Yuval.Mintz@cavium.com> References: <1480258273-24973-1-git-send-email-Yuval.Mintz@cavium.com> MIME-Version: 1.0 disclaimer: bypass X-Proofpoint-Virus-Version: vendor=nai engine=5800 definitions=8362 signatures=670751 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 priorityscore=1501 malwarescore=0 suspectscore=2 phishscore=0 bulkscore=0 spamscore=0 clxscore=1015 lowpriorityscore=0 impostorscore=0 adultscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1609300000 definitions=main-1611270266 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Driver needs to maintain a structure per-each concurrent possible open aggregation, but the structure storing that metadata is far from being optimized - biggest waste in it is that there are 2 buffer metadata, one for a replacement buffer when the aggregation begins and the other for holding the first aggregation's buffer after it begins [as firmware might still update it]. Those 2 can safely be united into a single metadata structure. struct qede_agg_info changes the following: /* size: 120, cachelines: 2, members: 9 */ /* sum members: 114, holes: 1, sum holes: 4 */ /* padding: 2 */ /* paddings: 2, sum paddings: 8 */ /* last cacheline: 56 bytes */ --> /* size: 48, cachelines: 1, members: 9 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 48 bytes */ Signed-off-by: Yuval Mintz --- drivers/net/ethernet/qlogic/qede/qede.h | 29 +++++++++---- drivers/net/ethernet/qlogic/qede/qede_main.c | 63 ++++++++++++---------------- 2 files changed, 48 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 0cba21b..1d4c7e0 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -225,15 +225,30 @@ enum qede_agg_state { }; struct qede_agg_info { - struct sw_rx_data replace_buf; - dma_addr_t replace_buf_mapping; - struct sw_rx_data start_buf; - dma_addr_t start_buf_mapping; - struct eth_fast_path_rx_tpa_start_cqe start_cqe; - enum qede_agg_state agg_state; + /* rx_buf is a data buffer that can be placed /consumed from rx bd + * chain. It has two purposes: We will preallocate the data buffer + * for each aggregation when we open the interface and will place this + * buffer on the rx-bd-ring when we receive TPA_START. We don't want + * to be in a state where allocation fails, as we can't reuse the + * consumer buffer in the rx-chain since FW may still be writing to it + * (since header needs to be modified for TPA). + * The second purpose is to keep a pointer to the bd buffer during + * aggregation. + */ + struct sw_rx_data buffer; + dma_addr_t buffer_mapping; + struct sk_buff *skb; - int frag_id; + + /* We need some structs from the start cookie until termination */ u16 vlan_tag; + u16 start_cqe_bd_len; + u8 start_cqe_placement_offset; + + u8 state; + u8 frag_id; + + u8 tunnel_type; }; struct qede_rx_queue { diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index b84a2c4..653be22 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1058,7 +1058,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev, struct qede_agg_info *tpa_info = &rxq->tpa_info[tpa_agg_index]; struct sk_buff *skb = tpa_info->skb; - if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START)) + if (unlikely(tpa_info->state != QEDE_AGG_STATE_START)) goto out; /* Add one frag and update the appropriate fields in the skb */ @@ -1084,7 +1084,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev, return 0; out: - tpa_info->agg_state = QEDE_AGG_STATE_ERROR; + tpa_info->state = QEDE_AGG_STATE_ERROR; qede_recycle_rx_bd_ring(rxq, edev, 1); return -ENOMEM; } @@ -1096,8 +1096,8 @@ static void qede_tpa_start(struct qede_dev *edev, struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index]; struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring); struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring); - struct sw_rx_data *replace_buf = &tpa_info->replace_buf; - dma_addr_t mapping = tpa_info->replace_buf_mapping; + struct sw_rx_data *replace_buf = &tpa_info->buffer; + dma_addr_t mapping = tpa_info->buffer_mapping; struct sw_rx_data *sw_rx_data_cons; struct sw_rx_data *sw_rx_data_prod; enum pkt_hash_types rxhash_type; @@ -1122,11 +1122,11 @@ static void qede_tpa_start(struct qede_dev *edev, /* move partial skb from cons to pool (don't unmap yet) * save mapping, incase we drop the packet later on. */ - tpa_info->start_buf = *sw_rx_data_cons; + tpa_info->buffer = *sw_rx_data_cons; mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi), le32_to_cpu(rx_bd_cons->addr.lo)); - tpa_info->start_buf_mapping = mapping; + tpa_info->buffer_mapping = mapping; rxq->sw_rx_cons++; /* set tpa state to start only if we are able to allocate skb @@ -1137,23 +1137,25 @@ static void qede_tpa_start(struct qede_dev *edev, le16_to_cpu(cqe->len_on_first_bd)); if (unlikely(!tpa_info->skb)) { DP_NOTICE(edev, "Failed to allocate SKB for gro\n"); - tpa_info->agg_state = QEDE_AGG_STATE_ERROR; + tpa_info->state = QEDE_AGG_STATE_ERROR; goto cons_buf; } - skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd)); - memcpy(&tpa_info->start_cqe, cqe, sizeof(tpa_info->start_cqe)); - /* Start filling in the aggregation info */ + skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd)); tpa_info->frag_id = 0; - tpa_info->agg_state = QEDE_AGG_STATE_START; + tpa_info->state = QEDE_AGG_STATE_START; rxhash = qede_get_rxhash(edev, cqe->bitfields, cqe->rss_hash, &rxhash_type); skb_set_hash(tpa_info->skb, rxhash, rxhash_type); + + /* Store some information from first CQE */ + tpa_info->start_cqe_placement_offset = cqe->placement_offset; + tpa_info->start_cqe_bd_len = le16_to_cpu(cqe->len_on_first_bd); if ((le16_to_cpu(cqe->pars_flags.flags) >> PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) & - PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK) + PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK) tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag); else tpa_info->vlan_tag = 0; @@ -1169,7 +1171,7 @@ static void qede_tpa_start(struct qede_dev *edev, if (unlikely(cqe->ext_bd_len_list[1])) { DP_ERR(edev, "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n"); - tpa_info->agg_state = QEDE_AGG_STATE_ERROR; + tpa_info->state = QEDE_AGG_STATE_ERROR; } } @@ -1276,7 +1278,7 @@ static void qede_tpa_end(struct qede_dev *edev, DP_ERR(edev, "Strange - TPA emd with more than a single len_list entry\n"); - if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START)) + if (unlikely(tpa_info->state != QEDE_AGG_STATE_START)) goto err; /* Sanity */ @@ -1290,14 +1292,9 @@ static void qede_tpa_end(struct qede_dev *edev, le16_to_cpu(cqe->total_packet_len), skb->len); memcpy(skb->data, - page_address(tpa_info->start_buf.data) + - tpa_info->start_cqe.placement_offset + - tpa_info->start_buf.page_offset, - le16_to_cpu(tpa_info->start_cqe.len_on_first_bd)); - - /* Recycle [mapped] start buffer for the next replacement */ - tpa_info->replace_buf = tpa_info->start_buf; - tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping; + page_address(tpa_info->buffer.data) + + tpa_info->start_cqe_placement_offset + + tpa_info->buffer.page_offset, tpa_info->start_cqe_bd_len); /* Finalize the SKB */ skb->protocol = eth_type_trans(skb, edev->ndev); @@ -1310,18 +1307,11 @@ static void qede_tpa_end(struct qede_dev *edev, qede_gro_receive(edev, fp, skb, tpa_info->vlan_tag); - tpa_info->agg_state = QEDE_AGG_STATE_NONE; + tpa_info->state = QEDE_AGG_STATE_NONE; return; err: - /* The BD starting the aggregation is still mapped; Re-use it for - * future aggregations [as replacement buffer] - */ - memcpy(&tpa_info->replace_buf, &tpa_info->start_buf, - sizeof(struct sw_rx_data)); - tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping; - tpa_info->start_buf.data = NULL; - tpa_info->agg_state = QEDE_AGG_STATE_NONE; + tpa_info->state = QEDE_AGG_STATE_NONE; dev_kfree_skb_any(tpa_info->skb); tpa_info->skb = NULL; } @@ -2823,7 +2813,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; - struct sw_rx_data *replace_buf = &tpa_info->replace_buf; + struct sw_rx_data *replace_buf = &tpa_info->buffer; if (replace_buf->data) { dma_unmap_page(&edev->pdev->dev, @@ -2905,7 +2895,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; - struct sw_rx_data *replace_buf = &tpa_info->replace_buf; + struct sw_rx_data *replace_buf = &tpa_info->buffer; replace_buf->data = alloc_pages(GFP_ATOMIC, 0); if (unlikely(!replace_buf->data)) { @@ -2923,10 +2913,9 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) } replace_buf->mapping = mapping; - tpa_info->replace_buf.page_offset = 0; - - tpa_info->replace_buf_mapping = mapping; - tpa_info->agg_state = QEDE_AGG_STATE_NONE; + tpa_info->buffer.page_offset = 0; + tpa_info->buffer_mapping = mapping; + tpa_info->state = QEDE_AGG_STATE_NONE; } return 0;