From patchwork Fri Oct 7 07:18:39 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Kirsher, Jeffrey T" X-Patchwork-Id: 118230 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 35B43B70DC for ; Fri, 7 Oct 2011 18:19:31 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932461Ab1JGHTX (ORCPT ); Fri, 7 Oct 2011 03:19:23 -0400 Received: from mga01.intel.com ([192.55.52.88]:34245 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758947Ab1JGHSw (ORCPT ); Fri, 7 Oct 2011 03:18:52 -0400 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga101.fm.intel.com with ESMTP; 07 Oct 2011 00:18:51 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.68,500,1312182000"; d="scan'208";a="75654406" Received: from unknown (HELO jtkirshe-mobl.amr.corp.intel.com) ([10.255.14.210]) by fmsmga002.fm.intel.com with ESMTP; 07 Oct 2011 00:18:51 -0700 From: Jeff Kirsher To: davem@davemloft.net Cc: Alexander Duyck , netdev@vger.kernel.org, gospo@redhat.com, sassmann@redhat.com, Jeff Kirsher Subject: [net-next 06/13] igb: Make Tx budget for NAPI user adjustable Date: Fri, 7 Oct 2011 00:18:39 -0700 Message-Id: <1317971926-23145-8-git-send-email-jeffrey.t.kirsher@intel.com> X-Mailer: git-send-email 1.7.6.4 In-Reply-To: <1317971926-23145-1-git-send-email-jeffrey.t.kirsher@intel.com> References: <1317971926-23145-1-git-send-email-jeffrey.t.kirsher@intel.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Alexander Duyck This change is to make the NAPI budget limits for transmit adjustable. Currently they are only set to 128, and when the changes/improvements to NAPI occur to allow for adjustability, it would be possible to tune the value for optimal performance with applications such as routing. v2: remove tie between NAPI and interrupt moderation fix work limit define name (s/IXGBE/IGB/) Update patch description to better reflect patch Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Tested-by: Aaron Brown --- drivers/net/ethernet/intel/igb/igb.h | 3 + drivers/net/ethernet/intel/igb/igb_ethtool.c | 1 + drivers/net/ethernet/intel/igb/igb_main.c | 136 ++++++++++++++++---------- 3 files changed, 87 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index b725937..beab918 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -47,6 +47,7 @@ struct igb_adapter; /* TX/RX descriptor defines */ #define IGB_DEFAULT_TXD 256 +#define IGB_DEFAULT_TX_WORK 128 #define IGB_MIN_TXD 80 #define IGB_MAX_TXD 4096 @@ -177,6 +178,7 @@ struct igb_q_vector { u32 eims_value; u16 cpu; + u16 tx_work_limit; u16 itr_val; u8 set_itr; @@ -266,6 +268,7 @@ struct igb_adapter { u16 rx_itr; /* TX */ + u16 tx_work_limit; u32 tx_timeout_count; int num_tx_queues; struct igb_ring *tx_ring[16]; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f231d82..a445c4f 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2011,6 +2011,7 @@ static int igb_set_coalesce(struct net_device *netdev, for (i = 0; i < adapter->num_q_vectors; i++) { struct igb_q_vector *q_vector = adapter->q_vector[i]; + q_vector->tx_work_limit = adapter->tx_work_limit; if (q_vector->rx_ring) q_vector->itr_val = adapter->rx_itr_setting; else diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7ad25e8..12faa99 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -136,8 +136,8 @@ static irqreturn_t igb_msix_ring(int irq, void *); static void igb_update_dca(struct igb_q_vector *); static void igb_setup_dca(struct igb_adapter *); #endif /* CONFIG_IGB_DCA */ -static bool igb_clean_tx_irq(struct igb_q_vector *); static int igb_poll(struct napi_struct *, int); +static bool igb_clean_tx_irq(struct igb_q_vector *); static bool igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); @@ -1120,6 +1120,7 @@ static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, q_vector->tx_ring = adapter->tx_ring[ring_idx]; q_vector->tx_ring->q_vector = q_vector; q_vector->itr_val = adapter->tx_itr_setting; + q_vector->tx_work_limit = adapter->tx_work_limit; if (q_vector->itr_val && q_vector->itr_val <= 3) q_vector->itr_val = IGB_START_ITR; } @@ -2388,11 +2389,17 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + /* set default ring sizes */ adapter->tx_ring_count = IGB_DEFAULT_TXD; adapter->rx_ring_count = IGB_DEFAULT_RXD; + + /* set default ITR values */ adapter->rx_itr_setting = IGB_DEFAULT_ITR; adapter->tx_itr_setting = IGB_DEFAULT_ITR; + /* set default work limits */ + adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; + adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; @@ -5496,7 +5503,7 @@ static int igb_poll(struct napi_struct *napi, int budget) igb_update_dca(q_vector); #endif if (q_vector->tx_ring) - clean_complete = !!igb_clean_tx_irq(q_vector); + clean_complete = igb_clean_tx_irq(q_vector); if (q_vector->rx_ring) clean_complete &= igb_clean_rx_irq(q_vector, budget); @@ -5578,64 +5585,69 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx_ring; - struct net_device *netdev = tx_ring->netdev; - struct e1000_hw *hw = &adapter->hw; - struct igb_buffer *buffer_info; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + struct igb_buffer *tx_buffer; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; - unsigned int i, eop, count = 0; - bool cleaned = false; + unsigned int budget = q_vector->tx_work_limit; + u16 i = tx_ring->next_to_clean; - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = IGB_TX_DESC(tx_ring, eop); + if (test_bit(__IGB_DOWN, &adapter->state)) + return true; - while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) && - (count < tx_ring->count)) { - rmb(); /* read buffer_info after eop_desc status */ - for (cleaned = false; !cleaned; count++) { - tx_desc = IGB_TX_DESC(tx_ring, i); - buffer_info = &tx_ring->buffer_info[i]; - cleaned = (i == eop); + tx_buffer = &tx_ring->buffer_info[i]; + tx_desc = IGB_TX_DESC(tx_ring, i); - if (buffer_info->skb) { - total_bytes += buffer_info->bytecount; - /* gso_segs is currently only valid for tcp */ - total_packets += buffer_info->gso_segs; - igb_tx_hwtstamp(q_vector, buffer_info); - } + for (; budget; budget--) { + u16 eop = tx_buffer->next_to_watch; + union e1000_adv_tx_desc *eop_desc; + + eop_desc = IGB_TX_DESC(tx_ring, eop); + + /* if DD is not set pending work has not been completed */ + if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) + break; + + /* prevent any other reads prior to eop_desc being verified */ + rmb(); - igb_unmap_and_free_tx_resource(tx_ring, buffer_info); + do { tx_desc->wb.status = 0; + if (likely(tx_desc == eop_desc)) { + eop_desc = NULL; + + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + igb_tx_hwtstamp(q_vector, tx_buffer); + } + + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + tx_buffer++; + tx_desc++; i++; - if (i == tx_ring->count) + if (unlikely(i == tx_ring->count)) { i = 0; - } - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = IGB_TX_DESC(tx_ring, eop); + tx_buffer = tx_ring->buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + } while (eop_desc); } tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->tx_syncp); + tx_ring->tx_stats.bytes += total_bytes; + tx_ring->tx_stats.packets += total_packets; + u64_stats_update_end(&tx_ring->tx_syncp); + tx_ring->total_bytes += total_bytes; + tx_ring->total_packets += total_packets; - if (unlikely(count && - netif_carrier_ok(netdev) && - igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { - /* Make sure that anybody stopping the queue after this - * sees the new next_to_clean. - */ - smp_mb(); - if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) && - !(test_bit(__IGB_DOWN, &adapter->state))) { - netif_wake_subqueue(netdev, tx_ring->queue_index); + if (tx_ring->detect_tx_hung) { + struct e1000_hw *hw = &adapter->hw; + u16 eop = tx_ring->buffer_info[i].next_to_watch; + union e1000_adv_tx_desc *eop_desc; - u64_stats_update_begin(&tx_ring->tx_syncp); - tx_ring->tx_stats.restart_queue++; - u64_stats_update_end(&tx_ring->tx_syncp); - } - } + eop_desc = IGB_TX_DESC(tx_ring, eop); - if (tx_ring->detect_tx_hung) { /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ tx_ring->detect_tx_hung = false; @@ -5666,16 +5678,34 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) eop, jiffies, eop_desc->wb.status); - netif_stop_subqueue(netdev, tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); + + /* we are about to reset, no point in enabling stuff */ + return true; } } - tx_ring->total_bytes += total_bytes; - tx_ring->total_packets += total_packets; - u64_stats_update_begin(&tx_ring->tx_syncp); - tx_ring->tx_stats.bytes += total_bytes; - tx_ring->tx_stats.packets += total_packets; - u64_stats_update_end(&tx_ring->tx_syncp); - return count < tx_ring->count; + + if (unlikely(total_packets && + netif_carrier_ok(tx_ring->netdev) && + igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); + + u64_stats_update_begin(&tx_ring->tx_syncp); + tx_ring->tx_stats.restart_queue++; + u64_stats_update_end(&tx_ring->tx_syncp); + } + } + + return !!budget; } static inline void igb_rx_checksum(struct igb_ring *ring,