From patchwork Tue Oct 7 03:01:36 2008 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Kirsher, Jeffrey T" X-Patchwork-Id: 3111 X-Patchwork-Delegate: jgarzik@pobox.com Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by ozlabs.org (Postfix) with ESMTP id D5D2DDDE19 for ; Tue, 7 Oct 2008 14:07:37 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754686AbYJGDHb (ORCPT ); Mon, 6 Oct 2008 23:07:31 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1754652AbYJGDHb (ORCPT ); Mon, 6 Oct 2008 23:07:31 -0400 Received: from qmta03.emeryville.ca.mail.comcast.net ([76.96.30.32]:39267 "EHLO QMTA03.emeryville.ca.mail.comcast.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754633AbYJGDH2 (ORCPT ); Mon, 6 Oct 2008 23:07:28 -0400 X-Greylist: delayed 331 seconds by postgrey-1.27 at vger.kernel.org; Mon, 06 Oct 2008 23:07:28 EDT Received: from OMTA02.emeryville.ca.mail.comcast.net ([76.96.30.19]) by QMTA03.emeryville.ca.mail.comcast.net with comcast id PeJT1a00S0QkzPwA3f1w1A; Tue, 07 Oct 2008 03:01:56 +0000 Received: from gitlost.lost ([63.64.152.142]) by OMTA02.emeryville.ca.mail.comcast.net with comcast id Pf1c1a00F34bfcX8Nf1fel; Tue, 07 Oct 2008 03:01:53 +0000 X-Authority-Analysis: v=1.0 c=1 a=ZFiXGS5U724A:10 a=wIWWHEVAbuwA:10 a=OFpfXMHt-uB19aXp1Y4A:9 a=hI9YkUMgJ98k6IX5hRQA:7 a=2QN9TkmY1Em30fu_ZKAqWcxjkw4A:4 a=dGJ0OcVc7YAA:10 a=iYlkOlhu7C0A:10 From: Jeff Kirsher Subject: [NET-NEXT PATCH 1/6] ixgbe: this patch adds support for DCB to the kernel and ixgbe driver To: jeff@garzik.org, davem@davemloft.net Cc: netdev@vger.kernel.org, peter.p.waskiewicz@intel.com, Alexander Duyck , Jeff Kirsher Date: Mon, 06 Oct 2008 20:01:36 -0700 Message-ID: <20081007030136.727.44027.stgit@gitlost.lost> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Alexander Duyck Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher --- drivers/net/ixgbe/Makefile | 3 drivers/net/ixgbe/ixgbe.h | 26 + drivers/net/ixgbe/ixgbe_dcb.c | 332 +++++++++++++++++ drivers/net/ixgbe/ixgbe_dcb.h | 158 ++++++++ drivers/net/ixgbe/ixgbe_dcb_82598.c | 398 ++++++++++++++++++++ drivers/net/ixgbe/ixgbe_dcb_82598.h | 94 +++++ drivers/net/ixgbe/ixgbe_dcb_nl.c | 352 ++++++++++++++++++ drivers/net/ixgbe/ixgbe_ethtool.c | 37 ++ drivers/net/ixgbe/ixgbe_main.c | 189 +++++++++- include/linux/dcbnl.h | 230 ++++++++++++ include/linux/netdevice.h | 8 include/linux/rtnetlink.h | 5 include/net/dcbnl.h | 44 ++ net/Kconfig | 1 net/Makefile | 3 net/dcb/Kconfig | 12 + net/dcb/Makefile | 1 net/dcb/dcbnl.c | 682 +++++++++++++++++++++++++++++++++++ 18 files changed, 2555 insertions(+), 20 deletions(-) create mode 100644 drivers/net/ixgbe/ixgbe_dcb.c create mode 100644 drivers/net/ixgbe/ixgbe_dcb.h create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.c create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.h create mode 100644 drivers/net/ixgbe/ixgbe_dcb_nl.c create mode 100644 include/linux/dcbnl.h create mode 100644 include/net/dcbnl.h create mode 100644 net/dcb/Kconfig create mode 100644 net/dcb/Makefile create mode 100644 net/dcb/dcbnl.c -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile index ccd83d9..2a45fa0 100644 --- a/drivers/net/ixgbe/Makefile +++ b/drivers/net/ixgbe/Makefile @@ -33,4 +33,5 @@ obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ - ixgbe_82598.o ixgbe_phy.o + ixgbe_82598.o ixgbe_phy.o ixgbe_dcb.o ixgbe_dcb_82598.o \ + ixgbe_dcb_nl.o diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 2198b77..b21a8c9 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -35,6 +35,7 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" +#include "ixgbe_dcb.h" #if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) #include @@ -84,6 +85,7 @@ #define IXGBE_TX_FLAGS_TSO (u32)(1 << 2) #define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 3) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 #define IXGBE_MAX_LRO_DESCRIPTORS 8 @@ -134,7 +136,7 @@ struct ixgbe_ring { u16 reg_idx; /* holds the special value that gets the hardware register * offset associated with this ring, which is different - * for DCE and RSS modes */ + * for DCB and RSS modes */ #if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) /* cpu for tx queue */ @@ -152,8 +154,10 @@ struct ixgbe_ring { u16 rx_buf_len; }; +#define RING_F_DCB 0 #define RING_F_VMDQ 1 #define RING_F_RSS 2 +#define IXGBE_MAX_DCB_INDICES 8 #define IXGBE_MAX_RSS_INDICES 16 #define IXGBE_MAX_VMDQ_INDICES 16 struct ixgbe_ring_feature { @@ -164,6 +168,10 @@ struct ixgbe_ring_feature { #define MAX_RX_QUEUES 64 #define MAX_TX_QUEUES 32 +#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ + ? 8 : 1) +#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS + /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. */ @@ -215,6 +223,9 @@ struct ixgbe_adapter { struct work_struct reset_task; struct ixgbe_q_vector q_vector[MAX_MSIX_Q_VECTORS]; char name[MAX_MSIX_COUNT][IFNAMSIZ + 5]; + struct ixgbe_dcb_config dcb_cfg; + struct ixgbe_dcb_config temp_dcb_cfg; + u8 dcb_set_bitmap; /* Interrupt Throttle Rate */ u32 itr_setting; @@ -269,6 +280,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 19) #define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 22) #define IXGBE_FLAG_IN_WATCHDOG_TASK (u32)(1 << 23) +#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 24) /* default to trying for four seconds */ #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) @@ -312,6 +324,13 @@ enum ixgbe_boards { }; extern struct ixgbe_info ixgbe_82598_info; +#ifdef CONFIG_DCBNL +extern struct dcbnl_rtnl_ops dcbnl_ops; +extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, + struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max); +#endif + + extern char ixgbe_driver_name[]; extern const char ixgbe_driver_version[]; @@ -326,5 +345,8 @@ extern int ixgbe_setup_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *) extern void ixgbe_free_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *); extern void ixgbe_free_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *); extern void ixgbe_update_stats(struct ixgbe_adapter *adapter); - +extern void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter); +extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); +void ixgbe_napi_add_all(struct ixgbe_adapter *adapter); +void ixgbe_napi_del_all(struct ixgbe_adapter *adapter); #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ixgbe/ixgbe_dcb.c b/drivers/net/ixgbe/ixgbe_dcb.c new file mode 100644 index 0000000..e2e28ac --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb.c @@ -0,0 +1,332 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#include "ixgbe.h" +#include "ixgbe_type.h" +#include "ixgbe_dcb.h" +#include "ixgbe_dcb_82598.h" + +/** + * ixgbe_dcb_config - Struct containing DCB settings. + * @dcb_config: Pointer to DCB config structure + * + * This function checks DCB rules for DCB settings. + * The following rules are checked: + * 1. The sum of bandwidth percentages of all Bandwidth Groups must total 100%. + * 2. The sum of bandwidth percentages of all Traffic Classes within a Bandwidth + * Group must total 100. + * 3. A Traffic Class should not be set to both Link Strict Priority + * and Group Strict Priority. + * 4. Link strict Bandwidth Groups can only have link strict traffic classes + * with zero bandwidth. + */ +s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + s32 ret_val = 0; + u8 i, j, bw = 0, bw_id; + u8 bw_sum[2][MAX_BW_GROUP]; + bool link_strict[2][MAX_BW_GROUP]; + + memset(bw_sum, 0, sizeof(bw_sum)); + memset(link_strict, 0, sizeof(link_strict)); + + /* First Tx, then Rx */ + for (i = 0; i < 2; i++) { + /* Check each traffic class for rule violation */ + for (j = 0; j < MAX_TRAFFIC_CLASS; j++) { + p = &dcb_config->tc_config[j].path[i]; + + bw = p->bwg_percent; + bw_id = p->bwg_id; + + if (bw_id >= MAX_BW_GROUP) { + ret_val = DCB_ERR_CONFIG; + goto err_config; + } + if (p->prio_type == prio_link) { + link_strict[i][bw_id] = true; + /* Link strict should have zero bandwidth */ + if (bw) { + ret_val = DCB_ERR_LS_BW_NONZERO; + goto err_config; + } + } else if (!bw) { + /* + * Traffic classes without link strict + * should have non-zero bandwidth. + */ + ret_val = DCB_ERR_TC_BW_ZERO; + goto err_config; + } + bw_sum[i][bw_id] += bw; + } + + bw = 0; + + /* Check each bandwidth group for rule violation */ + for (j = 0; j < MAX_BW_GROUP; j++) { + bw += dcb_config->bw_percentage[i][j]; + /* + * Sum of bandwidth percentages of all traffic classes + * within a Bandwidth Group must total 100 except for + * link strict group (zero bandwidth). + */ + if (link_strict[i][j]) { + if (bw_sum[i][j]) { + /* + * Link strict group should have zero + * bandwidth. + */ + ret_val = DCB_ERR_LS_BWG_NONZERO; + goto err_config; + } + } else if (bw_sum[i][j] != BW_PERCENT && + bw_sum[i][j] != 0) { + ret_val = DCB_ERR_TC_BW; + goto err_config; + } + } + + if (bw != BW_PERCENT) { + ret_val = DCB_ERR_BW_GROUP; + goto err_config; + } + } + +err_config: + return ret_val; +} + +/** + * ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits + * @ixgbe_dcb_config: Struct containing DCB settings. + * @direction: Configuring either Tx or Rx. + * + * This function calculates the credits allocated to each traffic class. + * It should be called only after the rules are checked by + * ixgbe_dcb_check_config(). + */ +s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *dcb_config, + u8 direction) +{ + struct tc_bw_alloc *p; + s32 ret_val = 0; + /* Initialization values default for Tx settings */ + u32 credit_refill = 0; + u32 credit_max = 0; + u16 link_percentage = 0; + u8 bw_percent = 0; + u8 i; + + if (dcb_config == NULL) { + ret_val = DCB_ERR_CONFIG; + goto out; + } + + /* Find out the link percentage for each TC first */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[direction]; + bw_percent = dcb_config->bw_percentage[direction][p->bwg_id]; + + link_percentage = p->bwg_percent; + /* Must be careful of integer division for very small nums */ + link_percentage = (link_percentage * bw_percent) / 100; + if (p->bwg_percent > 0 && link_percentage == 0) + link_percentage = 1; + + /* Save link_percentage for reference */ + p->link_percent = (u8)link_percentage; + + /* Calculate credit refill and save it */ + credit_refill = link_percentage * MINIMUM_CREDIT_REFILL; + p->data_credits_refill = (u16)credit_refill; + + /* Calculate maximum credit for the TC */ + credit_max = (link_percentage * MAX_CREDIT) / 100; + + /* + * Adjustment based on rule checking, if the percentage + * of a TC is too small, the maximum credit may not be + * enough to send out a jumbo frame in data plane arbitration. + */ + if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_JUMBO)) + credit_max = MINIMUM_CREDIT_FOR_JUMBO; + + if (direction == DCB_TX_CONFIG) { + /* + * Adjustment based on rule checking, if the + * percentage of a TC is too small, the maximum + * credit may not be enough to send out a TSO + * packet in descriptor plane arbitration. + */ + if (credit_max && + (credit_max < MINIMUM_CREDIT_FOR_TSO)) + credit_max = MINIMUM_CREDIT_FOR_TSO; + + dcb_config->tc_config[i].desc_credits_max = + (u16)credit_max; + } + + p->data_credits_max = (u16)credit_max; + } + +out: + return ret_val; +} + +/** + * ixgbe_dcb_get_tc_stats - Returns status of each traffic class + * @hw: pointer to hardware structure + * @stats: pointer to statistics structure + * @tc_count: Number of elements in bwg_array. + * + * This function returns the status data for each of the Traffic Classes in use. + */ +s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_get_tc_stats_82598(hw, stats, tc_count); + return ret; +} + +/** + * ixgbe_dcb_get_pfc_stats - Returns CBFC status of each traffic class + * hw - pointer to hardware structure + * stats - pointer to statistics structure + * tc_count - Number of elements in bwg_array. + * + * This function returns the CBFC status data for each of the Traffic Classes. + */ +s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_get_pfc_stats_82598(hw, stats, tc_count); + return ret; +} + +/** + * ixgbe_dcb_config_rx_arbiter - Config Rx arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Rx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_tx_desc_arbiter - Config Tx Desc arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Descriptor Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_tx_data_arbiter - Config Tx data arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_pfc - Config priority flow control + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Priority Flow Control for each traffic class. + */ +s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_pfc_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_tc_stats - Config traffic class statistics + * @hw: pointer to hardware structure + * + * Configure queue statistics registers, all queues belonging to same traffic + * class uses a single set of queue statistics counters. + */ +s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_tc_stats_82598(hw); + return ret; +} + +/** + * ixgbe_dcb_hw_config - Config and enable DCB + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure dcb settings and enable dcb mode. + */ +s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_hw_config_82598(hw, dcb_config); + return ret; +} + diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h new file mode 100644 index 0000000..bc05c63 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb.h @@ -0,0 +1,158 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _DCB_CONFIG_H_ +#define _DCB_CONFIG_H_ + +#include "ixgbe_type.h" + +/* DCB data structures */ + +#define IXGBE_MAX_PACKET_BUFFERS 8 +#define MAX_USER_PRIORITY 8 +#define MAX_TRAFFIC_CLASS 8 +#define MAX_BW_GROUP 8 +#define BW_PERCENT 100 + +#define DCB_TX_CONFIG 0 +#define DCB_RX_CONFIG 1 + +/* DCB error Codes */ +#define DCB_SUCCESS 0 +#define DCB_ERR_CONFIG -1 +#define DCB_ERR_PARAM -2 + +/* Transmit and receive Errors */ +/* Error in bandwidth group allocation */ +#define DCB_ERR_BW_GROUP -3 +/* Error in traffic class bandwidth allocation */ +#define DCB_ERR_TC_BW -4 +/* Traffic class has both link strict and group strict enabled */ +#define DCB_ERR_LS_GS -5 +/* Link strict traffic class has non zero bandwidth */ +#define DCB_ERR_LS_BW_NONZERO -6 +/* Link strict bandwidth group has non zero bandwidth */ +#define DCB_ERR_LS_BWG_NONZERO -7 +/* Traffic class has zero bandwidth */ +#define DCB_ERR_TC_BW_ZERO -8 + +#define DCB_NOT_IMPLEMENTED 0x7FFFFFFF + +struct dcb_pfc_tc_debug { + u8 tc; + u8 pause_status; + u64 pause_quanta; +}; + +enum strict_prio_type { + prio_none = 0, + prio_group, + prio_link +}; + +/* Traffic class bandwidth allocation per direction */ +struct tc_bw_alloc { + u8 bwg_id; /* Bandwidth Group (BWG) ID */ + u8 bwg_percent; /* % of BWG's bandwidth */ + u8 link_percent; /* % of link bandwidth */ + u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */ + u16 data_credits_refill; /* Credit refill amount in 64B granularity */ + u16 data_credits_max; /* Max credits for a configured packet buffer + * in 64B granularity.*/ + enum strict_prio_type prio_type; /* Link or Group Strict Priority */ +}; + +enum dcb_pfc_type { + pfc_disabled = 0, + pfc_enabled_full, + pfc_enabled_tx, + pfc_enabled_rx +}; + +/* Traffic class configuration */ +struct tc_configuration { + struct tc_bw_alloc path[2]; /* One each for Tx/Rx */ + enum dcb_pfc_type dcb_pfc; /* Class based flow control setting */ + + u16 desc_credits_max; /* For Tx Descriptor arbitration */ + u8 tc; /* Traffic class (TC) */ +}; + +enum dcb_rx_pba_cfg { + pba_equal, /* PBA[0-7] each use 64KB FIFO */ + pba_80_48 /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */ +}; + +struct ixgbe_dcb_config { + struct tc_configuration tc_config[MAX_TRAFFIC_CLASS]; + u8 bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */ + + bool round_robin_enable; + + enum dcb_rx_pba_cfg rx_pba_cfg; + + u32 dcb_cfg_version; /* Not used...OS-specific? */ + u32 link_speed; /* For bandwidth allocation validation purpose */ +}; + + +/* DCB driver APIs */ + +/* DCB rule checking function.*/ +s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *config); + +/* DCB credits calculation */ +s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *, u8); + +/* DCB PFC functions */ +s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, struct ixgbe_dcb_config *g); +s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); + +/* DCB traffic class stats */ +s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *); +s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); + +/* DCB config arbiters */ +s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +/* DCB hw initialization */ +s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +/* DCB definitions for credit calculation */ +#define MAX_CREDIT_REFILL 511 /* 0x1FF * 64B = 32704B */ +#define MINIMUM_CREDIT_REFILL 5 /* 5*64B = 320B */ +#define MINIMUM_CREDIT_FOR_JUMBO 145 /* 145= UpperBound((9*1024+54)/64B) for 9KB jumbo frame */ +#define DCB_MAX_TSO_SIZE (32*1024) /* MAX TSO packet size supported in DCB mode */ +#define MINIMUM_CREDIT_FOR_TSO (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO packet */ +#define MAX_CREDIT 4095 /* Maximum credit supported: 256KB * 1204 / 64B */ + +#endif /* _DCB_CONFIG_H */ diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ixgbe/ixgbe_dcb_82598.c new file mode 100644 index 0000000..fce6867 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb_82598.c @@ -0,0 +1,398 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe.h" +#include "ixgbe_type.h" +#include "ixgbe_dcb.h" +#include "ixgbe_dcb_82598.h" + +/** + * ixgbe_dcb_get_tc_stats_82598 - Return status data for each traffic class + * @hw: pointer to hardware structure + * @stats: pointer to statistics structure + * @tc_count: Number of elements in bwg_array. + * + * This function returns the status data for each of the Traffic Classes in use. + */ +s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw, + struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + int tc; + + if (tc_count > MAX_TRAFFIC_CLASS) + return DCB_ERR_PARAM; + + /* Statistics pertaining to each traffic class */ + for (tc = 0; tc < tc_count; tc++) { + /* Transmitted Packets */ + stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc)); + /* Transmitted Bytes */ + stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc)); + /* Received Packets */ + stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc)); + /* Received Bytes */ + stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc)); + } + + return 0; +} + +/** + * ixgbe_dcb_get_pfc_stats_82598 - Returns CBFC status data + * @hw: pointer to hardware structure + * @stats: pointer to statistics structure + * @tc_count: Number of elements in bwg_array. + * + * This function returns the CBFC status data for each of the Traffic Classes. + */ +s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw, + struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + int tc; + + if (tc_count > MAX_TRAFFIC_CLASS) + return DCB_ERR_PARAM; + + for (tc = 0; tc < tc_count; tc++) { + /* Priority XOFF Transmitted */ + stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc)); + /* Priority XOFF Received */ + stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(tc)); + } + + return 0; +} + +/** + * ixgbe_dcb_config_packet_buffers_82598 - Configure packet buffers + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure packet buffers for DCB mode. + */ +s32 ixgbe_dcb_config_packet_buffers_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret_val = 0; + u32 value = IXGBE_RXPBSIZE_64KB; + u8 i = 0; + + /* Setup Rx packet buffer sizes */ + switch (dcb_config->rx_pba_cfg) { + case pba_80_48: + /* Setup the first four at 80KB */ + value = IXGBE_RXPBSIZE_80KB; + for (; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value); + /* Setup the last four at 48KB...don't re-init i */ + value = IXGBE_RXPBSIZE_48KB; + /* Fall Through */ + case pba_equal: + default: + for (; i < IXGBE_MAX_PACKET_BUFFERS; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value); + + /* Setup Tx packet buffer sizes */ + for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) { + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), + IXGBE_TXPBSIZE_40KB); + } + break; + } + + return ret_val; +} + +/** + * ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Rx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + u32 reg = 0; + u32 credit_refill = 0; + u32 credit_max = 0; + u8 i = 0; + + reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA; + IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_RMCS); + /* Enable Arbiter */ + reg &= ~IXGBE_RMCS_ARBDIS; + /* Enable Receive Recycle within the BWG */ + reg |= IXGBE_RMCS_RRM; + /* Enable Deficit Fixed Priority arbitration*/ + reg |= IXGBE_RMCS_DFP; + + IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg); + + /* Configure traffic class credits and priority */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG]; + credit_refill = p->data_credits_refill; + credit_max = p->data_credits_max; + + reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT); + + if (p->prio_type == prio_link) + reg |= IXGBE_RT2CR_LSP; + + IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg); + } + + reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + reg |= IXGBE_RDRXCTL_RDMTS_1_2; + reg |= IXGBE_RDRXCTL_MPBEN; + reg |= IXGBE_RDRXCTL_MCEN; + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + /* Make sure there is enough descriptors before arbitration */ + reg &= ~IXGBE_RXCTRL_DMBYPS; + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg); + + return 0; +} + +/** + * ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Descriptor Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + u32 reg, max_credits; + u8 i; + + reg = IXGBE_READ_REG(hw, IXGBE_DPMCS); + + /* Enable arbiter */ + reg &= ~IXGBE_DPMCS_ARBDIS; + if (!(dcb_config->round_robin_enable)) { + /* Enable DFP and Recycle mode */ + reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM); + } + reg |= IXGBE_DPMCS_TSOEF; + /* Configure Max TSO packet size 34KB including payload and headers */ + reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT); + + IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg); + + /* Configure traffic class credits and priority */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG]; + max_credits = dcb_config->tc_config[i].desc_credits_max; + reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT; + reg |= p->data_credits_refill; + reg |= (u32)(p->bwg_id) << IXGBE_TDTQ2TCCR_BWG_SHIFT; + + if (p->prio_type == prio_group) + reg |= IXGBE_TDTQ2TCCR_GSP; + + if (p->prio_type == prio_link) + reg |= IXGBE_TDTQ2TCCR_LSP; + + IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg); + } + + return 0; +} + +/** + * ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + u32 reg; + u8 i; + + reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS); + /* Enable Data Plane Arbiter */ + reg &= ~IXGBE_PDPMCS_ARBDIS; + /* Enable DFP and Transmit Recycle Mode */ + reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM); + + IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg); + + /* Configure traffic class credits and priority */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG]; + reg = p->data_credits_refill; + reg |= (u32)(p->data_credits_max) << IXGBE_TDPT2TCCR_MCL_SHIFT; + reg |= (u32)(p->bwg_id) << IXGBE_TDPT2TCCR_BWG_SHIFT; + + if (p->prio_type == prio_group) + reg |= IXGBE_TDPT2TCCR_GSP; + + if (p->prio_type == prio_link) + reg |= IXGBE_TDPT2TCCR_LSP; + + IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg); + } + + /* Enable Tx packet buffer division */ + reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL); + reg |= IXGBE_DTXCTL_ENDBUBD; + IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg); + + return 0; +} + +/** + * ixgbe_dcb_config_pfc_82598 - Config priority flow control + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Priority Flow Control for each traffic class. + */ +s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + u32 reg, rx_pba_size; + u8 i; + + /* Enable Transmit Priority Flow Control */ + reg = IXGBE_READ_REG(hw, IXGBE_RMCS); + reg &= ~IXGBE_RMCS_TFCE_802_3X; + /* correct the reporting of our flow control status */ + hw->fc.type = ixgbe_fc_none; + reg |= IXGBE_RMCS_TFCE_PRIORITY; + IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg); + + /* Enable Receive Priority Flow Control */ + reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); + reg &= ~IXGBE_FCTRL_RFCE; + reg |= IXGBE_FCTRL_RPFCE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg); + + /* + * Configure flow control thresholds and enable priority flow control + * for each traffic class. + */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + if (dcb_config->rx_pba_cfg == pba_equal) { + rx_pba_size = IXGBE_RXPBSIZE_64KB; + } else { + rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB + : IXGBE_RXPBSIZE_48KB; + } + + reg = ((rx_pba_size >> 5) & 0xFFF0); + if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx || + dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full) + reg |= IXGBE_FCRTL_XONE; + + IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg); + + reg = ((rx_pba_size >> 2) & 0xFFF0); + if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx || + dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full) + reg |= IXGBE_FCRTH_FCEN; + + IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg); + } + + /* Configure pause time */ + for (i = 0; i < (MAX_TRAFFIC_CLASS >> 1); i++) + IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), 0x68006800); + + /* Configure flow control refresh threshold value */ + IXGBE_WRITE_REG(hw, IXGBE_FCRTV, 0x3400); + + return 0; +} + +/** + * ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics + * @hw: pointer to hardware structure + * + * Configure queue statistics registers, all queues belonging to same traffic + * class uses a single set of queue statistics counters. + */ +s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw) +{ + u32 reg = 0; + u8 i = 0; + u8 j = 0; + + /* Receive Queues stats setting - 8 queues per statistics reg */ + for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) { + reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i)); + reg |= ((0x1010101) * j); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg); + reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1)); + reg |= ((0x1010101) * j); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg); + } + /* Transmit Queues stats setting - 4 queues per statistics reg */ + for (i = 0; i < 8; i++) { + reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i)); + reg |= ((0x1010101) * i); + IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg); + } + + return 0; +} + +/** + * ixgbe_dcb_hw_config_82598 - Config and enable DCB + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure dcb settings and enable dcb mode. + */ +s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + ixgbe_dcb_config_packet_buffers_82598(hw, dcb_config); + ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config); + ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config); + ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config); + ixgbe_dcb_config_pfc_82598(hw, dcb_config); + ixgbe_dcb_config_tc_stats_82598(hw); + + return 0; +} diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ixgbe/ixgbe_dcb_82598.h new file mode 100644 index 0000000..1e6a313 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb_82598.h @@ -0,0 +1,94 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _DCB_82598_CONFIG_H_ +#define _DCB_82598_CONFIG_H_ + +/* DCB register definitions */ + +#define IXGBE_DPMCS_MTSOS_SHIFT 16 +#define IXGBE_DPMCS_TDPAC 0x00000001 /* 0 Round Robin, 1 DFP - Deficit Fixed Priority */ +#define IXGBE_DPMCS_TRM 0x00000010 /* Transmit Recycle Mode */ +#define IXGBE_DPMCS_ARBDIS 0x00000040 /* DCB arbiter disable */ +#define IXGBE_DPMCS_TSOEF 0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */ + +#define IXGBE_RUPPBMR_MQA 0x80000000 /* Enable UP to queue mapping */ + +#define IXGBE_RT2CR_MCL_SHIFT 12 /* Offset to Max Credit Limit setting */ +#define IXGBE_RT2CR_LSP 0x80000000 /* LSP enable bit */ + +#define IXGBE_RDRXCTL_MPBEN 0x00000010 /* DMA config for multiple packet buffers enable */ +#define IXGBE_RDRXCTL_MCEN 0x00000040 /* DMA config for multiple cores (RSS) enable */ + +#define IXGBE_TDTQ2TCCR_MCL_SHIFT 12 +#define IXGBE_TDTQ2TCCR_BWG_SHIFT 9 +#define IXGBE_TDTQ2TCCR_GSP 0x40000000 +#define IXGBE_TDTQ2TCCR_LSP 0x80000000 + +#define IXGBE_TDPT2TCCR_MCL_SHIFT 12 +#define IXGBE_TDPT2TCCR_BWG_SHIFT 9 +#define IXGBE_TDPT2TCCR_GSP 0x40000000 +#define IXGBE_TDPT2TCCR_LSP 0x80000000 + +#define IXGBE_PDPMCS_TPPAC 0x00000020 /* 0 Round Robin, 1 for DFP - Deficit Fixed Priority */ +#define IXGBE_PDPMCS_ARBDIS 0x00000040 /* Arbiter disable */ +#define IXGBE_PDPMCS_TRM 0x00000100 /* Transmit Recycle Mode enable */ + +#define IXGBE_DTXCTL_ENDBUBD 0x00000004 /* Enable DBU buffer division */ + +#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */ +#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */ +#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */ +#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */ + +#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000 + +/* DCB hardware-specific driver APIs */ + +/* DCB PFC functions */ +s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *); +s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *, + u8); + +/* DCB traffic class stats */ +s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *); +s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *, + u8); + +/* DCB config arbiters */ +s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *, + struct ixgbe_dcb_config *); + +/* DCB hw initialization */ +s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +#endif /* _DCB_82598_CONFIG_H */ diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c new file mode 100644 index 0000000..5915ad3 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -0,0 +1,352 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2008 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe.h" +#include + +/* Callbacks for DCB netlink in the kernel */ +#define BIT_DCB_MODE 0x01 +#define BIT_PFC 0x02 +#define BIT_PG_RX 0x04 +#define BIT_PG_TX 0x08 + +int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, + struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max) +{ + struct tc_configuration *src_tc_cfg = NULL; + struct tc_configuration *dst_tc_cfg = NULL; + int i; + + if (!src_dcb_cfg || !dst_dcb_cfg) + return -EINVAL; + + for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) { + src_tc_cfg = &src_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0]; + dst_tc_cfg = &dst_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0]; + + dst_tc_cfg->path[DCB_TX_CONFIG].prio_type = + src_tc_cfg->path[DCB_TX_CONFIG].prio_type; + + dst_tc_cfg->path[DCB_TX_CONFIG].bwg_id = + src_tc_cfg->path[DCB_TX_CONFIG].bwg_id; + + dst_tc_cfg->path[DCB_TX_CONFIG].bwg_percent = + src_tc_cfg->path[DCB_TX_CONFIG].bwg_percent; + + dst_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap = + src_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap; + + dst_tc_cfg->path[DCB_RX_CONFIG].prio_type = + src_tc_cfg->path[DCB_RX_CONFIG].prio_type; + + dst_tc_cfg->path[DCB_RX_CONFIG].bwg_id = + src_tc_cfg->path[DCB_RX_CONFIG].bwg_id; + + dst_tc_cfg->path[DCB_RX_CONFIG].bwg_percent = + src_tc_cfg->path[DCB_RX_CONFIG].bwg_percent; + + dst_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap = + src_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap; + } + + for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) { + dst_dcb_cfg->bw_percentage[DCB_TX_CONFIG] + [i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage + [DCB_TX_CONFIG][i-DCB_PG_ATTR_BW_ID_0]; + dst_dcb_cfg->bw_percentage[DCB_RX_CONFIG] + [i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage + [DCB_RX_CONFIG][i-DCB_PG_ATTR_BW_ID_0]; + } + + for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) { + dst_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc = + src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc; + } + + return 0; +} + +static u8 ixgbe_dcbnl_get_state(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + DPRINTK(DRV, INFO, "Get DCB Admin Mode.\n"); + + return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED); +} + +static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + /* All traffic should default to class 0 */ + return 0; +} + +static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n"); + + if (state > 0) { + /* Turn on DCB */ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + return; + } else { + if (netdev->flags & IFF_UP) + netdev->stop(netdev); + ixgbe_reset_interrupt_capability(adapter); + ixgbe_napi_del_all(adapter); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + adapter->tx_ring = NULL; + adapter->rx_ring = NULL; + netdev->select_queue = &ixgbe_dcb_select_queue; + + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + adapter->flags |= IXGBE_FLAG_DCB_ENABLED; + ixgbe_init_interrupt_scheme(adapter); + ixgbe_napi_add_all(adapter); + if (netdev->flags & IFF_UP) + netdev->open(netdev); + } + } else { + /* Turn off DCB */ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + if (netdev->flags & IFF_UP) + netdev->stop(netdev); + ixgbe_reset_interrupt_capability(adapter); + ixgbe_napi_del_all(adapter); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + adapter->tx_ring = NULL; + adapter->rx_ring = NULL; + netdev->select_queue = NULL; + + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; + adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + ixgbe_init_interrupt_scheme(adapter); + ixgbe_napi_add_all(adapter); + if (netdev->flags & IFF_UP) + netdev->open(netdev); + } else { + return; + } + } +} + +static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev, + u8 *perm_addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < netdev->addr_len; i++) + perm_addr[i] = adapter->hw.mac.perm_addr[i]; +} + +static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 prio, u8 bwg_id, u8 bw_pct, + u8 up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (prio != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio; + if (bwg_id != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id = bwg_id; + if (bw_pct != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent = + bw_pct; + if (up_map != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap = + up_map; + + if ((adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type != + adapter->dcb_cfg.tc_config[tc].path[0].prio_type) || + (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id != + adapter->dcb_cfg.tc_config[tc].path[0].bwg_id) || + (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent != + adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent) || + (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap != + adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap)) + adapter->dcb_set_bitmap |= BIT_PG_TX; +} + +static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id, + u8 bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct; + + if (adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] != + adapter->dcb_cfg.bw_percentage[0][bwg_id]) + adapter->dcb_set_bitmap |= BIT_PG_RX; +} + +static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc, + u8 prio, u8 bwg_id, u8 bw_pct, + u8 up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (prio != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio; + if (bwg_id != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id = bwg_id; + if (bw_pct != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent = + bw_pct; + if (up_map != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap = + up_map; + + if ((adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type != + adapter->dcb_cfg.tc_config[tc].path[1].prio_type) || + (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id != + adapter->dcb_cfg.tc_config[tc].path[1].bwg_id) || + (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent != + adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent) || + (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap != + adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap)) + adapter->dcb_set_bitmap |= BIT_PG_RX; +} + +static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, + u8 bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct; + + if (adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] != + adapter->dcb_cfg.bw_percentage[1][bwg_id]) + adapter->dcb_set_bitmap |= BIT_PG_RX; +} + +static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 *prio, u8 *bwg_id, u8 *bw_pct, + u8 *up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type; + *bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id; + *bw_pct = adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent; + *up_map = adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap; +} + +static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id, + u8 *bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id]; +} + +static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc, + u8 *prio, u8 *bwg_id, u8 *bw_pct, + u8 *up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type; + *bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id; + *bw_pct = adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent; + *up_map = adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap; +} + +static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, + u8 *bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id]; +} + +static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, + u8 setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting; + if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc != + adapter->dcb_cfg.tc_config[priority].dcb_pfc) + adapter->dcb_set_bitmap |= BIT_PFC; +} + +static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, + u8 *setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc; +} + +static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret; + + if (!adapter->dcb_set_bitmap) + return 1; + + while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) + msleep(1); + + ret = ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg, &adapter->dcb_cfg, + adapter->ring_feature[RING_F_DCB].indices); + if (ret) { + clear_bit(__IXGBE_RESETTING, &adapter->state); + return ret; + } + + ixgbe_down(adapter); + ixgbe_up(adapter); + adapter->dcb_set_bitmap = 0x00; + clear_bit(__IXGBE_RESETTING, &adapter->state); + return ret; +} + +struct dcbnl_rtnl_ops dcbnl_ops = { + .getstate = ixgbe_dcbnl_get_state, + .setstate = ixgbe_dcbnl_set_state, + .getpermhwaddr = ixgbe_dcbnl_get_perm_hw_addr, + .setpgtccfgtx = ixgbe_dcbnl_set_pg_tc_cfg_tx, + .setpgbwgcfgtx = ixgbe_dcbnl_set_pg_bwg_cfg_tx, + .setpgtccfgrx = ixgbe_dcbnl_set_pg_tc_cfg_rx, + .setpgbwgcfgrx = ixgbe_dcbnl_set_pg_bwg_cfg_rx, + .getpgtccfgtx = ixgbe_dcbnl_get_pg_tc_cfg_tx, + .getpgbwgcfgtx = ixgbe_dcbnl_get_pg_bwg_cfg_tx, + .getpgtccfgrx = ixgbe_dcbnl_get_pg_tc_cfg_rx, + .getpgbwgcfgrx = ixgbe_dcbnl_get_pg_bwg_cfg_rx, + .setpfccfg = ixgbe_dcbnl_set_pfc_cfg, + .getpfccfg = ixgbe_dcbnl_get_pfc_cfg, + .setall = ixgbe_dcbnl_set_all +}; + diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 81a9c4b..87dda98 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -97,9 +97,18 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = { ((((struct ixgbe_adapter *)netdev->priv)->num_tx_queues + \ ((struct ixgbe_adapter *)netdev->priv)->num_rx_queues) * \ (sizeof(struct ixgbe_queue_stats) / sizeof(u64))) -#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN) #define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats) -#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN) +#define IXGBE_PB_STATS_LEN ( \ + (((struct ixgbe_adapter *)netdev->priv)->flags & \ + IXGBE_FLAG_DCB_ENABLED) ? \ + (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \ + / sizeof(u64) : 0) +#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \ + IXGBE_PB_STATS_LEN + \ + IXGBE_QUEUE_STATS_LEN) static int ixgbe_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) @@ -804,6 +813,16 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i + k] = queue_stat[k]; i += k; } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxontxc[j]; + data[i++] = adapter->stats.pxofftxc[j]; + } + for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxonrxc[j]; + data[i++] = adapter->stats.pxoffrxc[j]; + } + } } static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, @@ -832,6 +851,20 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, sprintf(p, "rx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + sprintf(p, "tx_pb_%u_pxon", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_pb_%u_pxoff", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) { + sprintf(p, "rx_pb_%u_pxon", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_pb_%u_pxoff", i); + p += ETH_GSTRING_LEN; + } + } /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ break; } diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index ca17af4..cbbfc8a 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -402,7 +402,7 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter, if (adapter->netdev->features & NETIF_F_LRO && skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (adapter->vlgrp && is_vlan) + if (adapter->vlgrp && is_vlan && (tag != 0)) lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb, adapter->vlgrp, tag, rx_desc); @@ -411,12 +411,12 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter, ring->lro_used = true; } else { if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) { - if (adapter->vlgrp && is_vlan) + if (adapter->vlgrp && is_vlan && (tag != 0)) vlan_hwaccel_receive_skb(skb, adapter->vlgrp, tag); else netif_receive_skb(skb); } else { - if (adapter->vlgrp && is_vlan) + if (adapter->vlgrp && is_vlan && (tag != 0)) vlan_hwaccel_rx(skb, adapter->vlgrp, tag); else netif_rx(skb); @@ -1654,10 +1654,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) * effects of setting this bit are only that SRRCTL must be * fully programmed [0..15] */ - rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - rdrxctl |= IXGBE_RDRXCTL_MVMEN; - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); - + if (adapter->flags & + (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED)) { + rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + rdrxctl |= IXGBE_RDRXCTL_MVMEN; + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); + } if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { /* Fill out redirection table */ @@ -1716,6 +1718,16 @@ static void ixgbe_vlan_rx_register(struct net_device *netdev, ixgbe_irq_disable(adapter); adapter->vlgrp = grp; + /* + * For a DCB driver, always enable VLAN tag stripping so we can + * still receive traffic from a DCB-enabled host even if we're + * not in DCB mode. + */ + ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL); + ctrl |= IXGBE_VLNCTRL_VME; + ctrl &= ~IXGBE_VLNCTRL_CFIEN; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VLNCTRL, ctrl); + if (grp) { /* enable VLAN tag insert/strip */ ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL); @@ -1880,6 +1892,42 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) } } +/* + * ixgbe_configure_dcb - Configure DCB hardware + * @adapter: ixgbe adapter struct + * + * This is called by the driver on open to configure the DCB hardware. + * This is also called by the gennetlink interface when reconfiguring + * the DCB state. + */ +static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 txdctl, vlnctrl; + int i, j; + + ixgbe_dcb_check_config(&adapter->dcb_cfg); + ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_TX_CONFIG); + ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_RX_CONFIG); + + /* reconfigure the hardware */ + ixgbe_dcb_hw_config(&adapter->hw, &adapter->dcb_cfg); + + for (i = 0; i < adapter->num_tx_queues; i++) { + j = adapter->tx_ring[i].reg_idx; + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j)); + /* PThresh workaround for Tx hang with DFP enabled. */ + txdctl |= 32; + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl); + } + /* Enable VLAN tag insert/strip */ + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VME | IXGBE_VLNCTRL_VFE; + vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); +} + static void ixgbe_configure(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1888,6 +1936,12 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) ixgbe_set_rx_mode(netdev); ixgbe_restore_vlan(adapter); + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + netif_set_gso_max_size(netdev, 32768); + ixgbe_configure_dcb(adapter); + } else { + netif_set_gso_max_size(netdev, 65536); + } ixgbe_configure_tx(adapter); ixgbe_configure_rx(adapter); @@ -2234,6 +2288,11 @@ static void ixgbe_reset_task(struct work_struct *work) struct ixgbe_adapter *adapter; adapter = container_of(work, struct ixgbe_adapter, reset_task); + /* If we're already down or resetting, just bail */ + if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_RESETTING, &adapter->state)) + return; + adapter->tx_timeout_count++; ixgbe_reinit_locked(adapter); @@ -2243,15 +2302,31 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) { int nrq = 1, ntq = 1; int feature_mask = 0, rss_i, rss_m; + int dcb_i, dcb_m; /* Number of supported queues */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: + dcb_i = adapter->ring_feature[RING_F_DCB].indices; + dcb_m = 0; rss_i = adapter->ring_feature[RING_F_RSS].indices; rss_m = 0; feature_mask |= IXGBE_FLAG_RSS_ENABLED; + feature_mask |= IXGBE_FLAG_DCB_ENABLED; switch (adapter->flags & feature_mask) { + case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED): + dcb_m = 0x7 << 3; + rss_i = min(8, rss_i); + rss_m = 0x7; + nrq = dcb_i * rss_i; + ntq = min(MAX_TX_QUEUES, dcb_i * rss_i); + break; + case (IXGBE_FLAG_DCB_ENABLED): + dcb_m = 0x7 << 3; + nrq = dcb_i; + ntq = dcb_i; + break; case (IXGBE_FLAG_RSS_ENABLED): rss_m = 0xF; nrq = rss_i; @@ -2259,6 +2334,8 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) break; case 0: default: + dcb_i = 0; + dcb_m = 0; rss_i = 0; rss_m = 0; nrq = 1; @@ -2266,6 +2343,12 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) break; } + /* Sanity check, we should never have zero queues */ + nrq = (nrq ?:1); + ntq = (ntq ?:1); + + adapter->ring_feature[RING_F_DCB].indices = dcb_i; + adapter->ring_feature[RING_F_DCB].mask = dcb_m; adapter->ring_feature[RING_F_RSS].indices = rss_i; adapter->ring_feature[RING_F_RSS].mask = rss_m; break; @@ -2317,6 +2400,7 @@ static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter, adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; kfree(adapter->msix_entries); adapter->msix_entries = NULL; + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; ixgbe_set_num_queues(adapter); } else { @@ -2336,15 +2420,42 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) { int feature_mask = 0, rss_i; int i, txr_idx, rxr_idx; + int dcb_i; /* Number of supported queues */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: + dcb_i = adapter->ring_feature[RING_F_DCB].indices; rss_i = adapter->ring_feature[RING_F_RSS].indices; txr_idx = 0; rxr_idx = 0; + feature_mask |= IXGBE_FLAG_DCB_ENABLED; feature_mask |= IXGBE_FLAG_RSS_ENABLED; switch (adapter->flags & feature_mask) { + case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED): + for (i = 0; i < dcb_i; i++) { + int j; + /* Rx first */ + for (j = 0; j < adapter->num_rx_queues; j++) { + adapter->rx_ring[rxr_idx].reg_idx = + i << 3 | j; + rxr_idx++; + } + /* Tx now */ + for (j = 0; j < adapter->num_tx_queues; j++) { + adapter->tx_ring[txr_idx].reg_idx = + i << 2 | (j >> 1); + if (j & 1) + txr_idx++; + } + } + case (IXGBE_FLAG_DCB_ENABLED): + /* the number of queues is assumed to be symmetric */ + for (i = 0; i < dcb_i; i++) { + adapter->rx_ring[i].reg_idx = i << 3; + adapter->tx_ring[i].reg_idx = i << 2; + } + break; case (IXGBE_FLAG_RSS_ENABLED): for (i = 0; i < adapter->num_rx_queues; i++) adapter->rx_ring[i].reg_idx = i; @@ -2369,7 +2480,7 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) * number of queues at compile-time. The polling_netdev array is * intended for Multiqueue, but should work fine with a single queue. **/ -static int __devinit ixgbe_alloc_queues(struct ixgbe_adapter *adapter) +static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter) { int i; @@ -2439,6 +2550,7 @@ static int __devinit ixgbe_set_interrupt_capability(struct ixgbe_adapter adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); if (!adapter->msix_entries) { + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; ixgbe_set_num_queues(adapter); kfree(adapter->tx_ring); @@ -2479,7 +2591,7 @@ out: return err; } -static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) +void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) { if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; @@ -2503,7 +2615,7 @@ static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) * - Hardware queue count (num_*_queues) * - defined by miscellaneous hardware support/features (RSS, etc.) **/ -static int __devinit ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) +int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) { int err; @@ -2551,6 +2663,8 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; unsigned int rss; + int j; + struct tc_configuration *tc; /* PCI config space info */ @@ -2564,6 +2678,26 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus()); adapter->ring_feature[RING_F_RSS].indices = rss; adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES; + + /* Configure DCB traffic classes */ + for (j = 0; j < MAX_TRAFFIC_CLASS; j++) { + tc = &adapter->dcb_cfg.tc_config[j]; + tc->path[DCB_TX_CONFIG].bwg_id = 0; + tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1); + tc->path[DCB_RX_CONFIG].bwg_id = 0; + tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1); + tc->dcb_pfc = pfc_disabled; + } + adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100; + adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100; + adapter->dcb_cfg.rx_pba_cfg = pba_equal; + adapter->dcb_cfg.round_robin_enable = false; + adapter->dcb_set_bitmap = 0x00; +#ifdef CONFIG_DCBNL + ixgbe_copy_dcb_cfg(&adapter->dcb_cfg, &adapter->temp_dcb_cfg, + adapter->ring_feature[RING_F_DCB].indices); +#endif /* default flow control settings */ hw->fc.original_type = ixgbe_fc_none; @@ -2938,7 +3072,7 @@ static int ixgbe_close(struct net_device *netdev) * @adapter: private struct * helper function to napi_add each possible q_vector->napi */ -static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) +void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) { int q_idx, q_vectors; int (*poll)(struct napi_struct *, int); @@ -2959,7 +3093,7 @@ static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) } } -static void ixgbe_napi_del_all(struct ixgbe_adapter *adapter) +void ixgbe_napi_del_all(struct ixgbe_adapter *adapter) { int q_idx; int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; @@ -3080,6 +3214,18 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) adapter->stats.mpc[i] += mpc; total_mpc += adapter->stats.mpc[i]; adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXONRXC(i)); + adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXONTXC(i)); + adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXOFFRXC(i)); + adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXOFFTXC(i)); } adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); /* work around hardware counting issue */ @@ -3577,6 +3723,14 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (adapter->vlgrp && vlan_tx_tag_present(skb)) { tx_flags |= vlan_tx_tag_get(skb); + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK; + tx_flags |= (skb->queue_mapping << 13); + } + tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT; + tx_flags |= IXGBE_TX_FLAGS_VLAN; + } else if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + tx_flags |= (skb->queue_mapping << 13); tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT; tx_flags |= IXGBE_TX_FLAGS_VLAN; } @@ -3843,6 +3997,13 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_IP_CSUM; netdev->vlan_features |= NETIF_F_SG; + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + +#ifdef CONFIG_DCBNL + netdev->dcbnl_ops = &dcbnl_ops; +#endif + if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; @@ -4099,7 +4260,6 @@ static struct pci_driver ixgbe_driver = { **/ static int __init ixgbe_init_module(void) { - int ret; printk(KERN_INFO "%s: %s - version %s\n", ixgbe_driver_name, ixgbe_driver_string, ixgbe_driver_version); @@ -4109,8 +4269,7 @@ static int __init ixgbe_init_module(void) dca_register_notify(&dca_notifier); #endif - ret = pci_register_driver(&ixgbe_driver); - return ret; + return pci_register_driver(&ixgbe_driver); } module_init(ixgbe_init_module); diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h new file mode 100644 index 0000000..32d32c1 --- /dev/null +++ b/include/linux/dcbnl.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __LINUX_DCBNL_H__ +#define __LINUX_DCBNL_H__ + +#define DCB_PROTO_VERSION 1 + +struct dcbmsg { + unsigned char dcb_family; + __u8 cmd; + __u16 dcb_pad; +}; + +/** + * enum dcbnl_commands - supported DCB commands + * + * @DCB_CMD_UNDEFINED: unspecified command to catch errors + * @DCB_CMD_GSTATE: request the state of DCB in the device + * @DCB_CMD_SSTATE: set the state of DCB in the device + * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx + * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx + * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx + * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx + * @DCB_CMD_PFC_GCFG: request the priority flow control configuration + * @DCB_CMD_PFC_SCFG: set the priority flow control configuration + * @DCB_CMD_SET_ALL: apply all changes to the underlying device + * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying + * device. Only useful when using bonding. + */ +enum dcbnl_commands { + DCB_CMD_UNDEFINED, + + DCB_CMD_GSTATE, + DCB_CMD_SSTATE, + + DCB_CMD_PGTX_GCFG, + DCB_CMD_PGTX_SCFG, + DCB_CMD_PGRX_GCFG, + DCB_CMD_PGRX_SCFG, + + DCB_CMD_PFC_GCFG, + DCB_CMD_PFC_SCFG, + + DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + + __DCB_CMD_ENUM_MAX, + DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, +}; + + +/** + * enum dcbnl_attrs - DCB top-level netlink attributes + * + * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING) + * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8) + * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8) + * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED) + * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8) + * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) + * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) + * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + */ +enum dcbnl_attrs { + DCB_ATTR_UNDEFINED, + + DCB_ATTR_IFNAME, + DCB_ATTR_STATE, + DCB_ATTR_PFC_STATE, + DCB_ATTR_PFC_CFG, + DCB_ATTR_NUM_TC, + DCB_ATTR_PG_CFG, + DCB_ATTR_SET_ALL, + DCB_ATTR_PERM_HWADDR, + + __DCB_ATTR_ENUM_MAX, + DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs + * + * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8) + * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8) + * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8) + * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8) + * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8) + * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8) + * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8) + * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8) + * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined + * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG) + * + */ +enum dcbnl_pfc_up_attrs { + DCB_PFC_UP_ATTR_UNDEFINED, + + DCB_PFC_UP_ATTR_0, + DCB_PFC_UP_ATTR_1, + DCB_PFC_UP_ATTR_2, + DCB_PFC_UP_ATTR_3, + DCB_PFC_UP_ATTR_4, + DCB_PFC_UP_ATTR_5, + DCB_PFC_UP_ATTR_6, + DCB_PFC_UP_ATTR_7, + DCB_PFC_UP_ATTR_ALL, + + __DCB_PFC_UP_ATTR_ENUM_MAX, + DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pg_attrs - DCB Priority Group attributes + * + * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED) + * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG) + * + */ +enum dcbnl_pg_attrs { + DCB_PG_ATTR_UNDEFINED, + + DCB_PG_ATTR_TC_0, + DCB_PG_ATTR_TC_1, + DCB_PG_ATTR_TC_2, + DCB_PG_ATTR_TC_3, + DCB_PG_ATTR_TC_4, + DCB_PG_ATTR_TC_5, + DCB_PG_ATTR_TC_6, + DCB_PG_ATTR_TC_7, + DCB_PG_ATTR_TC_MAX, + DCB_PG_ATTR_TC_ALL, + + DCB_PG_ATTR_BW_ID_0, + DCB_PG_ATTR_BW_ID_1, + DCB_PG_ATTR_BW_ID_2, + DCB_PG_ATTR_BW_ID_3, + DCB_PG_ATTR_BW_ID_4, + DCB_PG_ATTR_BW_ID_5, + DCB_PG_ATTR_BW_ID_6, + DCB_PG_ATTR_BW_ID_7, + DCB_PG_ATTR_BW_ID_MAX, + DCB_PG_ATTR_BW_ID_ALL, + + __DCB_PG_ATTR_ENUM_MAX, + DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_tc_attrs - DCB Traffic Class attributes + * + * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors + * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to + * Valid values are: 0-7 + * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map + * Some devices may not support changing the + * user priority map of a TC. + * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting + * 0 - none + * 1 - group strict + * 2 - link strict + * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and + * not configured to use link strict priority, + * this is the percentage of bandwidth of the + * priority group this traffic class belongs to + * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters + * + */ +enum dcbnl_tc_attrs { + DCB_TC_ATTR_PARAM_UNDEFINED, + + DCB_TC_ATTR_PARAM_PGID, + DCB_TC_ATTR_PARAM_UP_MAPPING, + DCB_TC_ATTR_PARAM_STRICT_PRIO, + DCB_TC_ATTR_PARAM_BW_PCT, + DCB_TC_ATTR_PARAM_ALL, + + __DCB_TC_ATTR_PARAM_ENUM_MAX, + DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, +}; + +/** + * enum dcb_general_attr_values - general DCB attribute values + * + * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported + * + */ +enum dcb_general_attr_values { + DCB_ATTR_VALUE_UNDEFINED = 0xff +}; + + +#endif /* __LINUX_DCBNL_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cfd20b..f010c1a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -42,6 +42,9 @@ #include #include +#ifdef CONFIG_DCBNL +#include +#endif struct vlan_group; struct ethtool_ops; @@ -751,6 +754,11 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#ifdef CONFIG_DCBNL + /* Data Center Bridging netlink ops */ + struct dcbnl_rtnl_ops *dcbnl_ops; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2b3d51c..e88f705 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -107,6 +107,11 @@ enum { RTM_GETADDRLABEL, #define RTM_GETADDRLABEL RTM_GETADDRLABEL + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h new file mode 100644 index 0000000..0ef0c5a --- /dev/null +++ b/include/net/dcbnl.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __NET_DCBNL_H__ +#define __NET_DCBNL_H__ + +/* + * Ops struct for the netlink callbacks. Used by DCB-enabled drivers through + * the netdevice struct. + */ +struct dcbnl_rtnl_ops { + u8 (*getstate)(struct net_device *); + void (*setstate)(struct net_device *, u8); + void (*getpermhwaddr)(struct net_device *, u8 *); + void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8); + void (*setpgbwgcfgtx)(struct net_device *, int, u8); + void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8); + void (*setpgbwgcfgrx)(struct net_device *, int, u8); + void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *); + void (*getpgbwgcfgtx)(struct net_device *, int, u8 *); + void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *); + void (*getpgbwgcfgrx)(struct net_device *, int, u8 *); + void (*setpfccfg)(struct net_device *, int, u8); + void (*getpfccfg)(struct net_device *, int, u8 *); + u8 (*setall)(struct net_device *); +}; + +#endif /* __NET_DCBNL_H__ */ diff --git a/net/Kconfig b/net/Kconfig index 9103a16..de441be 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -190,6 +190,7 @@ source "net/lapb/Kconfig" source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/sched/Kconfig" +source "net/dcb/Kconfig" menu "Network testing" diff --git a/net/Makefile b/net/Makefile index acaf819..7718f19 100644 --- a/net/Makefile +++ b/net/Makefile @@ -56,6 +56,9 @@ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ +ifeq ($(CONFIG_DCBNL),y) +obj-$(CONFIG_DCB) += dcb/ +endif ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig new file mode 100644 index 0000000..bdf3880 --- /dev/null +++ b/net/dcb/Kconfig @@ -0,0 +1,12 @@ +config DCB + tristate "Data Center Bridging support" + +config DCBNL + bool "Data Center Bridging netlink interface support" + depends on DCB + default n + ---help--- + This option turns on the netlink interface + (dcbnl) for Data Center Bridging capable devices. + + If unsure, say N. diff --git a/net/dcb/Makefile b/net/dcb/Makefile new file mode 100644 index 0000000..9930f4c --- /dev/null +++ b/net/dcb/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DCB) += dcbnl.o diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c new file mode 100644 index 0000000..f340d09 --- /dev/null +++ b/net/dcb/dcbnl.c @@ -0,0 +1,682 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Lucy Liu, "); +MODULE_DESCRIPTION("Data Center Bridging generic netlink interface"); +MODULE_LICENSE("GPL"); + +/**************** DCB attribute policies *************************************/ + +/* DCB netlink attributes policy */ +static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { + [DCB_ATTR_IFNAME] = {.type = NLA_STRING, .len = IFNAMSIZ - 1}, + [DCB_ATTR_STATE] = {.type = NLA_U8}, + [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, + [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, +}; + +/* DCB priority flow control to User Priority nested attributes */ +static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { + [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB priority grouping nested attributes */ +static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { + [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_3] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_4] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_5] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_6] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_7] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_ALL] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_BW_ID_0] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_1] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_2] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_3] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_4] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_5] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_6] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_7] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB traffic class nested attributes. */ +static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { + [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_BW_PCT] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG}, +}; + + +/* standard netlink reply call */ +static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, + u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct dcbmsg *dcb; + struct nlmsghdr *nlh; + int ret = -EINVAL; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + return ret; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + dcb->dcb_pad = 0; + + ret = nla_put_u8(dcbnl_skb, attr, value); + if (ret) + goto err; + + /* end the message, assign the nlmsg_len. */ + nlmsg_end(dcbnl_skb, nlh); + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); + return ret; +} + +static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */ + if (!netdev->dcbnl_ops->getstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB, + DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags); + + return ret; +} + +static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_PFC_GCFG; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG); + if (!nest) + goto err; + + if (data[DCB_PFC_UP_ATTR_ALL]) + getall = 1; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (!getall && !data[i]) + continue; + + netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, + &value); + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + u8 perm_addr[MAX_ADDR_LEN]; + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpermhwaddr) + return ret; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GPERM_HWADDR; + + netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); + + ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), + perm_addr); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *pg_nest, *param_nest, *data; + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + u8 prio, pgid, tc_pct, up_map; + int ret = -EINVAL; + int getall = 0; + int i; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->getpgtccfgtx || + !netdev->dcbnl_ops->getpgtccfgrx || + !netdev->dcbnl_ops->getpgbwgcfgtx || + !netdev->dcbnl_ops->getpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG; + + pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG); + if (!pg_nest) + goto err; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + getall = 1; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + data = pg_tb[DCB_PG_ATTR_TC_ALL]; + else + data = pg_tb[i]; + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + data, dcbnl_tc_param_nest); + if (ret) + goto err_pg; + + param_nest = nla_nest_start(dcbnl_skb, i); + if (!param_nest) + goto err_pg; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } + + if (param_tb[DCB_TC_ATTR_PARAM_PGID] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_PGID, pgid); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT, + tc_pct); + if (ret) + goto err_param; + } + nla_nest_end(dcbnl_skb, param_nest); + } + + if (pg_tb[DCB_PG_ATTR_BW_ID_ALL]) + getall = 1; + else + getall = 0; + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } + ret = nla_put_u8(dcbnl_skb, i, tc_pct); + + if (ret) + goto err_pg; + } + + nla_nest_end(dcbnl_skb, pg_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_param: + nla_nest_cancel(dcbnl_skb, param_nest); +err_pg: + nla_nest_cancel(dcbnl_skb, pg_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_STATE]); + + netdev->dcbnl_ops->setstate(netdev, value); + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (data[i] == NULL) + continue; + value = nla_get_u8(data[i]); + netdev->dcbnl_ops->setpfccfg(netdev, + data[i]->nla_type - DCB_PFC_UP_ATTR_0, value); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG, + pid, seq, flags); +err: + return ret; +} + +static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB, + DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags); + + return ret; +} + +static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + int ret = -EINVAL; + int i; + u8 pgid; + u8 up_map; + u8 prio; + u8 tc_pct; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->setpgtccfgtx || + !netdev->dcbnl_ops->setpgtccfgrx || + !netdev->dcbnl_ops->setpgbwgcfgtx || + !netdev->dcbnl_ops->setpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + if (ret) + goto err; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!pg_tb[i]) + continue; + + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + pg_tb[i], dcbnl_tc_param_nest); + if (ret) + goto err; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]) + prio = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]); + + if (param_tb[DCB_TC_ATTR_PARAM_PGID]) + pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]); + + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT]) + tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]); + + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]) + up_map = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } + } + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!pg_tb[i]) + continue; + + tc_pct = nla_get_u8(pg_tb[i]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } + } + + ret = dcbnl_reply(0, RTM_SETDCB, + (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), + DCB_ATTR_PG_CFG, pid, seq, flags); + +err: + return ret; +} + +static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh); + struct nlattr *tb[DCB_ATTR_MAX + 1]; + u32 pid = skb ? NETLINK_CB(skb).pid : 0; + int ret = -EINVAL; + + if (net != &init_net) + return -EINVAL; + + ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, + dcbnl_rtnl_policy); + if (ret < 0) + return ret; + + if (!tb[DCB_ATTR_IFNAME]) + return -EINVAL; + + netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME])); + if (!netdev) + return -EINVAL; + + if (!netdev->dcbnl_ops) + goto errout; + + switch (dcb->cmd) { + case DCB_CMD_GSTATE: + ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_GCFG: + ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GPERM_HWADDR: + ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_GCFG: + ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_GCFG: + ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SSTATE: + ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SCFG: + ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + + case DCB_CMD_SET_ALL: + ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_SCFG: + ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_SCFG: + ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + default: + goto errout; + } +errout: + ret = -EINVAL; +out: + dev_put(netdev); + return ret; +} + +static int __init dcbnl_init(void) +{ + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); + + return 0; +} +module_init(dcbnl_init); + +static void __exit dcbnl_exit(void) +{ + rtnl_unregister(PF_UNSPEC, RTM_GETDCB); + rtnl_unregister(PF_UNSPEC, RTM_SETDCB); +} +module_exit(dcbnl_exit); + +