From patchwork Mon Nov 23 11:11:46 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Or Gerlitz X-Patchwork-Id: 547500 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 0111814029C for ; Mon, 23 Nov 2015 22:14:20 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753002AbbKWLOL (ORCPT ); Mon, 23 Nov 2015 06:14:11 -0500 Received: from [193.47.165.129] ([193.47.165.129]:37356 "EHLO mellanox.co.il" rhost-flags-FAIL-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1752945AbbKWLOE (ORCPT ); Mon, 23 Nov 2015 06:14:04 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from ogerlitz@mellanox.com) with ESMTPS (AES256-SHA encrypted); 23 Nov 2015 13:11:57 +0200 Received: from r-vnc04.mtr.labs.mlnx (r-vnc04.mtr.labs.mlnx [10.208.0.116]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id tANBBtpw009640; Mon, 23 Nov 2015 13:11:57 +0200 From: Or Gerlitz To: "David S. Miller" Cc: netdev@vger.kernel.org, Don Dutile , Doug Ledford , Saeed Mahameed , Tal Alon , Hadar Har-Zion , Rony Efraim , Or Gerlitz Subject: [PATCH net-next 13/18] net/mlx5: E-Switch, Add SR-IOV (FDB) support Date: Mon, 23 Nov 2015 13:11:46 +0200 Message-Id: <1448277111-16474-14-git-send-email-ogerlitz@mellanox.com> X-Mailer: git-send-email 1.7.8.2 In-Reply-To: <1448277111-16474-1-git-send-email-ogerlitz@mellanox.com> References: <1448277111-16474-1-git-send-email-ogerlitz@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Saeed Mahameed Enabling E-Switch SRIOV for nvfs+1 vports. Create E-Switch FDB for L2 UC/MC mac steering between VFs/PF and external vport (Uplink). FDB contains forwarding rules such as: UC MAC0 -> vport0(PF). UC MAC1 -> vport1. UC MAC2 -> vport2. MC MACX -> vport0, vport2, Uplink. MC MACY -> vport1, Uplink. For unmatched traffic FDB has the following default rules: Unmached Traffic (src vport != Uplink) -> Uplink. Unmached Traffic (src vport == Uplink) -> vport0(PF). FDB rules population: Each NIC vport (VF) will notify E-Switch manager of its UC/MC vport context changes via modify vport context command, which will be translated to an event that will be handled by E-Switch manager (PF) which will update FDB table accordingly. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 682 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 25 + .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 14 +- include/linux/mlx5/device.h | 6 + include/linux/mlx5/flow_table.h | 9 + include/linux/mlx5/mlx5_ifc.h | 7 +- 7 files changed, 661 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1f2f804..a208be7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -34,9 +34,12 @@ #include #include #include +#include #include "mlx5_core.h" #include "eswitch.h" +#define UPLINK_VPORT 0xFFFF + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -54,18 +57,26 @@ enum { MLX5_ACTION_DEL = 2, }; -/* HW UC L2 table hash node */ -struct mlx5_uc_l2addr { +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { struct l2addr_node node; - u8 action; u32 table_index; u32 vport; }; -/* Vport UC L2 table hash node */ -struct mlx5_vport_addr { - struct l2addr_node node; - u8 action; +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ }; enum { @@ -73,7 +84,11 @@ enum { MC_ADDR_CHANGE = BIT(1), }; -static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, int vport, +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; @@ -196,97 +211,492 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) free_l2_table_index(l2_table, index); } -/* SW E-Switch L2 Table management */ -static int l2_table_addr_add(struct mlx5_eswitch *esw, - u8 mac[ETH_ALEN], u32 vport) +/* E-Switch FDB flow steering */ +struct dest_node { + struct list_head list; + struct mlx5_flow_destination dest; +}; + +static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr) { - struct hlist_head *hash; - struct mlx5_uc_l2addr *addr; + bool was_valid = fr->valid; + struct dest_node *dest_n; + u32 dest_list_size = 0; + void *in_match_value; + u32 *flow_context; + u32 flow_index; int err; + int i; + + if (list_empty(&fr->dest_list)) { + if (fr->valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->valid = false; + return 0; + } + + list_for_each_entry(dest_n, &fr->dest_list, list) + dest_list_size++; - hash = esw->l2_table.l2_hash; - addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); - if (addr) { + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct) * + dest_list_size); + if (!flow_context) + return -ENOMEM; + + MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag); + MLX5_SET(flow_context, flow_context, action, fr->action); + MLX5_SET(flow_context, flow_context, destination_list_size, + dest_list_size); + + i = 0; + list_for_each_entry(dest_n, &fr->dest_list, list) { + void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context, + destination[i++]); + + MLX5_SET(dest_format_struct, dest_addr, destination_type, + dest_n->dest.type); + MLX5_SET(dest_format_struct, dest_addr, destination_id, + dest_n->dest.vport_num); + } + + in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable, + fr->match_criteria, flow_context, + &flow_index); + if (!err) { + if (was_valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->fi = flow_index; + fr->valid = true; + } + kfree(flow_context); + return err; +} + +static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *new_dest) +{ + struct dest_node *dest_n; + int err; + + dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL); + if (!dest_n) + return -ENOMEM; + + memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest)); + mutex_lock(&fr->mutex); + list_add(&dest_n->list, &fr->dest_list); + err = _mlx5_flow_rule_apply(fr); + if (err) { + list_del(&dest_n->list); + kfree(dest_n); + } + mutex_unlock(&fr->mutex); + return err; +} + +static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *dest) +{ + struct dest_node *dest_n; + struct dest_node *n; + int err; + + mutex_lock(&fr->mutex); + list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) { + if (dest->vport_num == dest_n->dest.vport_num) + goto found; + } + mutex_unlock(&fr->mutex); + return -ENOENT; + +found: + list_del(&dest_n->list); + err = _mlx5_flow_rule_apply(fr); + mutex_unlock(&fr->mutex); + kfree(dest_n); + + return err; +} + +static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_value) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *dmac_v; + + dmac_v = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + /* UNICAST FULL MATCH */ + if (!is_multicast_ether_addr(dmac_v)) + return NULL; + + /* MULTICAST FULL MATCH */ + esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr); + + return esw_mc ? esw_mc->uplink_rule : NULL; +} + +static struct mlx5_flow_rule *alloc_fr(void *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag) +{ + struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL); + + if (!fr) + return NULL; + + fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!fr->match_criteria || !fr->match_value) { + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); + return NULL; + } + + memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + fr->match_criteria_enable = match_criteria_enable; + fr->flow_tag = flow_tag; + fr->action = action; + + mutex_init(&fr->mutex); + INIT_LIST_HEAD(&fr->dest_list); + atomic_set(&fr->refcount, 0); + fr->ft = ft; + return fr; +} + +static void deref_fr(struct mlx5_flow_rule *fr) +{ + if (!atomic_dec_and_test(&fr->refcount)) + return; + + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); +} + +static struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *fr; + int err; + + fr = find_fr(esw, match_criteria_enable, match_value); + fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria, + match_value, action, flow_tag); + if (!fr) + return NULL; + + atomic_inc(&fr->refcount); + + err = mlx5_flow_rule_add_dest(fr, dest); + if (err) { + deref_fr(fr); + return NULL; + } + + return fr; +} + +static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport) +{ + struct mlx5_flow_destination dest; + + dest.vport_num = vport; + mlx5_flow_rule_del_dest(fr, &dest); + deref_fr(fr); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_v) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table_group g; + struct mlx5_flow_table *fdb; + u8 *dmac; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + memset(&g, 0, sizeof(g)); + /* UC MC Full match rules*/ + g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria, + outer_headers.dmac_47_16); + /* Match criteria mask */ + memset(dmac, 0xff, 6); + + fdb = mlx5_create_flow_table(dev, 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH, + 1, &g); + if (fdb) + esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb)); + else + esw_warn(dev, "ESW: Failed to create FDB Table\n"); + + esw->fdb_table.fdb = fdb; + return fdb ? 0 : -ENOMEM; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n", + mlx5_get_flow_table_id(esw->fdb_table.fdb)); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { esw_warn(esw->dev, "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", - mac, vport, addr->vport); + mac, vport, esw_uc->vport); return -EEXIST; } - addr = l2addr_hash_add(hash, mac, struct mlx5_uc_l2addr, - GFP_KERNEL); - if (!addr) + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) return -ENOMEM; + esw_uc->vport = vport; - addr->vport = vport; - addr->action = MLX5_ACTION_NONE; - err = set_l2_table_entry(esw->dev, mac, 0, 0, &addr->table_index); + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); if (err) - l2addr_hash_del(addr); - else - esw_debug(esw->dev, "\tADDED L2 MAC: vport[%d] %pM index:%d\n", - vport, addr->node.addr, addr->table_index); + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); return err; } -static int l2_table_addr_del(struct mlx5_eswitch *esw, - u8 mac[ETH_ALEN], u32 vport) +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash; - struct mlx5_uc_l2addr *addr; + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; - hash = esw->l2_table.l2_hash; - addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); - if (!addr || addr->vport != vport) { - esw_warn(esw->dev, "MAC(%pM) doesn't belong to vport (%d)\n", + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", mac, vport); return -EINVAL; } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); - esw_debug(esw->dev, "\tDELETE L2 MAC: vport[%d] %pM index:%d\n", - vport, addr->node.addr, addr->table_index); - del_l2_table_entry(esw->dev, addr->table_index); - l2addr_hash_del(addr); + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT); + + l2addr_hash_del(esw_mc); return 0; } -/* E-Switch vport uc list management */ - -/* Apply vport uc list to HW l2 table */ -static void esw_apply_vport_uc_list(struct mlx5_eswitch *esw, - u32 vport_num) +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; - struct mlx5_vport_addr *addr; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; struct l2addr_node *node; struct hlist_head *hash; struct hlist_node *tmp; int hi; - hash = vport->uc_list; + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { - addr = container_of(node, struct mlx5_vport_addr, node); + addr = container_of(node, struct vport_addr, node); switch (addr->action) { case MLX5_ACTION_ADD: - l2_table_addr_add(esw, addr->node.addr, vport_num); + vport_addr_add(esw, addr); addr->action = MLX5_ACTION_NONE; break; case MLX5_ACTION_DEL: - l2_table_addr_del(esw, addr->node.addr, vport_num); + vport_addr_del(esw, addr); l2addr_hash_del(addr); break; } } } -/* Sync vport uc list from vport context */ -static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, - u32 vport_num) +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; - struct mlx5_vport_addr *addr; - struct l2addr_node *node; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; struct hlist_head *hash; struct hlist_node *tmp; int size; @@ -294,40 +704,41 @@ static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, int hi; int i; - size = MLX5_MAX_UC_PER_VPORT(esw->dev); + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); if (!mac_list) return; - hash = vport->uc_list; + hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { - addr = container_of(node, struct mlx5_vport_addr, node); + addr = container_of(node, struct vport_addr, node); addr->action = MLX5_ACTION_DEL; } - err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, - MLX5_NVPRT_LIST_TYPE_UC, + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, mac_list, &size); if (err) return; - esw_debug(esw->dev, "vport[%d] context update UC list size (%d)\n", - vport_num, size); + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); for (i = 0; i < size; i++) { - if (!is_valid_ether_addr(mac_list[i])) + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) continue; - addr = l2addr_hash_find(hash, mac_list[i], - struct mlx5_vport_addr); + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); if (addr) { addr->action = MLX5_ACTION_NONE; continue; } - addr = l2addr_hash_add(hash, mac_list[i], - struct mlx5_vport_addr, + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, GFP_KERNEL); if (!addr) { esw_warn(esw->dev, @@ -335,6 +746,7 @@ static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, mac_list[i], vport_num); continue; } + addr->vport = vport_num; addr->action = MLX5_ACTION_ADD; } kfree(mac_list); @@ -345,32 +757,80 @@ static void esw_vport_change_handler(struct work_struct *work) struct mlx5_vport *vport = container_of(work, struct mlx5_vport, vport_change_handler); struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } - if (!is_valid_ether_addr(mac)) - goto out; - - esw_update_vport_uc_list(dev->priv.eswitch, vport->vport); - esw_apply_vport_uc_list(dev->priv.eswitch, vport->vport); + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } -out: + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); if (vport->enabled) arm_vport_context_events_cmd(dev, vport->vport, - UC_ADDR_CHANGE); + vport->enabled_events); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) { struct mlx5_vport *vport = &esw->vports[vport_num]; unsigned long flags; + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + spin_lock_irqsave(&vport->lock, flags); vport->enabled = true; spin_unlock_irqrestore(&vport->lock, flags); - arm_vport_context_events_cmd(esw->dev, vport_num, UC_ADDR_CHANGE); + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); } static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) @@ -381,19 +841,82 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) if (!vport->enabled) return; + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ spin_lock_irqsave(&vport->lock, flags); vport->enabled = false; + vport->enabled_events = 0; spin_unlock_irqrestore(&vport->lock, flags); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); - /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; } /* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -439,7 +962,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } - esw->total_vports = total_vports; for (vport_num = 0; vport_num < total_vports; vport_num++) { struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -450,9 +972,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) spin_lock_init(&vport->lock); } - dev->priv.eswitch = esw; + esw->total_vports = total_vports; + esw->enabled_vports = 0; - esw_enable_vport(esw, 0); + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 57617c5..aec1ec0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -86,10 +86,25 @@ struct l2addr_node { kfree(ptr); \ }) +struct mlx5_flow_rule { + void *ft; + u32 fi; + u8 match_criteria_enable; + u32 *match_criteria; + u32 *match_value; + u32 action; + u32 flow_tag; + bool valid; + atomic_t refcount; + struct mutex mutex; /* protect flow rule updates */ + struct list_head dest_list; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; struct work_struct vport_change_handler; /* This spinlock protects access to vport data, between @@ -98,6 +113,7 @@ struct mlx5_vport { */ spinlock_t lock; /* vport events sync */ bool enabled; + u16 enabled_events; }; struct mlx5_l2_table { @@ -106,18 +122,27 @@ struct mlx5_l2_table { unsigned long *bitmap; }; +struct mlx5_eswitch_fdb { + void *fdb; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; int total_vports; + int enabled_vports; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1649d5c..bee7da8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include #include #include +#include #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 19a4324..7b24386 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -33,6 +33,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) { @@ -144,13 +147,15 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_cleanup_vfs(dev); if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif kfree(sriov->vfs_ctx); sriov->vfs_ctx = NULL; if (!pci_vfs_assigned(pdev)) pci_disable_sriov(pdev); else pr_info("unloading PF driver while leaving orphan VFs\n"); - return 0; } @@ -161,6 +166,9 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) } mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif return num_vfs; } @@ -199,6 +207,10 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) sriov->enabled_vfs = cur_vfs; mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif enable_vfs(dev, cur_vfs); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index bce9cae..88eb449 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1075,6 +1075,12 @@ enum { }; enum { + MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, + MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, +}; + +enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, }; diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h index 5f922c6..0f2a15c 100644 --- a/include/linux/mlx5/flow_table.h +++ b/include/linux/mlx5/flow_table.h @@ -41,6 +41,15 @@ struct mlx5_flow_table_group { u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; }; +struct mlx5_flow_destination { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + void *ft; + u32 vport_num; + }; +}; + void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, u16 num_groups, struct mlx5_flow_table_group *group); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7c08a..a81b008 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -827,9 +827,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_69[0x220]; }; -enum { - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +enum mlx5_flow_destination_type { + MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, + MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, }; struct mlx5_ifc_dest_format_struct_bits {