From patchwork Fri Dec 14 21:51:48 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013721 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43Gkn55qYgz9s47 for ; Sat, 15 Dec 2018 08:52:57 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731229AbeLNVw4 (ORCPT ); Fri, 14 Dec 2018 16:52:56 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34202 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730734AbeLNVw4 (ORCPT ); Fri, 14 Dec 2018 16:52:56 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:01 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh24017084; Fri, 14 Dec 2018 23:52:46 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Aviv Heller , Saeed Mahameed Subject: [net-next 01/13] net/mlx5: Introduce inter-device communication mechanism Date: Fri, 14 Dec 2018 13:51:48 -0800 Message-Id: <20181214215200.31222-2-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Aviv Heller This introduces devcom, a generic mechanism for performing operations on both physical functions of the same Connect-X card. The first user of this API is merged eswitch, which will be introduced in subsequent patches. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../ethernet/mellanox/mlx5/core/lib/devcom.c | 255 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/lib/devcom.h | 44 +++ .../net/ethernet/mellanox/mlx5/core/main.c | 14 +- include/linux/mlx5/driver.h | 2 + 5 files changed, 313 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9678051b8ff1..9de9abacf7f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - diag/fs_tracepoint.o diag/fw_tracer.o + lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c new file mode 100644 index 000000000000..bced2efe9bef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include +#include "lib/devcom.h" + +static LIST_HEAD(devcom_list); + +#define devcom_for_each_component(priv, comp, iter) \ + for (iter = 0; \ + comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \ + iter++) + +struct mlx5_devcom_component { + struct { + void *data; + } device[MLX5_MAX_PORTS]; + + mlx5_devcom_event_handler_t handler; + struct rw_semaphore sem; + bool paired; +}; + +struct mlx5_devcom_list { + struct list_head list; + + struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; + struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; +}; + +struct mlx5_devcom { + struct mlx5_devcom_list *priv; + int idx; +}; + +static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void) +{ + struct mlx5_devcom_component *comp; + struct mlx5_devcom_list *priv; + int i; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + devcom_for_each_component(priv, comp, i) + init_rwsem(&comp->sem); + + return priv; +} + +static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv, + u8 idx) +{ + struct mlx5_devcom *devcom; + + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return NULL; + + devcom->priv = priv; + devcom->idx = idx; + return devcom; +} + +/* Must be called with intf_mutex held */ +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_list *priv = NULL, *iter; + struct mlx5_devcom *devcom = NULL; + bool new_priv = false; + u64 sguid0, sguid1; + int idx, i; + + if (!mlx5_core_is_pf(dev)) + return NULL; + + sguid0 = mlx5_query_nic_system_image_guid(dev); + list_for_each_entry(iter, &devcom_list, list) { + struct mlx5_core_dev *tmp_dev = NULL; + + idx = -1; + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (iter->devs[i]) + tmp_dev = iter->devs[i]; + else + idx = i; + } + + if (idx == -1) + continue; + + sguid1 = mlx5_query_nic_system_image_guid(tmp_dev); + if (sguid0 != sguid1) + continue; + + priv = iter; + break; + } + + if (!priv) { + priv = mlx5_devcom_list_alloc(); + if (!priv) + return ERR_PTR(-ENOMEM); + + idx = 0; + new_priv = true; + } + + priv->devs[idx] = dev; + devcom = mlx5_devcom_alloc(priv, idx); + if (!devcom) { + kfree(priv); + return ERR_PTR(-ENOMEM); + } + + if (new_priv) + list_add(&priv->list, &devcom_list); + + return devcom; +} + +/* Must be called with intf_mutex held */ +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) +{ + struct mlx5_devcom_list *priv; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return; + + priv = devcom->priv; + priv->devs[devcom->idx] = NULL; + + kfree(devcom); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (priv->devs[i]) + break; + + if (i != MLX5_MAX_PORTS) + return; + + list_del(&priv->list); + kfree(priv); +} + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + WARN_ON(!data); + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->handler = handler; + comp->device[devcom->idx].data = data; + up_write(&comp->sem); +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->device[devcom->idx].data = NULL; + up_write(&comp->sem); +} + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data) +{ + struct mlx5_devcom_component *comp; + int err = -ENODEV, i; + + if (IS_ERR_OR_NULL(devcom)) + return err; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx && comp->device[i].data) { + err = comp->handler(event, comp->device[i].data, + event_data); + break; + } + + up_write(&comp->sem); + return err; +} + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired) +{ + struct mlx5_devcom_component *comp; + + comp = &devcom->priv->components[id]; + WARN_ON(!rwsem_is_locked(&comp->sem)); + + comp->paired = paired; +} + +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + if (IS_ERR_OR_NULL(devcom)) + return false; + + return devcom->priv->components[id].paired; +} + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + comp = &devcom->priv->components[id]; + down_read(&comp->sem); + if (!comp->paired) { + up_read(&comp->sem); + return NULL; + } + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx) + break; + + return comp->device[i].data; +} + +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + + up_read(&comp->sem); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h new file mode 100644 index 000000000000..f2d338b187a6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_DEVCOM_H__ +#define __LIB_MLX5_DEVCOM_H__ + +#include + +enum mlx5_devcom_components { + MLX5_DEVCOM_NUM_COMPONENTS, +}; + +typedef int (*mlx5_devcom_event_handler_t)(int event, + void *my_data, + void *event_data); + +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom); + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data); + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired); +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 778995573812..c23553164e0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -63,6 +63,7 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/devcom.h" #include "diag/fw_tracer.h" MODULE_AUTHOR("Eli Cohen "); @@ -722,16 +723,21 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; + priv->devcom = mlx5_devcom_register_device(dev); + if (IS_ERR(priv->devcom)) + dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n", + priv->devcom); + err = mlx5_query_board_id(dev); if (err) { dev_err(&pdev->dev, "query board id failed\n"); - goto out; + goto err_devcom; } err = mlx5_eq_table_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize eq\n"); - goto out; + goto err_devcom; } err = mlx5_events_init(dev); @@ -807,8 +813,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_devcom: + mlx5_devcom_unregister_device(dev->priv.devcom); -out: return err; } @@ -828,6 +835,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_devcom_unregister_device(dev->priv.devcom); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cc29e880c733..cd7af5d0311b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -486,6 +486,7 @@ struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; +struct mlx5_devcom; struct mlx5_eq_table; struct mlx5_rate_limit { @@ -560,6 +561,7 @@ struct mlx5_priv { struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; struct mlx5_lag *lag; + struct mlx5_devcom *devcom; unsigned long pci_dev_data; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; From patchwork Fri Dec 14 21:51:49 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013722 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43Gkn85FrGz9s47 for ; Sat, 15 Dec 2018 08:53:00 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731242AbeLNVxA (ORCPT ); Fri, 14 Dec 2018 16:53:00 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34281 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731232AbeLNVw7 (ORCPT ); Fri, 14 Dec 2018 16:52:59 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:05 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh25017084; Fri, 14 Dec 2018 23:52:49 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Roi Dayan , Aviv Heller , Shahar Klein , Saeed Mahameed Subject: [net-next 02/13] net/mlx5e: E-Switch, Add peer miss rules Date: Fri, 14 Dec 2018 13:51:49 -0800 Message-Id: <20181214215200.31222-3-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Roi Dayan In the sriov offloads mode, packets that are not matched by any other rule are sent towards the e-switch vport manager for further processing. Under upcoming patches (e.g for uplink LAG), packets sent from VF vports belonging to esw0 (e-switch related to PF0) might end up in esw1 (e-switch related to PF1) due to muxing logic applied by the FW. In such a case we still want the missed packet to be sent to the "base" esw manager vport in order to present the control plane a consistent view of the source (VF reresentor) port. Signed-off-by: Roi Dayan Signed-off-by: Aviv Heller Signed-off-by: Shahar Klein Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + .../mellanox/mlx5/core/eswitch_offloads.c | 222 +++++++++++++++++- .../ethernet/mellanox/mlx5/core/lib/devcom.h | 2 + 3 files changed, 225 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 87c9dea9bccf..9dba6ad5744d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -143,6 +143,8 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *peer_miss_grp; + struct mlx5_flow_handle **peer_miss_rules; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bde1fb8c284b..a6927ca3d4ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -39,6 +39,7 @@ #include "eswitch.h" #include "en.h" #include "fs_core.h" +#include "lib/devcom.h" enum { FDB_FAST_PATH = 0, @@ -541,6 +542,98 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, + struct mlx5_flow_spec *spec, + struct mlx5_flow_destination *dest) +{ + void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest->vport.num = 0; + dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); + dest->vport.vhca_id_valid = 1; +} + +static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + /* total vports is the same for both e-switches */ + int nvports = esw->total_vports; + void *misc; + int err, i; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + peer_miss_rules_setup(peer_dev, spec, &dest); + + flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + for (i = 1; i < nvports; i++) { + MLX5_SET(fte_match_set_misc, misc, source_port, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); + goto add_flow_err; + } + flows[i] = flow; + } + + esw->fdb_table.offloads.peer_miss_rules = flows; + + kvfree(spec); + return 0; + +add_flow_err: + for (i--; i > 0; i--) + mlx5_del_flow_rules(flows[i]); + kvfree(flows); +alloc_flows_err: + kvfree(spec); + return err; +} + +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows; + int i; + + flows = esw->fdb_table.offloads.peer_miss_rules; + + for (i = 1; i < esw->total_vports; i++) + mlx5_del_flow_rules(flows[i]); + + kvfree(flows); +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_act flow_act = {0}; @@ -811,7 +904,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 + + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -866,6 +960,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.send_to_vport_grp = g; + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; + /* create miss group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -898,6 +1020,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); +peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: esw_destroy_offloads_fast_fdb_tables(esw); @@ -917,6 +1041,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); @@ -1173,6 +1298,99 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) return err; } +#define ESW_OFFLOADS_DEVCOM_PAIR (0) +#define ESW_OFFLOADS_DEVCOM_UNPAIR (1) + +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + int err; + + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + return 0; +} + +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +{ + esw_del_fdb_peer_miss_rules(esw); +} + +static int mlx5_esw_offloads_devcom_event(int event, + void *my_data, + void *event_data) +{ + struct mlx5_eswitch *esw = my_data; + struct mlx5_eswitch *peer_esw = event_data; + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + int err; + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_out; + + err = mlx5_esw_offloads_pair(peer_esw, esw); + if (err) + goto err_pair; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + break; + + case ESW_OFFLOADS_DEVCOM_UNPAIR: + if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + break; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + mlx5_esw_offloads_unpair(peer_esw); + mlx5_esw_offloads_unpair(esw); + break; + } + + return 0; + +err_pair: + mlx5_esw_offloads_unpair(esw); + +err_out: + mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", + event, err); + return err; +} + +static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_register_component(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_esw_offloads_devcom_event, + esw); + + mlx5_devcom_send_event(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_PAIR, esw); +} + +static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + + mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; @@ -1195,6 +1413,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto err_reps; + esw_offloads_devcom_init(esw); return 0; err_reps: @@ -1233,6 +1452,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) { + esw_offloads_devcom_cleanup(esw); esw_offloads_unload_reps(esw, nvports); esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index f2d338b187a6..939d5bf1581b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -7,6 +7,8 @@ #include enum mlx5_devcom_components { + MLX5_DEVCOM_ESW_OFFLOADS, + MLX5_DEVCOM_NUM_COMPONENTS, }; From patchwork Fri Dec 14 21:51:50 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013723 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknB47pkz9s47 for ; Sat, 15 Dec 2018 08:53:02 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731237AbeLNVw7 (ORCPT ); Fri, 14 Dec 2018 16:52:59 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34283 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730734AbeLNVw7 (ORCPT ); Fri, 14 Dec 2018 16:52:59 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:07 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh26017084; Fri, 14 Dec 2018 23:52:53 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Roi Dayan , Aviv Heller , Shahar Klein , Saeed Mahameed Subject: [net-next 03/13] net/mlx5e: Infrastructure for duplicated offloading of TC flows Date: Fri, 14 Dec 2018 13:51:50 -0800 Message-Id: <20181214215200.31222-4-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Roi Dayan Under uplink LAG or multipath schemes, traffic that matches one flow might arrive on both uplink ports and transmitted through both as part of supporting aggregation and high-availability. To cope with the fact that the SW model might use logical SW port (e.g uplink team or bond) but we have two HW ports with e-switch on each, there are cases where in order to offload a SW TC rule we need to duplicate it to two HW flows. Since each HW rule has its own counter we also aggregate the counter of both rules when a flow stats query is executed from user-space. Introduce the changes for the different elements (add/delete/stats), currently nothing is duplicated. Signed-off-by: Roi Dayan Signed-off-by: Aviv Heller Signed-off-by: Shahar Klein Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 174 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + .../mellanox/mlx5/core/eswitch_offloads.c | 8 +- 3 files changed, 176 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0921213561cb..eacccac05dda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -52,6 +52,7 @@ #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "lib/devcom.h" struct mlx5_nic_flow_attr { u32 action; @@ -74,6 +75,7 @@ enum { MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 6), }; #define MLX5E_TC_MAX_SPLITS 1 @@ -111,8 +113,10 @@ struct mlx5e_tc_flow { * destinations. */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } +static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + + if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || + !(flow->flags & MLX5E_TC_FLOW_DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer); + mutex_unlock(&esw->offloads.peer_mutex); + + flow->flags &= ~MLX5E_TC_FLOW_DUP; + + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kvfree(flow->peer_flow); + flow->peer_flow = NULL; +} + +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_core_dev *dev = flow->priv->mdev; + struct mlx5_devcom *devcom = dev->priv.devcom; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return; + + __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); - else + } else { mlx5e_tc_del_nic_flow(priv, flow); + } } @@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) return &priv->fs.tc.ht; } +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + return false; +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct tc_cls_flower_offload *f, u16 flow_flags, @@ -2693,11 +2737,13 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, } static int -mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, - u16 flow_flags, - struct net_device *filter_dev, - struct mlx5e_tc_flow **__flow) +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev, + struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + flow->esw_attr->in_rep = in_rep; + flow->esw_attr->in_mdev = in_mdev; err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); if (err) goto err_free; @@ -2738,6 +2786,87 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return err; } +static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return -ENODEV; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + */ + if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->esw_attr->parse_attr; + err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev, &peer_flow); + if (err) + goto out; + + flow->peer_flow = peer_flow; + flow->flags |= MLX5E_TC_FLOW_DUP; + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer, &esw->offloads.peer_flows); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5e_tc_flow *flow; + int err; + + err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev, &flow); + if (err) + goto out; + + if (is_peer_flow_needed(flow)) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow); + if (err) { + mlx5e_tc_del_fdb_flow(priv, flow); + goto out; + } + } + + *__flow = flow; + + return 0; + +out: + return err; +} + static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, @@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; u64 bytes; @@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + goto out; + + if ((flow->flags & MLX5E_TC_FLOW_DUP) && + (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + u64 bytes2; + u64 packets2; + u64 lastuse2; + + counter = mlx5e_tc_get_counter(flow->peer_flow); + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + +out: tcf_exts_stats_update(f->exts, bytes, packets, lastuse); return 0; @@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv) return atomic_read(&tc_ht->nelems); } + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) + __mlx5e_tc_del_fdb_peer_flow(flow); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9dba6ad5744d..4d048f7e703b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -167,6 +167,8 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + struct list_head peer_flows; + struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); u8 inline_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a6927ca3d4ca..76cb57202474 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = 0; dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); - dest->vport.vhca_id_valid = 1; + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, @@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; } +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) { + mlx5e_tc_clean_fdb_peer_flows(esw); esw_del_fdb_peer_miss_rules(esw); } @@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) { struct mlx5_devcom *devcom = esw->dev->priv.devcom; + INIT_LIST_HEAD(&esw->offloads.peer_flows); + mutex_init(&esw->offloads.peer_mutex); + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; From patchwork Fri Dec 14 21:51:51 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013725 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknG2Wmnz9s4s for ; Sat, 15 Dec 2018 08:53:06 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731266AbeLNVxF (ORCPT ); Fri, 14 Dec 2018 16:53:05 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34357 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730887AbeLNVxD (ORCPT ); Fri, 14 Dec 2018 16:53:03 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:10 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh27017084; Fri, 14 Dec 2018 23:52:55 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Shahar Klein , Roi Dayan , Saeed Mahameed Subject: [net-next 04/13] net/mlx5e: Enhance flow counter scheme for offloaded TC eswitch rules Date: Fri, 14 Dec 2018 13:51:51 -0800 Message-Id: <20181214215200.31222-5-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Shahar Klein Assign a counter dev attribute according to device capability and use it for management of counters related to offloaded eswitch TC flows. With upcoming support for uplink LAG, we have two HW rules per one logical SW (TC) rule. Although the HW supports attaching one counter to multiple rules, we are allocating counter per HW rule. We need this separation for two reasons: 1. "flow eswitch" counter affinity HW require the counter to be allocated on the device where the eswitch rule is set. 2. for some use-cases (multi-path routing) each HW flow relates to different neighbour, hence our neigh update logic must have a per-rule HW accountant in order to provide the proper feedback to the kernel. Signed-off-by: Shahar Klein Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index eacccac05dda..779ca3a43bec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -994,7 +994,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); + counter = mlx5_fc_create(attr->counter_dev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); goto err_create_counter; @@ -1023,7 +1023,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return 0; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); + mlx5_fc_destroy(attr->counter_dev, counter); err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); @@ -1064,7 +1064,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_detach_mod_hdr(priv, flow); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(esw->dev, attr->counter); + mlx5_fc_destroy(attr->counter_dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -2746,6 +2746,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; int attr_size, err; @@ -2771,6 +2772,13 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, flow->esw_attr->in_rep = in_rep; flow->esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + flow->esw_attr->counter_dev = in_mdev; + else + flow->esw_attr->counter_dev = priv->mdev; + err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4d048f7e703b..701abd794dab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -293,6 +293,7 @@ enum { struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; + struct mlx5_core_dev *counter_dev; int split_count; int out_count; From patchwork Fri Dec 14 21:51:52 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013726 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknJ5ZQhz9s47 for ; Sat, 15 Dec 2018 08:53:08 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731251AbeLNVxE (ORCPT ); Fri, 14 Dec 2018 16:53:04 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34358 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731243AbeLNVxD (ORCPT ); Fri, 14 Dec 2018 16:53:03 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:12 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh28017084; Fri, 14 Dec 2018 23:52:58 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Rabie Loulou , Aviv Heller , Saeed Mahameed Subject: [net-next 05/13] net/mlx5e: In case of LAG, one switch parent id is used for all representors Date: Fri, 14 Dec 2018 13:51:52 -0800 Message-Id: <20181214215200.31222-6-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Rabie Loulou When the uplink representors are put into lag, set all the representors (VFs and uplinks) of the same NIC to return the same switchdev id. Currently, the route lookup code on the encapsulation offload path assumes that same switchdev id for the source and dest devices means that the dest is also mlx5 HW netdev. This doesn't hold anymore when we align the switchdev Id of the uplinks to be same, which in turn causes the bond/team to return that id to the caller. As such, enhance the relevant check to take into account the uplink lag case. Signed-off-by: Rabie Loulou Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 41 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 20 +++++++-- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index c1515f013501..d5d161ab0dbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -14,7 +14,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, u8 *out_ttl) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; struct rtable *rt; struct neighbour *n = NULL; @@ -28,10 +29,20 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, #else return -EOPNOTSUPP; #endif - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) - *out_dev = uplink_rpriv->netdev; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + rt->dst.dev == uplink_upper && + mlx5_lag_is_active(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev) || + dst_is_lag_dev) + *out_dev = uplink_dev; else *out_dev = rt->dst.dev; @@ -65,8 +76,9 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct dst_entry *dst; #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5e_rep_priv *uplink_rpriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; int ret; ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, @@ -77,10 +89,19 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, if (!(*out_ttl)) *out_ttl = ip6_dst_hoplimit(dst); - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) - *out_dev = uplink_rpriv->netdev; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + dst->dev == uplink_upper && + mlx5_lag_is_active(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev) || + dst_is_lag_dev) + *out_dev = uplink_dev; else *out_dev = dst->dev; #else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e5230049612f..17f24127a3ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -297,17 +297,31 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_upper = NULL; + struct mlx5e_priv *uplink_priv = NULL; + struct net_device *uplink_dev; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + if (uplink_dev) { + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + uplink_priv = netdev_priv(uplink_dev); + } + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, rep->hw_id); + if (uplink_upper && mlx5_lag_is_active(uplink_priv->mdev)) { + ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + ether_addr_copy(attr->u.ppid.id, rep->hw_id); + } break; default: return -EOPNOTSUPP; From patchwork Fri Dec 14 21:51:53 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013724 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknF4Wthz9s47 for ; Sat, 15 Dec 2018 08:53:05 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731258AbeLNVxE (ORCPT ); Fri, 14 Dec 2018 16:53:04 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34364 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731232AbeLNVxD (ORCPT ); Fri, 14 Dec 2018 16:53:03 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:14 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh29017084; Fri, 14 Dec 2018 23:53:00 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Rabie Loulou , Aviv Heller , Saeed Mahameed Subject: [net-next 06/13] net/mlx5e: Offload TC e-switch rules with egress LAG device Date: Fri, 14 Dec 2018 13:51:53 -0800 Message-Id: <20181214215200.31222-7-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Rabie Loulou When parsing TC FDB actions, if the egress device is a bond/team net-device which enslaved the uplink representor of the e-switch, use the uplink representor as the destination in the HW rule. Signed-off-by: Rabie Loulou Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 779ca3a43bec..cede77fd208f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2574,6 +2574,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (switchdev_port_same_parent_id(priv->netdev, out_dev) || is_merged_eswitch_dev(priv, out_dev)) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + + if (uplink_upper && + netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) + out_dev = uplink_dev; + out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; attr->dests[attr->out_count].rep = rpriv->rep; From patchwork Fri Dec 14 21:51:54 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013728 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknN5x8lz9s47 for ; Sat, 15 Dec 2018 08:53:12 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731272AbeLNVxK (ORCPT ); Fri, 14 Dec 2018 16:53:10 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34430 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731020AbeLNVxJ (ORCPT ); Fri, 14 Dec 2018 16:53:09 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:16 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2A017084; Fri, 14 Dec 2018 23:53:02 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Aviv Heller , Rabie Loulou , Saeed Mahameed Subject: [net-next 07/13] net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG Date: Fri, 14 Dec 2018 13:51:54 -0800 Message-Id: <20181214215200.31222-8-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Aviv Heller Under uplink LAG, packets that match a flow might arrive on both uplink ports and transmitted through both as part of supporting aggregation and high-availability. When the netdevs representing the uplinks are set into LAG (bonding, teaming), duplicate the TC flow offloading into each of the per-uplink e-switches. Duplication is not required if the source is the bond device, since in this case it is assumed that the bond and the uplink netdevs share the same TC block, and thus duplication will occur naturally by the stack. Note that under encapsulation scheme, both flows will use the same neighbour and hence both will contribute to the last-used feedback towards the stack. Signed-off-by: Aviv Heller Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cede77fd208f..864f3b00d09d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2710,7 +2710,16 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { - return false; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT && + flow->flags & MLX5E_TC_FLOW_INGRESS; + bool act_is_encap = !!(attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); + bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + + return esw_paired && mlx5_lag_is_active(attr->in_mdev) && + (is_rep_ingress || act_is_encap); } static int From patchwork Fri Dec 14 21:51:55 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013727 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknM0wZmz9s47 for ; Sat, 15 Dec 2018 08:53:11 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731276AbeLNVxK (ORCPT ); Fri, 14 Dec 2018 16:53:10 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34437 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730887AbeLNVxJ (ORCPT ); Fri, 14 Dec 2018 16:53:09 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:18 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2B017084; Fri, 14 Dec 2018 23:53:04 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Rabie Loulou , Saeed Mahameed Subject: [net-next 08/13] net/mlx5: Adjustments for the activate LAG logic to run under sriov Date: Fri, 14 Dec 2018 13:51:55 -0800 Message-Id: <20181214215200.31222-9-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Rabie Loulou When HW lag is set/unset, roce must not be enabled on the port, as such we wrap such changes with roce enable/disable either directly or through re-creation of IB device. Currently, lag and sriov are mutually exclusive, so by definition this code doesn't run under sriov. Towards changing this exclusion, we need to make sure that roce will not be enabled on the eswitch manager port under sriov since this is requirement of the switchdev mode. We are going strict here and avoiding this all together under sriov. Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 8c5c5e418d61..8127d907e1ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -34,6 +34,7 @@ #include #include #include "mlx5_core.h" +#include "eswitch.h" enum { MLX5_LAG_FLAG_BONDED = 1 << 0, @@ -257,13 +258,15 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; + bool do_bond, sriov_enabled; struct lag_tracker tracker; int i; - bool do_bond; if (!dev0 || !dev1) return; + sriov_enabled = mlx5_sriov_is_enabled(dev0) || mlx5_sriov_is_enabled(dev1); + mutex_lock(&lag_mutex); tracker = ldev->tracker; mutex_unlock(&lag_mutex); @@ -271,26 +274,32 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) do_bond = tracker.is_bonded && ldev->allowed; if (do_bond && !mlx5_lag_is_bonded(ldev)) { - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_remove_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + if (!sriov_enabled) + for (i = 0; i < MLX5_MAX_PORTS; i++) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); mlx5_activate_lag(ldev, &tracker); - mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_enable_roce(dev1); + if (!sriov_enabled) { + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } } else if (do_bond && mlx5_lag_is_bonded(ldev)) { mlx5_modify_lag(ldev, &tracker); } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_disable_roce(dev1); + if (!sriov_enabled) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); + } mlx5_deactivate_lag(ldev); - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - mlx5_add_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + if (!sriov_enabled) + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); } } From patchwork Fri Dec 14 21:51:56 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013730 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknT63XRz9s47 for ; Sat, 15 Dec 2018 08:53:17 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731292AbeLNVxQ (ORCPT ); Fri, 14 Dec 2018 16:53:16 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34504 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730887AbeLNVxO (ORCPT ); Fri, 14 Dec 2018 16:53:14 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:20 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2C017084; Fri, 14 Dec 2018 23:53:06 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Rabie Loulou , Aviv Heller , Saeed Mahameed Subject: [net-next 09/13] net/mlx5: Allow/disallow LAG according to pre-req only Date: Fri, 14 Dec 2018 13:51:56 -0800 Message-Id: <20181214215200.31222-10-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Rabie Loulou Remove the lag forbid/allow functions, change the lag prereq check to run in the do-bond logic, so every change in the prereq state will cause LAG to be disabled/enabled accordingly after the next do-bond run. Add lag update function, so every component which changes the prereq state and want the LAG to re-calc the conditions can call the update function. Signed-off-by: Rabie Loulou Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 13 ++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 ++ drivers/net/ethernet/mellanox/mlx5/core/lag.c | 62 +++++-------------- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 4 +- .../net/ethernet/mellanox/mlx5/core/sriov.c | 14 +---- 5 files changed, 36 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 4f9dd0d4a081..70d0d9aa6b5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1632,6 +1632,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); esw->mode = mode; + mlx5_lag_update(esw->dev); + if (mode == SRIOV_LEGACY) { err = esw_create_legacy_fdb_table(esw); } else { @@ -1708,6 +1710,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) old_mode = esw->mode; esw->mode = SRIOV_NONE; + mlx5_lag_update(esw->dev); + if (old_mode == SRIOV_OFFLOADS) mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); } @@ -2225,3 +2229,12 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); + +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) +{ + if (dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE) + return true; + + return false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 701abd794dab..9c89eea9b2c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -351,6 +351,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -367,6 +370,7 @@ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 8127d907e1ef..3b1dcd2969d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -62,11 +62,6 @@ struct mlx5_lag { struct lag_tracker tracker; struct delayed_work bond_work; struct notifier_block nb; - - /* Admin state. Allow lag only if allowed is true - * even if network conditions for lag were met - */ - bool allowed; }; /* General purpose, use for short periods of time. @@ -254,6 +249,16 @@ static void mlx5_deactivate_lag(struct mlx5_lag *ldev) err); } +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if (ldev->pf[0].dev && + ldev->pf[1].dev && + mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev)) + return true; + else + return false; +} + static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; @@ -271,7 +276,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) tracker = ldev->tracker; mutex_unlock(&lag_mutex); - do_bond = tracker.is_bonded && ldev->allowed; + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); if (do_bond && !mlx5_lag_is_bonded(ldev)) { if (!sriov_enabled) @@ -449,15 +454,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) -{ - if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || - (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) - return false; - else - return true; -} - static struct mlx5_lag *mlx5_lag_dev_alloc(void) { struct mlx5_lag *ldev; @@ -467,7 +463,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) return NULL; INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); - ldev->allowed = mlx5_lag_check_prereq(ldev); return ldev; } @@ -492,7 +487,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; - ldev->allowed = mlx5_lag_check_prereq(ldev); dev->priv.lag = ldev; mutex_unlock(&lag_mutex); @@ -514,7 +508,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; - ldev->allowed = mlx5_lag_check_prereq(ldev); mutex_unlock(&lag_mutex); } @@ -593,42 +586,19 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); -static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +void mlx5_lag_update(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; - int ret = 0; - bool lag_active; mlx5_dev_list_lock(); - ldev = mlx5_lag_dev_get(dev); - if (!ldev) { - ret = -ENODEV; - goto unlock; - } - lag_active = mlx5_lag_is_bonded(ldev); - if (!mlx5_lag_check_prereq(ldev) && allow) { - ret = -EINVAL; - goto unlock; - } - if (ldev->allowed == allow) + if (!ldev) goto unlock; - ldev->allowed = allow; - if ((lag_active && !allow) || allow) - mlx5_do_bond(ldev); -unlock: - mlx5_dev_list_unlock(); - return ret; -} -int mlx5_lag_forbid(struct mlx5_core_dev *dev) -{ - return mlx5_lag_set_state(dev, false); -} + mlx5_do_bond(ldev); -int mlx5_lag_allow(struct mlx5_core_dev *dev) -{ - return mlx5_lag_set_state(dev, true); +unlock: + mlx5_dev_list_unlock(); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index fd3141a4b3f1..73bf46599ec6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -185,10 +185,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } -int mlx5_lag_allow(struct mlx5_core_dev *dev); -int mlx5_lag_forbid(struct mlx5_core_dev *dev); - void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); +void mlx5_lag_update(struct mlx5_core_dev *dev); enum { MLX5_NIC_IFC_FULL = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a0674962f02c..6e178030d8fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; - if (num_vfs) { - int ret; - - ret = mlx5_lag_forbid(dev); - if (ret && (ret != -ENODEV)) - return ret; - } - - if (num_vfs) { + if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); - } else { + else mlx5_sriov_disable(pdev); - mlx5_lag_allow(dev); - } return err ? err : num_vfs; } From patchwork Fri Dec 14 21:51:57 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013731 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknW0smvz9s47 for ; Sat, 15 Dec 2018 08:53:19 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731282AbeLNVxO (ORCPT ); Fri, 14 Dec 2018 16:53:14 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34508 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730897AbeLNVxO (ORCPT ); Fri, 14 Dec 2018 16:53:14 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:23 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2D017084; Fri, 14 Dec 2018 23:53:08 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Rabie Loulou , Aviv Heller , Jianbo Liu , Saeed Mahameed Subject: [net-next 10/13] net/mlx5: Allow co-enablement of uplink LAG and SRIOV Date: Fri, 14 Dec 2018 13:51:57 -0800 Message-Id: <20181214215200.31222-11-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Rabie Loulou Enable setting uplink LAG if sriov is enabled on both ports in switchdev mode. Once the sriov mode is changed from switchdev for any of the ports, the LAG instance is disabled. Signed-off-by: Rabie Loulou Signed-off-by: Aviv Heller Signed-off-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 70d0d9aa6b5d..ef2bec50423d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2232,8 +2232,10 @@ EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - if (dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE) + if ((dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE) || + (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && + dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) return true; return false; From patchwork Fri Dec 14 21:51:58 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013729 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43GknR5mJcz9s47 for ; Sat, 15 Dec 2018 08:53:15 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731287AbeLNVxO (ORCPT ); Fri, 14 Dec 2018 16:53:14 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34513 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731277AbeLNVxO (ORCPT ); Fri, 14 Dec 2018 16:53:14 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:24 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2E017084; Fri, 14 Dec 2018 23:53:10 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Aviv Heller , Saeed Mahameed Subject: [net-next 11/13] net/mlx5: Rename mlx5_lag_is_bonded() to __mlx5_lag_is_active() Date: Fri, 14 Dec 2018 13:51:58 -0800 Message-Id: <20181214215200.31222-12-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Aviv Heller The new name better represents the function's aim, and sets a precedent for a '__' prefix for internal, non-locked versions of LAG APIs. Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 3b1dcd2969d7..db5ef7023371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -161,7 +161,7 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -1; } -static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +static bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); } @@ -278,7 +278,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); - if (do_bond && !mlx5_lag_is_bonded(ldev)) { + if (do_bond && !__mlx5_lag_is_active(ldev)) { if (!sriov_enabled) for (i = 0; i < MLX5_MAX_PORTS; i++) mlx5_remove_dev_by_protocol(ldev->pf[i].dev, @@ -290,9 +290,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_enable_roce(dev1); } - } else if (do_bond && mlx5_lag_is_bonded(ldev)) { + } else if (do_bond && __mlx5_lag_is_active(ldev)) { mlx5_modify_lag(ldev, &tracker); - } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { + } else if (!do_bond && __mlx5_lag_is_active(ldev)) { if (!sriov_enabled) { mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_disable_roce(dev1); @@ -555,7 +555,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (!ldev) return; - if (mlx5_lag_is_bonded(ldev)) + if (__mlx5_lag_is_active(ldev)) mlx5_deactivate_lag(ldev); mlx5_lag_dev_remove_pf(ldev, dev); @@ -579,7 +579,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - res = ldev && mlx5_lag_is_bonded(ldev); + res = ldev && __mlx5_lag_is_active(ldev); mutex_unlock(&lag_mutex); return res; @@ -609,7 +609,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!(ldev && mlx5_lag_is_bonded(ldev))) + if (!(ldev && __mlx5_lag_is_active(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { @@ -638,7 +638,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_active(ldev) || ldev->pf[0].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -665,7 +665,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (ldev && mlx5_lag_is_bonded(ldev)) { + if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[0] = ldev->pf[0].dev; mdev[1] = ldev->pf[1].dev; From patchwork Fri Dec 14 21:51:59 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013732 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43Gknc1n9Pz9s4s for ; Sat, 15 Dec 2018 08:53:24 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731315AbeLNVxV (ORCPT ); Fri, 14 Dec 2018 16:53:21 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34571 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731209AbeLNVxU (ORCPT ); Fri, 14 Dec 2018 16:53:20 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:26 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2F017084; Fri, 14 Dec 2018 23:53:12 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Aviv Heller , Saeed Mahameed Subject: [net-next 12/13] net/mlx5: Make RoCE and SR-IOV LAG modes explicit Date: Fri, 14 Dec 2018 13:51:59 -0800 Message-Id: <20181214215200.31222-13-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Aviv Heller With the introduction of SR-IOV LAG, checking whether LAG is active is no longer good enough, since RoCE and SR-IOV LAG each entails different behavior by both the core and infiniband drivers. This patch introduces facilities to discern LAG type, in addition to mlx5_lag_is_active(). These are implemented in such a way as to allow more complex mode combinations in the future. Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 13 +-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 2 +- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 +- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 79 +++++++++++++++---- include/linux/mlx5/driver.h | 2 + 8 files changed, 79 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d66457e6ffba..e85974ab06c0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -445,7 +445,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (!ndev) goto out; - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { rcu_read_lock(); upper = netdev_master_upper_dev_get_rcu(ndev); if (upper) { @@ -1848,7 +1848,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 port = mlx5_core_native_port_num(dev->mdev); atomic_set(&context->tx_port_affinity, @@ -4841,7 +4841,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_active(mdev)) + if (!ns || !mlx5_lag_is_roce(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -4855,6 +4855,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_active = true; return 0; err_destroy_vport_lag: @@ -4866,7 +4867,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - if (dev->flow_db->lag_demux_ft) { + if (dev->lag_active) { + dev->lag_active = false; + mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); dev->flow_db->lag_demux_ft = NULL; @@ -6173,7 +6176,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) const char *name; rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); - if (!mlx5_lag_is_active(dev->mdev)) + if (!mlx5_lag_is_roce(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c89b3b44b22e..e507b6eb7c09 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -936,6 +936,7 @@ struct mlx5_ib_dev { struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; struct mlx5_eswitch_rep *rep; + int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3747cc681b18..a0e9ff763d42 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3258,7 +3258,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 p = mlx5_core_native_port_num(dev->mdev); tx_affinity = get_tx_affinity(dev, pd, base, p); context->flags |= cpu_to_be32(tx_affinity << 24); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index d5d161ab0dbc..b92f8b3ff6b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -35,7 +35,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && rt->dst.dev == uplink_upper && - mlx5_lag_is_active(priv->mdev)); + mlx5_lag_is_sriov(priv->mdev)); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink @@ -94,7 +94,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && dst->dev == uplink_upper && - mlx5_lag_is_active(priv->mdev)); + mlx5_lag_is_sriov(priv->mdev)); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 17f24127a3ba..e4a34c9ef700 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -314,7 +314,7 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_active(uplink_priv->mdev)) { + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); } else { struct mlx5e_rep_priv *rpriv = priv->ppriv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 864f3b00d09d..53ebb5a48018 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2718,7 +2718,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); - return esw_paired && mlx5_lag_is_active(attr->in_mdev) && + return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) && (is_rep_ingress || act_is_encap); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index db5ef7023371..feb8230d3f86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -37,9 +37,12 @@ #include "eswitch.h" enum { - MLX5_LAG_FLAG_BONDED = 1 << 0, + MLX5_LAG_FLAG_ROCE = 1 << 0, + MLX5_LAG_FLAG_SRIOV = 1 << 1, }; +#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV) + struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; @@ -161,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -1; } +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); +} + static bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); + return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); } static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, @@ -229,9 +242,10 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, } static void mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) + struct lag_tracker *tracker, + u8 flags) { - ldev->flags |= MLX5_LAG_FLAG_BONDED; + ldev->flags |= flags; mlx5_create_lag(ldev, tracker); } @@ -240,7 +254,7 @@ static void mlx5_deactivate_lag(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[0].dev; int err; - ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + ldev->flags &= ~MLX5_LAG_MODE_FLAGS; err = mlx5_cmd_destroy_lag(dev0); if (err) @@ -263,15 +277,13 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; - bool do_bond, sriov_enabled; struct lag_tracker tracker; + bool do_bond, roce_lag; int i; if (!dev0 || !dev1) return; - sriov_enabled = mlx5_sriov_is_enabled(dev0) || mlx5_sriov_is_enabled(dev1); - mutex_lock(&lag_mutex); tracker = ldev->tracker; mutex_unlock(&lag_mutex); @@ -279,28 +291,35 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); if (do_bond && !__mlx5_lag_is_active(ldev)) { - if (!sriov_enabled) + roce_lag = !mlx5_sriov_is_enabled(dev0) && + !mlx5_sriov_is_enabled(dev1); + + if (roce_lag) for (i = 0; i < MLX5_MAX_PORTS; i++) mlx5_remove_dev_by_protocol(ldev->pf[i].dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_activate_lag(ldev, &tracker); + mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV); - if (!sriov_enabled) { + if (roce_lag) { mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_enable_roce(dev1); } } else if (do_bond && __mlx5_lag_is_active(ldev)) { mlx5_modify_lag(ldev, &tracker); } else if (!do_bond && __mlx5_lag_is_active(ldev)) { - if (!sriov_enabled) { + roce_lag = __mlx5_lag_is_roce(ldev); + + if (roce_lag) { mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_disable_roce(dev1); } mlx5_deactivate_lag(ldev); - if (!sriov_enabled) + if (roce_lag) for (i = 0; i < MLX5_MAX_PORTS; i++) if (ldev->pf[i].dev) mlx5_add_dev_by_protocol(ldev->pf[i].dev, @@ -572,6 +591,20 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) } } +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_roce(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_roce); + bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -586,6 +619,20 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_sriov); + void mlx5_lag_update(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -609,7 +656,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!(ldev && __mlx5_lag_is_active(ldev))) + if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { @@ -638,7 +685,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !__mlx5_lag_is_active(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -665,7 +712,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (ldev && __mlx5_lag_is_active(ldev)) { + if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[0] = ldev->pf[0].dev; mdev[1] = ldev->pf[1].dev; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cd7af5d0311b..4d16ba04790e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1019,6 +1019,8 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, From patchwork Fri Dec 14 21:52:00 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Saeed Mahameed X-Patchwork-Id: 1013733 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43Gkp971g8z9s47 for ; Sat, 15 Dec 2018 08:53:53 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1731392AbeLNVxx (ORCPT ); Fri, 14 Dec 2018 16:53:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:34573 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730887AbeLNVxT (ORCPT ); Fri, 14 Dec 2018 16:53:19 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from saeedm@mellanox.com) with ESMTPS (AES256-SHA encrypted); 14 Dec 2018 23:59:28 +0200 Received: from sx1.mtl.com ([172.16.5.47]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBELqh2G017084; Fri, 14 Dec 2018 23:53:14 +0200 From: Saeed Mahameed To: "David S. Miller" Cc: netdev@vger.kernel.org, Aviv Heller , Saeed Mahameed Subject: [net-next 13/13] net/mlx5: Handle LAG FW commands failure gracefully Date: Fri, 14 Dec 2018 13:52:00 -0800 Message-Id: <20181214215200.31222-14-saeedm@mellanox.com> X-Mailer: git-send-email 2.19.2 In-Reply-To: <20181214215200.31222-1-saeedm@mellanox.com> References: <20181214215200.31222-1-saeedm@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Aviv Heller When create_lag or destroy_lag FW commands fail, display an appropriate error message, and try to recover, if possible. Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 91 ++++++++++++++----- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index feb8230d3f86..5187dc7a72a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -241,26 +241,52 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, return err; } -static void mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - u8 flags) +static int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) { + bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + err = mlx5_create_lag(ldev, tracker); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + } else { + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + ldev->flags |= flags; - mlx5_create_lag(ldev, tracker); + return 0; } -static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + bool roce_lag = __mlx5_lag_is_roce(ldev); int err; ldev->flags &= ~MLX5_LAG_MODE_FLAGS; err = mlx5_cmd_destroy_lag(dev0); - if (err) - mlx5_core_err(dev0, - "Failed to destroy LAG (%d)\n", - err); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + } + + return err; } static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) @@ -273,13 +299,33 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; } +static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); +} + +static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); +} + static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; struct lag_tracker tracker; bool do_bond, roce_lag; - int i; + int err; if (!dev0 || !dev1) return; @@ -295,13 +341,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) !mlx5_sriov_is_enabled(dev1); if (roce_lag) - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_remove_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + mlx5_lag_remove_ib_devices(ldev); + + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV); + if (err) { + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); - mlx5_activate_lag(ldev, &tracker, - roce_lag ? MLX5_LAG_FLAG_ROCE : - MLX5_LAG_FLAG_SRIOV); + return; + } if (roce_lag) { mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); @@ -317,13 +367,12 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_nic_vport_disable_roce(dev1); } - mlx5_deactivate_lag(ldev); + err = mlx5_deactivate_lag(ldev); + if (err) + return; if (roce_lag) - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - mlx5_add_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + mlx5_lag_add_ib_devices(ldev); } }