diff mbox series

[mlx5-next,09/11] net/mlx5: Eswitch, enable RoCE loopback traffic

Message ID 20190426215732.29761-10-saeedm@mellanox.com
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series Mellanox, mlx5-next updates 2019-04-25 | expand

Commit Message

Saeed Mahameed April 26, 2019, 9:58 p.m. UTC
From: Maor Gottlieb <maorg@mellanox.com>

When in switchdev mode, we would like to treat loopback RoCE
traffic (on eswitch manager) as RDMA and not as regular
Ethernet traffic
In order to enable it we add flow steering rule that forward RoCE
loopback traffic to the HW RoCE filter (by adding allow rule).
In addition we add RoCE address in GID index 0, which will be
set in the RoCE loopback packet.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../mellanox/mlx5/core/eswitch_offloads.c     |   9 +
 .../net/ethernet/mellanox/mlx5/core/rdma.c    | 181 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/rdma.h    |  20 ++
 include/linux/mlx5/driver.h                   |   7 +
 5 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.h

Comments

Leon Romanovsky April 27, 2019, 5:49 a.m. UTC | #1
On Fri, Apr 26, 2019 at 09:58:23PM +0000, Saeed Mahameed wrote:
> From: Maor Gottlieb <maorg@mellanox.com>
>
> When in switchdev mode, we would like to treat loopback RoCE
> traffic (on eswitch manager) as RDMA and not as regular
> Ethernet traffic
> In order to enable it we add flow steering rule that forward RoCE
> loopback traffic to the HW RoCE filter (by adding allow rule).
> In addition we add RoCE address in GID index 0, which will be
> set in the RoCE loopback packet.
>
> Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
> Reviewed-by: Mark Bloch <markb@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
>  .../mellanox/mlx5/core/eswitch_offloads.c     |   9 +
>  .../net/ethernet/mellanox/mlx5/core/rdma.c    | 181 ++++++++++++++++++
>  .../net/ethernet/mellanox/mlx5/core/rdma.h    |  20 ++
>  include/linux/mlx5/driver.h                   |   7 +
>  5 files changed, 218 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.c
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.h
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index 1a16f6d73cbc..5f0be9b36a04 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tu
>  #
>  # Core extra
>  #
> -mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o
> +mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o rdma.o
>  mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>  mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>  mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index 6c8a17ca236e..4b48bb98981e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -37,6 +37,7 @@
>  #include <linux/mlx5/fs.h>
>  #include "mlx5_core.h"
>  #include "eswitch.h"
> +#include "rdma.h"
>  #include "en.h"
>  #include "fs_core.h"
>  #include "lib/devcom.h"
> @@ -1713,6 +1714,13 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
>  		esw->host_info.num_vfs = vf_nvports;
>  	}
>
> +	err = mlx5_rdma_enable_roce(esw->dev);
> +	if (err) {
> +		esw_debug(esw->dev, "Failed to enable RoCE, err: %d\n",
> +			  err);

You are already printing errors in all flows of mlx5_rdma_enable_roce(),
there is no need in extra debug print.

> +		err = 0;

If you are not interested in return value, better to declare function as void.

> +	}
> +
>  	return 0;
>
>  err_reps:
> @@ -1751,6 +1759,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
>  		num_vfs = esw->dev->priv.sriov.num_vfs;
>  	}
>
> +	mlx5_rdma_disable_roce(esw->dev);
>  	esw_offloads_devcom_cleanup(esw);
>  	esw_offloads_unload_all_reps(esw, num_vfs);
>  	esw_offloads_steering_cleanup(esw);
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> new file mode 100644
> index 000000000000..f6c5e4f91aa8
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/* Copyright (c) 2019 Mellanox Technologies */
> +
> +#include <linux/mlx5/vport.h>
> +#include <rdma/ib_verbs.h>
> +#include <net/addrconf.h>
> +
> +#include "lib/mlx5.h"
> +#include "eswitch.h"
> +#include "fs_core.h"
> +
> +void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
> +{
> +	struct mlx5_core_roce *roce = &dev->priv.roce;
> +
> +	if (IS_ERR_OR_NULL(roce->ft))

roce->ft shouldn't be error, or NULL or proper pointer.

> +		return;
> +
> +	mlx5_del_flow_rules(roce->allow_rule);
> +	mlx5_destroy_flow_group(roce->fg);
> +	mlx5_destroy_flow_table(roce->ft);
> +}
> +
> +int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
> +{
> +	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
> +	struct mlx5_core_roce *roce = &dev->priv.roce;
> +	struct mlx5_flow_handle *flow_rule = NULL;
> +	struct mlx5_flow_table_attr ft_attr = {};
> +	struct mlx5_flow_namespace *ns = NULL;
> +	struct mlx5_flow_act flow_act = {0};

{0} -> {}

> +	struct mlx5_flow_spec *spec;
> +	struct mlx5_flow_table *ft;
> +	struct mlx5_flow_group *fg;
> +	void *match_criteria;
> +	u32 *flow_group_in;
> +	void *misc;
> +	int err;
> +
> +	if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
> +	      MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)))
> +		return -EOPNOTSUPP;
> +
> +	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
> +	if (!flow_group_in)
> +		return -ENOMEM;
> +	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
> +	if (!spec) {
> +		kvfree(flow_group_in);
> +		return -ENOMEM;
> +	}
> +
> +	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX);
> +	if (!ns) {
> +		mlx5_core_err(dev, "Failed to get RDMA RX namespace");
> +		err = -EOPNOTSUPP;
> +		goto free;
> +	}
> +
> +	ft_attr.max_fte = 1;
> +	ft = mlx5_create_flow_table(ns, &ft_attr);
> +	if (IS_ERR(ft)) {
> +		mlx5_core_err(dev, "Failed to create RDMA RX flow table");
> +		err = PTR_ERR(ft);
> +		goto free;
> +	}
> +
> +	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
> +		 MLX5_MATCH_MISC_PARAMETERS);
> +	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
> +				      match_criteria);
> +	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
> +			 misc_parameters.source_port);
> +
> +	fg = mlx5_create_flow_group(ft, flow_group_in);
> +	if (IS_ERR(fg)) {
> +		err = PTR_ERR(fg);
> +		mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err);
> +		goto destroy_flow_table;
> +	}
> +
> +	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
> +	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
> +			    misc_parameters);
> +	MLX5_SET(fte_match_set_misc, misc, source_port,
> +		 dev->priv.eswitch->manager_vport);
> +	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
> +			    misc_parameters);
> +	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
> +
> +	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
> +	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
> +	if (IS_ERR(flow_rule)) {
> +		err = PTR_ERR(flow_rule);
> +		mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n",
> +			      err);
> +		goto destroy_flow_group;
> +	}
> +
> +	kvfree(spec);
> +	kvfree(flow_group_in);
> +	roce->ft = ft;
> +	roce->fg = fg;
> +	roce->allow_rule = flow_rule;
> +
> +	return 0;
> +
> +destroy_flow_table:
> +	mlx5_destroy_flow_table(ft);
> +destroy_flow_group:
> +	mlx5_destroy_flow_group(fg);
> +free:
> +	kvfree(spec);
> +	kvfree(flow_group_in);
> +	return err;
> +}
> +
> +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
> +{
> +	mlx5_core_roce_gid_set(dev, 0, 0, 0,
> +			       NULL, NULL, false, 0, 0);
> +}
> +
> +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
> +{
> +	u8 hw_id[ETH_ALEN];
> +
> +	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
> +	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
> +	addrconf_addr_eui48(&gid->raw[8], hw_id);
> +}
> +
> +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
> +{
> +	union ib_gid gid;
> +	u8 mac[ETH_ALEN];
> +
> +	mlx5_rdma_make_default_gid(dev, &gid);
> +	return mlx5_core_roce_gid_set(dev, 0,
> +				      MLX5_ROCE_VERSION_1,
> +				      0, gid.raw, mac,
> +				      false, 0, 1);
> +}
> +
> +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
> +{
> +	mlx5_rdma_disable_roce_steering(dev);
> +	mlx5_rdma_del_roce_addr(dev);
> +	mlx5_nic_vport_disable_roce(dev);
> +}
> +
> +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
> +{
> +	int err;
> +
> +	err = mlx5_nic_vport_enable_roce(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
> +		return err;
> +	}
> +
> +	err = mlx5_rdma_add_roce_addr(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err);
> +		goto disable_roce;
> +	}
> +
> +	err = mlx5_rdma_enable_roce_steering(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err);
> +		goto del_roce_addr;
> +	}
> +
> +	return 0;
> +
> +del_roce_addr:
> +	mlx5_rdma_del_roce_addr(dev);
> +disable_roce:
> +	mlx5_nic_vport_disable_roce(dev);
> +	return err;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
> new file mode 100644
> index 000000000000..3d9e76c3d42f
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __MLX5_RDMA_H__
> +#define __MLX5_RDMA_H__
> +
> +#include "mlx5_core.h"
> +
> +#ifdef CONFIG_MLX5_ESWITCH
> +
> +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
> +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
> +
> +#else /* CONFIG_MLX5_ESWITCH */
> +
> +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; }
> +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
> +
> +#endif /* CONFIG_MLX5_ESWITCH */
> +#endif /* __MLX5_RDMA_H__ */
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 582a9680b182..7fa95270dd59 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -512,6 +512,12 @@ struct mlx5_rl_table {
>  	struct mlx5_rl_entry   *rl_entry;
>  };
>
> +struct mlx5_core_roce {
> +	struct mlx5_flow_table *ft;
> +	struct mlx5_flow_group *fg;
> +	struct mlx5_flow_handle *allow_rule;
> +};
> +
>  struct mlx5_priv {
>  	struct mlx5_eq_table	*eq_table;
>
> @@ -565,6 +571,7 @@ struct mlx5_priv {
>  	struct mlx5_lag		*lag;
>  	struct mlx5_devcom	*devcom;
>  	unsigned long		pci_dev_data;
> +	struct mlx5_core_roce	roce;
>  	struct mlx5_fc_stats		fc_stats;
>  	struct mlx5_rl_table            rl_table;
>
> --
> 2.20.1
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 1a16f6d73cbc..5f0be9b36a04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -35,7 +35,7 @@  mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tu
 #
 # Core extra
 #
-mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o rdma.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 6c8a17ca236e..4b48bb98981e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -37,6 +37,7 @@ 
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
 #include "eswitch.h"
+#include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
 #include "lib/devcom.h"
@@ -1713,6 +1714,13 @@  int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
 		esw->host_info.num_vfs = vf_nvports;
 	}
 
+	err = mlx5_rdma_enable_roce(esw->dev);
+	if (err) {
+		esw_debug(esw->dev, "Failed to enable RoCE, err: %d\n",
+			  err);
+		err = 0;
+	}
+
 	return 0;
 
 err_reps:
@@ -1751,6 +1759,7 @@  void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 		num_vfs = esw->dev->priv.sriov.num_vfs;
 	}
 
+	mlx5_rdma_disable_roce(esw->dev);
 	esw_offloads_devcom_cleanup(esw);
 	esw_offloads_unload_all_reps(esw, num_vfs);
 	esw_offloads_steering_cleanup(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
new file mode 100644
index 000000000000..f6c5e4f91aa8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -0,0 +1,181 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include <rdma/ib_verbs.h>
+#include <net/addrconf.h>
+
+#include "lib/mlx5.h"
+#include "eswitch.h"
+#include "fs_core.h"
+
+void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_roce *roce = &dev->priv.roce;
+
+	if (IS_ERR_OR_NULL(roce->ft))
+		return;
+
+	mlx5_del_flow_rules(roce->allow_rule);
+	mlx5_destroy_flow_group(roce->fg);
+	mlx5_destroy_flow_table(roce->ft);
+}
+
+int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_roce *roce = &dev->priv.roce;
+	struct mlx5_flow_handle *flow_rule = NULL;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns = NULL;
+	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_spec *spec;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *fg;
+	void *match_criteria;
+	u32 *flow_group_in;
+	void *misc;
+	int err;
+
+	if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
+	      MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)))
+		return -EOPNOTSUPP;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		kvfree(flow_group_in);
+		return -ENOMEM;
+	}
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX);
+	if (!ns) {
+		mlx5_core_err(dev, "Failed to get RDMA RX namespace");
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+
+	ft_attr.max_fte = 1;
+	ft = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		mlx5_core_err(dev, "Failed to create RDMA RX flow table");
+		err = PTR_ERR(ft);
+		goto free;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+				      match_criteria);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+			 misc_parameters.source_port);
+
+	fg = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(fg)) {
+		err = PTR_ERR(fg);
+		mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err);
+		goto destroy_flow_table;
+	}
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port,
+		 dev->priv.eswitch->manager_vport);
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			    misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n",
+			      err);
+		goto destroy_flow_group;
+	}
+
+	kvfree(spec);
+	kvfree(flow_group_in);
+	roce->ft = ft;
+	roce->fg = fg;
+	roce->allow_rule = flow_rule;
+
+	return 0;
+
+destroy_flow_table:
+	mlx5_destroy_flow_table(ft);
+destroy_flow_group:
+	mlx5_destroy_flow_group(fg);
+free:
+	kvfree(spec);
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
+{
+	mlx5_core_roce_gid_set(dev, 0, 0, 0,
+			       NULL, NULL, false, 0, 0);
+}
+
+static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
+{
+	u8 hw_id[ETH_ALEN];
+
+	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	addrconf_addr_eui48(&gid->raw[8], hw_id);
+}
+
+static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
+{
+	union ib_gid gid;
+	u8 mac[ETH_ALEN];
+
+	mlx5_rdma_make_default_gid(dev, &gid);
+	return mlx5_core_roce_gid_set(dev, 0,
+				      MLX5_ROCE_VERSION_1,
+				      0, gid.raw, mac,
+				      false, 0, 1);
+}
+
+void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
+{
+	mlx5_rdma_disable_roce_steering(dev);
+	mlx5_rdma_del_roce_addr(dev);
+	mlx5_nic_vport_disable_roce(dev);
+}
+
+int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = mlx5_nic_vport_enable_roce(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
+		return err;
+	}
+
+	err = mlx5_rdma_add_roce_addr(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err);
+		goto disable_roce;
+	}
+
+	err = mlx5_rdma_enable_roce_steering(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err);
+		goto del_roce_addr;
+	}
+
+	return 0;
+
+del_roce_addr:
+	mlx5_rdma_del_roce_addr(dev);
+disable_roce:
+	mlx5_nic_vport_disable_roce(dev);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
new file mode 100644
index 000000000000..3d9e76c3d42f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
@@ -0,0 +1,20 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_RDMA_H__
+#define __MLX5_RDMA_H__
+
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
+void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_RDMA_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 582a9680b182..7fa95270dd59 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -512,6 +512,12 @@  struct mlx5_rl_table {
 	struct mlx5_rl_entry   *rl_entry;
 };
 
+struct mlx5_core_roce {
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *fg;
+	struct mlx5_flow_handle *allow_rule;
+};
+
 struct mlx5_priv {
 	struct mlx5_eq_table	*eq_table;
 
@@ -565,6 +571,7 @@  struct mlx5_priv {
 	struct mlx5_lag		*lag;
 	struct mlx5_devcom	*devcom;
 	unsigned long		pci_dev_data;
+	struct mlx5_core_roce	roce;
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;