diff mbox series

[net-next,12/17] net/mlx5e: Re-attempt to offload flows on multipath port affinity events

Message ID 20190301200848.9534-13-saeedm@mellanox.com
State Accepted
Delegated to: David Miller
Headers show
Series [net-next,01/17] net/mlx5e: Declare mlx5e_tx_reporter_recover_from_ctx as static | expand

Commit Message

Saeed Mahameed March 1, 2019, 8:08 p.m. UTC
From: Roi Dayan <roid@mellanox.com>

Under multipath it's possible for us to offload the flow only through
the e-switch for which proper route through the uplink exists.
When the port is up and the next-hop route is set again we want to
offload through it as well.

We generate SW event from the FIB event handler when multipath port
affinity changes. The tc offloads code gets this event, goes over the
flows which were marked as of having missing route and attempts to
offload them.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  | 40 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |  3 ++
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 39 +++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/en_tc.h   |  1 +
 4 files changed, 71 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 4d033e01f6ab..a1a3e2774989 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1573,6 +1573,8 @@  static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
 	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
 		uplink_priv = &rpriv->uplink_priv;
 
+		INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
 		/* init shared tc flow table */
 		err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
 		if (err)
@@ -1632,27 +1634,38 @@  static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
 static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
 {
 	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
-	struct mlx5_eqe   *eqe = data;
 
-	if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
-		return NOTIFY_DONE;
+	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+		struct mlx5_eqe *eqe = data;
 
-	switch (eqe->sub_type) {
-	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-		queue_work(priv->wq, &priv->update_carrier_work);
-		break;
-	default:
-		return NOTIFY_DONE;
+		switch (eqe->sub_type) {
+		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+			queue_work(priv->wq, &priv->update_carrier_work);
+			break;
+		default:
+			return NOTIFY_DONE;
+		}
+
+		return NOTIFY_OK;
 	}
 
-	return NOTIFY_OK;
+	if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
+		struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+		queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
 }
 
 static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 {
 	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	u16 max_mtu;
 
 	netdev->min_mtu = ETH_MIN_MTU;
@@ -1660,6 +1673,9 @@  static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
 	mlx5e_set_dev_port_mtu(priv);
 
+	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+		  mlx5e_tc_reoffload_flows_work);
+
 	mlx5_lag_add(mdev, netdev);
 	priv->events_nb.notifier_call = uplink_rep_async_event;
 	mlx5_notifier_register(mdev, &priv->events_nb);
@@ -1672,11 +1688,13 @@  static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_delete_app(priv);
 #endif
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
+	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
 	mlx5_lag_remove(mdev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 1aa3e110bb97..83b573b1abac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -74,6 +74,9 @@  struct mlx5_rep_uplink_priv {
 	struct notifier_block	    netdevice_nb;
 
 	struct mlx5_tun_entropy tun_entropy;
+
+	struct list_head            unready_flows;
+	struct work_struct          reoffload_flows_work;
 };
 
 struct mlx5e_rep_priv {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 7fe5f3de3917..2f0c631847fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -117,6 +117,7 @@  struct mlx5e_tc_flow {
 	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
 	struct list_head	hairpin; /* flows sharing the same hairpin */
 	struct list_head	peer;    /* flows with peer flow */
+	struct list_head	unready; /* flows not ready to be offloaded (e.g due to missing route) */
 	union {
 		struct mlx5_esw_flow_attr esw_attr[0];
 		struct mlx5_nic_flow_attr nic_attr[0];
@@ -928,6 +929,26 @@  mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
 	flow->flags &= ~MLX5E_TC_FLOW_SLOW;
 }
 
+static void add_unready_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+
+	esw = flow->priv->mdev->priv.eswitch;
+	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &rpriv->uplink_priv;
+
+	flow->flags |= MLX5E_TC_FLOW_NOT_READY;
+	list_add_tail(&flow->unready, &uplink_priv->unready_flows);
+}
+
+static void remove_unready_flow(struct mlx5e_tc_flow *flow)
+{
+	list_del(&flow->unready);
+	flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
+}
+
 static int
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow *flow,
@@ -1049,6 +1070,7 @@  static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 	int out_index;
 
 	if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
+		remove_unready_flow(flow);
 		kvfree(attr->parse_attr);
 		return;
 	}
@@ -2810,7 +2832,7 @@  __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
 		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
 			goto err_free;
 
-		flow->flags |= MLX5E_TC_FLOW_NOT_READY;
+		add_unready_flow(flow);
 	}
 
 	return flow;
@@ -3214,3 +3236,18 @@  void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
 	list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
 		__mlx5e_tc_del_fdb_peer_flow(flow);
 }
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
+{
+	struct mlx5_rep_uplink_priv *rpriv =
+		container_of(work, struct mlx5_rep_uplink_priv,
+			     reoffload_flows_work);
+	struct mlx5e_tc_flow *flow, *tmp;
+
+	rtnl_lock();
+	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
+		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
+			remove_unready_flow(flow);
+	}
+	rtnl_unlock();
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index d2d87f978c06..f62e81902d27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -72,6 +72,7 @@  void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
 
 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
 
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
 
 #else /* CONFIG_MLX5_ESWITCH */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }