diff mbox series

[v3,net-next] net: mscc: ocelot: install MAC addresses in .ndo_set_rx_mode from process context

Message ID 20201205004315.143851-1-vladimir.oltean@nxp.com
State Superseded
Headers show
Series [v3,net-next] net: mscc: ocelot: install MAC addresses in .ndo_set_rx_mode from process context | expand

Commit Message

Vladimir Oltean Dec. 5, 2020, 12:43 a.m. UTC
Currently ocelot_set_rx_mode calls ocelot_mact_learn directly, which has
a very nice ocelot_mact_wait_for_completion at the end. Introduced in
commit 639c1b2625af ("net: mscc: ocelot: Register poll timeout should be
wall time not attempts"), this function uses readx_poll_timeout which
triggers a lot of lockdep warnings and is also dangerous to use from
atomic context, potentially leading to lockups and panics.

Steen Hegelund added a poll timeout of 100 ms for checking the MAC
table, a duration which is clearly absurd to poll in atomic context.
So we need to defer the MAC table access to process context, which we do
via a dynamically allocated workqueue which contains all there is to
know about the MAC table operation it has to do.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
---
Changes in v3:
- Dropped Fixes tag and retargeted to net-next
- Dropped get_device/put_device since they don't offer real protection
- Now allocating a private ordered workqueue which is drained on unbind
  to avoid accessing freed memory

Changes in v2:
- Added Fixes tag (it won't backport that far, but anyway)
- Using get_device and put_device to avoid racing with unbind

 drivers/net/ethernet/mscc/ocelot.c     |  5 ++
 drivers/net/ethernet/mscc/ocelot_net.c | 81 +++++++++++++++++++++++++-
 include/soc/mscc/ocelot.h              |  2 +
 3 files changed, 85 insertions(+), 3 deletions(-)

Comments

Jakub Kicinski Dec. 8, 2020, 1:09 a.m. UTC | #1
On Sat,  5 Dec 2020 02:43:15 +0200 Vladimir Oltean wrote:
> Currently ocelot_set_rx_mode calls ocelot_mact_learn directly, which has
> a very nice ocelot_mact_wait_for_completion at the end. Introduced in
> commit 639c1b2625af ("net: mscc: ocelot: Register poll timeout should be
> wall time not attempts"), this function uses readx_poll_timeout which
> triggers a lot of lockdep warnings and is also dangerous to use from
> atomic context, potentially leading to lockups and panics.
> 
> Steen Hegelund added a poll timeout of 100 ms for checking the MAC
> table, a duration which is clearly absurd to poll in atomic context.
> So we need to defer the MAC table access to process context, which we do
> via a dynamically allocated workqueue which contains all there is to
> know about the MAC table operation it has to do.
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> Changes in v3:
> - Dropped Fixes tag and retargeted to net-next
> - Dropped get_device/put_device since they don't offer real protection
> - Now allocating a private ordered workqueue which is drained on unbind
>   to avoid accessing freed memory
> 
> Changes in v2:
> - Added Fixes tag (it won't backport that far, but anyway)
> - Using get_device and put_device to avoid racing with unbind
> 
>  drivers/net/ethernet/mscc/ocelot.c     |  5 ++
>  drivers/net/ethernet/mscc/ocelot_net.c | 81 +++++++++++++++++++++++++-
>  include/soc/mscc/ocelot.h              |  2 +
>  3 files changed, 85 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index abea8dd2b0cb..b9626eec8db6 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1513,6 +1513,10 @@ int ocelot_init(struct ocelot *ocelot)
>  	if (!ocelot->stats_queue)
>  		return -ENOMEM;
>  
> +	ocelot->owq = alloc_ordered_workqueue("ocelot-owq", WQ_MEM_RECLAIM);

Why MEM_RECLAIM ?

> +	if (!ocelot->owq)
> +		return -ENOMEM;

I don't think you can pass NULL to destroy_workqueue() so IDK how this
code does error handling (freeing of ocelot->stats_queue if owq fails).

>  	INIT_LIST_HEAD(&ocelot->multicast);
>  	INIT_LIST_HEAD(&ocelot->pgids);
>  	ocelot_mact_init(ocelot);
> @@ -1619,6 +1623,7 @@ void ocelot_deinit(struct ocelot *ocelot)
>  {
>  	cancel_delayed_work(&ocelot->stats_work);
>  	destroy_workqueue(ocelot->stats_queue);
> +	destroy_workqueue(ocelot->owq);
>  	mutex_destroy(&ocelot->stats_lock);
>  }

> +static int ocelot_enqueue_mact_action(struct ocelot *ocelot,
> +				      const struct ocelot_mact_work_ctx *ctx)
> +{
> +	struct ocelot_mact_work_ctx *w = kmalloc(sizeof(*w), GFP_ATOMIC);
> +
> +	if (!w)
> +		return -ENOMEM;
> +
> +	memcpy(w, ctx, sizeof(*w));

kmemdup()?
Vladimir Oltean Dec. 9, 2020, 1:56 a.m. UTC | #2
On Mon, Dec 07, 2020 at 05:09:37PM -0800, Jakub Kicinski wrote:
> > +	ocelot->owq = alloc_ordered_workqueue("ocelot-owq", WQ_MEM_RECLAIM);
>
> Why MEM_RECLAIM ?

Ok, fine, I admit, I copied it.

After reading the documentation a bit more thoroughly, I am still as
clear about the guidelines as before. The original logic was, I am
allocating a memory area and then freeing it from the work item. So it
must be beneficial for the kernel to want to flush this workqueue during
the memory reclaim process / under memory pressure, because I am doing
no memory allocation, and I am also freeing some memory in fact.

The thing is, there are already a lot of users of WQ_MEM_RECLAIM. Many
outside of the filesystem/block subsystems. Not sure if all of them
misuse it, or how to even tell which one constitutes a correct example
of usage for WQ_MEM_RECLAIM.

> > +	if (!ocelot->owq)
> > +		return -ENOMEM;
>
> I don't think you can pass NULL to destroy_workqueue() so IDK how this
> code does error handling (freeing of ocelot->stats_queue if owq fails).

It doesn't.

> >  	INIT_LIST_HEAD(&ocelot->multicast);
> >  	INIT_LIST_HEAD(&ocelot->pgids);
> >  	ocelot_mact_init(ocelot);
> > @@ -1619,6 +1623,7 @@ void ocelot_deinit(struct ocelot *ocelot)
> >  {
> >  	cancel_delayed_work(&ocelot->stats_work);
> >  	destroy_workqueue(ocelot->stats_queue);
> > +	destroy_workqueue(ocelot->owq);
> >  	mutex_destroy(&ocelot->stats_lock);
> >  }
>
> > +static int ocelot_enqueue_mact_action(struct ocelot *ocelot,
> > +				      const struct ocelot_mact_work_ctx *ctx)
> > +{
> > +	struct ocelot_mact_work_ctx *w = kmalloc(sizeof(*w), GFP_ATOMIC);
> > +
> > +	if (!w)
> > +		return -ENOMEM;
> > +
> > +	memcpy(w, ctx, sizeof(*w));
>
> kmemdup()?

Ok.
Jakub Kicinski Dec. 9, 2020, 6:25 p.m. UTC | #3
On Wed, 9 Dec 2020 01:56:14 +0000 Vladimir Oltean wrote:
> On Mon, Dec 07, 2020 at 05:09:37PM -0800, Jakub Kicinski wrote:
> > > +	ocelot->owq = alloc_ordered_workqueue("ocelot-owq", WQ_MEM_RECLAIM);  
> >
> > Why MEM_RECLAIM ?  
> 
> Ok, fine, I admit, I copied it.
> 
> After reading the documentation a bit more thoroughly, I am still as
> clear about the guidelines as before. The original logic was, I am
> allocating a memory area and then freeing it from the work item. So it
> must be beneficial for the kernel to want to flush this workqueue during
> the memory reclaim process / under memory pressure, because I am doing
> no memory allocation, and I am also freeing some memory in fact.
> 
> The thing is, there are already a lot of users of WQ_MEM_RECLAIM. Many
> outside of the filesystem/block subsystems. Not sure if all of them
> misuse it, or how to even tell which one constitutes a correct example
> of usage for WQ_MEM_RECLAIM.

Agreed, I wasn't 100% sure either.

I double checked with Tejun, he says it's only needed if the work 
queue is directly used in memory reclaim paths, like writing pages 
out to swap and such.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index abea8dd2b0cb..b9626eec8db6 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1513,6 +1513,10 @@  int ocelot_init(struct ocelot *ocelot)
 	if (!ocelot->stats_queue)
 		return -ENOMEM;
 
+	ocelot->owq = alloc_ordered_workqueue("ocelot-owq", WQ_MEM_RECLAIM);
+	if (!ocelot->owq)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&ocelot->multicast);
 	INIT_LIST_HEAD(&ocelot->pgids);
 	ocelot_mact_init(ocelot);
@@ -1619,6 +1623,7 @@  void ocelot_deinit(struct ocelot *ocelot)
 {
 	cancel_delayed_work(&ocelot->stats_work);
 	destroy_workqueue(ocelot->stats_queue);
+	destroy_workqueue(ocelot->owq);
 	mutex_destroy(&ocelot->stats_lock);
 }
 EXPORT_SYMBOL(ocelot_deinit);
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index c65ae6f75a16..9ba7e2b166e9 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -414,13 +414,82 @@  static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
+enum ocelot_action_type {
+	OCELOT_MACT_LEARN,
+	OCELOT_MACT_FORGET,
+};
+
+struct ocelot_mact_work_ctx {
+	struct work_struct work;
+	struct ocelot *ocelot;
+	enum ocelot_action_type type;
+	union {
+		/* OCELOT_MACT_LEARN */
+		struct {
+			int pgid;
+			enum macaccess_entry_type entry_type;
+			unsigned char addr[ETH_ALEN];
+			u16 vid;
+		} learn;
+		/* OCELOT_MACT_FORGET */
+		struct {
+			unsigned char addr[ETH_ALEN];
+			u16 vid;
+		} forget;
+	};
+};
+
+#define ocelot_work_to_ctx(x) \
+	container_of((x), struct ocelot_mact_work_ctx, work)
+
+static void ocelot_mact_work(struct work_struct *work)
+{
+	struct ocelot_mact_work_ctx *w = ocelot_work_to_ctx(work);
+	struct ocelot *ocelot = w->ocelot;
+
+	switch (w->type) {
+	case OCELOT_MACT_LEARN:
+		ocelot_mact_learn(ocelot, w->learn.pgid, w->learn.addr,
+				  w->learn.vid, w->learn.entry_type);
+		break;
+	case OCELOT_MACT_FORGET:
+		ocelot_mact_forget(ocelot, w->forget.addr, w->forget.vid);
+		break;
+	default:
+		break;
+	};
+
+	kfree(w);
+}
+
+static int ocelot_enqueue_mact_action(struct ocelot *ocelot,
+				      const struct ocelot_mact_work_ctx *ctx)
+{
+	struct ocelot_mact_work_ctx *w = kmalloc(sizeof(*w), GFP_ATOMIC);
+
+	if (!w)
+		return -ENOMEM;
+
+	memcpy(w, ctx, sizeof(*w));
+	w->ocelot = ocelot;
+	INIT_WORK(&w->work, ocelot_mact_work);
+	queue_work(ocelot->owq, &w->work);
+
+	return 0;
+}
+
 static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct ocelot_mact_work_ctx w;
+
+	ether_addr_copy(w.forget.addr, addr);
+	w.forget.vid = ocelot_port->pvid_vlan.vid;
+	w.type = OCELOT_MACT_FORGET;
 
-	return ocelot_mact_forget(ocelot, addr, ocelot_port->pvid_vlan.vid);
+	return ocelot_enqueue_mact_action(ocelot, &w);
 }
 
 static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr)
@@ -428,9 +497,15 @@  static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr)
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct ocelot_mact_work_ctx w;
+
+	ether_addr_copy(w.learn.addr, addr);
+	w.learn.vid = ocelot_port->pvid_vlan.vid;
+	w.learn.pgid = PGID_CPU;
+	w.learn.entry_type = ENTRYTYPE_LOCKED;
+	w.type = OCELOT_MACT_LEARN;
 
-	return ocelot_mact_learn(ocelot, PGID_CPU, addr,
-				 ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED);
+	return ocelot_enqueue_mact_action(ocelot, &w);
 }
 
 static void ocelot_set_rx_mode(struct net_device *dev)
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 731116611390..2f4cd3288bcc 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -650,6 +650,8 @@  struct ocelot {
 	struct delayed_work		stats_work;
 	struct workqueue_struct		*stats_queue;
 
+	struct workqueue_struct		*owq;
+
 	u8				ptp:1;
 	struct ptp_clock		*ptp_clock;
 	struct ptp_clock_info		ptp_info;