diff mbox

[net-next] rocker: move netevent neigh update to processes context

Message ID 1432184707-27252-1-git-send-email-sfeldma@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Scott Feldman May 21, 2015, 5:05 a.m. UTC
From: Scott Feldman <sfeldma@gmail.com>

In review of Simon's patchset "rocker: transaction fixes". it was noted
that rocker->neigh_tbl_next_index was unprotected in the call path below
and could race with other contexts calling rocker_port_ipv4_neigh():

	arp_process()
	neigh_update()
	rocker_neigh_update()
	rocker_port_ipv4_neigh()

To fix, move the neigh_update() event processing to process contexts and
hold rtnl_lock to call rocker_port_ipv4_neigh().  This will protect
rocker->neigh_tbl_next_index accesses and is more consistent with the rest
of the driver code where non-I/O processing is done under process context
with rtnl_lock held.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
---
 drivers/net/ethernet/rocker/rocker.c |   52 +++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 7 deletions(-)

Comments

Simon Horman May 21, 2015, 7:37 a.m. UTC | #1
On Wed, May 20, 2015 at 10:05:07PM -0700, sfeldma@gmail.com wrote:
> From: Scott Feldman <sfeldma@gmail.com>
> 
> In review of Simon's patchset "rocker: transaction fixes". it was noted
> that rocker->neigh_tbl_next_index was unprotected in the call path below
> and could race with other contexts calling rocker_port_ipv4_neigh():
> 
> 	arp_process()
> 	neigh_update()
> 	rocker_neigh_update()
> 	rocker_port_ipv4_neigh()
> 
> To fix, move the neigh_update() event processing to process contexts and
> hold rtnl_lock to call rocker_port_ipv4_neigh().  This will protect
> rocker->neigh_tbl_next_index accesses and is more consistent with the rest
> of the driver code where non-I/O processing is done under process context
> with rtnl_lock held.
> 
> Signed-off-by: Scott Feldman <sfeldma@gmail.com>

Hi Scott,

this patch does what it says on the wrapper and in itself looks good.

Reviewed-by: Simon Horman <simon.horman@netronome.com>

However, this patch seems to expose another bug.
With it applied I see the following:

ip addr add 10.0.99.192/24 dev eth0
ip link set up dev eth0
ip route add 10.0.97.0/24 via 10.0.99.135

1. Prepare Phase:

   rocker_port_ipv4_nh() -> rocker_port_ipv4_resolve()

   In rocker_port_ipv4_resolve() n->nud_state & NUD_VALID is false
   and rocker_port_ipv4_neigh() is not called.

2. Commit Phase:

   rocker_port_ipv4_nh() -> rocker_port_ipv4_resolve()

   In rocker_port_ipv4_resolve() n->nud_state & NUD_VALID is now true
   and rocker_port_ipv4_neigh() is called.

   rocker_port_ipv4_neigh() calls rocker_port_kzalloc() which
   reports a bug because there was no corresponding call during the prepare
   phase.

In a nutshell the rocker_port_*alloc calls are not symmetric in the two
phases because of an external state change (in the neighbour table
in the core of the network stack).

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko May 21, 2015, 8 a.m. UTC | #2
Thu, May 21, 2015 at 07:05:07AM CEST, sfeldma@gmail.com wrote:
>From: Scott Feldman <sfeldma@gmail.com>
>
>In review of Simon's patchset "rocker: transaction fixes". it was noted
>that rocker->neigh_tbl_next_index was unprotected in the call path below
>and could race with other contexts calling rocker_port_ipv4_neigh():
>
>	arp_process()
>	neigh_update()
>	rocker_neigh_update()
>	rocker_port_ipv4_neigh()
>
>To fix, move the neigh_update() event processing to process contexts and
>hold rtnl_lock to call rocker_port_ipv4_neigh().  This will protect
>rocker->neigh_tbl_next_index accesses and is more consistent with the rest
>of the driver code where non-I/O processing is done under process context
>with rtnl_lock held.
>
>Signed-off-by: Scott Feldman <sfeldma@gmail.com>
>---
> drivers/net/ethernet/rocker/rocker.c |   52 +++++++++++++++++++++++++++++-----
> 1 file changed, 45 insertions(+), 7 deletions(-)
>
>diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
>index 0f5e962..4cff2f6 100644
>--- a/drivers/net/ethernet/rocker/rocker.c
>+++ b/drivers/net/ethernet/rocker/rocker.c
>@@ -5240,14 +5240,52 @@ static struct notifier_block rocker_netdevice_nb __read_mostly = {
>  * Net event notifier event handler
>  ************************************/
> 
>-static int rocker_neigh_update(struct net_device *dev, struct neighbour *n)
>+struct rocker_neigh_update_work {
>+	struct work_struct work;
>+	struct rocker_port *rocker_port;
>+	int flags;
>+	__be32 ip_addr;
>+	unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
>+};
>+
>+static void rocker_event_neigh_update_work(struct work_struct *work)
> {
>-	struct rocker_port *rocker_port = netdev_priv(dev);
>-	int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
>-	__be32 ip_addr = *(__be32 *)n->primary_key;
>+	struct rocker_neigh_update_work *nw =
>+		container_of(work, struct rocker_neigh_update_work, work);
>+	int err;
>+
>+	rtnl_lock();
>+	err = rocker_port_ipv4_neigh(nw->rocker_port, SWITCHDEV_TRANS_NONE,
>+				     nw->flags, nw->ip_addr, nw->ha);
>+	rtnl_unlock();
>+
>+	if (err)
>+		netdev_warn(nw->rocker_port->dev,
>+			    "failed to handle neigh %pI4 update (err %d)\n",
>+			    &nw->ip_addr, err);
> 
>-	return rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE,
>-				      flags, ip_addr, n->ha);
>+	kfree(work);

It should be kfree(nw)
I know it is the same, but looks more correct.

Other than this, this looks good to me

Acked-by: Jiri Pirko <jiri@resnulli.us>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toshiaki Makita May 21, 2015, 8:53 a.m. UTC | #3
On 2015/05/21 14:05, sfeldma@gmail.com wrote:
> From: Scott Feldman <sfeldma@gmail.com>
> 
> In review of Simon's patchset "rocker: transaction fixes". it was noted
> that rocker->neigh_tbl_next_index was unprotected in the call path below
> and could race with other contexts calling rocker_port_ipv4_neigh():
> 
> 	arp_process()
> 	neigh_update()
> 	rocker_neigh_update()
> 	rocker_port_ipv4_neigh()
> 
> To fix, move the neigh_update() event processing to process contexts and
> hold rtnl_lock to call rocker_port_ipv4_neigh().  This will protect
> rocker->neigh_tbl_next_index accesses and is more consistent with the rest
> of the driver code where non-I/O processing is done under process context
> with rtnl_lock held.

Hi Scott,

Thank you for fixing this.
Note that this also fixes incorrect use of GFP_KERNEL in
__rocker_port_mem_alloc() and rocker_wait_event_timeout() in interrupt
context, as well as the neigh_tbl_next_index problem stated above.

There seem to be another transactions performed in interrupt context or
process context with bh disabled, where we cannot call
rocker_wait_event_timeout() which could sleep.
Problematic cases I have found so far are those through br_set_state():
- br_stp_rcv() ... -> br_set_state() ... -> rocker_port_attr_set()
- del_nbp() -> br_stp_disable_port() -> br_set_state() ...

Thanks,
Toshiaki Makita

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller May 22, 2015, 3:29 a.m. UTC | #4
From: sfeldma@gmail.com
Date: Wed, 20 May 2015 22:05:07 -0700

> From: Scott Feldman <sfeldma@gmail.com>
> 
> In review of Simon's patchset "rocker: transaction fixes". it was noted
> that rocker->neigh_tbl_next_index was unprotected in the call path below
> and could race with other contexts calling rocker_port_ipv4_neigh():
> 
> 	arp_process()
> 	neigh_update()
> 	rocker_neigh_update()
> 	rocker_port_ipv4_neigh()
> 
> To fix, move the neigh_update() event processing to process contexts and
> hold rtnl_lock to call rocker_port_ipv4_neigh().  This will protect
> rocker->neigh_tbl_next_index accesses and is more consistent with the rest
> of the driver code where non-I/O processing is done under process context
> with rtnl_lock held.
> 
> Signed-off-by: Scott Feldman <sfeldma@gmail.com>

Are you sure that the workqueue mechanism all by itself will ensure
that operations queued up will be processed in-order?

I do not know of any such explicit guarantee.

Therefore I think you will need a per-device workqueue with a list,
or something like that.

I could be wrong.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 0f5e962..4cff2f6 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -5240,14 +5240,52 @@  static struct notifier_block rocker_netdevice_nb __read_mostly = {
  * Net event notifier event handler
  ************************************/
 
-static int rocker_neigh_update(struct net_device *dev, struct neighbour *n)
+struct rocker_neigh_update_work {
+	struct work_struct work;
+	struct rocker_port *rocker_port;
+	int flags;
+	__be32 ip_addr;
+	unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
+};
+
+static void rocker_event_neigh_update_work(struct work_struct *work)
 {
-	struct rocker_port *rocker_port = netdev_priv(dev);
-	int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
-	__be32 ip_addr = *(__be32 *)n->primary_key;
+	struct rocker_neigh_update_work *nw =
+		container_of(work, struct rocker_neigh_update_work, work);
+	int err;
+
+	rtnl_lock();
+	err = rocker_port_ipv4_neigh(nw->rocker_port, SWITCHDEV_TRANS_NONE,
+				     nw->flags, nw->ip_addr, nw->ha);
+	rtnl_unlock();
+
+	if (err)
+		netdev_warn(nw->rocker_port->dev,
+			    "failed to handle neigh %pI4 update (err %d)\n",
+			    &nw->ip_addr, err);
 
-	return rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE,
-				      flags, ip_addr, n->ha);
+	kfree(work);
+}
+
+static int rocker_event_neigh_update(struct net_device *dev,
+				     struct neighbour *n)
+{
+	struct rocker_neigh_update_work *nw;
+
+	nw = kmalloc(sizeof(*nw), GFP_ATOMIC);
+	if (!nw)
+		return -ENOMEM;
+
+	INIT_WORK(&nw->work, rocker_event_neigh_update_work);
+
+	nw->rocker_port = netdev_priv(dev);
+	nw->flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
+	nw->ip_addr = *(__be32 *)n->primary_key;
+	memcpy(nw->ha, n->ha, sizeof(nw->ha));
+
+	schedule_work(&nw->work);
+
+	return 0;
 }
 
 static int rocker_netevent_event(struct notifier_block *unused,
@@ -5264,7 +5302,7 @@  static int rocker_netevent_event(struct notifier_block *unused,
 		dev = n->dev;
 		if (!rocker_port_dev_check(dev))
 			return NOTIFY_DONE;
-		err = rocker_neigh_update(dev, n);
+		err = rocker_event_neigh_update(dev, n);
 		if (err)
 			netdev_warn(dev,
 				    "failed to handle neigh update (err %d)\n",