diff mbox

bonding: cancel_delayed_work() -> cancel_delayed_work_sync()

Message ID 20091217002808.E44D0254177@hockey.mtv.corp.google.com
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Mikhail Markine Dec. 17, 2009, 12:28 a.m. UTC
A race condition was observed with bond_mii_monitor() attempting to
process an interface already closed by bond_close() in response to
'ifconfig bond<x> down'.

Change all instances of cancel_delayed_work() to cancel_delayed_work_sync().

Signed-off-by: Mikhail Markine <markine@google.com>
Signed-off-by: Petri Gynther <pgynther@google.com>
---
 drivers/net/bonding/bond_main.c  |   16 ++++++++--------
 drivers/net/bonding/bond_sysfs.c |    4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

Comments

Jarek Poplawski Dec. 17, 2009, 7:49 a.m. UTC | #1
On 17-12-2009 01:28, Mikhail Markine wrote:
> A race condition was observed with bond_mii_monitor() attempting to
> process an interface already closed by bond_close() in response to
> 'ifconfig bond<x> down'.
> 
> Change all instances of cancel_delayed_work() to cancel_delayed_work_sync().

I think you can't do it at places which hold rtnl_lock with works
taking this lock too. Did you test it with CONFIG_PROVE_LOCKING btw?

Jarek P.

> 
> Signed-off-by: Mikhail Markine <markine@google.com>
> Signed-off-by: Petri Gynther <pgynther@google.com>
> ---
>  drivers/net/bonding/bond_main.c  |   16 ++++++++--------
>  drivers/net/bonding/bond_sysfs.c |    4 ++--
>  2 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index af9b9c4..2bdacb6 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -3786,20 +3786,20 @@ static int bond_close(struct net_device *bond_dev)
>  	write_unlock_bh(&bond->lock);
>  
>  	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
> -		cancel_delayed_work(&bond->mii_work);
> +		cancel_delayed_work_sync(&bond->mii_work);
>  	}
>  
>  	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
> -		cancel_delayed_work(&bond->arp_work);
> +		cancel_delayed_work_sync(&bond->arp_work);
>  	}
>  
>  	switch (bond->params.mode) {
>  	case BOND_MODE_8023AD:
> -		cancel_delayed_work(&bond->ad_work);
> +		cancel_delayed_work_sync(&bond->ad_work);
>  		break;
>  	case BOND_MODE_TLB:
>  	case BOND_MODE_ALB:
> -		cancel_delayed_work(&bond->alb_work);
> +		cancel_delayed_work_sync(&bond->alb_work);
>  		break;
>  	default:
>  		break;
> @@ -4566,18 +4566,18 @@ static void bond_work_cancel_all(struct bonding *bond)
>  	write_unlock_bh(&bond->lock);
>  
>  	if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
> -		cancel_delayed_work(&bond->mii_work);
> +		cancel_delayed_work_sync(&bond->mii_work);
>  
>  	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
> -		cancel_delayed_work(&bond->arp_work);
> +		cancel_delayed_work_sync(&bond->arp_work);
>  
>  	if (bond->params.mode == BOND_MODE_ALB &&
>  	    delayed_work_pending(&bond->alb_work))
> -		cancel_delayed_work(&bond->alb_work);
> +		cancel_delayed_work_sync(&bond->alb_work);
>  
>  	if (bond->params.mode == BOND_MODE_8023AD &&
>  	    delayed_work_pending(&bond->ad_work))
> -		cancel_delayed_work(&bond->ad_work);
> +		cancel_delayed_work_sync(&bond->ad_work);
>  }
>  
>  /*
> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
> index 4e00b4f..d951939 100644
> --- a/drivers/net/bonding/bond_sysfs.c
> +++ b/drivers/net/bonding/bond_sysfs.c
> @@ -598,7 +598,7 @@ static ssize_t bonding_store_arp_interval(struct device *d,
>  		       bond->dev->name, bond->dev->name);
>  		bond->params.miimon = 0;
>  		if (delayed_work_pending(&bond->mii_work)) {
> -			cancel_delayed_work(&bond->mii_work);
> +			cancel_delayed_work_sync(&bond->mii_work);
>  			flush_workqueue(bond->wq);
>  		}
>  	}
> @@ -1117,7 +1117,7 @@ static ssize_t bonding_store_miimon(struct device *d,
>  					BOND_ARP_VALIDATE_NONE;
>  			}
>  			if (delayed_work_pending(&bond->arp_work)) {
> -				cancel_delayed_work(&bond->arp_work);
> +				cancel_delayed_work_sync(&bond->arp_work);
>  				flush_workqueue(bond->wq);
>  			}
>  		}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Dec. 17, 2009, 1:36 p.m. UTC | #2
On Thu, Dec 17, 2009 at 02:19:46PM +0100, Johannes Berg wrote:
> Jarek,
> 
> Sorry to mail you directly, but I only saw your reply on gmane and
> didn't want to break up the threading etc.
> 
> cancel_delayed_work_sync() should be ok in this case unless the work
> items themselves used the rtnl,

Hmm, I'm not sure I get your point, but e.g. bond_mii_monitor() work
function can get rtnl_lock().

> the common problem only happens with
> flush_scheduled_work() -- sync() is fine since either it's running, then
> you need the sync, or if it's not running it doesn't matter if something
> else is on the queue before it that's blocked on the rtnl.
> 
> If you could reply to the thread to that effect I'd appreciate it :)

No problem with this question :-)
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Johannes Berg Dec. 17, 2009, 2:30 p.m. UTC | #3
On Thu, 2009-12-17 at 13:36 +0000, Jarek Poplawski wrote:
> On Thu, Dec 17, 2009 at 02:19:46PM +0100, Johannes Berg wrote:
> > Jarek,
> > 
> > Sorry to mail you directly, but I only saw your reply on gmane and
> > didn't want to break up the threading etc.
> > 
> > cancel_delayed_work_sync() should be ok in this case unless the work
> > items themselves used the rtnl,
> 
> Hmm, I'm not sure I get your point, but e.g. bond_mii_monitor() work
> function can get rtnl_lock().

Ok in that case you can't cancel_sync() it under rtnl. I was thinking of
the case where it's just not ok because of other work. Sorry for the
confusion!

johannes
Jay Vosburgh Dec. 17, 2009, 4:12 p.m. UTC | #4
Johannes Berg <johannes@sipsolutions.net> wrote:

>On Thu, 2009-12-17 at 13:36 +0000, Jarek Poplawski wrote:
>> On Thu, Dec 17, 2009 at 02:19:46PM +0100, Johannes Berg wrote:
>> > Jarek,
>> > 
>> > Sorry to mail you directly, but I only saw your reply on gmane and
>> > didn't want to break up the threading etc.
>> > 
>> > cancel_delayed_work_sync() should be ok in this case unless the work
>> > items themselves used the rtnl,
>> 
>> Hmm, I'm not sure I get your point, but e.g. bond_mii_monitor() work
>> function can get rtnl_lock().
>
>Ok in that case you can't cancel_sync() it under rtnl. I was thinking of
>the case where it's just not ok because of other work. Sorry for the
>confusion!

	There's already logic in the monitors (bond_mii_monitor, et al)
to check a sentinel (kill_timers) and do nothing (not acquire rtnl) and
return.

	What exactly is the nature of the race that doing cancel..sync
is fixing?  The bond_close function sets kill_timers prior to calling
the cancel functions, so the monitor function might run once, but it
should do nothing.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Dec. 17, 2009, 6:40 p.m. UTC | #5
On Thu, Dec 17, 2009 at 08:12:53AM -0800, Jay Vosburgh wrote:
> 	There's already logic in the monitors (bond_mii_monitor, et al)
> to check a sentinel (kill_timers) and do nothing (not acquire rtnl) and
> return.

Btw, this check should be repeated if bond->lock is given back and
re-acquired. I can't see these kill_timers used in bond_sysfs.c though.

> 	What exactly is the nature of the race that doing cancel..sync
> is fixing?  The bond_close function sets kill_timers prior to calling
> the cancel functions, so the monitor function might run once, but it
> should do nothing.

I guess there is a problem with destructions, but I hope Mikhail will
give more details.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
laurent chavey Dec. 17, 2009, 6:49 p.m. UTC | #6
one instance that could be a problem

__exit bonding_exit(void)
    bond_free_all()
      	bond_work_cancel_all(bond);
        unregister_netdevice(bond_dev)

could the above result in an invalid pointer when trying
to use bond-> in one of the timer CB ?


On Thu, Dec 17, 2009 at 10:40 AM, Jarek Poplawski <jarkao2@gmail.com> wrote:
> On Thu, Dec 17, 2009 at 08:12:53AM -0800, Jay Vosburgh wrote:
>>       There's already logic in the monitors (bond_mii_monitor, et al)
>> to check a sentinel (kill_timers) and do nothing (not acquire rtnl) and
>> return.
>
> Btw, this check should be repeated if bond->lock is given back and
> re-acquired. I can't see these kill_timers used in bond_sysfs.c though.
>
>>       What exactly is the nature of the race that doing cancel..sync
>> is fixing?  The bond_close function sets kill_timers prior to calling
>> the cancel functions, so the monitor function might run once, but it
>> should do nothing.
>
> I guess there is a problem with destructions, but I hope Mikhail will
> give more details.
>
> Jarek P.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Vosburgh Dec. 17, 2009, 7:37 p.m. UTC | #7
Laurent Chavey <chavey@google.com> wrote:

>one instance that could be a problem
>
>__exit bonding_exit(void)
>    bond_free_all()
>      	bond_work_cancel_all(bond);
>        unregister_netdevice(bond_dev)
>
>could the above result in an invalid pointer when trying
>to use bond-> in one of the timer CB ?

	The bonding teardown logic was reworked in October, and there is
no longer a bond_free_all in the current mainline.  What kernel are you
looking at?

	The bond_close function will stop the various work items, and
the ndo_uninit (bond_uninit) will call bond_work_cancel_all as well.

	Actually, on looking at it (it being current mainline),
bond_uninit might need some kind of logic to wait and insure that all
timers have completed before returning.  It comes from unregister, so
the next thing that happens after it returns is that the memory will be
freed (via netdev_run_todo, during rtnl_unlock, if I'm following it
correctly).

	The bond_uninit function is called under RTNL, though, so the
timer functions (bond_mii_monitor, et al) may need additional checks for
kill_timers to insure they don't attempt to acquire RTNL if a cancel is
pending.

	That's kind of tricky itself, since the lock ordering requires
RTNL to be acquired first, so there's no way for bond_mii_monitor (et
al) to check for kill_timers prior to already having RTNL (because the
function acquires RTNL conditionally, only if needed; to do that, it
unlocks the bond lock, then acquires RTNL, then re-locks the bond lock).

	So, the lock dance to acquire RTNL in bond_mii_monitor (et al)
would need some trickery, perhaps a rtnl_trylock loop, that checks
kill_timers each time the trylock fails, e.g.,

	if (bond_miimon_inspect(bond)) {
		read_unlock(&bond->lock);
		while (!rtnl_trylock) {
			read_lock(&bond->lock);
			if (bond->kill_timers)
				goto out;
			read_unlock(&bond->lock);
			/* msleep ? */
		}

		bond_miimon_commit(bond);
		[...]

	So, with the above (and similar changes to the other delayed
work functions, and a big honkin' comment somewhere to explain this), I
suspect that bond_work_cancel_all could use the _sync variant to cancel
the work, as long as kill_timers is set before the cancel_sync is
called.

	Am I missing anything?  Does this seem rational?

>On Thu, Dec 17, 2009 at 10:40 AM, Jarek Poplawski <jarkao2@gmail.com> wrote:
>> On Thu, Dec 17, 2009 at 08:12:53AM -0800, Jay Vosburgh wrote:
>>>       There's already logic in the monitors (bond_mii_monitor, et al)
>>> to check a sentinel (kill_timers) and do nothing (not acquire rtnl) and
>>> return.
>>
>> Btw, this check should be repeated if bond->lock is given back and
>> re-acquired. I can't see these kill_timers used in bond_sysfs.c though.

	Yes, this is true, and I think that doing this in the above
manner may eliminate the problem.

>>>       What exactly is the nature of the race that doing cancel..sync
>>> is fixing?  The bond_close function sets kill_timers prior to calling
>>> the cancel functions, so the monitor function might run once, but it
>>> should do nothing.
>>
>> I guess there is a problem with destructions, but I hope Mikhail will
>> give more details.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Dec. 17, 2009, 8:56 p.m. UTC | #8
On Thu, Dec 17, 2009 at 11:37:42AM -0800, Jay Vosburgh wrote:
> Laurent Chavey <chavey@google.com> wrote:
> 
> >one instance that could be a problem
> >
> >__exit bonding_exit(void)
> >    bond_free_all()
> >      	bond_work_cancel_all(bond);
> >        unregister_netdevice(bond_dev)
> >
> >could the above result in an invalid pointer when trying
> >to use bond-> in one of the timer CB ?
> 
> 	The bonding teardown logic was reworked in October, and there is
> no longer a bond_free_all in the current mainline.  What kernel are you
> looking at?
> 
> 	The bond_close function will stop the various work items, and
> the ndo_uninit (bond_uninit) will call bond_work_cancel_all as well.
> 
> 	Actually, on looking at it (it being current mainline),
> bond_uninit might need some kind of logic to wait and insure that all
> timers have completed before returning.  It comes from unregister, so
> the next thing that happens after it returns is that the memory will be
> freed (via netdev_run_todo, during rtnl_unlock, if I'm following it
> correctly).
> 
> 	The bond_uninit function is called under RTNL, though, so the
> timer functions (bond_mii_monitor, et al) may need additional checks for
> kill_timers to insure they don't attempt to acquire RTNL if a cancel is
> pending.
> 
> 	That's kind of tricky itself, since the lock ordering requires
> RTNL to be acquired first, so there's no way for bond_mii_monitor (et
> al) to check for kill_timers prior to already having RTNL (because the
> function acquires RTNL conditionally, only if needed; to do that, it
> unlocks the bond lock, then acquires RTNL, then re-locks the bond lock).
> 
> 	So, the lock dance to acquire RTNL in bond_mii_monitor (et al)
> would need some trickery, perhaps a rtnl_trylock loop, that checks
> kill_timers each time the trylock fails, e.g.,
> 
> 	if (bond_miimon_inspect(bond)) {
> 		read_unlock(&bond->lock);
> 		while (!rtnl_trylock) {
> 			read_lock(&bond->lock);
> 			if (bond->kill_timers)
> 				goto out;
> 			read_unlock(&bond->lock);
> 			/* msleep ? */
> 		}
> 
> 		bond_miimon_commit(bond);
> 		[...]
> 
> 	So, with the above (and similar changes to the other delayed
> work functions, and a big honkin' comment somewhere to explain this), I
> suspect that bond_work_cancel_all could use the _sync variant to cancel
> the work, as long as kill_timers is set before the cancel_sync is
> called.
> 
> 	Am I missing anything?  Does this seem rational?

It seems OK to me ...if there is nothing better ;-) But such endless
loops are tricky (they omit lockdep, plus can hide some hidden
dependancies between different tasks, even in the future). If it's
possible we could consider a limited loop with re-arming on failure;
then cancel_delayed_work_sync() (with its standard logic) could be
used everywhere, and kill_timers might be useless too (if there is no
re-arming between different works).

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Dec. 17, 2009, 9:16 p.m. UTC | #9
On Thu, Dec 17, 2009 at 09:56:17PM +0100, Jarek Poplawski wrote:
> loops are tricky (they omit lockdep, plus can hide some hidden
...hideousness... ;-)

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
laurent chavey Dec. 17, 2009, 9:25 p.m. UTC | #10
On Thu, Dec 17, 2009 at 11:37 AM, Jay Vosburgh <fubar@us.ibm.com> wrote:
> Laurent Chavey <chavey@google.com> wrote:
>
>>one instance that could be a problem
>>
>>__exit bonding_exit(void)
>>    bond_free_all()
>>       bond_work_cancel_all(bond);
>>        unregister_netdevice(bond_dev)
>>
>>could the above result in an invalid pointer when trying
>>to use bond-> in one of the timer CB ?
>
>        The bonding teardown logic was reworked in October, and there is
> no longer a bond_free_all in the current mainline.  What kernel are you
> looking at?
not mainline :-), switched to mainline, see it as you do. thx

>
>        The bond_close function will stop the various work items, and
> the ndo_uninit (bond_uninit) will call bond_work_cancel_all as well.
>
>        Actually, on looking at it (it being current mainline),
> bond_uninit might need some kind of logic to wait and insure that all
> timers have completed before returning.  It comes from unregister, so
> the next thing that happens after it returns is that the memory will be
> freed (via netdev_run_todo, during rtnl_unlock, if I'm following it
> correctly).
>
>        The bond_uninit function is called under RTNL, though, so the
> timer functions (bond_mii_monitor, et al) may need additional checks for
> kill_timers to insure they don't attempt to acquire RTNL if a cancel is
> pending.
>
>        That's kind of tricky itself, since the lock ordering requires
> RTNL to be acquired first, so there's no way for bond_mii_monitor (et
> al) to check for kill_timers prior to already having RTNL (because the
> function acquires RTNL conditionally, only if needed; to do that, it
> unlocks the bond lock, then acquires RTNL, then re-locks the bond lock).
>
>        So, the lock dance to acquire RTNL in bond_mii_monitor (et al)
> would need some trickery, perhaps a rtnl_trylock loop, that checks
> kill_timers each time the trylock fails, e.g.,
>
>        if (bond_miimon_inspect(bond)) {
>                read_unlock(&bond->lock);
>                while (!rtnl_trylock) {
>                        read_lock(&bond->lock);
>                        if (bond->kill_timers)
>                                goto out;
>                        read_unlock(&bond->lock);
>                        /* msleep ? */
>                }
>
>                bond_miimon_commit(bond);
>                [...]
>
>        So, with the above (and similar changes to the other delayed
> work functions, and a big honkin' comment somewhere to explain this), I
> suspect that bond_work_cancel_all could use the _sync variant to cancel
> the work, as long as kill_timers is set before the cancel_sync is
> called.
>
>        Am I missing anything?  Does this seem rational?
>
yes it does. the _sync variant should cover the hole.


>>On Thu, Dec 17, 2009 at 10:40 AM, Jarek Poplawski <jarkao2@gmail.com> wrote:
>>> On Thu, Dec 17, 2009 at 08:12:53AM -0800, Jay Vosburgh wrote:
>>>>       There's already logic in the monitors (bond_mii_monitor, et al)
>>>> to check a sentinel (kill_timers) and do nothing (not acquire rtnl) and
>>>> return.
>>>
>>> Btw, this check should be repeated if bond->lock is given back and
>>> re-acquired. I can't see these kill_timers used in bond_sysfs.c though.
>
>        Yes, this is true, and I think that doing this in the above
> manner may eliminate the problem.
>
>>>>       What exactly is the nature of the race that doing cancel..sync
>>>> is fixing?  The bond_close function sets kill_timers prior to calling
>>>> the cancel functions, so the monitor function might run once, but it
>>>> should do nothing.
>>>
>>> I guess there is a problem with destructions, but I hope Mikhail will
>>> give more details.
>
>        -J
>
> ---
>        -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mikhail Markine Dec. 17, 2009, 9:31 p.m. UTC | #11
Jay,

We originally applied this patch against bond_main.c under 2.6.25
(bear with me) in response to a BUG() observed in testing:

------------[ cut here ]------------
Kernel BUG at c0039aa4 [verbose debug info unavailable]
Oops: Exception in kernel mode, sig: 5 [#1]
[SNIP]
NIP [c0039aa4] queue_delayed_work_on+0x60/0x120
LR [e1578a7c] bond_mii_monitor+0x6c/0x98 [bonding]
Call Trace:
[db401f30] [c029e0e8] 0xc029e0e8 (unreliable)
[db401f50] [e1578a7c] bond_mii_monitor+0x6c/0x98 [bonding]
[db401f60] [c003914c] run_workqueue+0xb8/0x148
[db401f90] [c0039700] worker_thread+0x70/0xd0
[db401fd0] [c003d13c] kthread+0x48/0x84
[db401ff0] [c000dab4] kernel_thread+0x44/0x60
Instruction dump:
40a2fff4 71200001 38600000 41820018 80010024 bb810010 38210020 7c0803a6
4e800020 80050010 3160ffff 7d2b0110 <0f090000> 80050004 39250004 7c004a78
---[ end trace ef4eb74d43ff218e ]---

This was a BUG_ON(timer_pending(timer)) call in
queue_delayed_work_on() which was called from queue_delayed_work()
which was called at the bottom of bond_mii_monitor(). This implied
that mii_work was queued twice.

In both 2.6.25 and the current HEAD of Linus' tree, mii_work is queued
up in two places: bond_mii_monitor() and bond_open(). bond_open()
calls INIT_DELAYED_WORK() prior to queue_delayed_work().

Under both kernels, if bond_mii_monitor() sleeps due to RTNL while
bond_close() is called, setting kill_timers=1 and calling
cancel_delayed_work(&bond->mii_work) in bond_close() accomplishes
nothing. bond_mii_monitor() will eventually wake up and call
queue_delayed_work() again. A possible explanation for the 2.6.25
kernel trace above is that bond_close() and bond_open() are called on
the same net_device in succession while bond_mii_monitor() sleeps with
the associated bond. I believe the same is still possible on HEAD.
Thoughts?

Mikhail

On Thu, Dec 17, 2009 at 8:12 AM, Jay Vosburgh <fubar@us.ibm.com> wrote:
> Johannes Berg <johannes@sipsolutions.net> wrote:
>
>>On Thu, 2009-12-17 at 13:36 +0000, Jarek Poplawski wrote:
>>> On Thu, Dec 17, 2009 at 02:19:46PM +0100, Johannes Berg wrote:
>>> > Jarek,
>>> >
>>> > Sorry to mail you directly, but I only saw your reply on gmane and
>>> > didn't want to break up the threading etc.
>>> >
>>> > cancel_delayed_work_sync() should be ok in this case unless the work
>>> > items themselves used the rtnl,
>>>
>>> Hmm, I'm not sure I get your point, but e.g. bond_mii_monitor() work
>>> function can get rtnl_lock().
>>
>>Ok in that case you can't cancel_sync() it under rtnl. I was thinking of
>>the case where it's just not ok because of other work. Sorry for the
>>confusion!
>
>        There's already logic in the monitors (bond_mii_monitor, et al)
> to check a sentinel (kill_timers) and do nothing (not acquire rtnl) and
> return.
>
>        What exactly is the nature of the race that doing cancel..sync
> is fixing?  The bond_close function sets kill_timers prior to calling
> the cancel functions, so the monitor function might run once, but it
> should do nothing.
>
>        -J
>
> ---
>        -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Vosburgh Dec. 17, 2009, 9:40 p.m. UTC | #12
Jarek Poplawski <jarkao2@gmail.com> wrote:

>On Thu, Dec 17, 2009 at 11:37:42AM -0800, Jay Vosburgh wrote:
>> Laurent Chavey <chavey@google.com> wrote:
>> 
>> >one instance that could be a problem
>> >
>> >__exit bonding_exit(void)
>> >    bond_free_all()
>> >      	bond_work_cancel_all(bond);
>> >        unregister_netdevice(bond_dev)
>> >
>> >could the above result in an invalid pointer when trying
>> >to use bond-> in one of the timer CB ?
>> 
>> 	The bonding teardown logic was reworked in October, and there is
>> no longer a bond_free_all in the current mainline.  What kernel are you
>> looking at?
>> 
>> 	The bond_close function will stop the various work items, and
>> the ndo_uninit (bond_uninit) will call bond_work_cancel_all as well.
>> 
>> 	Actually, on looking at it (it being current mainline),
>> bond_uninit might need some kind of logic to wait and insure that all
>> timers have completed before returning.  It comes from unregister, so
>> the next thing that happens after it returns is that the memory will be
>> freed (via netdev_run_todo, during rtnl_unlock, if I'm following it
>> correctly).
>> 
>> 	The bond_uninit function is called under RTNL, though, so the
>> timer functions (bond_mii_monitor, et al) may need additional checks for
>> kill_timers to insure they don't attempt to acquire RTNL if a cancel is
>> pending.
>> 
>> 	That's kind of tricky itself, since the lock ordering requires
>> RTNL to be acquired first, so there's no way for bond_mii_monitor (et
>> al) to check for kill_timers prior to already having RTNL (because the
>> function acquires RTNL conditionally, only if needed; to do that, it
>> unlocks the bond lock, then acquires RTNL, then re-locks the bond lock).
>> 
>> 	So, the lock dance to acquire RTNL in bond_mii_monitor (et al)
>> would need some trickery, perhaps a rtnl_trylock loop, that checks
>> kill_timers each time the trylock fails, e.g.,
>> 
>> 	if (bond_miimon_inspect(bond)) {
>> 		read_unlock(&bond->lock);
>> 		while (!rtnl_trylock) {
>> 			read_lock(&bond->lock);
>> 			if (bond->kill_timers)
>> 				goto out;
>> 			read_unlock(&bond->lock);
>> 			/* msleep ? */
>> 		}
>> 
>> 		bond_miimon_commit(bond);
>> 		[...]
>> 
>> 	So, with the above (and similar changes to the other delayed
>> work functions, and a big honkin' comment somewhere to explain this), I
>> suspect that bond_work_cancel_all could use the _sync variant to cancel
>> the work, as long as kill_timers is set before the cancel_sync is
>> called.
>> 
>> 	Am I missing anything?  Does this seem rational?
>
>It seems OK to me ...if there is nothing better ;-) But such endless
>loops are tricky (they omit lockdep, plus can hide some hidden
>dependancies between different tasks, even in the future). If it's
>possible we could consider a limited loop with re-arming on failure;
>then cancel_delayed_work_sync() (with its standard logic) could be
>used everywhere, and kill_timers might be useless too (if there is no
>re-arming between different works).

	A less evil alternative would be to punt and reschedule the work
if the rtnl_trylock failed, e.g.,

	if (bond_miimon_inspect(bond)) {
		read_unlock(&bond->lock);
		if (!rtnl_trylock()) {
			queue_work(...);
			return;
		}

		read_lock(&bond->lock);

		bond_miimon_commit(bond);
		[...]

	I'm not sure what the usual contention level on rtnl is (and,
therefore, how often this will punt for the normal case that's not the
race we're trying to avoid here).

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Dec. 17, 2009, 9:58 p.m. UTC | #13
On Thu, Dec 17, 2009 at 01:40:29PM -0800, Jay Vosburgh wrote:
> 	A less evil alternative would be to punt and reschedule the work
> if the rtnl_trylock failed, e.g.,
> 
> 	if (bond_miimon_inspect(bond)) {
> 		read_unlock(&bond->lock);
> 		if (!rtnl_trylock()) {
> 			queue_work(...);
> 			return;
> 		}
> 
> 		read_lock(&bond->lock);
> 
> 		bond_miimon_commit(bond);
> 		[...]
> 
> 	I'm not sure what the usual contention level on rtnl is (and,
> therefore, how often this will punt for the normal case that's not the
> race we're trying to avoid here).

Even if there is not much contention there is usually a lot of work
inside, so this looks most reasonable to me.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Dec. 17, 2009, 10:33 p.m. UTC | #14
On Thu, Dec 17, 2009 at 10:58:23PM +0100, Jarek Poplawski wrote:
> On Thu, Dec 17, 2009 at 01:40:29PM -0800, Jay Vosburgh wrote:
> > 	A less evil alternative would be to punt and reschedule the work
> > if the rtnl_trylock failed, e.g.,
> > 
> > 	if (bond_miimon_inspect(bond)) {
> > 		read_unlock(&bond->lock);
> > 		if (!rtnl_trylock()) {
> > 			queue_work(...);
> > 			return;
> > 		}
> > 
> > 		read_lock(&bond->lock);
> > 
> > 		bond_miimon_commit(bond);
> > 		[...]
> > 
> > 	I'm not sure what the usual contention level on rtnl is (and,
> > therefore, how often this will punt for the normal case that's not the
> > race we're trying to avoid here).
> 
> Even if there is not much contention there is usually a lot of work
> inside, so this looks most reasonable to me.

On the other hand, there could be considered other alternatives yet,
like separating these works with rtnl_lock, and killing them reliably
from some better place (after dev close).

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index af9b9c4..2bdacb6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3786,20 +3786,20 @@  static int bond_close(struct net_device *bond_dev)
 	write_unlock_bh(&bond->lock);
 
 	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
-		cancel_delayed_work(&bond->mii_work);
+		cancel_delayed_work_sync(&bond->mii_work);
 	}
 
 	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
-		cancel_delayed_work(&bond->arp_work);
+		cancel_delayed_work_sync(&bond->arp_work);
 	}
 
 	switch (bond->params.mode) {
 	case BOND_MODE_8023AD:
-		cancel_delayed_work(&bond->ad_work);
+		cancel_delayed_work_sync(&bond->ad_work);
 		break;
 	case BOND_MODE_TLB:
 	case BOND_MODE_ALB:
-		cancel_delayed_work(&bond->alb_work);
+		cancel_delayed_work_sync(&bond->alb_work);
 		break;
 	default:
 		break;
@@ -4566,18 +4566,18 @@  static void bond_work_cancel_all(struct bonding *bond)
 	write_unlock_bh(&bond->lock);
 
 	if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
-		cancel_delayed_work(&bond->mii_work);
+		cancel_delayed_work_sync(&bond->mii_work);
 
 	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
-		cancel_delayed_work(&bond->arp_work);
+		cancel_delayed_work_sync(&bond->arp_work);
 
 	if (bond->params.mode == BOND_MODE_ALB &&
 	    delayed_work_pending(&bond->alb_work))
-		cancel_delayed_work(&bond->alb_work);
+		cancel_delayed_work_sync(&bond->alb_work);
 
 	if (bond->params.mode == BOND_MODE_8023AD &&
 	    delayed_work_pending(&bond->ad_work))
-		cancel_delayed_work(&bond->ad_work);
+		cancel_delayed_work_sync(&bond->ad_work);
 }
 
 /*
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 4e00b4f..d951939 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -598,7 +598,7 @@  static ssize_t bonding_store_arp_interval(struct device *d,
 		       bond->dev->name, bond->dev->name);
 		bond->params.miimon = 0;
 		if (delayed_work_pending(&bond->mii_work)) {
-			cancel_delayed_work(&bond->mii_work);
+			cancel_delayed_work_sync(&bond->mii_work);
 			flush_workqueue(bond->wq);
 		}
 	}
@@ -1117,7 +1117,7 @@  static ssize_t bonding_store_miimon(struct device *d,
 					BOND_ARP_VALIDATE_NONE;
 			}
 			if (delayed_work_pending(&bond->arp_work)) {
-				cancel_delayed_work(&bond->arp_work);
+				cancel_delayed_work_sync(&bond->arp_work);
 				flush_workqueue(bond->wq);
 			}
 		}