diff mbox

[2/2] usbnet: Fix a race between usbnet_stop() and the BH

Message ID 1440447223-15945-3-git-send-email-eugene.shatokhin@rosalab.ru
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eugene Shatokhin Aug. 24, 2015, 8:13 p.m. UTC
The race may happen when a device (e.g. YOTA 4G LTE Modem) is
unplugged while the system is downloading a large file from the Net.

Hardware breakpoints and Kprobes with delays were used to confirm that
the race does actually happen.

The race is on skb_queue ('next' pointer) between usbnet_stop()
and rx_complete(), which, in turn, calls usbnet_bh().

Here is a part of the call stack with the code where the changes to the
queue happen. The line numbers are for the kernel 4.1.0:

*0 __skb_unlink (skbuff.h:1517)
    prev->next = next;
*1 defer_bh (usbnet.c:430)
    spin_lock_irqsave(&list->lock, flags);
    old_state = entry->state;
    entry->state = state;
    __skb_unlink(skb, list);
    spin_unlock(&list->lock);
    spin_lock(&dev->done.lock);
    __skb_queue_tail(&dev->done, skb);
    if (dev->done.qlen == 1)
        tasklet_schedule(&dev->bh);
    spin_unlock_irqrestore(&dev->done.lock, flags);
*2 rx_complete (usbnet.c:640)
    state = defer_bh(dev, skb, &dev->rxq, state);

At the same time, the following code repeatedly checks if the queue is
empty and reads these values concurrently with the above changes:

*0  usbnet_terminate_urbs (usbnet.c:765)
    /* maybe wait for deletions to finish. */
    while (!skb_queue_empty(&dev->rxq)
        && !skb_queue_empty(&dev->txq)
        && !skb_queue_empty(&dev->done)) {
            schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
            set_current_state(TASK_UNINTERRUPTIBLE);
            netif_dbg(dev, ifdown, dev->net,
                  "waited for %d urb completions\n", temp);
    }
*1  usbnet_stop (usbnet.c:806)
    if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
        usbnet_terminate_urbs(dev);

As a result, it is possible, for example, that the skb is removed from
dev->rxq by __skb_unlink() before the check
"!skb_queue_empty(&dev->rxq)" in usbnet_terminate_urbs() is made. It is
also possible in this case that the skb is added to dev->done queue
after "!skb_queue_empty(&dev->done)" is checked. So
usbnet_terminate_urbs() may stop waiting and return while dev->done
queue still has an item.

Locking in defer_bh() and usbnet_terminate_urbs() was revisited to avoid
this race.

Signed-off-by: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
---
 drivers/net/usb/usbnet.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

Comments

Bjørn Mork Aug. 24, 2015, 9:01 p.m. UTC | #1
Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:

> The race may happen when a device (e.g. YOTA 4G LTE Modem) is
> unplugged while the system is downloading a large file from the Net.
>
> Hardware breakpoints and Kprobes with delays were used to confirm that
> the race does actually happen.
>
> The race is on skb_queue ('next' pointer) between usbnet_stop()
> and rx_complete(), which, in turn, calls usbnet_bh().
>
> Here is a part of the call stack with the code where the changes to the
> queue happen. The line numbers are for the kernel 4.1.0:
>
> *0 __skb_unlink (skbuff.h:1517)
>     prev->next = next;
> *1 defer_bh (usbnet.c:430)
>     spin_lock_irqsave(&list->lock, flags);
>     old_state = entry->state;
>     entry->state = state;
>     __skb_unlink(skb, list);
>     spin_unlock(&list->lock);
>     spin_lock(&dev->done.lock);
>     __skb_queue_tail(&dev->done, skb);
>     if (dev->done.qlen == 1)
>         tasklet_schedule(&dev->bh);
>     spin_unlock_irqrestore(&dev->done.lock, flags);
> *2 rx_complete (usbnet.c:640)
>     state = defer_bh(dev, skb, &dev->rxq, state);
>
> At the same time, the following code repeatedly checks if the queue is
> empty and reads these values concurrently with the above changes:
>
> *0  usbnet_terminate_urbs (usbnet.c:765)
>     /* maybe wait for deletions to finish. */
>     while (!skb_queue_empty(&dev->rxq)
>         && !skb_queue_empty(&dev->txq)
>         && !skb_queue_empty(&dev->done)) {
>             schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
>             set_current_state(TASK_UNINTERRUPTIBLE);
>             netif_dbg(dev, ifdown, dev->net,
>                   "waited for %d urb completions\n", temp);
>     }
> *1  usbnet_stop (usbnet.c:806)
>     if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
>         usbnet_terminate_urbs(dev);
>
> As a result, it is possible, for example, that the skb is removed from
> dev->rxq by __skb_unlink() before the check
> "!skb_queue_empty(&dev->rxq)" in usbnet_terminate_urbs() is made. It is
> also possible in this case that the skb is added to dev->done queue
> after "!skb_queue_empty(&dev->done)" is checked. So
> usbnet_terminate_urbs() may stop waiting and return while dev->done
> queue still has an item.

Exactly what problem will that result in?  The tasklet_kill() will wait
for the processing of the single element done queue, and everything will
be fine.  Or?


Bjørn

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Aug. 26, 2015, 2:45 a.m. UTC | #2
From: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
Date: Mon, 24 Aug 2015 23:13:43 +0300

> The race may happen when a device (e.g. YOTA 4G LTE Modem) is
> unplugged while the system is downloading a large file from the Net.
> 
> Hardware breakpoints and Kprobes with delays were used to confirm that
> the race does actually happen.
> 
> The race is on skb_queue ('next' pointer) between usbnet_stop()
> and rx_complete(), which, in turn, calls usbnet_bh().
> 
> Here is a part of the call stack with the code where the changes to the
> queue happen. The line numbers are for the kernel 4.1.0:
 ...

It looks like this patch needs more discussion/work.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eugene Shatokhin Aug. 28, 2015, 8:09 a.m. UTC | #3
25.08.2015 00:01, Bjørn Mork пишет:
> Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:
>
>> The race may happen when a device (e.g. YOTA 4G LTE Modem) is
>> unplugged while the system is downloading a large file from the Net.
>>
>> Hardware breakpoints and Kprobes with delays were used to confirm that
>> the race does actually happen.
>>
>> The race is on skb_queue ('next' pointer) between usbnet_stop()
>> and rx_complete(), which, in turn, calls usbnet_bh().
>>
>> Here is a part of the call stack with the code where the changes to the
>> queue happen. The line numbers are for the kernel 4.1.0:
>>
>> *0 __skb_unlink (skbuff.h:1517)
>>      prev->next = next;
>> *1 defer_bh (usbnet.c:430)
>>      spin_lock_irqsave(&list->lock, flags);
>>      old_state = entry->state;
>>      entry->state = state;
>>      __skb_unlink(skb, list);
>>      spin_unlock(&list->lock);
>>      spin_lock(&dev->done.lock);
>>      __skb_queue_tail(&dev->done, skb);
>>      if (dev->done.qlen == 1)
>>          tasklet_schedule(&dev->bh);
>>      spin_unlock_irqrestore(&dev->done.lock, flags);
>> *2 rx_complete (usbnet.c:640)
>>      state = defer_bh(dev, skb, &dev->rxq, state);
>>
>> At the same time, the following code repeatedly checks if the queue is
>> empty and reads these values concurrently with the above changes:
>>
>> *0  usbnet_terminate_urbs (usbnet.c:765)
>>      /* maybe wait for deletions to finish. */
>>      while (!skb_queue_empty(&dev->rxq)
>>          && !skb_queue_empty(&dev->txq)
>>          && !skb_queue_empty(&dev->done)) {
>>              schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
>>              set_current_state(TASK_UNINTERRUPTIBLE);
>>              netif_dbg(dev, ifdown, dev->net,
>>                    "waited for %d urb completions\n", temp);
>>      }
>> *1  usbnet_stop (usbnet.c:806)
>>      if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
>>          usbnet_terminate_urbs(dev);
>>
>> As a result, it is possible, for example, that the skb is removed from
>> dev->rxq by __skb_unlink() before the check
>> "!skb_queue_empty(&dev->rxq)" in usbnet_terminate_urbs() is made. It is
>> also possible in this case that the skb is added to dev->done queue
>> after "!skb_queue_empty(&dev->done)" is checked. So
>> usbnet_terminate_urbs() may stop waiting and return while dev->done
>> queue still has an item.
>
> Exactly what problem will that result in?  The tasklet_kill() will wait
> for the processing of the single element done queue, and everything will
> be fine.  Or?

Given enough time, what prevents defer_bh() from calling 
tasklet_schedule(&dev->bh) *after* usbnet_stop() calls tasklet_kill()?

Consider the following situation (assuming '&&' are changed to '||' in 
that while loop in usbnet_terminate_urbs() as they should be):

CPU0                            CPU1
usbnet_stop()                   defer_bh() with list == dev->rxq
   usbnet_terminate_urbs()
                                 __skb_unlink() removes the last
                                 skb from dev->rxq.
                                 dev->rxq, dev->txq and dev->done
                                 are now empty.
   while (!skb_queue_empty()...)
     The loop ends because all 3
     queues are now empty.

   usbnet_terminate_urbs() ends.

usbnet_stop() continues:
   usbnet_status_stop(dev);
   ...
   del_timer_sync (&dev->delay);
   tasklet_kill (&dev->bh);
                                 __skb_queue_tail(&dev->done, skb);
                                 if (dev->done.qlen == 1)
                                   tasklet_schedule(&dev->bh);

The BH is scheduled at this point, which is not what was intended. The 
race window is small, but still.

Regards,
Eugene

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjørn Mork Aug. 28, 2015, 8:55 a.m. UTC | #4
Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:

> 25.08.2015 00:01, Bjørn Mork пишет:
>> Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:
>>
>>> The race may happen when a device (e.g. YOTA 4G LTE Modem) is
>>> unplugged while the system is downloading a large file from the Net.
>>>
>>> Hardware breakpoints and Kprobes with delays were used to confirm that
>>> the race does actually happen.
>>>
>>> The race is on skb_queue ('next' pointer) between usbnet_stop()
>>> and rx_complete(), which, in turn, calls usbnet_bh().
>>>
>>> Here is a part of the call stack with the code where the changes to the
>>> queue happen. The line numbers are for the kernel 4.1.0:
>>>
>>> *0 __skb_unlink (skbuff.h:1517)
>>>      prev->next = next;
>>> *1 defer_bh (usbnet.c:430)
>>>      spin_lock_irqsave(&list->lock, flags);
>>>      old_state = entry->state;
>>>      entry->state = state;
>>>      __skb_unlink(skb, list);
>>>      spin_unlock(&list->lock);
>>>      spin_lock(&dev->done.lock);
>>>      __skb_queue_tail(&dev->done, skb);
>>>      if (dev->done.qlen == 1)
>>>          tasklet_schedule(&dev->bh);
>>>      spin_unlock_irqrestore(&dev->done.lock, flags);
>>> *2 rx_complete (usbnet.c:640)
>>>      state = defer_bh(dev, skb, &dev->rxq, state);
>>>
>>> At the same time, the following code repeatedly checks if the queue is
>>> empty and reads these values concurrently with the above changes:
>>>
>>> *0  usbnet_terminate_urbs (usbnet.c:765)
>>>      /* maybe wait for deletions to finish. */
>>>      while (!skb_queue_empty(&dev->rxq)
>>>          && !skb_queue_empty(&dev->txq)
>>>          && !skb_queue_empty(&dev->done)) {
>>>              schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
>>>              set_current_state(TASK_UNINTERRUPTIBLE);
>>>              netif_dbg(dev, ifdown, dev->net,
>>>                    "waited for %d urb completions\n", temp);
>>>      }
>>> *1  usbnet_stop (usbnet.c:806)
>>>      if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
>>>          usbnet_terminate_urbs(dev);
>>>
>>> As a result, it is possible, for example, that the skb is removed from
>>> dev->rxq by __skb_unlink() before the check
>>> "!skb_queue_empty(&dev->rxq)" in usbnet_terminate_urbs() is made. It is
>>> also possible in this case that the skb is added to dev->done queue
>>> after "!skb_queue_empty(&dev->done)" is checked. So
>>> usbnet_terminate_urbs() may stop waiting and return while dev->done
>>> queue still has an item.
>>
>> Exactly what problem will that result in?  The tasklet_kill() will wait
>> for the processing of the single element done queue, and everything will
>> be fine.  Or?
>
> Given enough time, what prevents defer_bh() from calling
> tasklet_schedule(&dev->bh) *after* usbnet_stop() calls tasklet_kill()?
>
> Consider the following situation (assuming '&&' are changed to '||' in
> that while loop in usbnet_terminate_urbs() as they should be):
>
> CPU0                            CPU1
> usbnet_stop()                   defer_bh() with list == dev->rxq
>   usbnet_terminate_urbs()
>                                 __skb_unlink() removes the last
>                                 skb from dev->rxq.
>                                 dev->rxq, dev->txq and dev->done
>                                 are now empty.
>   while (!skb_queue_empty()...)
>     The loop ends because all 3
>     queues are now empty.
>
>   usbnet_terminate_urbs() ends.
>
> usbnet_stop() continues:
>   usbnet_status_stop(dev);
>   ...
>   del_timer_sync (&dev->delay);
>   tasklet_kill (&dev->bh);
>                                 __skb_queue_tail(&dev->done, skb);
>                                 if (dev->done.qlen == 1)
>                                   tasklet_schedule(&dev->bh);
>
> The BH is scheduled at this point, which is not what was intended. The
> race window is small, but still.

I guess you are right.  At least I cannot prove that you are not :)

There is a bit too much complexity involved here for me...



Bjørn
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eugene Shatokhin Aug. 28, 2015, 10:42 a.m. UTC | #5
28.08.2015 11:55, Bjørn Mork пишет:
> Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:
>
>> 25.08.2015 00:01, Bjørn Mork пишет:
>>> Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:
>>>
>>>> The race may happen when a device (e.g. YOTA 4G LTE Modem) is
>>>> unplugged while the system is downloading a large file from the Net.
>>>>
>>>> Hardware breakpoints and Kprobes with delays were used to confirm that
>>>> the race does actually happen.
>>>>
>>>> The race is on skb_queue ('next' pointer) between usbnet_stop()
>>>> and rx_complete(), which, in turn, calls usbnet_bh().
>>>>
>>>> Here is a part of the call stack with the code where the changes to the
>>>> queue happen. The line numbers are for the kernel 4.1.0:
>>>>
>>>> *0 __skb_unlink (skbuff.h:1517)
>>>>       prev->next = next;
>>>> *1 defer_bh (usbnet.c:430)
>>>>       spin_lock_irqsave(&list->lock, flags);
>>>>       old_state = entry->state;
>>>>       entry->state = state;
>>>>       __skb_unlink(skb, list);
>>>>       spin_unlock(&list->lock);
>>>>       spin_lock(&dev->done.lock);
>>>>       __skb_queue_tail(&dev->done, skb);
>>>>       if (dev->done.qlen == 1)
>>>>           tasklet_schedule(&dev->bh);
>>>>       spin_unlock_irqrestore(&dev->done.lock, flags);
>>>> *2 rx_complete (usbnet.c:640)
>>>>       state = defer_bh(dev, skb, &dev->rxq, state);
>>>>
>>>> At the same time, the following code repeatedly checks if the queue is
>>>> empty and reads these values concurrently with the above changes:
>>>>
>>>> *0  usbnet_terminate_urbs (usbnet.c:765)
>>>>       /* maybe wait for deletions to finish. */
>>>>       while (!skb_queue_empty(&dev->rxq)
>>>>           && !skb_queue_empty(&dev->txq)
>>>>           && !skb_queue_empty(&dev->done)) {
>>>>               schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
>>>>               set_current_state(TASK_UNINTERRUPTIBLE);
>>>>               netif_dbg(dev, ifdown, dev->net,
>>>>                     "waited for %d urb completions\n", temp);
>>>>       }
>>>> *1  usbnet_stop (usbnet.c:806)
>>>>       if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
>>>>           usbnet_terminate_urbs(dev);
>>>>
>>>> As a result, it is possible, for example, that the skb is removed from
>>>> dev->rxq by __skb_unlink() before the check
>>>> "!skb_queue_empty(&dev->rxq)" in usbnet_terminate_urbs() is made. It is
>>>> also possible in this case that the skb is added to dev->done queue
>>>> after "!skb_queue_empty(&dev->done)" is checked. So
>>>> usbnet_terminate_urbs() may stop waiting and return while dev->done
>>>> queue still has an item.
>>>
>>> Exactly what problem will that result in?  The tasklet_kill() will wait
>>> for the processing of the single element done queue, and everything will
>>> be fine.  Or?
>>
>> Given enough time, what prevents defer_bh() from calling
>> tasklet_schedule(&dev->bh) *after* usbnet_stop() calls tasklet_kill()?
>>
>> Consider the following situation (assuming '&&' are changed to '||' in
>> that while loop in usbnet_terminate_urbs() as they should be):
>>
>> CPU0                            CPU1
>> usbnet_stop()                   defer_bh() with list == dev->rxq
>>    usbnet_terminate_urbs()
>>                                  __skb_unlink() removes the last
>>                                  skb from dev->rxq.
>>                                  dev->rxq, dev->txq and dev->done
>>                                  are now empty.
>>    while (!skb_queue_empty()...)
>>      The loop ends because all 3
>>      queues are now empty.
>>
>>    usbnet_terminate_urbs() ends.
>>
>> usbnet_stop() continues:
>>    usbnet_status_stop(dev);
>>    ...
>>    del_timer_sync (&dev->delay);
>>    tasklet_kill (&dev->bh);
>>                                  __skb_queue_tail(&dev->done, skb);
>>                                  if (dev->done.qlen == 1)
>>                                    tasklet_schedule(&dev->bh);
>>
>> The BH is scheduled at this point, which is not what was intended. The
>> race window is small, but still.
>
> I guess you are right.  At least I cannot prove that you are not :)
>
> There is a bit too much complexity involved here for me...

:-)

Yes, it is quite complex.

I admit, it was easier for me to find the races in usbnet (the tools 
like KernelStrider and RaceHound do the dirty work) than to analyze 
their consequences. The latter often requires some time and effort, and 
so it did this time.

Well, any objections to this patch?

Regards,

Eugene

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjørn Mork Aug. 31, 2015, 7:32 a.m. UTC | #6
Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:
> 28.08.2015 11:55, Bjørn Mork пишет:
>
>> I guess you are right.  At least I cannot prove that you are not :)
>>
>> There is a bit too much complexity involved here for me...
>
> :-)
>
> Yes, it is quite complex.
>
> I admit, it was easier for me to find the races in usbnet (the tools
> like KernelStrider and RaceHound do the dirty work) than to analyze
> their consequences. The latter often requires some time and effort,
> and so it did this time.
>
> Well, any objections to this patch?

No objections from me.

But I would have liked it much better if the code became simpler instead
of more complex.


Bjørn
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eugene Shatokhin Aug. 31, 2015, 8:50 a.m. UTC | #7
31.08.2015 10:32, Bjørn Mork пишет:
> Eugene Shatokhin <eugene.shatokhin@rosalab.ru> writes:
>> 28.08.2015 11:55, Bjørn Mork пишет:
>>
>>> I guess you are right.  At least I cannot prove that you are not :)
>>>
>>> There is a bit too much complexity involved here for me...
>>
>> :-)
>>
>> Yes, it is quite complex.
>>
>> I admit, it was easier for me to find the races in usbnet (the tools
>> like KernelStrider and RaceHound do the dirty work) than to analyze
>> their consequences. The latter often requires some time and effort,
>> and so it did this time.
>>
>> Well, any objections to this patch?
>
> No objections from me.
>
> But I would have liked it much better if the code became simpler instead
> of more complex.

Me too, but I can see no other way here. The code is simpler without 
locking, indeed, but locking is needed to prevent the problems described 
earlier.

One needs to make sure that checking if txq or rxq is empty in 
usbnet_terminate_urbs() cannot get inbetween of processing of these 
queues and dev->done in defer_bh(). So 'list' and 'dev->done' must be 
updated under a common lock in defer_bh(). list->lock is an obvious 
candidate for this.

For the same reason, skb_queue_empty(q) must be called under q->lock. So 
the code takes it, calls skb_queue_empty(q) and then releases it to wait 
a little. Rinse and repeat.

The last complex piece is that spin_lock_nested() in defer_bh. It is 
safe to take both list->lock and dev->done.lock there (defer_bh can only 
be called for list = dev->rxq or dev->txq but not for dev->done). For 
lockdep, however, this is suspicious because '*list' and 'dev->done' are 
of the same type so the lock class is the same. So it complained.

To tell lockdep it is OK to use such locking scheme in this particular 
case, the recommended pattern was used: spin_lock_nested with 
SINGLE_DEPTH_NESTING.

Regards,
Eugene

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Oliver Neukum Sept. 1, 2015, 7:57 a.m. UTC | #8
On Fri, 2015-08-28 at 11:09 +0300, Eugene Shatokhin wrote:
> > Exactly what problem will that result in?  The tasklet_kill() will
> wait
> > for the processing of the single element done queue, and everything
> will
> > be fine.  Or?
> 
> Given enough time, what prevents defer_bh() from calling 
> tasklet_schedule(&dev->bh) *after* usbnet_stop() calls tasklet_kill()?
> 
> Consider the following situation (assuming '&&' are changed to '||'
> in 
> that while loop in usbnet_terminate_urbs() as they should be):
> 
> CPU0                            CPU1
> usbnet_stop()                   defer_bh() with list == dev->rxq
>    usbnet_terminate_urbs()
>                                  __skb_unlink() removes the last
>                                  skb from dev->rxq.
>                                  dev->rxq, dev->txq and dev->done
>                                  are now empty.
>    while (!skb_queue_empty()...)
>      The loop ends because all 3
>      queues are now empty.
> 
>    usbnet_terminate_urbs() ends.
> 
> usbnet_stop() continues:
>    usbnet_status_stop(dev);
>    ...
>    del_timer_sync (&dev->delay);
>    tasklet_kill (&dev->bh);
>                                  __skb_queue_tail(&dev->done, skb);
>                                  if (dev->done.qlen == 1)
>                                    tasklet_schedule(&dev->bh);
> 
> The BH is scheduled at this point, which is not what was intended.
> The 
> race window is small, but still.

This looks possible. I cannot come up with a better fix, although
it isn't nice. Thanks for finding this.

	Regards
		Oliver


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Oliver Neukum Sept. 1, 2015, 7:58 a.m. UTC | #9
On Mon, 2015-08-31 at 11:50 +0300, Eugene Shatokhin wrote:
> > But I would have liked it much better if the code became simpler
> instead
> > of more complex.
> 
> Me too, but I can see no other way here. The code is simpler without 
> locking, indeed, but locking is needed to prevent the problems
> described 
> earlier.

On a practical note, will you resend the patch?

	Regards
		Oliver



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eugene Shatokhin Sept. 1, 2015, 1:54 p.m. UTC | #10
01.09.2015 10:58, Oliver Neukum пишет:
> On Mon, 2015-08-31 at 11:50 +0300, Eugene Shatokhin wrote:
>>> But I would have liked it much better if the code became simpler
>> instead
>>> of more complex.
>>
>> Me too, but I can see no other way here. The code is simpler without
>> locking, indeed, but locking is needed to prevent the problems
>> described
>> earlier.
>
> On a practical note, will you resend the patch?

Yes, I will do it shortly. Thanks for your help!

Regards,

Eugene



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index e049857..b4cf107 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -428,12 +428,18 @@  static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
 	old_state = entry->state;
 	entry->state = state;
 	__skb_unlink(skb, list);
-	spin_unlock(&list->lock);
-	spin_lock(&dev->done.lock);
+
+	/* defer_bh() is never called with list == &dev->done.
+	 * spin_lock_nested() tells lockdep that it is OK to take
+	 * dev->done.lock here with list->lock held.
+	 */
+	spin_lock_nested(&dev->done.lock, SINGLE_DEPTH_NESTING);
+
 	__skb_queue_tail(&dev->done, skb);
 	if (dev->done.qlen == 1)
 		tasklet_schedule(&dev->bh);
-	spin_unlock_irqrestore(&dev->done.lock, flags);
+	spin_unlock(&dev->done.lock);
+	spin_unlock_irqrestore(&list->lock, flags);
 	return old_state;
 }
 
@@ -749,6 +755,20 @@  EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs);
 
 /*-------------------------------------------------------------------------*/
 
+static void wait_skb_queue_empty(struct sk_buff_head *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	while (!skb_queue_empty(q)) {
+		spin_unlock_irqrestore(&q->lock, flags);
+		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_lock_irqsave(&q->lock, flags);
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+
 // precondition: never called in_interrupt
 static void usbnet_terminate_urbs(struct usbnet *dev)
 {
@@ -762,14 +782,11 @@  static void usbnet_terminate_urbs(struct usbnet *dev)
 		unlink_urbs(dev, &dev->rxq);
 
 	/* maybe wait for deletions to finish. */
-	while (!skb_queue_empty(&dev->rxq)
-		&& !skb_queue_empty(&dev->txq)
-		&& !skb_queue_empty(&dev->done)) {
-			schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			netif_dbg(dev, ifdown, dev->net,
-				  "waited for %d urb completions\n", temp);
-	}
+	wait_skb_queue_empty(&dev->rxq);
+	wait_skb_queue_empty(&dev->txq);
+	wait_skb_queue_empty(&dev->done);
+	netif_dbg(dev, ifdown, dev->net,
+		  "waited for %d urb completions\n", temp);
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&dev->wait, &wait);
 }