diff mbox

reproducable panic eviction work queue

Message ID 55AF5E2E.5030203@cumulusnetworks.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Nikolay Aleksandrov July 22, 2015, 9:11 a.m. UTC
On 07/22/2015 10:17 AM, Frank Schreuder wrote:
> I got some additional information from syslog:
> 
> Jul 22 09:49:33 dommy0 kernel: [  675.987890] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:1:42]
> Jul 22 09:49:42 dommy0 kernel: [  685.114033] INFO: rcu_sched self-detected stall on CPU { 3}  (t=39918 jiffies g=988 c=987 q=23168)
> 
> Thanks,
> Frank
> 
> 

Hi,
It looks like it's happening because of the evict_again logic, I think we should also
add Florian's first suggestion about simplifying it to the patch and just skip the
entry if we can't delete its timer otherwise we can restart the eviction and see
entries that already had their timer stopped by us and can keep restarting for
a long time.
Here's an updated patch that removes the evict_again logic.




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Frank Schreuder July 22, 2015, 10:55 a.m. UTC | #1
Hi Nikolay,

Thanks for this patch. I'm no longer able to reproduce this panic on our 
test environment!
The server has been handling >120k fragmented UDP packets per second for 
over 40 minutes
So far everything is running stable without stacktraces in the logs. All 
other panics happened within 5-10 minutes.

I will let this test environment run for another day or 2. I will inform 
you as soon as something happens!

Thanks,
Frank



Op 7/22/2015 om 11:11 AM schreef Nikolay Aleksandrov:
> On 07/22/2015 10:17 AM, Frank Schreuder wrote:
>> I got some additional information from syslog:
>>
>> Jul 22 09:49:33 dommy0 kernel: [  675.987890] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:1:42]
>> Jul 22 09:49:42 dommy0 kernel: [  685.114033] INFO: rcu_sched self-detected stall on CPU { 3}  (t=39918 jiffies g=988 c=987 q=23168)
>>
>> Thanks,
>> Frank
>>
>>
> Hi,
> It looks like it's happening because of the evict_again logic, I think we should also
> add Florian's first suggestion about simplifying it to the patch and just skip the
> entry if we can't delete its timer otherwise we can restart the eviction and see
> entries that already had their timer stopped by us and can keep restarting for
> a long time.
> Here's an updated patch that removes the evict_again logic.
>
>
> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
> index e1300b3dd597..56a3a5685f76 100644
> --- a/include/net/inet_frag.h
> +++ b/include/net/inet_frag.h
> @@ -45,6 +45,7 @@ enum {
>    * @flags: fragment queue flags
>    * @max_size: maximum received fragment size
>    * @net: namespace that this frag belongs to
> + * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
>    */
>   struct inet_frag_queue {
>   	spinlock_t		lock;
> @@ -59,6 +60,7 @@ struct inet_frag_queue {
>   	__u8			flags;
>   	u16			max_size;
>   	struct netns_frags	*net;
> +	struct hlist_node	list_evictor;
>   };
>   
>   #define INETFRAGS_HASHSZ	1024
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index 5e346a082e5f..aaae37949c14 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
> @@ -138,27 +138,17 @@ evict_again:
>   		if (!inet_fragq_should_evict(fq))
>   			continue;
>   
> -		if (!del_timer(&fq->timer)) {
> -			/* q expiring right now thus increment its refcount so
> -			 * it won't be freed under us and wait until the timer
> -			 * has finished executing then destroy it
> -			 */
> -			atomic_inc(&fq->refcnt);
> -			spin_unlock(&hb->chain_lock);
> -			del_timer_sync(&fq->timer);
> -			inet_frag_put(fq, f);
> -			goto evict_again;
> -		}
> +		if (!del_timer(&fq->timer))
> +			continue;
>   
>   		fq->flags |= INET_FRAG_EVICTED;
> -		hlist_del(&fq->list);
> -		hlist_add_head(&fq->list, &expired);
> +		hlist_add_head(&fq->list_evictor, &expired);
>   		++evicted;
>   	}
>   
>   	spin_unlock(&hb->chain_lock);
>   
> -	hlist_for_each_entry_safe(fq, n, &expired, list)
> +	hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
>   		f->frag_expire((unsigned long) fq);
>   
>   	return evicted;
> @@ -284,8 +274,7 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
>   	struct inet_frag_bucket *hb;
>   
>   	hb = get_frag_bucket_locked(fq, f);
> -	if (!(fq->flags & INET_FRAG_EVICTED))
> -		hlist_del(&fq->list);
> +	hlist_del(&fq->list);
>   	spin_unlock(&hb->chain_lock);
>   }
>   
>
>
Florian Westphal July 22, 2015, 1:58 p.m. UTC | #2
Nikolay Aleksandrov <nikolay@cumulusnetworks.com> wrote:
> On 07/22/2015 10:17 AM, Frank Schreuder wrote:
> > I got some additional information from syslog:
> > 
> > Jul 22 09:49:33 dommy0 kernel: [  675.987890] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:1:42]
> > Jul 22 09:49:42 dommy0 kernel: [  685.114033] INFO: rcu_sched self-detected stall on CPU { 3}  (t=39918 jiffies g=988 c=987 q=23168)
> > 
> > Thanks,
> > Frank
> > 
> > 
> 
> Hi,
> It looks like it's happening because of the evict_again logic, I think we should also
> add Florian's first suggestion about simplifying it to the patch and just skip the
> entry if we can't delete its timer otherwise we can restart the eviction and see
> entries that already had their timer stopped by us and can keep restarting for
> a long time.
> Here's an updated patch that removes the evict_again logic.

Thanks Nik.  I'm afraid this adds bug when netns is exiting.

Currently, we wait until timer has finished, but after the change
we might destroy percpu counter while a timer is still executing on
another cpu.

I pushed a patch series to
https://git.breakpoint.cc/cgit/fw/net.git/log/?h=inetfrag_fixes_02

It includes this patch with a small change -- deferral of the percpu
counter subtraction until after queue has been free'd.

Frank -- it would be great if you could test with the four patches in
that series applied.

I'll then add your tested-by Tag to all of them before submitting this.

Thanks again for all your help in getting this fixed!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nikolay Aleksandrov July 22, 2015, 2:03 p.m. UTC | #3
On 07/22/2015 03:58 PM, Florian Westphal wrote:
> Nikolay Aleksandrov <nikolay@cumulusnetworks.com> wrote:
>> On 07/22/2015 10:17 AM, Frank Schreuder wrote:
>>> I got some additional information from syslog:
>>>
>>> Jul 22 09:49:33 dommy0 kernel: [  675.987890] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:1:42]
>>> Jul 22 09:49:42 dommy0 kernel: [  685.114033] INFO: rcu_sched self-detected stall on CPU { 3}  (t=39918 jiffies g=988 c=987 q=23168)
>>>
>>> Thanks,
>>> Frank
>>>
>>>
>>
>> Hi,
>> It looks like it's happening because of the evict_again logic, I think we should also
>> add Florian's first suggestion about simplifying it to the patch and just skip the
>> entry if we can't delete its timer otherwise we can restart the eviction and see
>> entries that already had their timer stopped by us and can keep restarting for
>> a long time.
>> Here's an updated patch that removes the evict_again logic.
> 
> Thanks Nik.  I'm afraid this adds bug when netns is exiting.
> 
> Currently, we wait until timer has finished, but after the change
> we might destroy percpu counter while a timer is still executing on
> another cpu.
> 
> I pushed a patch series to
> https://git.breakpoint.cc/cgit/fw/net.git/log/?h=inetfrag_fixes_02
> 
> It includes this patch with a small change -- deferral of the percpu
> counter subtraction until after queue has been free'd.
> 
> Frank -- it would be great if you could test with the four patches in
> that series applied.
> 
> I'll then add your tested-by Tag to all of them before submitting this.
> 
> Thanks again for all your help in getting this fixed!
> 

Sure, I didn't think it through, just supplied it for the test. :-)
Thanks for fixing it up!




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nikolay Aleksandrov July 22, 2015, 2:14 p.m. UTC | #4
On 07/22/2015 04:03 PM, Nikolay Aleksandrov wrote:
> On 07/22/2015 03:58 PM, Florian Westphal wrote:
>> Nikolay Aleksandrov <nikolay@cumulusnetworks.com> wrote:
>>> On 07/22/2015 10:17 AM, Frank Schreuder wrote:
>>>> I got some additional information from syslog:
>>>>
>>>> Jul 22 09:49:33 dommy0 kernel: [  675.987890] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:1:42]
>>>> Jul 22 09:49:42 dommy0 kernel: [  685.114033] INFO: rcu_sched self-detected stall on CPU { 3}  (t=39918 jiffies g=988 c=987 q=23168)
>>>>
>>>> Thanks,
>>>> Frank
>>>>
>>>>
>>>
>>> Hi,
>>> It looks like it's happening because of the evict_again logic, I think we should also
>>> add Florian's first suggestion about simplifying it to the patch and just skip the
>>> entry if we can't delete its timer otherwise we can restart the eviction and see
>>> entries that already had their timer stopped by us and can keep restarting for
>>> a long time.
>>> Here's an updated patch that removes the evict_again logic.
>>
>> Thanks Nik.  I'm afraid this adds bug when netns is exiting.
>>
>> Currently, we wait until timer has finished, but after the change
>> we might destroy percpu counter while a timer is still executing on
>> another cpu.
>>
>> I pushed a patch series to
>> https://git.breakpoint.cc/cgit/fw/net.git/log/?h=inetfrag_fixes_02
>>
>> It includes this patch with a small change -- deferral of the percpu
>> counter subtraction until after queue has been free'd.
>>
>> Frank -- it would be great if you could test with the four patches in
>> that series applied.
>>
>> I'll then add your tested-by Tag to all of them before submitting this.
>>
>> Thanks again for all your help in getting this fixed!
>>
> 
> Sure, I didn't think it through, just supplied it for the test. :-)
> Thanks for fixing it up!
> 

Patches look great, even the INET_FRAG_EVICTED flag will not be accidentally cleared 
this way. I'll give them a try.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Frank Schreuder July 22, 2015, 3:31 p.m. UTC | #5
Op 7/22/2015 om 4:14 PM schreef Nikolay Aleksandrov:
> On 07/22/2015 04:03 PM, Nikolay Aleksandrov wrote:
>> On 07/22/2015 03:58 PM, Florian Westphal wrote:
>>> Nikolay Aleksandrov <nikolay@cumulusnetworks.com> wrote:
>>>> On 07/22/2015 10:17 AM, Frank Schreuder wrote:
>>>>> I got some additional information from syslog:
>>>>>
>>>>> Jul 22 09:49:33 dommy0 kernel: [  675.987890] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:1:42]
>>>>> Jul 22 09:49:42 dommy0 kernel: [  685.114033] INFO: rcu_sched self-detected stall on CPU { 3}  (t=39918 jiffies g=988 c=987 q=23168)
>>>>>
>>>>> Thanks,
>>>>> Frank
>>>>>
>>>>>
>>>> Hi,
>>>> It looks like it's happening because of the evict_again logic, I think we should also
>>>> add Florian's first suggestion about simplifying it to the patch and just skip the
>>>> entry if we can't delete its timer otherwise we can restart the eviction and see
>>>> entries that already had their timer stopped by us and can keep restarting for
>>>> a long time.
>>>> Here's an updated patch that removes the evict_again logic.
>>> Thanks Nik.  I'm afraid this adds bug when netns is exiting.
>>>
>>> Currently, we wait until timer has finished, but after the change
>>> we might destroy percpu counter while a timer is still executing on
>>> another cpu.
>>>
>>> I pushed a patch series to
>>> https://git.breakpoint.cc/cgit/fw/net.git/log/?h=inetfrag_fixes_02
>>>
>>> It includes this patch with a small change -- deferral of the percpu
>>> counter subtraction until after queue has been free'd.
>>>
>>> Frank -- it would be great if you could test with the four patches in
>>> that series applied.
>>>
>>> I'll then add your tested-by Tag to all of them before submitting this.
>>>
>>> Thanks again for all your help in getting this fixed!
>>>
>> Sure, I didn't think it through, just supplied it for the test. :-)
>> Thanks for fixing it up!
>>
> Patches look great, even the INET_FRAG_EVICTED flag will not be accidentally cleared
> this way. I'll give them a try.
>
>

Hi,

I'm currently building a new kernel bases on 3.18.19 + patches.
One of the patches however fails to apply as we dont have a 
"net/ieee802154/6lowpan/" directory.
Modifying the patch to use "net/ieee802154/reassembly.c" does work 
without problems.
Is this a due to the different kernel version or something else?

I'll come back to you as soon as I have my first test results.

Thanks,
Frank

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index e1300b3dd597..56a3a5685f76 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -45,6 +45,7 @@  enum {
  * @flags: fragment queue flags
  * @max_size: maximum received fragment size
  * @net: namespace that this frag belongs to
+ * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
  */
 struct inet_frag_queue {
 	spinlock_t		lock;
@@ -59,6 +60,7 @@  struct inet_frag_queue {
 	__u8			flags;
 	u16			max_size;
 	struct netns_frags	*net;
+	struct hlist_node	list_evictor;
 };
 
 #define INETFRAGS_HASHSZ	1024
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5e346a082e5f..aaae37949c14 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -138,27 +138,17 @@  evict_again:
 		if (!inet_fragq_should_evict(fq))
 			continue;
 
-		if (!del_timer(&fq->timer)) {
-			/* q expiring right now thus increment its refcount so
-			 * it won't be freed under us and wait until the timer
-			 * has finished executing then destroy it
-			 */
-			atomic_inc(&fq->refcnt);
-			spin_unlock(&hb->chain_lock);
-			del_timer_sync(&fq->timer);
-			inet_frag_put(fq, f);
-			goto evict_again;
-		}
+		if (!del_timer(&fq->timer))
+			continue;
 
 		fq->flags |= INET_FRAG_EVICTED;
-		hlist_del(&fq->list);
-		hlist_add_head(&fq->list, &expired);
+		hlist_add_head(&fq->list_evictor, &expired);
 		++evicted;
 	}
 
 	spin_unlock(&hb->chain_lock);
 
-	hlist_for_each_entry_safe(fq, n, &expired, list)
+	hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
 		f->frag_expire((unsigned long) fq);
 
 	return evicted;
@@ -284,8 +274,7 @@  static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 	struct inet_frag_bucket *hb;
 
 	hb = get_frag_bucket_locked(fq, f);
-	if (!(fq->flags & INET_FRAG_EVICTED))
-		hlist_del(&fq->list);
+	hlist_del(&fq->list);
 	spin_unlock(&hb->chain_lock);
 }