diff mbox

[RT,1/2] net: add back the missing serialization in ip_send_unicast_reply()

Message ID 20160831160049.14303-1-bigeasy@linutronix.de
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Sebastian Andrzej Siewior Aug. 31, 2016, 4 p.m. UTC
Some time ago Sami Pietikäinen reported a crash on -RT in
ip_send_unicast_reply() which was later fixed by Nicholas Mc Guire
(v3.12.8-rt11). Later (v3.18.8) the code was reworked and I dropped the
patch. As it turns out it was mistake.
I have reports that the same crash is possible with a similar backtrace.
It seems that vanilla protects access to this_cpu_ptr() via
local_bh_disable(). This does not work on -RT since we can have
NET_RX and NET_TX running in parallel on the same CPU.
This is brings back the old locks.

|Unable to handle kernel NULL pointer dereference at virtual address 00000010
|PC is at __ip_make_skb+0x198/0x3e8
|[<c04e39d8>] (__ip_make_skb) from [<c04e3ca8>] (ip_push_pending_frames+0x20/0x40)
|[<c04e3ca8>] (ip_push_pending_frames) from [<c04e3ff0>] (ip_send_unicast_reply+0x210/0x22c)
|[<c04e3ff0>] (ip_send_unicast_reply) from [<c04fbb54>] (tcp_v4_send_reset+0x190/0x1c0)
|[<c04fbb54>] (tcp_v4_send_reset) from [<c04fcc1c>] (tcp_v4_do_rcv+0x22c/0x288)
|[<c04fcc1c>] (tcp_v4_do_rcv) from [<c0474364>] (release_sock+0xb4/0x150)
|[<c0474364>] (release_sock) from [<c04ed904>] (tcp_close+0x240/0x454)
|[<c04ed904>] (tcp_close) from [<c0511408>] (inet_release+0x74/0x7c)
|[<c0511408>] (inet_release) from [<c0470728>] (sock_release+0x30/0xb0)
|[<c0470728>] (sock_release) from [<c0470abc>] (sock_close+0x1c/0x24)
|[<c0470abc>] (sock_close) from [<c0115ec4>] (__fput+0xe8/0x20c)
|[<c0115ec4>] (__fput) from [<c0116050>] (____fput+0x18/0x1c)
|[<c0116050>] (____fput) from [<c0058138>] (task_work_run+0xa4/0xb8)
|[<c0058138>] (task_work_run) from [<c0011478>] (do_work_pending+0xd0/0xe4)
|[<c0011478>] (do_work_pending) from [<c000e740>] (work_pending+0xc/0x20)
|Code: e3530001 8a000001 e3a00040 ea000011 (e5973010)

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 net/ipv4/tcp_ipv4.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

Steven Rostedt Aug. 31, 2016, 4:15 p.m. UTC | #1
On Wed, 31 Aug 2016 18:00:48 +0200
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> Some time ago Sami Pietikäinen reported a crash on -RT in
> ip_send_unicast_reply() which was later fixed by Nicholas Mc Guire
> (v3.12.8-rt11). Later (v3.18.8) the code was reworked and I dropped the
> patch. As it turns out it was mistake.
> I have reports that the same crash is possible with a similar backtrace.
> It seems that vanilla protects access to this_cpu_ptr() via
> local_bh_disable(). This does not work on -RT since we can have
> NET_RX and NET_TX running in parallel on the same CPU.
> This is brings back the old locks.
> 
> |Unable to handle kernel NULL pointer dereference at virtual address 00000010
> |PC is at __ip_make_skb+0x198/0x3e8
> |[<c04e39d8>] (__ip_make_skb) from [<c04e3ca8>] (ip_push_pending_frames+0x20/0x40)
> |[<c04e3ca8>] (ip_push_pending_frames) from [<c04e3ff0>] (ip_send_unicast_reply+0x210/0x22c)
> |[<c04e3ff0>] (ip_send_unicast_reply) from [<c04fbb54>] (tcp_v4_send_reset+0x190/0x1c0)
> |[<c04fbb54>] (tcp_v4_send_reset) from [<c04fcc1c>] (tcp_v4_do_rcv+0x22c/0x288)
> |[<c04fcc1c>] (tcp_v4_do_rcv) from [<c0474364>] (release_sock+0xb4/0x150)
> |[<c0474364>] (release_sock) from [<c04ed904>] (tcp_close+0x240/0x454)
> |[<c04ed904>] (tcp_close) from [<c0511408>] (inet_release+0x74/0x7c)
> |[<c0511408>] (inet_release) from [<c0470728>] (sock_release+0x30/0xb0)
> |[<c0470728>] (sock_release) from [<c0470abc>] (sock_close+0x1c/0x24)
> |[<c0470abc>] (sock_close) from [<c0115ec4>] (__fput+0xe8/0x20c)
> |[<c0115ec4>] (__fput) from [<c0116050>] (____fput+0x18/0x1c)
> |[<c0116050>] (____fput) from [<c0058138>] (task_work_run+0xa4/0xb8)
> |[<c0058138>] (task_work_run) from [<c0011478>] (do_work_pending+0xd0/0xe4)
> |[<c0011478>] (do_work_pending) from [<c000e740>] (work_pending+0xc/0x20)
> |Code: e3530001 8a000001 e3a00040 ea000011 (e5973010)
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  net/ipv4/tcp_ipv4.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index ad450509029b..c5521d1f1263 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -62,6 +62,7 @@
>  #include <linux/init.h>
>  #include <linux/times.h>
>  #include <linux/slab.h>
> +#include <linux/locallock.h>
>  
>  #include <net/net_namespace.h>
>  #include <net/icmp.h>
> @@ -565,6 +566,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
>  }
>  EXPORT_SYMBOL(tcp_v4_send_check);
>  
> +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
>  /*
>   *	This routine will send an RST to the other tcp.
>   *
> @@ -689,10 +691,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>  		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
>  
>  	arg.tos = ip_hdr(skb)->tos;
> +
> +	local_lock(tcp_sk_lock);

Interesting that I noticed in mainline, they have:

	local_bh_disable();

here.

I'm surprised we don't have a local_lock_bh() or something to that
effect.

-- Steve

>  	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
>  			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
>  			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
>  			      &arg, arg.iov[0].iov_len);
> +	local_unlock(tcp_sk_lock);
>  
>  	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
>  	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
> @@ -774,10 +779,12 @@ static void tcp_v4_send_ack(struct net *net,
>  	if (oif)
>  		arg.bound_dev_if = oif;
>  	arg.tos = tos;
> +	local_lock(tcp_sk_lock);
>  	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
>  			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
>  			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
>  			      &arg, arg.iov[0].iov_len);
> +	local_unlock(tcp_sk_lock);
>  
>  	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
>  }
Sebastian Andrzej Siewior Aug. 31, 2016, 4:36 p.m. UTC | #2
On 2016-08-31 12:15:53 [-0400], Steven Rostedt wrote:
> > @@ -689,10 +691,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
> >  		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
> >  
> >  	arg.tos = ip_hdr(skb)->tos;
> > +
> > +	local_lock(tcp_sk_lock);
> 
> Interesting that I noticed in mainline, they have:
> 
> 	local_bh_disable();
> 
> here.
> 
> I'm surprised we don't have a local_lock_bh() or something to that
> effect.

Turning local_bh_disable() into local_lock_bh(). One side effect would
be that the network driver will be flushed out / waited for completion
during socket write (due to the spin_lock_bh()). Not sure how much fun
all this will bring.
We could try this…

> -- Steve

Sebastian
diff mbox

Patch

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad450509029b..c5521d1f1263 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -62,6 +62,7 @@ 
 #include <linux/init.h>
 #include <linux/times.h>
 #include <linux/slab.h>
+#include <linux/locallock.h>
 
 #include <net/net_namespace.h>
 #include <net/icmp.h>
@@ -565,6 +566,7 @@  void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_v4_send_check);
 
+static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
 /*
  *	This routine will send an RST to the other tcp.
  *
@@ -689,10 +691,13 @@  static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 
 	arg.tos = ip_hdr(skb)->tos;
+
+	local_lock(tcp_sk_lock);
 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 			      &arg, arg.iov[0].iov_len);
+	local_unlock(tcp_sk_lock);
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -774,10 +779,12 @@  static void tcp_v4_send_ack(struct net *net,
 	if (oif)
 		arg.bound_dev_if = oif;
 	arg.tos = tos;
+	local_lock(tcp_sk_lock);
 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 			      &arg, arg.iov[0].iov_len);
+	local_unlock(tcp_sk_lock);
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 }