diff mbox

[RFC,v2,net-next] rds-tcp: Take explicit refcounts on struct net

Message ID 1488299601-162004-1-git-send-email-sowmini.varadhan@oracle.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Sowmini Varadhan Feb. 28, 2017, 4:33 p.m. UTC
This is a test patch being supplied for a trial run on syzkaller.

It's incorrect for the rds_connection to piggyback on the
sock_net() refcount for the netns because this gives rise to
a chicken-and-egg problem during rds_conn_destroy. Instead explicitly
take a ref on the net, and hold the netns down till the connection
tear-down is complete.

Dmitry: I'm not sure whether the other 2 panics around rds_sock
use-after-free are also around netns teardown and/or directly related 
to this- they may be unrelated bugs, could we please give this one
a trial run, while watching for the others? 

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
 net/rds/connection.c |    1 +
 net/rds/rds.h        |    6 +++---
 net/rds/tcp.c        |    4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

Comments

Dmitry Vyukov March 2, 2017, 10:07 a.m. UTC | #1
On Tue, Feb 28, 2017 at 5:33 PM, Sowmini Varadhan
<sowmini.varadhan@oracle.com> wrote:
> This is a test patch being supplied for a trial run on syzkaller.
>
> It's incorrect for the rds_connection to piggyback on the
> sock_net() refcount for the netns because this gives rise to
> a chicken-and-egg problem during rds_conn_destroy. Instead explicitly
> take a ref on the net, and hold the netns down till the connection
> tear-down is complete.
>
> Dmitry: I'm not sure whether the other 2 panics around rds_sock
> use-after-free are also around netns teardown and/or directly related
> to this- they may be unrelated bugs, could we please give this one
> a trial run, while watching for the others?


The other 2 does not look like net-related, but you also mailed patch
"Cancel any pending connection attempts before taking down
connection", which looks like it should fix the other 2, right?
I now applied both of your patched on bots. But only happened 1+2
times over the last 2 weeks. So it will require at least a month to
make a weak conclusion that it might have helped. So I would suggest
to either (1) re-review the crash reports, the code and the fix and
commit it if everything looks consistent, or (2) write a stress test
that provokes the bugs as much as possible, add some sleeps into the
kernel code, reproduce the crashes and check that the patches fix
them.




> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> ---
>  net/rds/connection.c |    1 +
>  net/rds/rds.h        |    6 +++---
>  net/rds/tcp.c        |    4 ++--
>  3 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/net/rds/connection.c b/net/rds/connection.c
> index 0e04dcc..1fa75ab 100644
> --- a/net/rds/connection.c
> +++ b/net/rds/connection.c
> @@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
>          */
>         rds_cong_remove_conn(conn);
>
> +       put_net(conn->c_net);
>         kmem_cache_free(rds_conn_slab, conn);
>
>         spin_lock_irqsave(&rds_conn_lock, flags);
> diff --git a/net/rds/rds.h b/net/rds/rds.h
> index 07fff73..219628f 100644
> --- a/net/rds/rds.h
> +++ b/net/rds/rds.h
> @@ -147,7 +147,7 @@ struct rds_connection {
>
>         /* Protocol version */
>         unsigned int            c_version;
> -       possible_net_t          c_net;
> +       struct net              *c_net;
>
>         struct list_head        c_map_item;
>         unsigned long           c_map_queued;
> @@ -162,13 +162,13 @@ struct rds_connection {
>  static inline
>  struct net *rds_conn_net(struct rds_connection *conn)
>  {
> -       return read_pnet(&conn->c_net);
> +       return conn->c_net;
>  }
>
>  static inline
>  void rds_conn_net_set(struct rds_connection *conn, struct net *net)
>  {
> -       write_pnet(&conn->c_net, net);
> +       conn->c_net = get_net(net);
>  }
>
>  #define RDS_FLAG_CONG_BITMAP   0x01
> diff --git a/net/rds/tcp.c b/net/rds/tcp.c
> index 57bb523..bf4c6a3 100644
> --- a/net/rds/tcp.c
> +++ b/net/rds/tcp.c
> @@ -529,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net)
>         flush_work(&rtn->rds_tcp_accept_w);
>         spin_lock_irq(&rds_tcp_conn_lock);
>         list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
> -               struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
> +               struct net *c_net = tc->t_cpath->cp_conn->c_net;
>
>                 if (net != c_net || !tc->t_sock)
>                         continue;
> @@ -584,7 +584,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
>
>         spin_lock_irq(&rds_tcp_conn_lock);
>         list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
> -               struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
> +               struct net *c_net = tc->t_cpath->cp_conn->c_net;
>
>                 if (net != c_net || !tc->t_sock)
>                         continue;
> --
> 1.7.1
>
Sowmini Varadhan March 2, 2017, 10:46 a.m. UTC | #2
On (03/02/17 11:07), Dmitry Vyukov wrote:
> 
> The other 2 does not look like net-related, but you also mailed patch
> "Cancel any pending connection attempts before taking down
> connection", which looks like it should fix the other 2, right?

no, that patch was still broken.. because, as you pointed out,
it only takes care of one workq, and not the other workqs. Also,
there are a number of clean up operations performed on the socket
associated with the rds_connection, all of which could potentially
be in jeopardy if the race is happening as suspected. I think the
v2 patch (this subject line) is the more appropriate fix- I see
that same thing is being done for svc_xprt's xpt_net, for example.

> I now applied both of your patched on bots. But only happened 1+2
> times over the last 2 weeks. So it will require at least a month to
> make a weak conclusion that it might have helped. So I would suggest
> to either (1) re-review the crash reports, the code and the fix and
> commit it if everything looks consistent, or (2) write a stress test
> that provokes the bugs as much as possible, add some sleeps into the
> kernel code, reproduce the crashes and check that the patches fix
> them.

I can try both, but IME reproducing such things is quite challenging.
Even with things like dtrace-chill on other OSes, it can take a loong
time to nail it. Let's give it a week, while I try out (1) at least.

--Sowmini
diff mbox

Patch

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 0e04dcc..1fa75ab 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -429,6 +429,7 @@  void rds_conn_destroy(struct rds_connection *conn)
 	 */
 	rds_cong_remove_conn(conn);
 
+	put_net(conn->c_net);
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 07fff73..219628f 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -147,7 +147,7 @@  struct rds_connection {
 
 	/* Protocol version */
 	unsigned int		c_version;
-	possible_net_t		c_net;
+	struct net		*c_net;
 
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
@@ -162,13 +162,13 @@  struct rds_connection {
 static inline
 struct net *rds_conn_net(struct rds_connection *conn)
 {
-	return read_pnet(&conn->c_net);
+	return conn->c_net;
 }
 
 static inline
 void rds_conn_net_set(struct rds_connection *conn, struct net *net)
 {
-	write_pnet(&conn->c_net, net);
+	conn->c_net = get_net(net);
 }
 
 #define RDS_FLAG_CONG_BITMAP	0x01
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 57bb523..bf4c6a3 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -529,7 +529,7 @@  static void rds_tcp_kill_sock(struct net *net)
 	flush_work(&rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+		struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
 		if (net != c_net || !tc->t_sock)
 			continue;
@@ -584,7 +584,7 @@  static void rds_tcp_sysctl_reset(struct net *net)
 
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+		struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
 		if (net != c_net || !tc->t_sock)
 			continue;