diff mbox

[net-next,3/3] tcp/dccp: fix race at listener dismantle phase

Message ID 1444845519-20064-4-git-send-email-edumazet@google.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Oct. 14, 2015, 5:58 p.m. UTC
Under stress, a close() on a listener can trigger the
WARN_ON(sk->sk_ack_backlog) in inet_csk_listen_stop()

We need to test if listener is still active before queueing
a child in inet_csk_reqsk_queue_add()

Create a common inet_child_forget() helper, and use it
from inet_csk_reqsk_queue_add() and inet_csk_listen_stop()

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/inet_connection_sock.h |  9 ++---
 include/net/request_sock.h         | 19 ----------
 net/ipv4/inet_connection_sock.c    | 73 ++++++++++++++++++++++++++------------
 3 files changed, 52 insertions(+), 49 deletions(-)

Comments

Eric Dumazet Oct. 14, 2015, 6:13 p.m. UTC | #1
On Wed, 2015-10-14 at 10:58 -0700, Eric Dumazet wrote:


...


> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index a5a1b54915e5..38b7ef8b0b78 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -740,7 +740,7 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
>  
>  	reqsk_queue_alloc(&icsk->icsk_accept_queue);
>  
> -	sk->sk_max_ack_backlog = 0;
> +	sk->sk_max_ack_backlog = nr_table_entries;
>  	sk->sk_ack_backlog = 0;
>  	inet_csk_delack_init(sk);
>  
> @@ -764,6 +764,53 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)


Arg, this part was not meant to be there, sorry. Will send a v2


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
kernel test robot Oct. 14, 2015, 7:27 p.m. UTC | #2
Hi Eric,

[auto build test WARNING on net-next/master -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Eric-Dumazet/tcp-dccp-make-our-listener-code-more-robust/20151015-020006
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> net/ipv4/tcp_input.c:6238:17: sparse: context imbalance in 'tcp_conn_request' - unexpected unlock

vim +/tcp_conn_request +6238 net/ipv4/tcp_input.c

f7b3bec6 Florian Westphal 2014-11-03  6222  
f7b3bec6 Florian Westphal 2014-11-03  6223  	if (want_cookie) {
f7b3bec6 Florian Westphal 2014-11-03  6224  		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
f7b3bec6 Florian Westphal 2014-11-03  6225  		req->cookie_ts = tmp_opt.tstamp_ok;
f7b3bec6 Florian Westphal 2014-11-03  6226  		if (!tmp_opt.tstamp_ok)
f7b3bec6 Florian Westphal 2014-11-03  6227  			inet_rsk(req)->ecn_ok = 0;
f7b3bec6 Florian Westphal 2014-11-03  6228  	}
f7b3bec6 Florian Westphal 2014-11-03  6229  
1fb6f159 Octavian Purdila 2014-06-25  6230  	tcp_rsk(req)->snt_isn = isn;
58d607d3 Eric Dumazet     2015-09-15  6231  	tcp_rsk(req)->txhash = net_tx_rndhash();
1fb6f159 Octavian Purdila 2014-06-25  6232  	tcp_openreq_init_rwin(req, sk, dst);
ca6fb065 Eric Dumazet     2015-10-02  6233  	if (!want_cookie) {
ca6fb065 Eric Dumazet     2015-10-02  6234  		tcp_reqsk_record_syn(sk, req, skb);
7656d842 Eric Dumazet     2015-10-04  6235  		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
ca6fb065 Eric Dumazet     2015-10-02  6236  	}
7c85af88 Eric Dumazet     2015-09-24  6237  	if (fastopen_sk) {
ca6fb065 Eric Dumazet     2015-10-02 @6238  		af_ops->send_synack(fastopen_sk, dst, &fl, req,
ca6fb065 Eric Dumazet     2015-10-02  6239  				    skb_get_queue_mapping(skb), &foc, false);
7656d842 Eric Dumazet     2015-10-04  6240  		/* Add the child socket directly into the accept queue */
7656d842 Eric Dumazet     2015-10-04  6241  		inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
7656d842 Eric Dumazet     2015-10-04  6242  		sk->sk_data_ready(sk);
7656d842 Eric Dumazet     2015-10-04  6243  		bh_unlock_sock(fastopen_sk);
7c85af88 Eric Dumazet     2015-09-24  6244  		sock_put(fastopen_sk);
7c85af88 Eric Dumazet     2015-09-24  6245  	} else {
9439ce00 Eric Dumazet     2015-03-17  6246  		tcp_rsk(req)->tfo_listener = false;

:::::: The code at line 6238 was first introduced by commit
:::::: ca6fb06518836ef9b65dc0aac02ff97704d52a05 tcp: attach SYNACK messages to request sockets instead of listener

:::::: TO: Eric Dumazet <edumazet@google.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 89ecbc80b2ce..8b0e3d8a4d81 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -268,13 +268,8 @@  struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 					    struct sock *newsk,
 					    const struct request_sock *req);
 
-static inline void inet_csk_reqsk_queue_add(struct sock *sk,
-					    struct request_sock *req,
-					    struct sock *child)
-{
-	reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
-}
-
+void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+			      struct sock *child);
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout);
 
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 2e73748956d5..a0dde04eb178 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -186,25 +186,6 @@  static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
 	return queue->rskq_accept_head == NULL;
 }
 
-static inline void reqsk_queue_add(struct request_sock_queue *queue,
-				   struct request_sock *req,
-				   struct sock *parent,
-				   struct sock *child)
-{
-	spin_lock(&queue->rskq_lock);
-	req->sk = child;
-	sk_acceptq_added(parent);
-
-	if (queue->rskq_accept_head == NULL)
-		queue->rskq_accept_head = req;
-	else
-		queue->rskq_accept_tail->dl_next = req;
-
-	queue->rskq_accept_tail = req;
-	req->dl_next = NULL;
-	spin_unlock(&queue->rskq_lock);
-}
-
 static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
 						      struct sock *parent)
 {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a5a1b54915e5..38b7ef8b0b78 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -740,7 +740,7 @@  int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 
 	reqsk_queue_alloc(&icsk->icsk_accept_queue);
 
-	sk->sk_max_ack_backlog = 0;
+	sk->sk_max_ack_backlog = nr_table_entries;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
 
@@ -764,6 +764,53 @@  int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 
+static void inet_child_forget(struct sock *sk, struct request_sock *req,
+			      struct sock *child)
+{
+	sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+	sock_orphan(child);
+
+	percpu_counter_inc(sk->sk_prot->orphan_count);
+
+	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
+		BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+		BUG_ON(sk != req->rsk_listener);
+
+		/* Paranoid, to prevent race condition if
+		 * an inbound pkt destined for child is
+		 * blocked by sock lock in tcp_v4_rcv().
+		 * Also to satisfy an assertion in
+		 * tcp_v4_destroy_sock().
+		 */
+		tcp_sk(child)->fastopen_rsk = NULL;
+	}
+	inet_csk_destroy_sock(child);
+	reqsk_put(req);
+}
+
+void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+			      struct sock *child)
+{
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+
+	spin_lock(&queue->rskq_lock);
+	if (unlikely(sk->sk_state != TCP_LISTEN)) {
+		inet_child_forget(sk, req, child);
+	} else {
+		req->sk = child;
+		req->dl_next = NULL;
+		if (queue->rskq_accept_head == NULL)
+			queue->rskq_accept_head = req;
+		else
+			queue->rskq_accept_tail->dl_next = req;
+		queue->rskq_accept_tail = req;
+		sk_acceptq_added(sk);
+	}
+	spin_unlock(&queue->rskq_lock);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
+
 /*
  *	This routine closes sockets which have been at least partially
  *	opened, but not yet accepted.
@@ -790,31 +837,11 @@  void inet_csk_listen_stop(struct sock *sk)
 		WARN_ON(sock_owned_by_user(child));
 		sock_hold(child);
 
-		sk->sk_prot->disconnect(child, O_NONBLOCK);
-
-		sock_orphan(child);
-
-		percpu_counter_inc(sk->sk_prot->orphan_count);
-
-		if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
-			BUG_ON(tcp_sk(child)->fastopen_rsk != req);
-			BUG_ON(sk != req->rsk_listener);
-
-			/* Paranoid, to prevent race condition if
-			 * an inbound pkt destined for child is
-			 * blocked by sock lock in tcp_v4_rcv().
-			 * Also to satisfy an assertion in
-			 * tcp_v4_destroy_sock().
-			 */
-			tcp_sk(child)->fastopen_rsk = NULL;
-		}
-		inet_csk_destroy_sock(child);
-
+		inet_child_forget(sk, req, child);
 		bh_unlock_sock(child);
 		local_bh_enable();
 		sock_put(child);
 
-		reqsk_put(req);
 		cond_resched();
 	}
 	if (queue->fastopenq.rskq_rst_head) {
@@ -829,7 +856,7 @@  void inet_csk_listen_stop(struct sock *sk)
 			req = next;
 		}
 	}
-	WARN_ON(sk->sk_ack_backlog);
+	WARN_ON_ONCE(sk->sk_ack_backlog);
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);