diff mbox

[SRU,Trusty,Vivid,Wily,1/1] tcp: fix recv with flags MSG_WAITALL | MSG_PEEK

Message ID c9945fdf9264ac24e2db099fefa98db8fdfa4dd8.1440016000.git.joseph.salisbury@canonical.com
State New
Headers show

Commit Message

Joseph Salisbury Aug. 19, 2015, 8:37 p.m. UTC
From: Sabrina Dubroca <sd@queasysnail.net>

BugLink: http://bugs.launchpad.net/bugs/1486146

Currently, tcp_recvmsg enters a busy loop in sk_wait_data if called
with flags = MSG_WAITALL | MSG_PEEK.

sk_wait_data waits for sk_receive_queue not empty, but in this case,
the receive queue is not empty, but does not contain any skb that we
can use.

Add a "last skb seen on receive queue" argument to sk_wait_data, so
that it sleeps until the receive queue has new skbs.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=99461
Link: https://sourceware.org/bugzilla/show_bug.cgi?id=18493
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1205258
Reported-by: Enrico Scholz <rh-bugzilla@ensc.de>
Reported-by: Dan Searle <dan@censornet.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit dfbafc995304ebb9a9b03f65083e6e9cea143b20)
Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
---
 include/net/sock.h |  2 +-
 net/core/sock.c    |  5 +++--
 net/dccp/proto.c   |  2 +-
 net/ipv4/tcp.c     | 11 +++++++----
 net/llc/af_llc.c   |  4 ++--
 5 files changed, 14 insertions(+), 10 deletions(-)

Comments

Brad Figg Aug. 19, 2015, 8:55 p.m. UTC | #1
On Wed, Aug 19, 2015 at 04:37:30PM -0400, Joseph Salisbury wrote:
> From: Sabrina Dubroca <sd@queasysnail.net>
> 
> BugLink: http://bugs.launchpad.net/bugs/1486146
> 
> Currently, tcp_recvmsg enters a busy loop in sk_wait_data if called
> with flags = MSG_WAITALL | MSG_PEEK.
> 
> sk_wait_data waits for sk_receive_queue not empty, but in this case,
> the receive queue is not empty, but does not contain any skb that we
> can use.
> 
> Add a "last skb seen on receive queue" argument to sk_wait_data, so
> that it sleeps until the receive queue has new skbs.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=99461
> Link: https://sourceware.org/bugzilla/show_bug.cgi?id=18493
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=1205258
> Reported-by: Enrico Scholz <rh-bugzilla@ensc.de>
> Reported-by: Dan Searle <dan@censornet.com>
> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
> Acked-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (cherry picked from commit dfbafc995304ebb9a9b03f65083e6e9cea143b20)
> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
> ---
>  include/net/sock.h |  2 +-
>  net/core/sock.c    |  5 +++--
>  net/dccp/proto.c   |  2 +-
>  net/ipv4/tcp.c     | 11 +++++++----
>  net/llc/af_llc.c   |  4 ++--
>  5 files changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index c4f2c65..c87982a 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -878,7 +878,7 @@ void sk_stream_kill_queues(struct sock *sk);
>  void sk_set_memalloc(struct sock *sk);
>  void sk_clear_memalloc(struct sock *sk);
>  
> -int sk_wait_data(struct sock *sk, long *timeo);
> +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);
>  
>  struct request_sock_ops;
>  struct timewait_sock_ops;
> diff --git a/net/core/sock.c b/net/core/sock.c
> index fa5f321..76491f3 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2026,20 +2026,21 @@ static void __release_sock(struct sock *sk)
>   * sk_wait_data - wait for data to arrive at sk_receive_queue
>   * @sk:    sock to wait on
>   * @timeo: for how long
> + * @skb:   last skb seen on sk_receive_queue
>   *
>   * Now socket state including sk->sk_err is changed only under lock,
>   * hence we may omit checks after joining wait queue.
>   * We check receive queue before schedule() only as optimization;
>   * it is very likely that release_sock() added new data.
>   */
> -int sk_wait_data(struct sock *sk, long *timeo)
> +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
>  {
>  	int rc;
>  	DEFINE_WAIT(wait);
>  
>  	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>  	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
> -	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
> +	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
>  	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
>  	finish_wait(sk_sleep(sk), &wait);
>  	return rc;
> diff --git a/net/dccp/proto.c b/net/dccp/proto.c
> index f9076f2..a752c8a 100644
> --- a/net/dccp/proto.c
> +++ b/net/dccp/proto.c
> @@ -888,7 +888,7 @@ verify_sock_status:
>  			break;
>  		}
>  
> -		sk_wait_data(sk, &timeo);
> +		sk_wait_data(sk, &timeo, NULL);
>  		continue;
>  	found_ok_skb:
>  		if (len > skb->len)
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index fda904e..ba66651 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -723,7 +723,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
>  				ret = -EAGAIN;
>  				break;
>  			}
> -			sk_wait_data(sk, &timeo);
> +			sk_wait_data(sk, &timeo, NULL);
>  			if (signal_pending(current)) {
>  				ret = sock_intr_errno(timeo);
>  				break;
> @@ -1536,7 +1536,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	int target;		/* Read at least this many bytes */
>  	long timeo;
>  	struct task_struct *user_recv = NULL;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb, *last;
>  	u32 urg_hole = 0;
>  
>  	if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
> @@ -1593,7 +1593,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  
>  		/* Next get a buffer. */
>  
> +		last = skb_peek_tail(&sk->sk_receive_queue);
>  		skb_queue_walk(&sk->sk_receive_queue, skb) {
> +			last = skb;
>  			/* Now that we have two receive queues this
>  			 * shouldn't happen.
>  			 */
> @@ -1712,8 +1714,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  			/* Do not sleep, just process backlog. */
>  			release_sock(sk);
>  			lock_sock(sk);
> -		} else
> -			sk_wait_data(sk, &timeo);
> +		} else {
> +			sk_wait_data(sk, &timeo, last);
> +		}
>  
>  		if (user_recv) {
>  			int chunk;
> diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
> index 78b9734..e85481d 100644
> --- a/net/llc/af_llc.c
> +++ b/net/llc/af_llc.c
> @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo)
>  		if (signal_pending(current))
>  			break;
>  		rc = 0;
> -		if (sk_wait_data(sk, &timeo))
> +		if (sk_wait_data(sk, &timeo, NULL))
>  			break;
>  	}
>  	return rc;
> @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
>  			release_sock(sk);
>  			lock_sock(sk);
>  		} else
> -			sk_wait_data(sk, &timeo);
> +			sk_wait_data(sk, &timeo, NULL);
>  
>  		if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
>  			net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",
> -- 
> 2.1.0
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team

Positive testing.
Andy Whitcroft Aug. 19, 2015, 10:45 p.m. UTC | #2
On Wed, Aug 19, 2015 at 04:37:30PM -0400, Joseph Salisbury wrote:
> From: Sabrina Dubroca <sd@queasysnail.net>
> 
> BugLink: http://bugs.launchpad.net/bugs/1486146
> 
> Currently, tcp_recvmsg enters a busy loop in sk_wait_data if called
> with flags = MSG_WAITALL | MSG_PEEK.
> 
> sk_wait_data waits for sk_receive_queue not empty, but in this case,
> the receive queue is not empty, but does not contain any skb that we
> can use.
> 
> Add a "last skb seen on receive queue" argument to sk_wait_data, so
> that it sleeps until the receive queue has new skbs.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=99461
> Link: https://sourceware.org/bugzilla/show_bug.cgi?id=18493
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=1205258
> Reported-by: Enrico Scholz <rh-bugzilla@ensc.de>
> Reported-by: Dan Searle <dan@censornet.com>
> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
> Acked-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (cherry picked from commit dfbafc995304ebb9a9b03f65083e6e9cea143b20)
> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
> ---
>  include/net/sock.h |  2 +-
>  net/core/sock.c    |  5 +++--
>  net/dccp/proto.c   |  2 +-
>  net/ipv4/tcp.c     | 11 +++++++----
>  net/llc/af_llc.c   |  4 ++--
>  5 files changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index c4f2c65..c87982a 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -878,7 +878,7 @@ void sk_stream_kill_queues(struct sock *sk);
>  void sk_set_memalloc(struct sock *sk);
>  void sk_clear_memalloc(struct sock *sk);
>  
> -int sk_wait_data(struct sock *sk, long *timeo);
> +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);
>  
>  struct request_sock_ops;
>  struct timewait_sock_ops;
> diff --git a/net/core/sock.c b/net/core/sock.c
> index fa5f321..76491f3 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2026,20 +2026,21 @@ static void __release_sock(struct sock *sk)
>   * sk_wait_data - wait for data to arrive at sk_receive_queue
>   * @sk:    sock to wait on
>   * @timeo: for how long
> + * @skb:   last skb seen on sk_receive_queue
>   *
>   * Now socket state including sk->sk_err is changed only under lock,
>   * hence we may omit checks after joining wait queue.
>   * We check receive queue before schedule() only as optimization;
>   * it is very likely that release_sock() added new data.
>   */
> -int sk_wait_data(struct sock *sk, long *timeo)
> +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
>  {
>  	int rc;
>  	DEFINE_WAIT(wait);
>  
>  	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>  	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
> -	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
> +	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
>  	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
>  	finish_wait(sk_sleep(sk), &wait);
>  	return rc;
> diff --git a/net/dccp/proto.c b/net/dccp/proto.c
> index f9076f2..a752c8a 100644
> --- a/net/dccp/proto.c
> +++ b/net/dccp/proto.c
> @@ -888,7 +888,7 @@ verify_sock_status:
>  			break;
>  		}
>  
> -		sk_wait_data(sk, &timeo);
> +		sk_wait_data(sk, &timeo, NULL);
>  		continue;
>  	found_ok_skb:
>  		if (len > skb->len)
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index fda904e..ba66651 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -723,7 +723,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
>  				ret = -EAGAIN;
>  				break;
>  			}
> -			sk_wait_data(sk, &timeo);
> +			sk_wait_data(sk, &timeo, NULL);
>  			if (signal_pending(current)) {
>  				ret = sock_intr_errno(timeo);
>  				break;
> @@ -1536,7 +1536,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	int target;		/* Read at least this many bytes */
>  	long timeo;
>  	struct task_struct *user_recv = NULL;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb, *last;
>  	u32 urg_hole = 0;
>  
>  	if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
> @@ -1593,7 +1593,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  
>  		/* Next get a buffer. */
>  
> +		last = skb_peek_tail(&sk->sk_receive_queue);
>  		skb_queue_walk(&sk->sk_receive_queue, skb) {
> +			last = skb;
>  			/* Now that we have two receive queues this
>  			 * shouldn't happen.
>  			 */
> @@ -1712,8 +1714,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  			/* Do not sleep, just process backlog. */
>  			release_sock(sk);
>  			lock_sock(sk);
> -		} else
> -			sk_wait_data(sk, &timeo);
> +		} else {
> +			sk_wait_data(sk, &timeo, last);
> +		}
>  
>  		if (user_recv) {
>  			int chunk;
> diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
> index 78b9734..e85481d 100644
> --- a/net/llc/af_llc.c
> +++ b/net/llc/af_llc.c
> @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo)
>  		if (signal_pending(current))
>  			break;
>  		rc = 0;
> -		if (sk_wait_data(sk, &timeo))
> +		if (sk_wait_data(sk, &timeo, NULL))
>  			break;
>  	}
>  	return rc;
> @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
>  			release_sock(sk);
>  			lock_sock(sk);
>  		} else
> -			sk_wait_data(sk, &timeo);
> +			sk_wait_data(sk, &timeo, NULL);
>  
>  		if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
>  			net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",
> -- 

Looks simple enough, looks to do what is claimed, clean upstream
cherry-pick. 

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw
diff mbox

Patch

diff --git a/include/net/sock.h b/include/net/sock.h
index c4f2c65..c87982a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -878,7 +878,7 @@  void sk_stream_kill_queues(struct sock *sk);
 void sk_set_memalloc(struct sock *sk);
 void sk_clear_memalloc(struct sock *sk);
 
-int sk_wait_data(struct sock *sk, long *timeo);
+int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);
 
 struct request_sock_ops;
 struct timewait_sock_ops;
diff --git a/net/core/sock.c b/net/core/sock.c
index fa5f321..76491f3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2026,20 +2026,21 @@  static void __release_sock(struct sock *sk)
  * sk_wait_data - wait for data to arrive at sk_receive_queue
  * @sk:    sock to wait on
  * @timeo: for how long
+ * @skb:   last skb seen on sk_receive_queue
  *
  * Now socket state including sk->sk_err is changed only under lock,
  * hence we may omit checks after joining wait queue.
  * We check receive queue before schedule() only as optimization;
  * it is very likely that release_sock() added new data.
  */
-int sk_wait_data(struct sock *sk, long *timeo)
+int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
 {
 	int rc;
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
+	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	finish_wait(sk_sleep(sk), &wait);
 	return rc;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f9076f2..a752c8a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -888,7 +888,7 @@  verify_sock_status:
 			break;
 		}
 
-		sk_wait_data(sk, &timeo);
+		sk_wait_data(sk, &timeo, NULL);
 		continue;
 	found_ok_skb:
 		if (len > skb->len)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fda904e..ba66651 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -723,7 +723,7 @@  ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 				ret = -EAGAIN;
 				break;
 			}
-			sk_wait_data(sk, &timeo);
+			sk_wait_data(sk, &timeo, NULL);
 			if (signal_pending(current)) {
 				ret = sock_intr_errno(timeo);
 				break;
@@ -1536,7 +1536,7 @@  int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int target;		/* Read at least this many bytes */
 	long timeo;
 	struct task_struct *user_recv = NULL;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
 
 	if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
@@ -1593,7 +1593,9 @@  int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 		/* Next get a buffer. */
 
+		last = skb_peek_tail(&sk->sk_receive_queue);
 		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			last = skb;
 			/* Now that we have two receive queues this
 			 * shouldn't happen.
 			 */
@@ -1712,8 +1714,9 @@  int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
 			lock_sock(sk);
-		} else
-			sk_wait_data(sk, &timeo);
+		} else {
+			sk_wait_data(sk, &timeo, last);
+		}
 
 		if (user_recv) {
 			int chunk;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 78b9734..e85481d 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -613,7 +613,7 @@  static int llc_wait_data(struct sock *sk, long timeo)
 		if (signal_pending(current))
 			break;
 		rc = 0;
-		if (sk_wait_data(sk, &timeo))
+		if (sk_wait_data(sk, &timeo, NULL))
 			break;
 	}
 	return rc;
@@ -802,7 +802,7 @@  static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 			release_sock(sk);
 			lock_sock(sk);
 		} else
-			sk_wait_data(sk, &timeo);
+			sk_wait_data(sk, &timeo, NULL);
 
 		if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
 			net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",