diff mbox series

[RFC,10/14] recv: make DATA_READY reflect ssk in-sequence state

Message ID 20191114173225.21199-11-fw@strlen.de
State Superseded, archived
Headers show
Series [RFC] mptcp: wmem accounting and nonblocking io support | expand

Commit Message

Florian Westphal Nov. 14, 2019, 5:32 p.m. UTC
In order to make mptcp_poll independent of the subflows, we need
to keep the mptcp DATA_READY flag in sync, i.e., if it is set, at least
one ssk has in-sequence data.

If it is cleared, no further data is available.
Avoid the unconditional clearing on recv entry.
Instead make sure the flag is cleared on exit if there is no more
in-sequence data available.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/protocol.c | 51 +++++++++++++++++++++++++++++---------------
 1 file changed, 34 insertions(+), 17 deletions(-)

Comments

Paolo Abeni Nov. 18, 2019, 11:46 a.m. UTC | #1
On Thu, 2019-11-14 at 18:32 +0100, Florian Westphal wrote:
> @@ -614,8 +612,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>  			if (bytes_read < 0) {
>  				if (!copied)
>  					copied = bytes_read;
> -				done = true;

Why 'done' is not set here anymore? I think we want to quit the
recvmsg() loop on socket error ?!?

[...]
>  		/* only the master socket status is relevant here. The exit
>  		 * conditions mirror closely tcp_recvmsg()
>  		 */
>  		if (copied >= target)
> -			break;
> +			goto out;

Double checking I'm following correctly the patch: 'goto out' and
'break;' jump to the same position right? - just after the main
recvmsg() loop.

The same in several places below.

[...]
> +out:
> +	if (more_data_avail) {
> +		if (!test_bit(MPTCP_DATA_READY, &msk->flags))
> +			set_bit(MPTCP_DATA_READY, &msk->flags);

Can we use test_and_set_bit() here? Othewise, don't we need some smp
barriers?

> +	} else if (!wait_data) {
> +		clear_bit(MPTCP_DATA_READY, &msk->flags);
> +
> +		/* .. race-breaker: ssk might get new data after last
> +		 * data_available() returns false.
> +		 */
> +		ssk = mptcp_subflow_recv_lookup(msk);
> +		if (unlikely(ssk))
> +			set_bit(MPTCP_DATA_READY, &msk->flags);

Don't we need smp barriers around the bit operations here?

Cheers,

Paolo
Florian Westphal Nov. 18, 2019, 12:36 p.m. UTC | #2
Paolo Abeni <pabeni@redhat.com> wrote:
> On Thu, 2019-11-14 at 18:32 +0100, Florian Westphal wrote:
> > @@ -614,8 +612,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> >  			if (bytes_read < 0) {
> >  				if (!copied)
> >  					copied = bytes_read;
> > -				done = true;
> 
> Why 'done' is not set here anymore? I think we want to quit the
> recvmsg() loop on socket error ?!?

Right, I'll add it back.

> >  		/* only the master socket status is relevant here. The exit
> >  		 * conditions mirror closely tcp_recvmsg()
> >  		 */
> >  		if (copied >= target)
> > -			break;
> > +			goto out;
> 
> Double checking I'm following correctly the patch: 'goto out' and
> 'break;' jump to the same position right? - just after the main
> recvmsg() loop.

I'll re-add the 'break' statements adn remove the out label.
> 
> The same in several places below.
> 
> [...]
> > +out:
> > +	if (more_data_avail) {
> > +		if (!test_bit(MPTCP_DATA_READY, &msk->flags))
> > +			set_bit(MPTCP_DATA_READY, &msk->flags);
> 
> Can we use test_and_set_bit() here? Othewise, don't we need some smp
> barriers?

Yes, but why?  We would not use its return value.
Its only purpose is to avoid usless set_bit in case its already
set  -- which it is in most cases.

We don't need barriers here in any case (the check is on same memory
address and there is no depencency to any other memory address here).

> > +	} else if (!wait_data) {
> > +		clear_bit(MPTCP_DATA_READY, &msk->flags);
> > +
> > +		/* .. race-breaker: ssk might get new data after last
> > +		 * data_available() returns false.
> > +		 */
> > +		ssk = mptcp_subflow_recv_lookup(msk);
> > +		if (unlikely(ssk))
> > +			set_bit(MPTCP_DATA_READY, &msk->flags);
> 
> Don't we need smp barriers around the bit operations here?

Why?  What sequence would result in a problem?

Cpu0                                  Cpu1
  clear_bit()
  ssk = recv_lookup()                 data_ready()
  if (NULL)...                        -> set_bit()


Result: DATA_READY is set.

I can't come up with a sequence where we'd do:

data_ready()
  ->set_bit()
clear_bit()
... and then have mptcp_subflow_recv_lookup() come
up with a NULL ssk if the subflow has data available.

(Which would mean DATA_READY is unset instead of set).
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6fb178067a4a..b8f936c78ed3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -556,8 +556,10 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_subflow_context *subflow;
+	bool more_data_avail = false;
 	struct mptcp_read_arg arg;
 	read_descriptor_t desc;
+	bool wait_data = false;
 	struct socket *ssock;
 	struct tcp_sock *tp;
 	bool done = false;
@@ -590,10 +592,6 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		u32 map_remaining;
 		int bytes_read;
 
-		smp_mb__before_atomic();
-		clear_bit(MPTCP_DATA_READY, &msk->flags);
-		smp_mb__after_atomic();
-
 		ssk = mptcp_subflow_recv_lookup(msk);
 		pr_debug("msk=%p ssk=%p", msk, ssk);
 		if (!ssk)
@@ -603,7 +601,7 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		tp = tcp_sk(ssk);
 
 		lock_sock(ssk);
-		while (mptcp_subflow_data_available(ssk) && !done) {
+		do {
 			/* try to read as much data as available */
 			map_remaining = subflow->map_data_len -
 					mptcp_subflow_get_map_offset(subflow);
@@ -614,8 +612,7 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			if (bytes_read < 0) {
 				if (!copied)
 					copied = bytes_read;
-				done = true;
-				continue;
+				goto next;
 			}
 
 			pr_debug("msk ack_seq=%llx -> %llx", msk->ack_seq,
@@ -624,23 +621,27 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			copied += bytes_read;
 			if (copied >= len) {
 				done = true;
-				continue;
+				goto next;
 			}
 			if (tp->urg_data && tp->urg_seq == tp->copied_seq) {
 				pr_err("Urgent data present, cannot proceed");
 				done = true;
-				continue;
+				goto next;
 			}
-		}
+next:
+			more_data_avail = mptcp_subflow_data_available(ssk);
+		} while (more_data_avail && !done);
 		release_sock(ssk);
 		continue;
 
 wait_for_data:
+		more_data_avail = false;
+
 		/* only the master socket status is relevant here. The exit
 		 * conditions mirror closely tcp_recvmsg()
 		 */
 		if (copied >= target)
-			break;
+			goto out;
 
 		if (copied) {
 			if (sk->sk_err ||
@@ -648,36 +649,52 @@  static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
 			    !timeo ||
 			    signal_pending(current))
-				break;
+				goto out;
 		} else {
 			if (sk->sk_err) {
 				copied = sock_error(sk);
-				break;
+				goto out;
 			}
 
 			if (sk->sk_shutdown & RCV_SHUTDOWN)
-				break;
+				goto out;
 
 			if (sk->sk_state == TCP_CLOSE) {
 				copied = -ENOTCONN;
-				break;
+				goto out;
 			}
 
 			if (!timeo) {
 				copied = -EAGAIN;
-				break;
+				goto out;
 			}
 
 			if (signal_pending(current)) {
 				copied = sock_intr_errno(timeo);
-				break;
+				goto out;
 			}
 		}
 
 		pr_debug("block timeout %ld", timeo);
+		wait_data = true;
 		mptcp_wait_data(sk, &timeo);
 	}
 
+out:
+	if (more_data_avail) {
+		if (!test_bit(MPTCP_DATA_READY, &msk->flags))
+			set_bit(MPTCP_DATA_READY, &msk->flags);
+	} else if (!wait_data) {
+		clear_bit(MPTCP_DATA_READY, &msk->flags);
+
+		/* .. race-breaker: ssk might get new data after last
+		 * data_available() returns false.
+		 */
+		ssk = mptcp_subflow_recv_lookup(msk);
+		if (unlikely(ssk))
+			set_bit(MPTCP_DATA_READY, &msk->flags);
+	}
+
 	release_sock(sk);
 	return copied;
 }