diff mbox series

[net-next] mptcp: keep unaccepted MPC subflow into join list

Message ID 2c16fd8e71c04ce6f38e2b3eafafb3996f95aa77.1603200777.git.pabeni@redhat.com
State Accepted, archived
Commit cdf7f6c99e8673436bbe5da2bc3cd41d7580c9b4
Delegated to: Matthieu Baerts
Headers show
Series [net-next] mptcp: keep unaccepted MPC subflow into join list | expand

Commit Message

Paolo Abeni Oct. 20, 2020, 2:54 p.m. UTC
This will simplify all operation dealing with subflows
before accept time (e.g. data fin processing, add_addr).

The join list is already flushed by mptcp_stream_accept()
before returning the newly created msk to the user space.

This also fixes an potential bug present into the old code:
conn_list was manipulated without helding the msk lock
in mptcp_stream_accept().

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
This is the pre-req to simplify ADD_ADDR pure-ack
generation (for unaccepted MPC subflow): with this
patch applpied the PM could
simply call flush_join_list() before sending the
pure ack.
---
 net/mptcp/protocol.c | 24 ++++++++----------------
 net/mptcp/protocol.h |  9 +++++++++
 net/mptcp/subflow.c  | 10 +++++-----
 3 files changed, 22 insertions(+), 21 deletions(-)

Comments

Mat Martineau Oct. 21, 2020, 3:33 a.m. UTC | #1
On Tue, 20 Oct 2020, Paolo Abeni wrote:

> This will simplify all operation dealing with subflows
> before accept time (e.g. data fin processing, add_addr).
>
> The join list is already flushed by mptcp_stream_accept()
> before returning the newly created msk to the user space.
>
> This also fixes an potential bug present into the old code:
> conn_list was manipulated without helding the msk lock
> in mptcp_stream_accept().
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> This is the pre-req to simplify ADD_ADDR pure-ack
> generation (for unaccepted MPC subflow): with this
> patch applpied the PM could
> simply call flush_join_list() before sending the
> pure ack.

Yes, this looks like a better approach and definitely improves some of the 
accept-time issues we've known about for a while. Looks good for 
the export branch. Thanks!

Mat

> ---
> net/mptcp/protocol.c | 24 ++++++++----------------
> net/mptcp/protocol.h |  9 +++++++++
> net/mptcp/subflow.c  | 10 +++++-----
> 3 files changed, 22 insertions(+), 21 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index ebb43b2c1b34..0bff3aa08270 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2319,7 +2319,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> 	if (sk_is_mptcp(newsk)) {
> 		struct mptcp_subflow_context *subflow;
> 		struct sock *new_mptcp_sock;
> -		struct sock *ssk = newsk;
>
> 		subflow = mptcp_subflow_ctx(newsk);
> 		new_mptcp_sock = subflow->conn;
> @@ -2334,22 +2333,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
>
> 		/* acquire the 2nd reference for the owning socket */
> 		sock_hold(new_mptcp_sock);
> -
> -		local_bh_disable();
> -		bh_lock_sock(new_mptcp_sock);
> -		msk = mptcp_sk(new_mptcp_sock);
> -		msk->first = newsk;
> -
> 		newsk = new_mptcp_sock;
> -		mptcp_copy_inaddrs(newsk, ssk);
> -		list_add(&subflow->node, &msk->conn_list);
> -		sock_hold(ssk);
> -
> -		mptcp_rcv_space_init(msk, ssk);
> -		bh_unlock_sock(new_mptcp_sock);
> -
> -		__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
> -		local_bh_enable();
> +		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
> 	} else {
> 		MPTCP_INC_STATS(sock_net(sk),
> 				MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
> @@ -2799,6 +2784,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
> 	if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
> 		struct mptcp_sock *msk = mptcp_sk(newsock->sk);
> 		struct mptcp_subflow_context *subflow;
> +		struct sock *newsk = newsock->sk;
> +		bool slowpath;
> +
> +		slowpath = lock_sock_fast(newsk);
> +		mptcp_copy_inaddrs(newsk, msk->first);
> +		mptcp_rcv_space_init(msk, msk->first);
>
> 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
> 		 * This is needed so NOSPACE flag can be set from tcp stack.
> @@ -2810,6 +2801,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
> 			if (!ssk->sk_socket)
> 				mptcp_sock_graft(ssk, newsock);
> 		}
> +		unlock_sock_fast(newsk, slowpath);
> 	}
>
> 	if (inet_csk_listen_poll(ssock->sk))
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index b4c8dbe9236b..24a361c684f4 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -404,6 +404,15 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
> 	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
> }
>
> +static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
> +					  struct mptcp_subflow_context *subflow)
> +{
> +	sock_hold(mptcp_subflow_tcp_sock(subflow));
> +	spin_lock_bh(&msk->join_list_lock);
> +	list_add_tail(&subflow->node, &msk->join_list);
> +	spin_unlock_bh(&msk->join_list_lock);
> +}
> +
> int mptcp_is_enabled(struct net *net);
> unsigned int mptcp_get_add_addr_timeout(struct net *net);
> void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 443c491c2897..1e9a72af67dc 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -578,6 +578,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> 			 */
> 			inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
>
> +			/* link the newly created socket to the msk */
> +			mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx);
> +			WRITE_ONCE(mptcp_sk(new_msk)->first, child);
> +
> 			/* new mpc subflow takes ownership of the newly
> 			 * created mptcp socket
> 			 */
> @@ -1124,11 +1128,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
> 	if (err && err != -EINPROGRESS)
> 		goto failed;
>
> -	sock_hold(ssk);
> -	spin_lock_bh(&msk->join_list_lock);
> -	list_add_tail(&subflow->node, &msk->join_list);
> -	spin_unlock_bh(&msk->join_list_lock);
> -
> +	mptcp_add_pending_subflow(msk, subflow);
> 	return err;
>
> failed:
> -- 
> 2.26.2

--
Mat Martineau
Intel
Geliang Tang Oct. 21, 2020, 3:57 a.m. UTC | #2
Hi Paolo,

Mat Martineau <mathew.j.martineau@linux.intel.com> 于2020年10月21日周三 上午11:33写道:
>
>
> On Tue, 20 Oct 2020, Paolo Abeni wrote:
>
> > This will simplify all operation dealing with subflows
> > before accept time (e.g. data fin processing, add_addr).
> >
> > The join list is already flushed by mptcp_stream_accept()
> > before returning the newly created msk to the user space.
> >
> > This also fixes an potential bug present into the old code:
> > conn_list was manipulated without helding the msk lock
> > in mptcp_stream_accept().
> >
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > This is the pre-req to simplify ADD_ADDR pure-ack
> > generation (for unaccepted MPC subflow): with this
> > patch applpied the PM could
> > simply call flush_join_list() before sending the
> > pure ack.

I have tested this patch with my ADD_ADDR IPv6 patches,
they all work well. Thanks for your help.

Tested-by: Geliang Tang <geliangtang@gmail.com>

-Geliang

>
> Yes, this looks like a better approach and definitely improves some of the
> accept-time issues we've known about for a while. Looks good for
> the export branch. Thanks!
>
> Mat
>
> > ---
> > net/mptcp/protocol.c | 24 ++++++++----------------
> > net/mptcp/protocol.h |  9 +++++++++
> > net/mptcp/subflow.c  | 10 +++++-----
> > 3 files changed, 22 insertions(+), 21 deletions(-)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index ebb43b2c1b34..0bff3aa08270 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -2319,7 +2319,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> >       if (sk_is_mptcp(newsk)) {
> >               struct mptcp_subflow_context *subflow;
> >               struct sock *new_mptcp_sock;
> > -             struct sock *ssk = newsk;
> >
> >               subflow = mptcp_subflow_ctx(newsk);
> >               new_mptcp_sock = subflow->conn;
> > @@ -2334,22 +2333,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> >
> >               /* acquire the 2nd reference for the owning socket */
> >               sock_hold(new_mptcp_sock);
> > -
> > -             local_bh_disable();
> > -             bh_lock_sock(new_mptcp_sock);
> > -             msk = mptcp_sk(new_mptcp_sock);
> > -             msk->first = newsk;
> > -
> >               newsk = new_mptcp_sock;
> > -             mptcp_copy_inaddrs(newsk, ssk);
> > -             list_add(&subflow->node, &msk->conn_list);
> > -             sock_hold(ssk);
> > -
> > -             mptcp_rcv_space_init(msk, ssk);
> > -             bh_unlock_sock(new_mptcp_sock);
> > -
> > -             __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
> > -             local_bh_enable();
> > +             MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
> >       } else {
> >               MPTCP_INC_STATS(sock_net(sk),
> >                               MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
> > @@ -2799,6 +2784,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
> >       if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
> >               struct mptcp_sock *msk = mptcp_sk(newsock->sk);
> >               struct mptcp_subflow_context *subflow;
> > +             struct sock *newsk = newsock->sk;
> > +             bool slowpath;
> > +
> > +             slowpath = lock_sock_fast(newsk);
> > +             mptcp_copy_inaddrs(newsk, msk->first);
> > +             mptcp_rcv_space_init(msk, msk->first);
> >
> >               /* set ssk->sk_socket of accept()ed flows to mptcp socket.
> >                * This is needed so NOSPACE flag can be set from tcp stack.
> > @@ -2810,6 +2801,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
> >                       if (!ssk->sk_socket)
> >                               mptcp_sock_graft(ssk, newsock);
> >               }
> > +             unlock_sock_fast(newsk, slowpath);
> >       }
> >
> >       if (inet_csk_listen_poll(ssock->sk))
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index b4c8dbe9236b..24a361c684f4 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -404,6 +404,15 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
> >       return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
> > }
> >
> > +static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
> > +                                       struct mptcp_subflow_context *subflow)
> > +{
> > +     sock_hold(mptcp_subflow_tcp_sock(subflow));
> > +     spin_lock_bh(&msk->join_list_lock);
> > +     list_add_tail(&subflow->node, &msk->join_list);
> > +     spin_unlock_bh(&msk->join_list_lock);
> > +}
> > +
> > int mptcp_is_enabled(struct net *net);
> > unsigned int mptcp_get_add_addr_timeout(struct net *net);
> > void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 443c491c2897..1e9a72af67dc 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -578,6 +578,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> >                        */
> >                       inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
> >
> > +                     /* link the newly created socket to the msk */
> > +                     mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx);
> > +                     WRITE_ONCE(mptcp_sk(new_msk)->first, child);
> > +
> >                       /* new mpc subflow takes ownership of the newly
> >                        * created mptcp socket
> >                        */
> > @@ -1124,11 +1128,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
> >       if (err && err != -EINPROGRESS)
> >               goto failed;
> >
> > -     sock_hold(ssk);
> > -     spin_lock_bh(&msk->join_list_lock);
> > -     list_add_tail(&subflow->node, &msk->join_list);
> > -     spin_unlock_bh(&msk->join_list_lock);
> > -
> > +     mptcp_add_pending_subflow(msk, subflow);
> >       return err;
> >
> > failed:
> > --
> > 2.26.2
>
> --
> Mat Martineau
> Intel
Matthieu Baerts Oct. 23, 2020, 6 p.m. UTC | #3
Hi Paolo, Mat, Geliang,

On 20/10/2020 16:54, Paolo Abeni wrote:
> This will simplify all operation dealing with subflows
> before accept time (e.g. data fin processing, add_addr).
> 
> The join list is already flushed by mptcp_stream_accept()
> before returning the newly created msk to the user space.
> 
> This also fixes an potential bug present into the old code:
> conn_list was manipulated without helding the msk lock
> in mptcp_stream_accept().

Thank you for the patch, review and validation!

Just applied:

- cdf7f6c99e86: mptcp: keep unaccepted MPC subflow into join list
- Results: a1e156132206..d13ebf3a735c

With Mat and Geliang's tags.

Tests + export is in progress!

Cheers,
Matt
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ebb43b2c1b34..0bff3aa08270 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2319,7 +2319,6 @@  static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
 	if (sk_is_mptcp(newsk)) {
 		struct mptcp_subflow_context *subflow;
 		struct sock *new_mptcp_sock;
-		struct sock *ssk = newsk;
 
 		subflow = mptcp_subflow_ctx(newsk);
 		new_mptcp_sock = subflow->conn;
@@ -2334,22 +2333,8 @@  static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
 
 		/* acquire the 2nd reference for the owning socket */
 		sock_hold(new_mptcp_sock);
-
-		local_bh_disable();
-		bh_lock_sock(new_mptcp_sock);
-		msk = mptcp_sk(new_mptcp_sock);
-		msk->first = newsk;
-
 		newsk = new_mptcp_sock;
-		mptcp_copy_inaddrs(newsk, ssk);
-		list_add(&subflow->node, &msk->conn_list);
-		sock_hold(ssk);
-
-		mptcp_rcv_space_init(msk, ssk);
-		bh_unlock_sock(new_mptcp_sock);
-
-		__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
-		local_bh_enable();
+		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
 	} else {
 		MPTCP_INC_STATS(sock_net(sk),
 				MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
@@ -2799,6 +2784,12 @@  static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 	if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
 		struct mptcp_sock *msk = mptcp_sk(newsock->sk);
 		struct mptcp_subflow_context *subflow;
+		struct sock *newsk = newsock->sk;
+		bool slowpath;
+
+		slowpath = lock_sock_fast(newsk);
+		mptcp_copy_inaddrs(newsk, msk->first);
+		mptcp_rcv_space_init(msk, msk->first);
 
 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
 		 * This is needed so NOSPACE flag can be set from tcp stack.
@@ -2810,6 +2801,7 @@  static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 			if (!ssk->sk_socket)
 				mptcp_sock_graft(ssk, newsock);
 		}
+		unlock_sock_fast(newsk, slowpath);
 	}
 
 	if (inet_csk_listen_poll(ssock->sk))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index b4c8dbe9236b..24a361c684f4 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -404,6 +404,15 @@  mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
 	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
 }
 
+static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
+					  struct mptcp_subflow_context *subflow)
+{
+	sock_hold(mptcp_subflow_tcp_sock(subflow));
+	spin_lock_bh(&msk->join_list_lock);
+	list_add_tail(&subflow->node, &msk->join_list);
+	spin_unlock_bh(&msk->join_list_lock);
+}
+
 int mptcp_is_enabled(struct net *net);
 unsigned int mptcp_get_add_addr_timeout(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 443c491c2897..1e9a72af67dc 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -578,6 +578,10 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			 */
 			inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
 
+			/* link the newly created socket to the msk */
+			mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx);
+			WRITE_ONCE(mptcp_sk(new_msk)->first, child);
+
 			/* new mpc subflow takes ownership of the newly
 			 * created mptcp socket
 			 */
@@ -1124,11 +1128,7 @@  int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	if (err && err != -EINPROGRESS)
 		goto failed;
 
-	sock_hold(ssk);
-	spin_lock_bh(&msk->join_list_lock);
-	list_add_tail(&subflow->node, &msk->join_list);
-	spin_unlock_bh(&msk->join_list_lock);
-
+	mptcp_add_pending_subflow(msk, subflow);
 	return err;
 
 failed: