@@ -1250,9 +1250,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
+struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
{
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+ struct mptcp_sock *msk;
+ u64 ack_seq;
if (!nsk)
return NULL;
@@ -1262,6 +1265,30 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif
+ __mptcp_init_sock(nsk);
+
+ msk = mptcp_sk(nsk);
+ msk->local_key = subflow_req->local_key;
+ msk->token = subflow_req->token;
+ msk->subflow = NULL;
+
+ mptcp_token_new_accept(subflow_req->token, nsk);
+
+ mptcp_pm_new_connection(msk, 1);
+
+ msk->write_seq = subflow_req->idsn + 1;
+ atomic64_set(&msk->snd_una, msk->write_seq);
+ if (subflow_req->remote_key_valid) {
+ msk->can_ack = true;
+ msk->remote_key = subflow_req->remote_key;
+ mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
+ ack_seq++;
+ msk->ack_seq = ack_seq;
+ }
+ bh_unlock_sock(nsk);
+
+ /* keep a single reference */
+ __sock_put(nsk);
return nsk;
}
@@ -1289,44 +1316,28 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
struct sock *ssk = newsk;
- u64 ack_seq;
subflow = mptcp_subflow_ctx(newsk);
- lock_sock(sk);
+ new_mptcp_sock = subflow->conn;
- local_bh_disable();
- new_mptcp_sock = mptcp_sk_clone_lock(sk);
- if (!new_mptcp_sock) {
- *err = -ENOBUFS;
- local_bh_enable();
- release_sock(sk);
- mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1);
- tcp_close(newsk, 0);
- return NULL;
+ /* is_mptcp should be false if subflow->conn is missing, see
+ * subflow_syn_recv_sock()
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+ return newsk;
}
- __mptcp_init_sock(new_mptcp_sock);
+ /* acquire the 2nd reference for the owning socket */
+ sock_hold(new_mptcp_sock);
+ local_bh_disable();
+ bh_lock_sock(new_mptcp_sock);
msk = mptcp_sk(new_mptcp_sock);
- msk->local_key = subflow->local_key;
- msk->token = subflow->token;
- msk->subflow = NULL;
msk->first = newsk;
- mptcp_token_update_accept(newsk, new_mptcp_sock);
-
- mptcp_pm_new_connection(msk, 1);
-
- msk->write_seq = subflow->idsn + 1;
- atomic64_set(&msk->snd_una, msk->write_seq);
- if (subflow->can_ack) {
- msk->can_ack = true;
- msk->remote_key = subflow->remote_key;
- mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
- ack_seq++;
- msk->ack_seq = ack_seq;
- }
newsk = new_mptcp_sock;
+
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
@@ -1338,18 +1349,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
local_bh_enable();
- release_sock(sk);
-
- /* the subflow can already receive packet, avoid racing with
- * the receive path and process the pending ones
- */
- lock_sock(ssk);
- subflow->rel_write_seq = 1;
- subflow->tcp_sock = ssk;
- subflow->conn = new_mptcp_sock;
- if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue)))
- mptcp_subflow_data_available(ssk);
- release_sock(ssk);
} else {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
@@ -323,6 +323,7 @@ void mptcp_proto_init(void);
int mptcp_proto_v6_init(void);
#endif
+struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx);
@@ -335,8 +336,7 @@ void mptcp_subflow_eof(struct sock *sk);
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(u32 token);
int mptcp_token_new_connect(struct sock *sk);
-int mptcp_token_new_accept(u32 token);
-void mptcp_token_update_accept(struct sock *sk, struct sock *conn);
+int mptcp_token_new_accept(u32 token, struct sock *conn);
struct mptcp_sock *mptcp_token_get_sock(u32 token);
void mptcp_token_destroy(u32 token);
@@ -344,6 +344,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct tcp_options_received opt_rx;
bool fallback_is_fatal = false;
+ struct sock *new_msk = NULL;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -359,7 +360,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* out-of-order pkt, which will not carry the MP_CAPABLE
* opt even on mptcp enabled paths
*/
- goto create_child;
+ goto create_msk;
}
opt_rx.mptcp.mp_capable = 0;
@@ -369,7 +370,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
subflow_req->remote_key_valid = 1;
} else {
subflow_req->mp_capable = 0;
+ goto create_child;
}
+
+create_msk:
+ new_msk = mptcp_sk_clone(listener->conn, req);
+ if (!new_msk)
+ subflow_req->mp_capable = 0;
+
} else if (subflow_req->mp_join) {
fallback_is_fatal = true;
opt_rx.mptcp.mp_join = 0;
@@ -394,12 +402,15 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!ctx) {
if (fallback_is_fatal)
goto close_child;
- return child;
+ goto out;
}
if (ctx->mp_capable) {
- if (mptcp_token_new_accept(ctx->token))
- goto close_child;
+ /* new mpc subflow takes ownership of the newly
+ * created mptcp socket
+ */
+ ctx->conn = new_msk;
+ new_msk = NULL;
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -415,6 +426,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
}
}
+out:
+ /* dispose of the left over mptcp master, if any */
+ if (new_msk)
+ sock_put(new_msk);
return child;
close_child:
@@ -1055,6 +1070,8 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
+ new_ctx->rel_write_seq = 1;
+ new_ctx->tcp_sock = newsk;
if (subflow_req->mp_capable) {
/* see comments in subflow_syn_recv_sock(), MPTCP connection
@@ -132,41 +132,17 @@ int mptcp_token_new_connect(struct sock *sk)
* We don't have an mptcp socket yet at that point.
* This is paired with mptcp_token_update_accept, called on accept().
*/
-int mptcp_token_new_accept(u32 token)
+int mptcp_token_new_accept(u32 token, struct sock *conn)
{
int err;
spin_lock_bh(&token_tree_lock);
- err = radix_tree_insert(&token_tree, token, &token_used);
+ err = radix_tree_insert(&token_tree, token, conn);
spin_unlock_bh(&token_tree_lock);
return err;
}
-/**
- * mptcp_token_update_accept - update token to map to mptcp socket
- * @conn: the new struct mptcp_sock
- * @sk: the initial subflow for this mptcp socket
- *
- * Called when the first mptcp socket is created on accept to
- * refresh the dummy mapping (done to reserve the token) with
- * the mptcp_socket structure that wasn't allocated before.
- */
-void mptcp_token_update_accept(struct sock *sk, struct sock *conn)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- void __rcu **slot;
-
- spin_lock_bh(&token_tree_lock);
- slot = radix_tree_lookup_slot(&token_tree, subflow->token);
- WARN_ON_ONCE(!slot);
- if (slot) {
- WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used);
- radix_tree_replace_slot(&token_tree, slot, conn);
- }
- spin_unlock_bh(&token_tree_lock);
-}
-
/**
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
* @token - token of the mptcp connection to retrieve
This closes a race between MPC subflow accept and additional MPJ creation which leads to MPJ subflow being dropped with MPTCP_MIB_JOINNOTOKEN despite carrying the correct token. (also fixes some sporadic self-tests failures) It also makes accept faster, as we only take a single spinlock vs a spinlock and 2 socket locks. Additional clean-up could be added in several places where we currently check for subflow->conn -> after this patch we know it's always valid. Flip side: more memory allocated for the accept backlog. I personally think that is acceptable -> we already allocate more memory on the server. Sharing as an early RFC to gather comments. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 81 ++++++++++++++++++++++---------------------- net/mptcp/protocol.h | 4 +-- net/mptcp/subflow.c | 25 +++++++++++--- net/mptcp/token.c | 28 ++------------- 4 files changed, 65 insertions(+), 73 deletions(-)