diff mbox series

[v2,9/9] don't flag parent socket as RCV_SHUTDOWN if one one subflow has closed

Message ID 20191202191005.30879-1-fw@strlen.de
State Accepted, archived
Delegated to: Matthieu Baerts
Headers show
Series None | expand

Commit Message

Florian Westphal Dec. 2, 2019, 7:10 p.m. UTC
instead schedule the mptcp worker and tell it to check eof state on all
subflows.  If all have closed, then also flag the mptcp socket as closed
and wake up userspace process blocking in poll().

v2: move eof checks to subflow_state_change() (Paolo)

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/protocol.c | 41 +++++++++++++++++++++++++++++++++--------
 net/mptcp/protocol.h |  2 ++
 net/mptcp/subflow.c  | 22 ++++++++++++----------
 3 files changed, 47 insertions(+), 18 deletions(-)

Comments

Paolo Abeni Dec. 2, 2019, 9:21 p.m. UTC | #1
On Mon, 2019-12-02 at 20:10 +0100, Florian Westphal wrote:
> instead schedule the mptcp worker and tell it to check eof state on all
> subflows.  If all have closed, then also flag the mptcp socket as closed
> and wake up userspace process blocking in poll().
> 
> v2: move eof checks to subflow_state_change() (Paolo)
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/mptcp/protocol.c | 41 +++++++++++++++++++++++++++++++++--------
>  net/mptcp/protocol.h |  2 ++
>  net/mptcp/subflow.c  | 22 ++++++++++++----------
>  3 files changed, 47 insertions(+), 18 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 36097008dd4a..feb4658b13a8 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -61,6 +61,15 @@ void mptcp_data_acked(struct sock *sk)
>  		sock_hold(sk);
>  }
>  
> +void mptcp_subflow_eof(struct sock *sk)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	if (!test_and_set_bit(MPTCP_WORK_EOF, &msk->flags) &&
> +	    schedule_work(&msk->rtx_work))
> +		sock_hold(sk);
> +}
> +
>  static void mptcp_stop_timer(struct sock *sk)
>  {
>  	struct inet_connection_sock *icsk = inet_csk(sk);
> @@ -95,22 +104,14 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
>  {
>  	struct sock *sk = (struct sock *)msk;
>  	struct mptcp_subflow_context *subflow;
> -	int receivers = 0;
>  
>  	sock_owned_by_me(sk);
>  
>  	mptcp_for_each_subflow(msk, subflow) {
>  		if (subflow->data_avail)
>  			return mptcp_subflow_tcp_sock(subflow);
> -
> -		receivers += !subflow->rx_eof;
>  	}
>  
> -	/* hopefully temporary hack: propagate shutdown status from subflow
> -	 * to msk, when all subflows agree on it
> -	 */
> -	if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN))
> -		sk->sk_shutdown |= RCV_SHUTDOWN;
>  	return NULL;
>  }
>  
> @@ -789,6 +790,27 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
>  	}
>  }
>  
> +static void mptcp_check_for_eof(struct mptcp_sock *msk)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	struct sock *sk = (struct sock *)msk;
> +	int receivers = 0;
> +
> +	mptcp_for_each_subflow(msk, subflow)
> +		receivers += !subflow->rx_eof;
> +
> +	if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
> +		/* hopefully temporary hack: propagate shutdown status
> +		 * to msk, when all subflows agree on it
> +		 */
> +		sk->sk_shutdown |= RCV_SHUTDOWN;
> +
> +		smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
> +		set_bit(MPTCP_DATA_READY, &msk->flags);
> +		sk->sk_data_ready(sk);
> +	}
> +}
> +
>  static void mptcp_worker(struct work_struct *work)
>  {
>  	int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0;
> @@ -806,6 +828,9 @@ static void mptcp_worker(struct work_struct *work)
>  	lock_sock(sk);
>  	mptcp_clean_una(sk);
>  
> +	if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
> +		mptcp_check_for_eof(msk);
> +
>  	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
>  		goto unlock;
>  
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 990da61fa51b..cf3b4ce483d9 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -78,6 +78,7 @@
>  #define MPTCP_DATA_READY	BIT(0)
>  #define MPTCP_WORK_RTX		BIT(1)
>  #define MPTCP_SEND_SPACE	BIT(2)
> +#define MPTCP_WORK_EOF		BIT(3)
>  
>  static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
>  {
> @@ -291,6 +292,7 @@ void mptcp_get_options(const struct sk_buff *skb,
>  		       struct tcp_options_received *opt_rx);
>  
>  void mptcp_finish_connect(struct sock *sk);
> +void mptcp_subflow_eof(struct sock *sk);
>  bool mptcp_finish_join(struct sock *sk);
>  void mptcp_data_acked(struct sock *sk);
>  
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 8c13e9a145d5..fe6c61367bd7 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -639,11 +639,6 @@ bool mptcp_subflow_data_available(struct sock *sk)
>  
>  	if (!subflow_check_data_avail(sk)) {
>  		subflow->data_avail = 0;
> -		/* set EoF only there is no data available - we already spooled
> -		 * all the pending skbs
> -		 */
> -		if (sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE)
> -			subflow->rx_eof = 1;
>  		return false;
>  	}
>  
> @@ -666,8 +661,7 @@ static void subflow_data_ready(struct sock *sk)
>  		return;
>  	}
>  
> -	/* always propagate the EoF */
> -	if (mptcp_subflow_data_available(sk) || subflow->rx_eof) {
> +	if (mptcp_subflow_data_available(sk)) {
>  		smp_mb__before_atomic();
>  		set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
>  		smp_mb__after_atomic();
> @@ -807,15 +801,23 @@ static void __subflow_state_change(struct sock *sk)
>  	rcu_read_unlock();
>  }
>  
> +static bool subflow_is_done(const struct sock *sk)
> +{
> +	return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
> +}
> +
>  static void subflow_state_change(struct sock *sk)
>  {
>  	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> -	struct sock *parent = subflow->conn;
> +	struct sock *parent = READ_ONCE(subflow->conn);
>  
>  	__subflow_state_change(sk);
>  
> -	if (parent)
> -		__subflow_state_change(parent);
> +	if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
> +	    !subflow->rx_eof && subflow_is_done(sk)) {
> +		subflow->rx_eof = 1;
> +		mptcp_subflow_eof(parent);
> +	}
>  }
>  
>  static int subflow_ulp_init(struct sock *sk)

Thanks for addressing my comments.

With 9/9 v2 the series is IMHO ready to me merged.

Thanks,

Paolo
Matthieu Baerts Dec. 3, 2019, 7:44 a.m. UTC | #2
Hi Florian, Paolo, Mat,

On 02/12/2019 22:21, Paolo Abeni wrote:
> On Mon, 2019-12-02 at 20:10 +0100, Florian Westphal wrote:

[...]

> Thanks for addressing my comments.
> 
> With 9/9 v2 the series is IMHO ready to me merged.

Thank you for the nice patch series and the reviews!
Related to what we discussed yesterday, is it OK if I merge this after:

  1) DATA_FIN
  2) MPTCPv1 → wait a bit for a review
  3) SendMsg "refactor" → if reviewed

Or can I do that after the patches for MPTCPv1 if the patches to fix 
sendmsg() have not been reviewed yet?

Also just to be sure: do I squash the 2 first ones and then I add the 
rest at the end of the series?

Cheers,
Matt
Paolo Abeni Dec. 3, 2019, 9:05 a.m. UTC | #3
On Tue, 2019-12-03 at 08:44 +0100, Matthieu Baerts wrote:
> Hi Florian, Paolo, Mat,
> 
> On 02/12/2019 22:21, Paolo Abeni wrote:
> > On Mon, 2019-12-02 at 20:10 +0100, Florian Westphal wrote:
> 
> [...]
> 
> > Thanks for addressing my comments.
> > 
> > With 9/9 v2 the series is IMHO ready to me merged.
> 
> Thank you for the nice patch series and the reviews!
> Related to what we discussed yesterday, is it OK if I merge this after:
> 
>   1) DATA_FIN
>   2) MPTCPv1 → wait a bit for a review
>   3) SendMsg "refactor" → if reviewed
> 
> Or can I do that after the patches for MPTCPv1 if the patches to fix 
> sendmsg() have not been reviewed yet?

I think/hope this and sendmsg patches should not conflict much - but
sendmsg patches do create quite of conflict with the following patches.

I think that appling this series after v1 could be a good option, will
give extra time to review the sendmsg() patches, and I can rebase my
tree to help the merging.

Thanks,

Paolo
Matthieu Baerts Dec. 3, 2019, 9:13 a.m. UTC | #4
Hi Paolo,

On 03/12/2019 10:05, Paolo Abeni wrote:
> On Tue, 2019-12-03 at 08:44 +0100, Matthieu Baerts wrote:
>> Hi Florian, Paolo, Mat,
>>
>> On 02/12/2019 22:21, Paolo Abeni wrote:
>>> On Mon, 2019-12-02 at 20:10 +0100, Florian Westphal wrote:
>>
>> [...]
>>
>>> Thanks for addressing my comments.
>>>
>>> With 9/9 v2 the series is IMHO ready to me merged.
>>
>> Thank you for the nice patch series and the reviews!
>> Related to what we discussed yesterday, is it OK if I merge this after:
>>
>>    1) DATA_FIN
>>    2) MPTCPv1 → wait a bit for a review
>>    3) SendMsg "refactor" → if reviewed
>>
>> Or can I do that after the patches for MPTCPv1 if the patches to fix
>> sendmsg() have not been reviewed yet?
> 
> I think/hope this and sendmsg patches should not conflict much - but
> sendmsg patches do create quite of conflict with the following patches.
> 
> I think that appling this series after v1 could be a good option, will
> give extra time to review the sendmsg() patches, and I can rebase my
> tree to help the merging.

Thank you for your reply!

Sounds good to me. If there is nothing to change in sendmsg() patches 
after the review, I can also start applying them. If I am blocked, I can 
ask for help.

Cheers,
Matt
Matthieu Baerts Dec. 6, 2019, 3:52 p.m. UTC | #5
Hi Florian, Paolo, Mat,

On 03/12/2019 10:13, Matthieu Baerts wrote:
> Hi Paolo,
> 
> On 03/12/2019 10:05, Paolo Abeni wrote:
>> On Tue, 2019-12-03 at 08:44 +0100, Matthieu Baerts wrote:
>>> Hi Florian, Paolo, Mat,
>>>
>>> On 02/12/2019 22:21, Paolo Abeni wrote:
>>>> On Mon, 2019-12-02 at 20:10 +0100, Florian Westphal wrote:
>>>
>>> [...]
>>>
>>>> Thanks for addressing my comments.
>>>>
>>>> With 9/9 v2 the series is IMHO ready to me merged.
>>>
>>> Thank you for the nice patch series and the reviews!
>>> Related to what we discussed yesterday, is it OK if I merge this after:
>>>
>>>    1) DATA_FIN
>>>    2) MPTCPv1 → wait a bit for a review
>>>    3) SendMsg "refactor" → if reviewed
>>>
>>> Or can I do that after the patches for MPTCPv1 if the patches to fix
>>> sendmsg() have not been reviewed yet?
>>
>> I think/hope this and sendmsg patches should not conflict much - but
>> sendmsg patches do create quite of conflict with the following patches.
>>
>> I think that appling this series after v1 could be a good option, will
>> give extra time to review the sendmsg() patches, and I can rebase my
>> tree to help the merging.
> 
> Thank you for your reply!
> 
> Sounds good to me. If there is nothing to change in sendmsg() patches 
> after the review, I can also start applying them. If I am blocked, I can 
> ask for help.

Thank you again for the patches and the reviews!

- 192c07c3fe9b: "squashed" patch 1/9 in "mptcp: Add handling of incoming 
MP_JOIN requests"
- d6c1939a16c5..2cc726803b47: result
- 0c24eaeadf60: "squashed" patch 2/9, part 1, in "mptcp: Add handling of 
incoming MP_JOIN requests"
- f2a1eeb27eab: "squashed" patch 2/9, part 2, in "mptcp: Add handling of 
outgoing MP_JOIN requests"
- 216b0b82f102: conflict in t/mptcp-introduce-MPTCP-retransmission-timer
- e1012595cc3d: conflict in t/mptcp-add-and-use-mptcp-RTX-flag
- 2cc726803b47..48f66afb6a81: result
- the other patches have been added at the end. I had some conflicts with:
  - "copy connection id from first subflow to mptcp socket"
  - "make accept not allocate kernel socket struct"
  - "subflow: place further subflows on new 'join_list'"

Tests on top of the commit introducing the kselftests are OK but not the 
one at the end of the series (export branch). Here is the trace (without 
CONFIG_KASAN nor PROVE_LOCKING)



00:07:55.647 # INFO: set ns3-5dea74bc-S7alT1 dev ns3eth2: ethtool -K tso 
off gro off
00:07:55.658 # INFO: set ns4-5dea74bc-S7alT1 dev ns4eth3: ethtool -K 
gro off
00:07:55.685 # Created /tmp/tmp.FSDFbTHqEa (size 3070486) containing 
data sent by client
00:07:55.694 # Created /tmp/tmp.klmXQh5uiX (size 375225) containing data 
sent by server
00:07:55.742 # New MPTCP socket can be blocked via sysctl		[ OK ]
00:07:55.763 # setsockopt(..., TCP_ULP, "mptcp", ...) blocked	[ OK ]
00:07:55.770 # INFO: validating network environment with pings
00:07:56.500 [   11.837548] IPv6: ADDRCONF(NETDEV_CHANGE): ns2eth1: link 
becomes ready
00:07:58.413 # INFO: Using loss of 0.10% delay 87 ms reorder 96% 42% on 
ns3eth4
00:07:58.420 # ns1 MPTCP -> ns1 (10.0.1.1:10000      ) MPTCP	(duration 
  19ms) [ OK ]
00:07:58.471 # ns1 MPTCP -> ns1 (10.0.1.1:10001      ) TCP  	(duration 
  18ms) [ OK ]
00:07:58.518 # ns1 TCP   -> ns1 (10.0.1.1:10002      ) MPTCP	(duration 
  18ms) [ OK ]
00:07:58.566 # ns1 MPTCP -> ns1 (dead:beef:1::1:10003) MPTCP	(duration 
  17ms) [ OK ]
00:07:58.612 # ns1 MPTCP -> ns1 (dead:beef:1::1:10004) TCP  	(duration 
  18ms) [ OK ]
00:07:58.660 # ns1 TCP   -> ns1 (dead:beef:1::1:10005) MPTCP	[ 
14.024584] BUG: unable to handle page fault for address: 00000003435d11af
00:07:58.688 [   14.025780] #PF: supervisor read access in kernel mode
00:07:58.689 [   14.026588] #PF: error_code(0x0000) - not-present page
00:07:58.690 [   14.027394] PGD 0 P4D 0
00:07:58.690 [   14.027801] Oops: 0000 [#1] SMP PTI
00:07:58.691 [   14.028355] CPU: 0 PID: 731 Comm: mptcp_connect Not 
tainted 5.4.0+ #5
00:07:58.692 [   14.029365] Hardware name: QEMU Standard PC (i440FX + 
PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
00:07:58.693 [   14.030752] RIP: 0010:mptcp_stream_accept+0x8d/0x160
00:07:58.694 [   14.031533] Code: 8b 9c 24 b0 05 00 00 49 81 c4 b0 05 00 
00 4c 39 e3 74 71 49 8d 76 40 48 89 74 24 08 eb 08 48 8b 1b 4c 39 e3 74 
5e 48 8b 6b 78 <48> 83 bd 78 02 00 00 00 75 ea 4c 8d bd 28 02 00 00 89 
44 24 04 4c
00:07:58.697 [   14.034405] RSP: 0018:ffffad8d8044bdf0 EFLAGS: 00010283
00:07:58.698 [   14.035218] RAX: 0000000000000000 RBX: ffff96f35d0d9928 
RCX: 0000000000000001
00:07:58.699 [   14.036317] RDX: 0000000000000001 RSI: ffff96f35eaffb40 
RDI: ffffffffb6199b8a
00:07:58.700 [   14.037423] RBP: 00000003435d0f37 R08: ffff96f35f42e690 
R09: ffffad8d8044bdb0
00:07:58.701 [   14.038528] R10: ffff96f35e375340 R11: ffff96f35d0d0740 
R12: ffff96f35d0d9930
00:07:58.702 [   14.039622] R13: ffff96f35eafe300 R14: ffff96f35eaffb00 
R15: ffff96f35eaffb00
00:07:58.703 [   14.040721] FS:  00007fad7081d500(0000) 
GS:ffff96f35f400000(0000) knlGS:0000000000000000
00:07:58.704 [   14.041973] CS:  0010 DS: 0000 ES: 0000 CR0: 
0000000080050033
00:07:58.738 [   14.042879] CR2: 00000003435d11af CR3: 000000001e3f4006 
CR4: 00000000003606f0
00:07:58.738 [   14.043987] DR0: 0000000000000000 DR1: 0000000000000000 
DR2: 0000000000000000
00:07:58.738 [   14.045098] DR3: 0000000000000000 DR6: 00000000fffe0ff0 
DR7: 0000000000000400
00:07:58.738 [   14.046204] Call Trace:
00:07:58.738 [   14.046599]  ? selinux_socket_accept+0x41/0x80
00:07:58.738 [   14.047294]  __sys_accept4_file+0xf7/0x1c0
00:07:58.738 [   14.047937]  ? mptcp_bind+0x38/0xa0
00:07:58.738 [   14.048487]  ? __inet_hash+0x120/0x2c0
00:07:58.738 [   14.049084]  ? kvm_clock_get_cycles+0xd/0x10
00:07:58.738 [   14.049751]  ? ktime_get_ts64+0x47/0xe0
00:07:58.738 [   14.050351]  __sys_accept4+0x3b/0x70
00:07:58.738 [   14.050911]  __x64_sys_accept+0x13/0x20
00:07:58.738 [   14.051510]  do_syscall_64+0x43/0x160
00:07:58.738 [   14.052084]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
00:07:58.738 [   14.052864] RIP: 0033:0x7fad7032e7e4
00:07:58.738 [   14.053428] Code: 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 
2e 0f 1f 84 00 00 00 00 00 48 8d 05 21 e1 2c 00 8b 00 85 c0 75 13 b8 2b 
00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 41 54 55 49 89 d4 
53 48 89 f5
00:07:58.738 [   14.056279] RSP: 002b:00007fffba32c8e8 EFLAGS: 00000246 
ORIG_RAX: 000000000000002b
00:07:58.738 [   14.057447] RAX: ffffffffffffffda RBX: 0000000000000005 
RCX: 00007fad7032e7e4
00:07:58.738 [   14.058544] RDX: 00007fffba32c900 RSI: 00007fffba32c910 
RDI: 0000000000000005
00:07:58.738 [   14.059640] RBP: 0000000000000005 R08: 0000000000000004 
R09: 0000000000000000
00:07:58.738 [   14.060742] R10: 00007fffba32cb24 R11: 0000000000000246 
R12: 00007fffba32c910
00:07:58.738 [   14.061863] R13: 000055e3b0610b4d R14: 00000000ffffffff 
R15: 0000000000000000
00:07:58.738 [   14.062974] Modules linked in:
00:07:58.738 [   14.063459] CR2: 00000003435d11af
00:07:58.738 [   14.063984] ---[ end trace 5eeeab694e375d46 ]---
00:07:58.738 [   14.064707] RIP: 0010:mptcp_stream_accept+0x8d/0x160
00:07:58.738 [   14.065487] Code: 8b 9c 24 b0 05 00 00 49 81 c4 b0 05 00 
00 4c 39 e3 74 71 49 8d 76 40 48 89 74 24 08 eb 08 48 8b 1b 4c 39 e3 74 
5e 48 8b 6b 78 <48> 83 bd 78 02 00 00 00 75 ea 4c 8d bd 28 02 00 00 89 
44 24 04 4c
00:07:58.738 [   14.068351] RSP: 0018:ffffad8d8044bdf0 EFLAGS: 00010283
00:07:58.738 [   14.069169] RAX: 0000000000000000 RBX: ffff96f35d0d9928 
RCX: 0000000000000001
00:07:58.738 [   14.070274] RDX: 0000000000000001 RSI: ffff96f35eaffb40 
RDI: ffffffffb6199b8a
00:07:58.738 [   14.071378] RBP: 00000003435d0f37 R08: ffff96f35f42e690 
R09: ffffad8d8044bdb0
00:07:58.738 [   14.072478] R10: ffff96f35e375340 R11: ffff96f35d0d0740 
R12: ffff96f35d0d9930
00:07:58.738 [   14.073582] R13: ffff96f35eafe300 R14: ffff96f35eaffb00 
R15: ffff96f35eaffb00
00:07:58.738 [   14.074685] FS:  00007fad7081d500(0000) 
GS:ffff96f35f400000(0000) knlGS:0000000000000000
00:07:58.738 [   14.075912] CS:  0010 DS: 0000 ES: 0000 CR0: 
0000000080050033
00:07:58.739 [   14.076805] CR2: 00000003435d11af CR3: 000000001e3f4006 
CR4: 00000000003606f0
00:07:58.740 [   14.077853] DR0: 0000000000000000 DR1: 0000000000000000 
DR2: 0000000000000000
00:07:58.741 [   14.078927] DR3: 0000000000000000 DR6: 00000000fffe0ff0 
DR7: 0000000000000400
00:08:28.978 copyfd_io_poll: poll timed out (events: POLLIN 1, POLLOUT 4)
00:08:28.979 # ./mptcp_connect.sh: line 392:   731 Killed 
   ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port 
-s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout"
00:08:28.988 # (duration 30308ms) [ FAIL ] client exit code 2, server 137
00:08:28.989 # \nnetns ns1-5dea74bc-S7alT1 socket stat for 10005:
00:08:29.013 # State  Recv-Q   Send-Q         Local Address:Port 
  Peer Address:Port
00:08:29.013 # ESTAB  94728    0           [dead:beef:1::1]:10005 
[dead:beef:1::1]:40964    rto:0.2 ato:0.04 cwnd:10 reordering:0
00:08:29.015 # \nnetns ns1-5dea74bc-S7alT1 socket stat for 10005:
00:08:29.027 # State      Recv-Q  Send-Q        Local Address:Port 
   Peer Address:Port
00:08:29.027 # FIN-WAIT-1 0       1691129    [dead:beef:1::1]:40964 
[dead:beef:1::1]:10005   timer:(persist,21sec,0) rto:0.2 cwnd:10 
reordering:0
00:08:29.031 # FAIL: Could not even run loopback v6 test
00:08:29.062 not ok 1 selftests: net/mptcp: mptcp_connect.sh # exit=1


I guess it is not the top priority because part 1 and 2 are OK.

Cheers,
Matt
Florian Westphal Dec. 6, 2019, 3:57 p.m. UTC | #6
Matthieu Baerts <matthieu.baerts@tessares.net> wrote:
> - 192c07c3fe9b: "squashed" patch 1/9 in "mptcp: Add handling of incoming
> MP_JOIN requests"
> - d6c1939a16c5..2cc726803b47: result
> - 0c24eaeadf60: "squashed" patch 2/9, part 1, in "mptcp: Add handling of
> incoming MP_JOIN requests"
> - f2a1eeb27eab: "squashed" patch 2/9, part 2, in "mptcp: Add handling of
> outgoing MP_JOIN requests"
> - 216b0b82f102: conflict in t/mptcp-introduce-MPTCP-retransmission-timer
> - e1012595cc3d: conflict in t/mptcp-add-and-use-mptcp-RTX-flag
> - 2cc726803b47..48f66afb6a81: result
> - the other patches have been added at the end. I had some conflicts with:
>  - "copy connection id from first subflow to mptcp socket"
>  - "make accept not allocate kernel socket struct"
>  - "subflow: place further subflows on new 'join_list'"
> 
> Tests on top of the commit introducing the kselftests are OK but not the one
> at the end of the series (export branch). Here is the trace (without
> CONFIG_KASAN nor PROVE_LOCKING)

I will have a look, thanks for reporting.
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 36097008dd4a..feb4658b13a8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -61,6 +61,15 @@  void mptcp_data_acked(struct sock *sk)
 		sock_hold(sk);
 }
 
+void mptcp_subflow_eof(struct sock *sk)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	if (!test_and_set_bit(MPTCP_WORK_EOF, &msk->flags) &&
+	    schedule_work(&msk->rtx_work))
+		sock_hold(sk);
+}
+
 static void mptcp_stop_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -95,22 +104,14 @@  static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
 {
 	struct sock *sk = (struct sock *)msk;
 	struct mptcp_subflow_context *subflow;
-	int receivers = 0;
 
 	sock_owned_by_me(sk);
 
 	mptcp_for_each_subflow(msk, subflow) {
 		if (subflow->data_avail)
 			return mptcp_subflow_tcp_sock(subflow);
-
-		receivers += !subflow->rx_eof;
 	}
 
-	/* hopefully temporary hack: propagate shutdown status from subflow
-	 * to msk, when all subflows agree on it
-	 */
-	if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN))
-		sk->sk_shutdown |= RCV_SHUTDOWN;
 	return NULL;
 }
 
@@ -789,6 +790,27 @@  static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	}
 }
 
+static void mptcp_check_for_eof(struct mptcp_sock *msk)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	int receivers = 0;
+
+	mptcp_for_each_subflow(msk, subflow)
+		receivers += !subflow->rx_eof;
+
+	if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		/* hopefully temporary hack: propagate shutdown status
+		 * to msk, when all subflows agree on it
+		 */
+		sk->sk_shutdown |= RCV_SHUTDOWN;
+
+		smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+		set_bit(MPTCP_DATA_READY, &msk->flags);
+		sk->sk_data_ready(sk);
+	}
+}
+
 static void mptcp_worker(struct work_struct *work)
 {
 	int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0;
@@ -806,6 +828,9 @@  static void mptcp_worker(struct work_struct *work)
 	lock_sock(sk);
 	mptcp_clean_una(sk);
 
+	if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+		mptcp_check_for_eof(msk);
+
 	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
 		goto unlock;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 990da61fa51b..cf3b4ce483d9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -78,6 +78,7 @@ 
 #define MPTCP_DATA_READY	BIT(0)
 #define MPTCP_WORK_RTX		BIT(1)
 #define MPTCP_SEND_SPACE	BIT(2)
+#define MPTCP_WORK_EOF		BIT(3)
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
 {
@@ -291,6 +292,7 @@  void mptcp_get_options(const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx);
 
 void mptcp_finish_connect(struct sock *sk);
+void mptcp_subflow_eof(struct sock *sk);
 bool mptcp_finish_join(struct sock *sk);
 void mptcp_data_acked(struct sock *sk);
 
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8c13e9a145d5..fe6c61367bd7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -639,11 +639,6 @@  bool mptcp_subflow_data_available(struct sock *sk)
 
 	if (!subflow_check_data_avail(sk)) {
 		subflow->data_avail = 0;
-		/* set EoF only there is no data available - we already spooled
-		 * all the pending skbs
-		 */
-		if (sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE)
-			subflow->rx_eof = 1;
 		return false;
 	}
 
@@ -666,8 +661,7 @@  static void subflow_data_ready(struct sock *sk)
 		return;
 	}
 
-	/* always propagate the EoF */
-	if (mptcp_subflow_data_available(sk) || subflow->rx_eof) {
+	if (mptcp_subflow_data_available(sk)) {
 		smp_mb__before_atomic();
 		set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
 		smp_mb__after_atomic();
@@ -807,15 +801,23 @@  static void __subflow_state_change(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static bool subflow_is_done(const struct sock *sk)
+{
+	return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
+}
+
 static void subflow_state_change(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-	struct sock *parent = subflow->conn;
+	struct sock *parent = READ_ONCE(subflow->conn);
 
 	__subflow_state_change(sk);
 
-	if (parent)
-		__subflow_state_change(parent);
+	if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
+	    !subflow->rx_eof && subflow_is_done(sk)) {
+		subflow->rx_eof = 1;
+		mptcp_subflow_eof(parent);
+	}
 }
 
 static int subflow_ulp_init(struct sock *sk)