diff mbox series

[net,2/4] tls: Fix write space handling

Message ID 20190226121235.20784-3-borisp@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series tls: Fix issues in tls_device | expand

Commit Message

Boris Pismenny Feb. 26, 2019, 12:12 p.m. UTC
TLS device cannot use the sw context. This patch returns the original
tls device write space handler and moves the sw/device specific portions
to the relevant files.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
---
 include/net/tls.h    |  3 +++
 net/tls/tls_device.c | 16 ++++++++++++++++
 net/tls/tls_main.c   | 17 +++++++++--------
 net/tls/tls_sw.c     | 15 +++++++++++++++
 4 files changed, 43 insertions(+), 8 deletions(-)

Comments

Vakul Garg Feb. 26, 2019, 12:49 p.m. UTC | #1
> -----Original Message-----
> From: Boris Pismenny <borisp@mellanox.com>
> Sent: Tuesday, February 26, 2019 5:43 PM
> To: aviadye@mellanox.com; davejwatson@fb.com;
> john.fastabend@gmail.com; daniel@iogearbox.net; Vakul Garg
> <vakul.garg@nxp.com>; netdev@vger.kernel.org
> Cc: eranbe@mellanox.com; borisp@mellanox.com
> Subject: [PATCH net 2/4] tls: Fix write space handling
> 
> TLS device cannot use the sw context. This patch returns the original
> tls device write space handler and moves the sw/device specific portions
> to the relevant files.
> 
> Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records
> for performance")
> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
> ---
>  include/net/tls.h    |  3 +++
>  net/tls/tls_device.c | 16 ++++++++++++++++
>  net/tls/tls_main.c   | 17 +++++++++--------
>  net/tls/tls_sw.c     | 15 +++++++++++++++
>  4 files changed, 43 insertions(+), 8 deletions(-)
> 
> diff --git a/include/net/tls.h b/include/net/tls.h
> index a528a082da73..9d7c53737b13 100644
> --- a/include/net/tls.h
> +++ b/include/net/tls.h
> @@ -519,6 +519,9 @@ static inline bool tls_sw_has_ctx_tx(const struct sock
> *sk)
>  	return !!tls_sw_ctx_tx(ctx);
>  }
> 
> +int tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
> +int tls_device_write_space(struct sock *sk, struct tls_context *ctx);
> +
>  static inline struct tls_offload_context_rx *
>  tls_offload_ctx_rx(const struct tls_context *tls_ctx)
>  {
> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
> index 3e5e8e021a87..e8988b3f3236 100644
> --- a/net/tls/tls_device.c
> +++ b/net/tls/tls_device.c
> @@ -546,6 +546,22 @@ static int tls_device_push_pending_record(struct
> sock *sk, int flags)
>  	return tls_push_data(sk, &msg_iter, 0, flags,
> TLS_RECORD_TYPE_DATA);
>  }
> 
> +int tls_device_write_space(struct sock *sk, struct tls_context *ctx)
> +{
> +	int rc = 0;
> +
> +	if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
> +		gfp_t sk_allocation = sk->sk_allocation;
> +
> +		sk->sk_allocation = GFP_ATOMIC;
> +		rc = tls_push_partial_record(sk, ctx,
> +					     MSG_DONTWAIT |
> MSG_NOSIGNAL);
> +		sk->sk_allocation = sk_allocation;
> +	}
> +
> +	return rc;
> +}
> +
>  void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
>  {
>  	struct tls_context *tls_ctx = tls_get_ctx(sk);
> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> index 7e05af75536d..11c1980a75cb 100644
> --- a/net/tls/tls_main.c
> +++ b/net/tls/tls_main.c
> @@ -212,7 +212,7 @@ int tls_push_partial_record(struct sock *sk, struct
> tls_context *ctx,
>  static void tls_write_space(struct sock *sk)
>  {
>  	struct tls_context *ctx = tls_get_ctx(sk);
> -	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
> +	int rc;
> 
>  	/* If in_tcp_sendpages call lower protocol write space handler
>  	 * to ensure we wake up any waiting operations there. For example
> @@ -223,14 +223,15 @@ static void tls_write_space(struct sock *sk)
>  		return;
>  	}
> 
> -	/* Schedule the transmission if tx list is ready */
> -	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
> -		/* Schedule the transmission */
> -		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx-
> >tx_bitmask))
> -			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
> -	}
> +#ifdef CONFIG_TLS_DEVICE
> +	if (ctx->tx_conf == TLS_HW)
> +		rc = tls_device_write_space(sk, ctx);
> +	else
> +#endif
> +		rc = tls_sw_write_space(sk, ctx);
> 
> -	ctx->sk_write_space(sk);
> +	if (!rc)

Why do we need to check 'rc'?

If it is required, then ' ctx->sk_write_space(sk)' can move to tls_device_write_space()
since  tls_sw_write_space() always returns '0'.

> +		ctx->sk_write_space(sk);
>  }
> 
>  static void tls_ctx_free(struct tls_context *ctx)
> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> index 1cc830582fa8..4afa67b00aaf 100644
> --- a/net/tls/tls_sw.c
> +++ b/net/tls/tls_sw.c
> @@ -2126,6 +2126,21 @@ static void tx_work_handler(struct work_struct
> *work)
>  	release_sock(sk);
>  }
> 
> +int tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
> +{
> +	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
> +
> +	/* Schedule the transmission if tx list is ready */
> +	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
> +		/* Schedule the transmission */
> +		if (!test_and_set_bit(BIT_TX_SCHEDULED,
> +				      &tx_ctx->tx_bitmask))
> +			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
> +	}
> +
> +	return 0;
> +}
> +
>  int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
>  {
>  	struct tls_context *tls_ctx = tls_get_ctx(sk);
> --
> 2.12.2
Boris Pismenny Feb. 26, 2019, 2:13 p.m. UTC | #2
On 2/26/2019 2:49 PM, Vakul Garg wrote:
> 
> 
>> -----Original Message-----
>> From: Boris Pismenny <borisp@mellanox.com>
>> Sent: Tuesday, February 26, 2019 5:43 PM
>> To: aviadye@mellanox.com; davejwatson@fb.com;
>> john.fastabend@gmail.com; daniel@iogearbox.net; Vakul Garg
>> <vakul.garg@nxp.com>; netdev@vger.kernel.org
>> Cc: eranbe@mellanox.com; borisp@mellanox.com
>> Subject: [PATCH net 2/4] tls: Fix write space handling
>>
>> TLS device cannot use the sw context. This patch returns the original
>> tls device write space handler and moves the sw/device specific portions
>> to the relevant files.
>>
>> Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records
>> for performance")
>> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
>> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
>> ---
>>   include/net/tls.h    |  3 +++
>>   net/tls/tls_device.c | 16 ++++++++++++++++
>>   net/tls/tls_main.c   | 17 +++++++++--------
>>   net/tls/tls_sw.c     | 15 +++++++++++++++
>>   4 files changed, 43 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/net/tls.h b/include/net/tls.h
>> index a528a082da73..9d7c53737b13 100644
>> --- a/include/net/tls.h
>> +++ b/include/net/tls.h
>> @@ -519,6 +519,9 @@ static inline bool tls_sw_has_ctx_tx(const struct sock
>> *sk)
>>   	return !!tls_sw_ctx_tx(ctx);
>>   }
>>
>> +int tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
>> +int tls_device_write_space(struct sock *sk, struct tls_context *ctx);
>> +
>>   static inline struct tls_offload_context_rx *
>>   tls_offload_ctx_rx(const struct tls_context *tls_ctx)
>>   {
>> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
>> index 3e5e8e021a87..e8988b3f3236 100644
>> --- a/net/tls/tls_device.c
>> +++ b/net/tls/tls_device.c
>> @@ -546,6 +546,22 @@ static int tls_device_push_pending_record(struct
>> sock *sk, int flags)
>>   	return tls_push_data(sk, &msg_iter, 0, flags,
>> TLS_RECORD_TYPE_DATA);
>>   }
>>
>> +int tls_device_write_space(struct sock *sk, struct tls_context *ctx)
>> +{
>> +	int rc = 0;
>> +
>> +	if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
>> +		gfp_t sk_allocation = sk->sk_allocation;
>> +
>> +		sk->sk_allocation = GFP_ATOMIC;
>> +		rc = tls_push_partial_record(sk, ctx,
>> +					     MSG_DONTWAIT |
>> MSG_NOSIGNAL);
>> +		sk->sk_allocation = sk_allocation;
>> +	}
>> +
>> +	return rc;
>> +}
>> +
>>   void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
>>   {
>>   	struct tls_context *tls_ctx = tls_get_ctx(sk);
>> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
>> index 7e05af75536d..11c1980a75cb 100644
>> --- a/net/tls/tls_main.c
>> +++ b/net/tls/tls_main.c
>> @@ -212,7 +212,7 @@ int tls_push_partial_record(struct sock *sk, struct
>> tls_context *ctx,
>>   static void tls_write_space(struct sock *sk)
>>   {
>>   	struct tls_context *ctx = tls_get_ctx(sk);
>> -	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
>> +	int rc;
>>
>>   	/* If in_tcp_sendpages call lower protocol write space handler
>>   	 * to ensure we wake up any waiting operations there. For example
>> @@ -223,14 +223,15 @@ static void tls_write_space(struct sock *sk)
>>   		return;
>>   	}
>>
>> -	/* Schedule the transmission if tx list is ready */
>> -	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
>> -		/* Schedule the transmission */
>> -		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx-
>>> tx_bitmask))
>> -			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
>> -	}
>> +#ifdef CONFIG_TLS_DEVICE
>> +	if (ctx->tx_conf == TLS_HW)
>> +		rc = tls_device_write_space(sk, ctx);
>> +	else
>> +#endif
>> +		rc = tls_sw_write_space(sk, ctx);
>>
>> -	ctx->sk_write_space(sk);
>> +	if (!rc)
> 
> Why do we need to check 'rc'?
> 
> If it is required, then ' ctx->sk_write_space(sk)' can move to tls_device_write_space()
> since  tls_sw_write_space() always returns '0'.
>

It is not necessary in the software code path due to the delayed work 
that is there. But, we need in the device flow. I'll move it there.


>> +		ctx->sk_write_space(sk);
>>   }
>>
>>   static void tls_ctx_free(struct tls_context *ctx)
>> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
>> index 1cc830582fa8..4afa67b00aaf 100644
>> --- a/net/tls/tls_sw.c
>> +++ b/net/tls/tls_sw.c
>> @@ -2126,6 +2126,21 @@ static void tx_work_handler(struct work_struct
>> *work)
>>   	release_sock(sk);
>>   }
>>
>> +int tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
>> +{
>> +	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
>> +
>> +	/* Schedule the transmission if tx list is ready */
>> +	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
>> +		/* Schedule the transmission */
>> +		if (!test_and_set_bit(BIT_TX_SCHEDULED,
>> +				      &tx_ctx->tx_bitmask))
>> +			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
>>   {
>>   	struct tls_context *tls_ctx = tls_get_ctx(sk);
>> --
>> 2.12.2
>
Vakul Garg March 11, 2019, 3:06 p.m. UTC | #3
> -----Original Message-----
> From: Boris Pismenny <borisp@mellanox.com>
> Sent: Tuesday, February 26, 2019 7:43 PM
> To: Vakul Garg <vakul.garg@nxp.com>; Aviad Yehezkel
> <aviadye@mellanox.com>; davejwatson@fb.com;
> john.fastabend@gmail.com; daniel@iogearbox.net; netdev@vger.kernel.org
> Cc: Eran Ben Elisha <eranbe@mellanox.com>
> Subject: Re: [PATCH net 2/4] tls: Fix write space handling
> 
> 
> 
> On 2/26/2019 2:49 PM, Vakul Garg wrote:
> >
> >
> >> -----Original Message-----
> >> From: Boris Pismenny <borisp@mellanox.com>
> >> Sent: Tuesday, February 26, 2019 5:43 PM
> >> To: aviadye@mellanox.com; davejwatson@fb.com;
> >> john.fastabend@gmail.com; daniel@iogearbox.net; Vakul Garg
> >> <vakul.garg@nxp.com>; netdev@vger.kernel.org
> >> Cc: eranbe@mellanox.com; borisp@mellanox.com
> >> Subject: [PATCH net 2/4] tls: Fix write space handling
> >>
> >> TLS device cannot use the sw context. This patch returns the original
> >> tls device write space handler and moves the sw/device specific
> >> portions to the relevant files.
> >>
> >> Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of
> >> records for performance")
> >> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> >> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
> >> ---
> >>   include/net/tls.h    |  3 +++
> >>   net/tls/tls_device.c | 16 ++++++++++++++++
> >>   net/tls/tls_main.c   | 17 +++++++++--------
> >>   net/tls/tls_sw.c     | 15 +++++++++++++++
> >>   4 files changed, 43 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/include/net/tls.h b/include/net/tls.h index
> >> a528a082da73..9d7c53737b13 100644
> >> --- a/include/net/tls.h
> >> +++ b/include/net/tls.h
> >> @@ -519,6 +519,9 @@ static inline bool tls_sw_has_ctx_tx(const struct
> >> sock
> >> *sk)
> >>   	return !!tls_sw_ctx_tx(ctx);
> >>   }
> >>
> >> +int tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
> >> +int tls_device_write_space(struct sock *sk, struct tls_context
> >> +*ctx);
> >> +
> >>   static inline struct tls_offload_context_rx *
> >>   tls_offload_ctx_rx(const struct tls_context *tls_ctx)
> >>   {
> >> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index
> >> 3e5e8e021a87..e8988b3f3236 100644
> >> --- a/net/tls/tls_device.c
> >> +++ b/net/tls/tls_device.c
> >> @@ -546,6 +546,22 @@ static int tls_device_push_pending_record(struct
> >> sock *sk, int flags)
> >>   	return tls_push_data(sk, &msg_iter, 0, flags,
> >> TLS_RECORD_TYPE_DATA);
> >>   }
> >>
> >> +int tls_device_write_space(struct sock *sk, struct tls_context *ctx)
> >> +{
> >> +	int rc = 0;
> >> +
> >> +	if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
> >> +		gfp_t sk_allocation = sk->sk_allocation;
> >> +
> >> +		sk->sk_allocation = GFP_ATOMIC;
> >> +		rc = tls_push_partial_record(sk, ctx,
> >> +					     MSG_DONTWAIT |
> >> MSG_NOSIGNAL);
> >> +		sk->sk_allocation = sk_allocation;
> >> +	}
> >> +
> >> +	return rc;
> >> +}
> >> +
> >>   void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
> >>   {
> >>   	struct tls_context *tls_ctx = tls_get_ctx(sk); diff --git
> >> a/net/tls/tls_main.c b/net/tls/tls_main.c index
> >> 7e05af75536d..11c1980a75cb 100644
> >> --- a/net/tls/tls_main.c
> >> +++ b/net/tls/tls_main.c
> >> @@ -212,7 +212,7 @@ int tls_push_partial_record(struct sock *sk,
> >> struct tls_context *ctx,
> >>   static void tls_write_space(struct sock *sk)
> >>   {
> >>   	struct tls_context *ctx = tls_get_ctx(sk);
> >> -	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
> >> +	int rc;
> >>
> >>   	/* If in_tcp_sendpages call lower protocol write space handler
> >>   	 * to ensure we wake up any waiting operations there. For example
> >> @@ -223,14 +223,15 @@ static void tls_write_space(struct sock *sk)
> >>   		return;
> >>   	}
> >>
> >> -	/* Schedule the transmission if tx list is ready */
> >> -	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
> >> -		/* Schedule the transmission */
> >> -		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx-
> >>> tx_bitmask))
> >> -			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
> >> -	}
> >> +#ifdef CONFIG_TLS_DEVICE
> >> +	if (ctx->tx_conf == TLS_HW)
> >> +		rc = tls_device_write_space(sk, ctx);
> >> +	else
> >> +#endif
> >> +		rc = tls_sw_write_space(sk, ctx);
> >>
> >> -	ctx->sk_write_space(sk);
> >> +	if (!rc)
> >
> > Why do we need to check 'rc'?
> >
> > If it is required, then ' ctx->sk_write_space(sk)' can move to
> > tls_device_write_space() since  tls_sw_write_space() always returns '0'.
> >
> 
> It is not necessary in the software code path due to the delayed work that is
> there. But, we need in the device flow. I'll move it there.
> 
 
Removal of ctx->sk_write_space(sk) has broken software code flow.
The ktls send stops and user space application waits infinitely.
When tls_write_space() gets invoked tcp has been able to transmit some data.
Shouldn't we unconditionally call ctx->sk_write_space() in order to inform 
user space application about availability of buffer space?

Please advise. I would submit the patch.

> 
> >> +		ctx->sk_write_space(sk);
> >>   }
> >>
> >>   static void tls_ctx_free(struct tls_context *ctx)
> >> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> >> index 1cc830582fa8..4afa67b00aaf 100644
> >> --- a/net/tls/tls_sw.c
> >> +++ b/net/tls/tls_sw.c
> >> @@ -2126,6 +2126,21 @@ static void tx_work_handler(struct
> work_struct
> >> *work)
> >>   	release_sock(sk);
> >>   }
> >>
> >> +int tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
> >> +{
> >> +	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
> >> +
> >> +	/* Schedule the transmission if tx list is ready */
> >> +	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
> >> +		/* Schedule the transmission */
> >> +		if (!test_and_set_bit(BIT_TX_SCHEDULED,
> >> +				      &tx_ctx->tx_bitmask))
> >> +			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>   int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
> >>   {
> >>   	struct tls_context *tls_ctx = tls_get_ctx(sk);
> >> --
> >> 2.12.2
> >
Boris Pismenny March 11, 2019, 3:59 p.m. UTC | #4
>>>> a/net/tls/tls_main.c b/net/tls/tls_main.c index
>>>> 7e05af75536d..11c1980a75cb 100644
>>>> --- a/net/tls/tls_main.c
>>>> +++ b/net/tls/tls_main.c
>>>> @@ -212,7 +212,7 @@ int tls_push_partial_record(struct sock *sk,
>>>> struct tls_context *ctx,
>>>>    static void tls_write_space(struct sock *sk)
>>>>    {
>>>>    	struct tls_context *ctx = tls_get_ctx(sk);
>>>> -	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
>>>> +	int rc;
>>>>
>>>>    	/* If in_tcp_sendpages call lower protocol write space handler
>>>>    	 * to ensure we wake up any waiting operations there. For example
>>>> @@ -223,14 +223,15 @@ static void tls_write_space(struct sock *sk)
>>>>    		return;
>>>>    	}
>>>>
>>>> -	/* Schedule the transmission if tx list is ready */
>>>> -	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
>>>> -		/* Schedule the transmission */
>>>> -		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx-
>>>>> tx_bitmask))
>>>> -			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
>>>> -	}
>>>> +#ifdef CONFIG_TLS_DEVICE
>>>> +	if (ctx->tx_conf == TLS_HW)
>>>> +		rc = tls_device_write_space(sk, ctx);
>>>> +	else
>>>> +#endif
>>>> +		rc = tls_sw_write_space(sk, ctx);
>>>>
>>>> -	ctx->sk_write_space(sk);
>>>> +	if (!rc)
>>>
>>> Why do we need to check 'rc'?
>>>
>>> If it is required, then ' ctx->sk_write_space(sk)' can move to
>>> tls_device_write_space() since  tls_sw_write_space() always returns '0'.
>>>
>>
>> It is not necessary in the software code path due to the delayed work that is
>> there. But, we need in the device flow. I'll move it there.
>>
>   
> Removal of ctx->sk_write_space(sk) has broken software code flow.
> The ktls send stops and user space application waits infinitely.
> When tls_write_space() gets invoked tcp has been able to transmit some data.
> Shouldn't we unconditionally call ctx->sk_write_space() in order to inform
> user space application about availability of buffer space?
> 
> Please advise. I would submit the patch.

AFAIU, the code in the software path calls ctx->sk_write_space in its 
schedule work which eventually calls tls_push_sg. Since this flow is 
asynchronous, I thought it was best to postpone the notification and let 
the work handle it.

> 
>>
>>>> +		ctx->sk_write_space(sk);
>>>>    }
>>>>
Vakul Garg March 12, 2019, 6:02 a.m. UTC | #5
> -----Original Message-----
> From: Boris Pismenny <borisp@mellanox.com>
> Sent: Monday, March 11, 2019 9:29 PM
> To: Vakul Garg <vakul.garg@nxp.com>; Aviad Yehezkel
> <aviadye@mellanox.com>; davejwatson@fb.com;
> john.fastabend@gmail.com; daniel@iogearbox.net; netdev@vger.kernel.org
> Cc: Eran Ben Elisha <eranbe@mellanox.com>
> Subject: Re: [PATCH net 2/4] tls: Fix write space handling
> 
> >>>> a/net/tls/tls_main.c b/net/tls/tls_main.c index
> >>>> 7e05af75536d..11c1980a75cb 100644
> >>>> --- a/net/tls/tls_main.c
> >>>> +++ b/net/tls/tls_main.c
> >>>> @@ -212,7 +212,7 @@ int tls_push_partial_record(struct sock *sk,
> >>>> struct tls_context *ctx,
> >>>>    static void tls_write_space(struct sock *sk)
> >>>>    {
> >>>>    	struct tls_context *ctx = tls_get_ctx(sk);
> >>>> -	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
> >>>> +	int rc;
> >>>>
> >>>>    	/* If in_tcp_sendpages call lower protocol write space handler
> >>>>    	 * to ensure we wake up any waiting operations there. For
> >>>> example @@ -223,14 +223,15 @@ static void tls_write_space(struct
> sock *sk)
> >>>>    		return;
> >>>>    	}
> >>>>
> >>>> -	/* Schedule the transmission if tx list is ready */
> >>>> -	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
> >>>> -		/* Schedule the transmission */
> >>>> -		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx-
> >>>>> tx_bitmask))
> >>>> -			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
> >>>> -	}
> >>>> +#ifdef CONFIG_TLS_DEVICE
> >>>> +	if (ctx->tx_conf == TLS_HW)
> >>>> +		rc = tls_device_write_space(sk, ctx);
> >>>> +	else
> >>>> +#endif
> >>>> +		rc = tls_sw_write_space(sk, ctx);
> >>>>
> >>>> -	ctx->sk_write_space(sk);
> >>>> +	if (!rc)
> >>>
> >>> Why do we need to check 'rc'?
> >>>
> >>> If it is required, then ' ctx->sk_write_space(sk)' can move to
> >>> tls_device_write_space() since  tls_sw_write_space() always returns '0'.
> >>>
> >>
> >> It is not necessary in the software code path due to the delayed work
> >> that is there. But, we need in the device flow. I'll move it there.
> >>
> >
> > Removal of ctx->sk_write_space(sk) has broken software code flow.
> > The ktls send stops and user space application waits infinitely.
> > When tls_write_space() gets invoked tcp has been able to transmit some
> data.
> > Shouldn't we unconditionally call ctx->sk_write_space() in order to
> > inform user space application about availability of buffer space?
> >
> > Please advise. I would submit the patch.
> 
> AFAIU, the code in the software path calls ctx->sk_write_space in its
> schedule work which eventually calls tls_push_sg. Since this flow is
> asynchronous, I thought it was best to postpone the notification and let the
> work handle it.
> 

As per my code reading, sk->sk_write_space() is called from tcp code itself after 
transmitting socket data.

sk->sk_write_space() is mapped to tls_write_space() which then internally calls 
tls_sw_write_space() or tls_device_write_space(). Inside tls_sw_write_space(), 
we may not schedule delayed work, but still we need to inform user space about 
availability of buffer space by way for invoking of ctx->sk_write_space().

On the other hand, calling ctx->sk_write_space() from tls_push_sg() seems superfluous
& should be removed. For both device and software flows, ctx->sk_write_space() should
be invoked like before from tls_write_space().


> >
> >>
> >>>> +		ctx->sk_write_space(sk);
> >>>>    }
> >>>>
diff mbox series

Patch

diff --git a/include/net/tls.h b/include/net/tls.h
index a528a082da73..9d7c53737b13 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -519,6 +519,9 @@  static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
 	return !!tls_sw_ctx_tx(ctx);
 }
 
+int tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
+int tls_device_write_space(struct sock *sk, struct tls_context *ctx);
+
 static inline struct tls_offload_context_rx *
 tls_offload_ctx_rx(const struct tls_context *tls_ctx)
 {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 3e5e8e021a87..e8988b3f3236 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -546,6 +546,22 @@  static int tls_device_push_pending_record(struct sock *sk, int flags)
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
+int tls_device_write_space(struct sock *sk, struct tls_context *ctx)
+{
+	int rc = 0;
+
+	if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
+		gfp_t sk_allocation = sk->sk_allocation;
+
+		sk->sk_allocation = GFP_ATOMIC;
+		rc = tls_push_partial_record(sk, ctx,
+					     MSG_DONTWAIT | MSG_NOSIGNAL);
+		sk->sk_allocation = sk_allocation;
+	}
+
+	return rc;
+}
+
 void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 7e05af75536d..11c1980a75cb 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -212,7 +212,7 @@  int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 static void tls_write_space(struct sock *sk)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
+	int rc;
 
 	/* If in_tcp_sendpages call lower protocol write space handler
 	 * to ensure we wake up any waiting operations there. For example
@@ -223,14 +223,15 @@  static void tls_write_space(struct sock *sk)
 		return;
 	}
 
-	/* Schedule the transmission if tx list is ready */
-	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
-		/* Schedule the transmission */
-		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
-			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
-	}
+#ifdef CONFIG_TLS_DEVICE
+	if (ctx->tx_conf == TLS_HW)
+		rc = tls_device_write_space(sk, ctx);
+	else
+#endif
+		rc = tls_sw_write_space(sk, ctx);
 
-	ctx->sk_write_space(sk);
+	if (!rc)
+		ctx->sk_write_space(sk);
 }
 
 static void tls_ctx_free(struct tls_context *ctx)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1cc830582fa8..4afa67b00aaf 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2126,6 +2126,21 @@  static void tx_work_handler(struct work_struct *work)
 	release_sock(sk);
 }
 
+int tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
+{
+	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
+
+	/* Schedule the transmission if tx list is ready */
+	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
+		/* Schedule the transmission */
+		if (!test_and_set_bit(BIT_TX_SCHEDULED,
+				      &tx_ctx->tx_bitmask))
+			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
+	}
+
+	return 0;
+}
+
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);