diff mbox series

[net-next,01/14] tcp: Add clean acked data hook

Message ID 20180320024510.7408-2-saeedm@mellanox.com
State Superseded, archived
Delegated to: David Miller
Headers show
Series TLS offload, netdev & MLX5 support | expand

Commit Message

Saeed Mahameed March 20, 2018, 2:44 a.m. UTC
From: Ilya Lesokhin <ilyal@mellanox.com>

Called when a TCP segment is acknowledged.
Could be used by application protocols who hold additional
metadata associated with the stream data.

This is required by TLS device offload to release
metadata associated with acknowledged TLS records.

Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/inet_connection_sock.h | 2 ++
 net/ipv4/tcp_input.c               | 2 ++
 2 files changed, 4 insertions(+)

Comments

Rao Shoaib March 20, 2018, 8:36 p.m. UTC | #1
On 03/19/2018 07:44 PM, Saeed Mahameed wrote:
> From: Ilya Lesokhin <ilyal@mellanox.com>
>
> Called when a TCP segment is acknowledged.
> Could be used by application protocols who hold additional
> metadata associated with the stream data.
>
> This is required by TLS device offload to release
> metadata associated with acknowledged TLS records.
>
> Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>   include/net/inet_connection_sock.h | 2 ++
>   net/ipv4/tcp_input.c               | 2 ++
>   2 files changed, 4 insertions(+)
>
> diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> index b68fea022a82..2ab6667275df 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
>    * @icsk_af_ops		   Operations which are AF_INET{4,6} specific
>    * @icsk_ulp_ops	   Pluggable ULP control hook
>    * @icsk_ulp_data	   ULP private data
> + * @icsk_clean_acked	   Clean acked data hook
>    * @icsk_listen_portaddr_node	hash to the portaddr listener hashtable
>    * @icsk_ca_state:	   Congestion control state
>    * @icsk_retransmits:	   Number of unrecovered [RTO] timeouts
> @@ -102,6 +103,7 @@ struct inet_connection_sock {
>   	const struct inet_connection_sock_af_ops *icsk_af_ops;
>   	const struct tcp_ulp_ops  *icsk_ulp_ops;
>   	void			  *icsk_ulp_data;
> +	void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
>   	struct hlist_node         icsk_listen_portaddr_node;
>   	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
>   	__u8			  icsk_ca_state:6,
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 451ef3012636..9854ecae7245 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3542,6 +3542,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>   	if (after(ack, prior_snd_una)) {
>   		flag |= FLAG_SND_UNA_ADVANCED;
>   		icsk->icsk_retransmits = 0;
> +		if (icsk->icsk_clean_acked)
> +			icsk->icsk_clean_acked(sk, ack);
>   	}
>   
>   	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
Per Dave we are not allowed to use function pointers any more, so why 
extend their use. I implemented a similar callback for my changes but in 
my use case I need to call the meta data update function even when the 
packet does not ack any new data or has no payload. Is it possible to 
move this to say tcp_data_queue() ?

Thanks,

Shoaib
Boris Pismenny March 21, 2018, 11:21 a.m. UTC | #2
On 3/20/2018 10:36 PM, Rao Shoaib wrote:
> 
> 
> On 03/19/2018 07:44 PM, Saeed Mahameed wrote:
>> From: Ilya Lesokhin <ilyal@mellanox.com>
>>
>> Called when a TCP segment is acknowledged.
>> Could be used by application protocols who hold additional
>> metadata associated with the stream data.
>>
>> This is required by TLS device offload to release
>> metadata associated with acknowledged TLS records.
>>
>> Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
>> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
>> Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> ---
>>   include/net/inet_connection_sock.h | 2 ++
>>   net/ipv4/tcp_input.c               | 2 ++
>>   2 files changed, 4 insertions(+)
>>
>> diff --git a/include/net/inet_connection_sock.h 
>> b/include/net/inet_connection_sock.h
>> index b68fea022a82..2ab6667275df 100644
>> --- a/include/net/inet_connection_sock.h
>> +++ b/include/net/inet_connection_sock.h
>> @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
>>    * @icsk_af_ops           Operations which are AF_INET{4,6} specific
>>    * @icsk_ulp_ops       Pluggable ULP control hook
>>    * @icsk_ulp_data       ULP private data
>> + * @icsk_clean_acked       Clean acked data hook
>>    * @icsk_listen_portaddr_node    hash to the portaddr listener 
>> hashtable
>>    * @icsk_ca_state:       Congestion control state
>>    * @icsk_retransmits:       Number of unrecovered [RTO] timeouts
>> @@ -102,6 +103,7 @@ struct inet_connection_sock {
>>       const struct inet_connection_sock_af_ops *icsk_af_ops;
>>       const struct tcp_ulp_ops  *icsk_ulp_ops;
>>       void              *icsk_ulp_data;
>> +    void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
>>       struct hlist_node         icsk_listen_portaddr_node;
>>       unsigned int          (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
>>       __u8              icsk_ca_state:6,
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index 451ef3012636..9854ecae7245 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -3542,6 +3542,8 @@ static int tcp_ack(struct sock *sk, const struct 
>> sk_buff *skb, int flag)
>>       if (after(ack, prior_snd_una)) {
>>           flag |= FLAG_SND_UNA_ADVANCED;
>>           icsk->icsk_retransmits = 0;
>> +        if (icsk->icsk_clean_acked)
>> +            icsk->icsk_clean_acked(sk, ack);
>>       }
>>       prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : 
>> tp->snd_una;
> Per Dave we are not allowed to use function pointers any more, so why 
> extend their use. I implemented a similar callback for my changes but in 
> my use case I need to call the meta data update function even when the 
> packet does not ack any new data or has no payload. Is it possible to 
> move this to say tcp_data_queue() ?

Sometimes function pointers are unavoidable. For example, when a module 
must change the functionality of a function. I think it is preferable to 
advance the kernel

This function is used to free memory based on new acknowledged data. It 
is unrelated to whether data was received or not. So it is not possible 
to move this call to tcp_data_queue.

Just in case, I'll add a static key here to reduce the impact on the 
fast-path as once suggested by EricD on netdev2.2.

> 
> Thanks,
> 
> Shoaib
> 
> 

Best,
Boris.
Rao Shoaib March 21, 2018, 4:16 p.m. UTC | #3
On 03/21/2018 04:21 AM, Boris Pismenny wrote:
>
>
> On 3/20/2018 10:36 PM, Rao Shoaib wrote:
>>
>>
>> On 03/19/2018 07:44 PM, Saeed Mahameed wrote:
>>> From: Ilya Lesokhin <ilyal@mellanox.com>
>>>
>>> Called when a TCP segment is acknowledged.
>>> Could be used by application protocols who hold additional
>>> metadata associated with the stream data.
>>>
>>> This is required by TLS device offload to release
>>> metadata associated with acknowledged TLS records.
>>>
>>> Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
>>> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
>>> Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
>>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>>> ---
>>>   include/net/inet_connection_sock.h | 2 ++
>>>   net/ipv4/tcp_input.c               | 2 ++
>>>   2 files changed, 4 insertions(+)
>>>
>>> diff --git a/include/net/inet_connection_sock.h 
>>> b/include/net/inet_connection_sock.h
>>> index b68fea022a82..2ab6667275df 100644
>>> --- a/include/net/inet_connection_sock.h
>>> +++ b/include/net/inet_connection_sock.h
>>> @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
>>>    * @icsk_af_ops           Operations which are AF_INET{4,6} specific
>>>    * @icsk_ulp_ops       Pluggable ULP control hook
>>>    * @icsk_ulp_data       ULP private data
>>> + * @icsk_clean_acked       Clean acked data hook
>>>    * @icsk_listen_portaddr_node    hash to the portaddr listener 
>>> hashtable
>>>    * @icsk_ca_state:       Congestion control state
>>>    * @icsk_retransmits:       Number of unrecovered [RTO] timeouts
>>> @@ -102,6 +103,7 @@ struct inet_connection_sock {
>>>       const struct inet_connection_sock_af_ops *icsk_af_ops;
>>>       const struct tcp_ulp_ops  *icsk_ulp_ops;
>>>       void              *icsk_ulp_data;
>>> +    void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
>>>       struct hlist_node         icsk_listen_portaddr_node;
>>>       unsigned int          (*icsk_sync_mss)(struct sock *sk, u32 
>>> pmtu);
>>>       __u8              icsk_ca_state:6,
>>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>>> index 451ef3012636..9854ecae7245 100644
>>> --- a/net/ipv4/tcp_input.c
>>> +++ b/net/ipv4/tcp_input.c
>>> @@ -3542,6 +3542,8 @@ static int tcp_ack(struct sock *sk, const 
>>> struct sk_buff *skb, int flag)
>>>       if (after(ack, prior_snd_una)) {
>>>           flag |= FLAG_SND_UNA_ADVANCED;
>>>           icsk->icsk_retransmits = 0;
>>> +        if (icsk->icsk_clean_acked)
>>> +            icsk->icsk_clean_acked(sk, ack);
>>>       }
>>>       prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : 
>>> tp->snd_una;
>> Per Dave we are not allowed to use function pointers any more, so why 
>> extend their use. I implemented a similar callback for my changes but 
>> in my use case I need to call the meta data update function even when 
>> the packet does not ack any new data or has no payload. Is it 
>> possible to move this to say tcp_data_queue() ?
>
> Sometimes function pointers are unavoidable. For example, when a 
> module must change the functionality of a function. I think it is 
> preferable to advance the kernel
I agree, in fact I was using function pointers for the exact reason, to 
change the functionality of a function. I asked Dave about the use and 
he said No (Also note that the relevant CPU optimizations have been 
turned off on selected NIC's due to the latest security issues -- On AMD 
CPU's the optimizations are not turned off). So it is Dave's decision -- 
I am hoping that he would reconsider and allow me to use pointers as 
well as pointers solve the problem nicely and are used extensively.
>
> This function is used to free memory based on new acknowledged data. 
> It is unrelated to whether data was received or not. So it is not 
> possible to move this call to tcp_data_queue.
After reviewing my changes I believe I can work with the change. So go 
ahead.
>
> Just in case, I'll add a static key here to reduce the impact on the 
> fast-path as once suggested by EricD on netdev2.2.
Regards,

Shoaib
>
>>
>> Thanks,
>>
>> Shoaib
>>
>>
>
> Best,
> Boris.
David Miller March 21, 2018, 4:32 p.m. UTC | #4
From: Rao Shoaib <rao.shoaib@oracle.com>
Date: Wed, 21 Mar 2018 09:16:48 -0700

> I agree, in fact I was using function pointers for the exact reason,
> to change the functionality of a function. I asked Dave about the
> use and he said No (Also note that the relevant CPU optimizations
> have been turned off on selected NIC's due to the latest security
> issues -- On AMD CPU's the optimizations are not turned off). So it
> is Dave's decision -- I am hoping that he would reconsider and allow
> me to use pointers as well as pointers solve the problem nicely and
> are used extensively.

This situation is different from your's Rao.

That proposal was to add indirect calls for things the TCP stack
internally can make internal state checks for.

Whereas this current patch discussed here is a driver offload hook,
which TCP cannot internally possibly know anything about.

I am fine with what Boris et al. are doing here.  It is a different
situation than your's.
diff mbox series

Patch

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b68fea022a82..2ab6667275df 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -77,6 +77,7 @@  struct inet_connection_sock_af_ops {
  * @icsk_af_ops		   Operations which are AF_INET{4,6} specific
  * @icsk_ulp_ops	   Pluggable ULP control hook
  * @icsk_ulp_data	   ULP private data
+ * @icsk_clean_acked	   Clean acked data hook
  * @icsk_listen_portaddr_node	hash to the portaddr listener hashtable
  * @icsk_ca_state:	   Congestion control state
  * @icsk_retransmits:	   Number of unrecovered [RTO] timeouts
@@ -102,6 +103,7 @@  struct inet_connection_sock {
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
 	const struct tcp_ulp_ops  *icsk_ulp_ops;
 	void			  *icsk_ulp_data;
+	void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
 	struct hlist_node         icsk_listen_portaddr_node;
 	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
 	__u8			  icsk_ca_state:6,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 451ef3012636..9854ecae7245 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3542,6 +3542,8 @@  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, prior_snd_una)) {
 		flag |= FLAG_SND_UNA_ADVANCED;
 		icsk->icsk_retransmits = 0;
+		if (icsk->icsk_clean_acked)
+			icsk->icsk_clean_acked(sk, ack);
 	}
 
 	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;