diff mbox series

[SRU,F:linux-bluefield,1/2] netfilter: conntrack: remove offload_pickup sysctl again

Message ID 1653433799-31384-1-git-send-email-bodong@nvidia.com
State New
Headers show
Series [SRU,F:linux-bluefield,1/2] netfilter: conntrack: remove offload_pickup sysctl again | expand

Commit Message

Bodong Wang May 24, 2022, 11:09 p.m. UTC
From: Florian Westphal <fw@strlen.de>

BugLink: https://bugs.launchpad.net/bugs/1934401

These two sysctls were added because the hardcoded defaults (2 minutes,
tcp, 30 seconds, udp) turned out to be too low for some setups.

They appeared in 5.14-rc1 so it should be fine to remove it again.

Marcelo convinced me that there should be no difference between a flow
that was offloaded vs. a flow that was not wrt. timeout handling.
Thus the default is changed to those for TCP established and UDP stream,
5 days and 120 seconds, respectively.

Marcelo also suggested to account for the timeout value used for the
offloading, this avoids increase beyond the value in the conntrack-sysctl
and will also instantly expire the conntrack entry with altered sysctls.

Example:
   nf_conntrack_udp_timeout_stream=60
   nf_flowtable_udp_timeout=60

This will remove offloaded udp flows after one minute, rather than two.

An earlier version of this patch also cleared the ASSURED bit to
allow nf_conntrack to evict the entry via early_drop (i.e., table full).
However, it looks like we can safely assume that connection timed out
via HW is still in established state, so this isn't needed.

Quoting Oz:
 [..] the hardware sends all packets with a set FIN flags to sw.
 [..] Connections that are aged in hardware are expected to be in the
 established state.

In case it turns out that back-to-sw-path transition can occur for
'dodgy' connections too (e.g., one side disappeared while software-path
would have been in RETRANS timeout), we can adjust this later.

Cc: Oz Shlomo <ozsh@nvidia.com>
Cc: Paul Blakey <paulb@nvidia.com>
Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
(backported from commit 4592ee7f525c4683ec9e290381601fdee50ae110)
Signed-off-by: Bodong Wang <bodong@nvidia.com>
[bodong: remove doc file]

Conflicts:
	Documentation/networking/nf_conntrack-sysctl.rst
---
 include/net/netns/conntrack.h           |  2 --
 net/netfilter/nf_conntrack_proto_tcp.c  |  1 -
 net/netfilter/nf_conntrack_proto_udp.c  |  1 -
 net/netfilter/nf_conntrack_standalone.c | 16 ----------------
 net/netfilter/nf_flow_table_core.c      | 11 ++++++++---
 5 files changed, 8 insertions(+), 23 deletions(-)

Comments

Tim Gardner May 25, 2022, 2:47 p.m. UTC | #1
Acked-by: Tim Gardner <tim.gardner@canonical.com>

On 5/24/22 17:09, Bodong Wang wrote:
> From: Florian Westphal <fw@strlen.de>
> 
> BugLink: https://bugs.launchpad.net/bugs/1934401
> 
> These two sysctls were added because the hardcoded defaults (2 minutes,
> tcp, 30 seconds, udp) turned out to be too low for some setups.
> 
> They appeared in 5.14-rc1 so it should be fine to remove it again.
> 
> Marcelo convinced me that there should be no difference between a flow
> that was offloaded vs. a flow that was not wrt. timeout handling.
> Thus the default is changed to those for TCP established and UDP stream,
> 5 days and 120 seconds, respectively.
> 
> Marcelo also suggested to account for the timeout value used for the
> offloading, this avoids increase beyond the value in the conntrack-sysctl
> and will also instantly expire the conntrack entry with altered sysctls.
> 
> Example:
>     nf_conntrack_udp_timeout_stream=60
>     nf_flowtable_udp_timeout=60
> 
> This will remove offloaded udp flows after one minute, rather than two.
> 
> An earlier version of this patch also cleared the ASSURED bit to
> allow nf_conntrack to evict the entry via early_drop (i.e., table full).
> However, it looks like we can safely assume that connection timed out
> via HW is still in established state, so this isn't needed.
> 
> Quoting Oz:
>   [..] the hardware sends all packets with a set FIN flags to sw.
>   [..] Connections that are aged in hardware are expected to be in the
>   established state.
> 
> In case it turns out that back-to-sw-path transition can occur for
> 'dodgy' connections too (e.g., one side disappeared while software-path
> would have been in RETRANS timeout), we can adjust this later.
> 
> Cc: Oz Shlomo <ozsh@nvidia.com>
> Cc: Paul Blakey <paulb@nvidia.com>
> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> (backported from commit 4592ee7f525c4683ec9e290381601fdee50ae110)
> Signed-off-by: Bodong Wang <bodong@nvidia.com>
> [bodong: remove doc file]
> 
> Conflicts:
> 	Documentation/networking/nf_conntrack-sysctl.rst
> ---
>   include/net/netns/conntrack.h           |  2 --
>   net/netfilter/nf_conntrack_proto_tcp.c  |  1 -
>   net/netfilter/nf_conntrack_proto_udp.c  |  1 -
>   net/netfilter/nf_conntrack_standalone.c | 16 ----------------
>   net/netfilter/nf_flow_table_core.c      | 11 ++++++++---
>   5 files changed, 8 insertions(+), 23 deletions(-)
> 
> diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
> index 67bbaa6..9e3963c8 100644
> --- a/include/net/netns/conntrack.h
> +++ b/include/net/netns/conntrack.h
> @@ -29,7 +29,6 @@ struct nf_tcp_net {
>   	int tcp_max_retrans;
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	unsigned int offload_timeout;
> -	unsigned int offload_pickup;
>   #endif
>   };
>   
> @@ -43,7 +42,6 @@ struct nf_udp_net {
>   	unsigned int timeouts[UDP_CT_MAX];
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	unsigned int offload_timeout;
> -	unsigned int offload_pickup;
>   #endif
>   };
>   
> diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
> index d8e554c..0e51fb7 100644
> --- a/net/netfilter/nf_conntrack_proto_tcp.c
> +++ b/net/netfilter/nf_conntrack_proto_tcp.c
> @@ -1435,7 +1435,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
>   
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	tn->offload_timeout = 30 * HZ;
> -	tn->offload_pickup = 120 * HZ;
>   #endif
>   }
>   
> diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
> index b3e135e..ee336d7 100644
> --- a/net/netfilter/nf_conntrack_proto_udp.c
> +++ b/net/netfilter/nf_conntrack_proto_udp.c
> @@ -282,7 +282,6 @@ void nf_conntrack_udp_init_net(struct net *net)
>   
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	un->offload_timeout = 30 * HZ;
> -	un->offload_pickup = 30 * HZ;
>   #endif
>   }
>   
> diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
> index ac8f12b..1e78ad8 100644
> --- a/net/netfilter/nf_conntrack_standalone.c
> +++ b/net/netfilter/nf_conntrack_standalone.c
> @@ -569,7 +569,6 @@ enum nf_ct_sysctl_index {
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
> -	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
>   #endif
>   	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
>   	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
> @@ -578,7 +577,6 @@ enum nf_ct_sysctl_index {
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
> -	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
>   #endif
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
> @@ -773,12 +771,6 @@ enum nf_ct_sysctl_index {
>   		.mode		= 0644,
>   		.proc_handler	= proc_dointvec_jiffies,
>   	},
> -	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
> -		.procname	= "nf_flowtable_tcp_pickup",
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
>   #endif
>   	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
>   		.procname	= "nf_conntrack_tcp_loose",
> @@ -821,12 +813,6 @@ enum nf_ct_sysctl_index {
>   		.mode		= 0644,
>   		.proc_handler	= proc_dointvec_jiffies,
>   	},
> -	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
> -		.procname	= "nf_flowtable_udp_pickup",
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
>   #endif
>   	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
>   		.procname	= "nf_conntrack_icmp_timeout",
> @@ -1006,7 +992,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
>   
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
> -	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
>   #endif
>   
>   }
> @@ -1098,7 +1083,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
>   	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
> -	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
>   #endif
>   
>   	nf_conntrack_standalone_init_tcp_sysctl(net, table);
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 8ed2752..ba86bc0 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -136,7 +136,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>   	const struct nf_conntrack_l4proto *l4proto;
>   	struct net *net = nf_ct_net(ct);
>   	int l4num = nf_ct_protonum(ct);
> -	unsigned int timeout;
> +	s32 timeout;
>   
>   	l4proto = nf_ct_l4proto_find(l4num);
>   	if (!l4proto)
> @@ -145,15 +145,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>   	if (l4num == IPPROTO_TCP) {
>   		struct nf_tcp_net *tn = nf_tcp_pernet(net);
>   
> -		timeout = tn->offload_pickup;
> +		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
> +		timeout -= tn->offload_timeout;
>   	} else if (l4num == IPPROTO_UDP) {
>   		struct nf_udp_net *tn = nf_udp_pernet(net);
>   
> -		timeout = tn->offload_pickup;
> +		timeout = tn->timeouts[UDP_CT_REPLIED];
> +		timeout -= tn->offload_timeout;
>   	} else {
>   		return;
>   	}
>   
> +	if (timeout < 0)
> +		timeout = 0;
> +
>   	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
>   		ct->timeout = nfct_time_stamp + timeout;
>   }
Zachary Tahenakos May 25, 2022, 2:57 p.m. UTC | #2
The first patch is referencing a buglink to a bug that is already fixed. 
This patch should have its own buglink explaining its purpose. If it is 
undoing the work in the previous buglink, then it can reference it. 
Also, a minor issue, but this submission also lacks a cover letter.

-Zack

On 5/24/22 7:09 PM, Bodong Wang wrote:
> From: Florian Westphal <fw@strlen.de>
>
> BugLink: https://bugs.launchpad.net/bugs/1934401
>
> These two sysctls were added because the hardcoded defaults (2 minutes,
> tcp, 30 seconds, udp) turned out to be too low for some setups.
>
> They appeared in 5.14-rc1 so it should be fine to remove it again.
>
> Marcelo convinced me that there should be no difference between a flow
> that was offloaded vs. a flow that was not wrt. timeout handling.
> Thus the default is changed to those for TCP established and UDP stream,
> 5 days and 120 seconds, respectively.
>
> Marcelo also suggested to account for the timeout value used for the
> offloading, this avoids increase beyond the value in the conntrack-sysctl
> and will also instantly expire the conntrack entry with altered sysctls.
>
> Example:
>     nf_conntrack_udp_timeout_stream=60
>     nf_flowtable_udp_timeout=60
>
> This will remove offloaded udp flows after one minute, rather than two.
>
> An earlier version of this patch also cleared the ASSURED bit to
> allow nf_conntrack to evict the entry via early_drop (i.e., table full).
> However, it looks like we can safely assume that connection timed out
> via HW is still in established state, so this isn't needed.
>
> Quoting Oz:
>   [..] the hardware sends all packets with a set FIN flags to sw.
>   [..] Connections that are aged in hardware are expected to be in the
>   established state.
>
> In case it turns out that back-to-sw-path transition can occur for
> 'dodgy' connections too (e.g., one side disappeared while software-path
> would have been in RETRANS timeout), we can adjust this later.
>
> Cc: Oz Shlomo <ozsh@nvidia.com>
> Cc: Paul Blakey <paulb@nvidia.com>
> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> (backported from commit 4592ee7f525c4683ec9e290381601fdee50ae110)
> Signed-off-by: Bodong Wang <bodong@nvidia.com>
> [bodong: remove doc file]
>
> Conflicts:
> 	Documentation/networking/nf_conntrack-sysctl.rst
> ---
>   include/net/netns/conntrack.h           |  2 --
>   net/netfilter/nf_conntrack_proto_tcp.c  |  1 -
>   net/netfilter/nf_conntrack_proto_udp.c  |  1 -
>   net/netfilter/nf_conntrack_standalone.c | 16 ----------------
>   net/netfilter/nf_flow_table_core.c      | 11 ++++++++---
>   5 files changed, 8 insertions(+), 23 deletions(-)
>
> diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
> index 67bbaa6..9e3963c8 100644
> --- a/include/net/netns/conntrack.h
> +++ b/include/net/netns/conntrack.h
> @@ -29,7 +29,6 @@ struct nf_tcp_net {
>   	int tcp_max_retrans;
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	unsigned int offload_timeout;
> -	unsigned int offload_pickup;
>   #endif
>   };
>   
> @@ -43,7 +42,6 @@ struct nf_udp_net {
>   	unsigned int timeouts[UDP_CT_MAX];
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	unsigned int offload_timeout;
> -	unsigned int offload_pickup;
>   #endif
>   };
>   
> diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
> index d8e554c..0e51fb7 100644
> --- a/net/netfilter/nf_conntrack_proto_tcp.c
> +++ b/net/netfilter/nf_conntrack_proto_tcp.c
> @@ -1435,7 +1435,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
>   
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	tn->offload_timeout = 30 * HZ;
> -	tn->offload_pickup = 120 * HZ;
>   #endif
>   }
>   
> diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
> index b3e135e..ee336d7 100644
> --- a/net/netfilter/nf_conntrack_proto_udp.c
> +++ b/net/netfilter/nf_conntrack_proto_udp.c
> @@ -282,7 +282,6 @@ void nf_conntrack_udp_init_net(struct net *net)
>   
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	un->offload_timeout = 30 * HZ;
> -	un->offload_pickup = 30 * HZ;
>   #endif
>   }
>   
> diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
> index ac8f12b..1e78ad8 100644
> --- a/net/netfilter/nf_conntrack_standalone.c
> +++ b/net/netfilter/nf_conntrack_standalone.c
> @@ -569,7 +569,6 @@ enum nf_ct_sysctl_index {
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
> -	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
>   #endif
>   	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
>   	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
> @@ -578,7 +577,6 @@ enum nf_ct_sysctl_index {
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
> -	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
>   #endif
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
>   	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
> @@ -773,12 +771,6 @@ enum nf_ct_sysctl_index {
>   		.mode		= 0644,
>   		.proc_handler	= proc_dointvec_jiffies,
>   	},
> -	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
> -		.procname	= "nf_flowtable_tcp_pickup",
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
>   #endif
>   	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
>   		.procname	= "nf_conntrack_tcp_loose",
> @@ -821,12 +813,6 @@ enum nf_ct_sysctl_index {
>   		.mode		= 0644,
>   		.proc_handler	= proc_dointvec_jiffies,
>   	},
> -	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
> -		.procname	= "nf_flowtable_udp_pickup",
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
>   #endif
>   	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
>   		.procname	= "nf_conntrack_icmp_timeout",
> @@ -1006,7 +992,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
>   
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
> -	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
>   #endif
>   
>   }
> @@ -1098,7 +1083,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
>   	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>   	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
> -	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
>   #endif
>   
>   	nf_conntrack_standalone_init_tcp_sysctl(net, table);
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 8ed2752..ba86bc0 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -136,7 +136,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>   	const struct nf_conntrack_l4proto *l4proto;
>   	struct net *net = nf_ct_net(ct);
>   	int l4num = nf_ct_protonum(ct);
> -	unsigned int timeout;
> +	s32 timeout;
>   
>   	l4proto = nf_ct_l4proto_find(l4num);
>   	if (!l4proto)
> @@ -145,15 +145,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>   	if (l4num == IPPROTO_TCP) {
>   		struct nf_tcp_net *tn = nf_tcp_pernet(net);
>   
> -		timeout = tn->offload_pickup;
> +		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
> +		timeout -= tn->offload_timeout;
>   	} else if (l4num == IPPROTO_UDP) {
>   		struct nf_udp_net *tn = nf_udp_pernet(net);
>   
> -		timeout = tn->offload_pickup;
> +		timeout = tn->timeouts[UDP_CT_REPLIED];
> +		timeout -= tn->offload_timeout;
>   	} else {
>   		return;
>   	}
>   
> +	if (timeout < 0)
> +		timeout = 0;
> +
>   	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
>   		ct->timeout = nfct_time_stamp + timeout;
>   }
Bodong Wang May 25, 2022, 3:37 p.m. UTC | #3
Will have new buglink.
The commit msg is pretty self explained, no cover letter is needed for 
this single patch.

On 5/25/2022 9:57 AM, Zachary Tahenakos wrote:
> The first patch is referencing a buglink to a bug that is already 
> fixed. This patch should have its own buglink explaining its purpose. 
> If it is undoing the work in the previous buglink, then it can 
> reference it. Also, a minor issue, but this submission also lacks a 
> cover letter.
>
> -Zack
>
> On 5/24/22 7:09 PM, Bodong Wang wrote:
>> From: Florian Westphal <fw@strlen.de>
>>
>> BugLink: https://bugs.launchpad.net/bugs/1934401
>>
>> These two sysctls were added because the hardcoded defaults (2 minutes,
>> tcp, 30 seconds, udp) turned out to be too low for some setups.
>>
>> They appeared in 5.14-rc1 so it should be fine to remove it again.
>>
>> Marcelo convinced me that there should be no difference between a flow
>> that was offloaded vs. a flow that was not wrt. timeout handling.
>> Thus the default is changed to those for TCP established and UDP stream,
>> 5 days and 120 seconds, respectively.
>>
>> Marcelo also suggested to account for the timeout value used for the
>> offloading, this avoids increase beyond the value in the 
>> conntrack-sysctl
>> and will also instantly expire the conntrack entry with altered sysctls.
>>
>> Example:
>>     nf_conntrack_udp_timeout_stream=60
>>     nf_flowtable_udp_timeout=60
>>
>> This will remove offloaded udp flows after one minute, rather than two.
>>
>> An earlier version of this patch also cleared the ASSURED bit to
>> allow nf_conntrack to evict the entry via early_drop (i.e., table full).
>> However, it looks like we can safely assume that connection timed out
>> via HW is still in established state, so this isn't needed.
>>
>> Quoting Oz:
>>   [..] the hardware sends all packets with a set FIN flags to sw.
>>   [..] Connections that are aged in hardware are expected to be in the
>>   established state.
>>
>> In case it turns out that back-to-sw-path transition can occur for
>> 'dodgy' connections too (e.g., one side disappeared while software-path
>> would have been in RETRANS timeout), we can adjust this later.
>>
>> Cc: Oz Shlomo <ozsh@nvidia.com>
>> Cc: Paul Blakey <paulb@nvidia.com>
>> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>> Signed-off-by: Florian Westphal <fw@strlen.de>
>> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>> Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
>> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
>> (backported from commit 4592ee7f525c4683ec9e290381601fdee50ae110)
>> Signed-off-by: Bodong Wang <bodong@nvidia.com>
>> [bodong: remove doc file]
>>
>> Conflicts:
>>     Documentation/networking/nf_conntrack-sysctl.rst
>> ---
>>   include/net/netns/conntrack.h           |  2 --
>>   net/netfilter/nf_conntrack_proto_tcp.c  |  1 -
>>   net/netfilter/nf_conntrack_proto_udp.c  |  1 -
>>   net/netfilter/nf_conntrack_standalone.c | 16 ----------------
>>   net/netfilter/nf_flow_table_core.c      | 11 ++++++++---
>>   5 files changed, 8 insertions(+), 23 deletions(-)
>>
>> diff --git a/include/net/netns/conntrack.h 
>> b/include/net/netns/conntrack.h
>> index 67bbaa6..9e3963c8 100644
>> --- a/include/net/netns/conntrack.h
>> +++ b/include/net/netns/conntrack.h
>> @@ -29,7 +29,6 @@ struct nf_tcp_net {
>>       int tcp_max_retrans;
>>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       unsigned int offload_timeout;
>> -    unsigned int offload_pickup;
>>   #endif
>>   };
>>   @@ -43,7 +42,6 @@ struct nf_udp_net {
>>       unsigned int timeouts[UDP_CT_MAX];
>>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       unsigned int offload_timeout;
>> -    unsigned int offload_pickup;
>>   #endif
>>   };
>>   diff --git a/net/netfilter/nf_conntrack_proto_tcp.c 
>> b/net/netfilter/nf_conntrack_proto_tcp.c
>> index d8e554c..0e51fb7 100644
>> --- a/net/netfilter/nf_conntrack_proto_tcp.c
>> +++ b/net/netfilter/nf_conntrack_proto_tcp.c
>> @@ -1435,7 +1435,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
>>     #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       tn->offload_timeout = 30 * HZ;
>> -    tn->offload_pickup = 120 * HZ;
>>   #endif
>>   }
>>   diff --git a/net/netfilter/nf_conntrack_proto_udp.c 
>> b/net/netfilter/nf_conntrack_proto_udp.c
>> index b3e135e..ee336d7 100644
>> --- a/net/netfilter/nf_conntrack_proto_udp.c
>> +++ b/net/netfilter/nf_conntrack_proto_udp.c
>> @@ -282,7 +282,6 @@ void nf_conntrack_udp_init_net(struct net *net)
>>     #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       un->offload_timeout = 30 * HZ;
>> -    un->offload_pickup = 30 * HZ;
>>   #endif
>>   }
>>   diff --git a/net/netfilter/nf_conntrack_standalone.c 
>> b/net/netfilter/nf_conntrack_standalone.c
>> index ac8f12b..1e78ad8 100644
>> --- a/net/netfilter/nf_conntrack_standalone.c
>> +++ b/net/netfilter/nf_conntrack_standalone.c
>> @@ -569,7 +569,6 @@ enum nf_ct_sysctl_index {
>>       NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
>>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
>> -    NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
>>   #endif
>>       NF_SYSCTL_CT_PROTO_TCP_LOOSE,
>>       NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
>> @@ -578,7 +577,6 @@ enum nf_ct_sysctl_index {
>>       NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
>>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
>> -    NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
>>   #endif
>>       NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
>>       NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
>> @@ -773,12 +771,6 @@ enum nf_ct_sysctl_index {
>>           .mode        = 0644,
>>           .proc_handler    = proc_dointvec_jiffies,
>>       },
>> -    [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
>> -        .procname    = "nf_flowtable_tcp_pickup",
>> -        .maxlen        = sizeof(unsigned int),
>> -        .mode        = 0644,
>> -        .proc_handler    = proc_dointvec_jiffies,
>> -    },
>>   #endif
>>       [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
>>           .procname    = "nf_conntrack_tcp_loose",
>> @@ -821,12 +813,6 @@ enum nf_ct_sysctl_index {
>>           .mode        = 0644,
>>           .proc_handler    = proc_dointvec_jiffies,
>>       },
>> -    [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
>> -        .procname    = "nf_flowtable_udp_pickup",
>> -        .maxlen        = sizeof(unsigned int),
>> -        .mode        = 0644,
>> -        .proc_handler    = proc_dointvec_jiffies,
>> -    },
>>   #endif
>>       [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
>>           .procname    = "nf_conntrack_icmp_timeout",
>> @@ -1006,7 +992,6 @@ static void 
>> nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
>>     #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = 
>> &tn->offload_timeout;
>> -    table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = 
>> &tn->offload_pickup;
>>   #endif
>>     }
>> @@ -1098,7 +1083,6 @@ static int 
>> nf_conntrack_standalone_init_sysctl(struct net *net)
>>       table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = 
>> &un->timeouts[UDP_CT_REPLIED];
>>   #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
>>       table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = 
>> &un->offload_timeout;
>> -    table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = 
>> &un->offload_pickup;
>>   #endif
>>         nf_conntrack_standalone_init_tcp_sysctl(net, table);
>> diff --git a/net/netfilter/nf_flow_table_core.c 
>> b/net/netfilter/nf_flow_table_core.c
>> index 8ed2752..ba86bc0 100644
>> --- a/net/netfilter/nf_flow_table_core.c
>> +++ b/net/netfilter/nf_flow_table_core.c
>> @@ -136,7 +136,7 @@ static void flow_offload_fixup_ct_timeout(struct 
>> nf_conn *ct)
>>       const struct nf_conntrack_l4proto *l4proto;
>>       struct net *net = nf_ct_net(ct);
>>       int l4num = nf_ct_protonum(ct);
>> -    unsigned int timeout;
>> +    s32 timeout;
>>         l4proto = nf_ct_l4proto_find(l4num);
>>       if (!l4proto)
>> @@ -145,15 +145,20 @@ static void 
>> flow_offload_fixup_ct_timeout(struct nf_conn *ct)
>>       if (l4num == IPPROTO_TCP) {
>>           struct nf_tcp_net *tn = nf_tcp_pernet(net);
>>   -        timeout = tn->offload_pickup;
>> +        timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
>> +        timeout -= tn->offload_timeout;
>>       } else if (l4num == IPPROTO_UDP) {
>>           struct nf_udp_net *tn = nf_udp_pernet(net);
>>   -        timeout = tn->offload_pickup;
>> +        timeout = tn->timeouts[UDP_CT_REPLIED];
>> +        timeout -= tn->offload_timeout;
>>       } else {
>>           return;
>>       }
>>   +    if (timeout < 0)
>> +        timeout = 0;
>> +
>>       if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
>>           ct->timeout = nfct_time_stamp + timeout;
>>   }
diff mbox series

Patch

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 67bbaa6..9e3963c8 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -29,7 +29,6 @@  struct nf_tcp_net {
 	int tcp_max_retrans;
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	unsigned int offload_timeout;
-	unsigned int offload_pickup;
 #endif
 };
 
@@ -43,7 +42,6 @@  struct nf_udp_net {
 	unsigned int timeouts[UDP_CT_MAX];
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	unsigned int offload_timeout;
-	unsigned int offload_pickup;
 #endif
 };
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index d8e554c..0e51fb7 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1435,7 +1435,6 @@  void nf_conntrack_tcp_init_net(struct net *net)
 
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	tn->offload_timeout = 30 * HZ;
-	tn->offload_pickup = 120 * HZ;
 #endif
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index b3e135e..ee336d7 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -282,7 +282,6 @@  void nf_conntrack_udp_init_net(struct net *net)
 
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	un->offload_timeout = 30 * HZ;
-	un->offload_pickup = 30 * HZ;
 #endif
 }
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ac8f12b..1e78ad8 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -569,7 +569,6 @@  enum nf_ct_sysctl_index {
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
-	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
 #endif
 	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
 	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
@@ -578,7 +577,6 @@  enum nf_ct_sysctl_index {
 	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
-	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
 #endif
 	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
@@ -773,12 +771,6 @@  enum nf_ct_sysctl_index {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
-	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
-		.procname	= "nf_flowtable_tcp_pickup",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
 #endif
 	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
 		.procname	= "nf_conntrack_tcp_loose",
@@ -821,12 +813,6 @@  enum nf_ct_sysctl_index {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
-	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
-		.procname	= "nf_flowtable_udp_pickup",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
 #endif
 	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
 		.procname	= "nf_conntrack_icmp_timeout",
@@ -1006,7 +992,6 @@  static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
 
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
-	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
 #endif
 
 }
@@ -1098,7 +1083,6 @@  static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
-	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
 #endif
 
 	nf_conntrack_standalone_init_tcp_sysctl(net, table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 8ed2752..ba86bc0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -136,7 +136,7 @@  static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 	const struct nf_conntrack_l4proto *l4proto;
 	struct net *net = nf_ct_net(ct);
 	int l4num = nf_ct_protonum(ct);
-	unsigned int timeout;
+	s32 timeout;
 
 	l4proto = nf_ct_l4proto_find(l4num);
 	if (!l4proto)
@@ -145,15 +145,20 @@  static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 	if (l4num == IPPROTO_TCP) {
 		struct nf_tcp_net *tn = nf_tcp_pernet(net);
 
-		timeout = tn->offload_pickup;
+		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+		timeout -= tn->offload_timeout;
 	} else if (l4num == IPPROTO_UDP) {
 		struct nf_udp_net *tn = nf_udp_pernet(net);
 
-		timeout = tn->offload_pickup;
+		timeout = tn->timeouts[UDP_CT_REPLIED];
+		timeout -= tn->offload_timeout;
 	} else {
 		return;
 	}
 
+	if (timeout < 0)
+		timeout = 0;
+
 	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
 		ct->timeout = nfct_time_stamp + timeout;
 }