Periodically flow expire from flow offload tables

Message ID	20221023171658.69761-1-michael.lilja@gmail.com
State	Changes Requested
Delegated to:	Pablo Neira
Headers	show Return-Path: <netfilter-devel-owner@vger.kernel.org> From: Michael Lilja <michael.lilja@gmail.com> To: "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Jonathan Corbet <corbet@lwn.net>, Pablo Neira Ayuso <pablo@netfilter.org>, Jozsef Kadlecsik <kadlec@netfilter.org>, Florian Westphal <fw@strlen.de> Cc: michael.lilja@gmail.com, netdev@vger.kernel.org, linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org, netfilter-devel@vger.kernel.org, coreteam@netfilter.org Subject: [PATCH] Periodically flow expire from flow offload tables Date: Sun, 23 Oct 2022 19:16:58 +0200 Message-Id: <20221023171658.69761-1-michael.lilja@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Periodically flow expire from flow offload tables \| expand Periodically flow expire from flow offload tables

diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 1120d71f28d7..ab4071bc64c1 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -201,3 +201,10 @@ nf_flowtable_udp_timeout - INTEGER (seconds) Control offload timeout for udp connections. UDP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with udp pickup timeout. + +nf_flowtable_retire - INTEGER (seconds) + - 0 - disabled (default) + - not 0 - enabled and set the number of seconds a flow is offloaded + + If this option is enabled offloaded flows retire periodically and return the + control of the flow to conntrack/netfilter. diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index cd982f4a0f50..f5643c24fb55 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -177,6 +177,7 @@ struct flow_offload { unsigned long flags; u16 type; u32 timeout; + u32 retire; struct rcu_head rcu_head; }; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e1290c159184..7567d5fa8220 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -110,5 +110,8 @@ struct netns_ct { #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + unsigned int sysctl_flowtable_retire; +#endif }; #endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ffe84c5a82c..92ed07b93846 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -620,6 +620,9 @@ enum nf_ct_sysctl_index { #ifdef CONFIG_LWTUNNEL NF_SYSCTL_CT_LWTUNNEL, #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + NF_SYSCTL_CT_FLOWTABLE_RETIRE, +#endif __NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -967,6 +970,15 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = nf_hooks_lwtunnel_sysctl_handler, }, +#endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + [NF_SYSCTL_CT_FLOWTABLE_RETIRE] = { + .procname = "nf_flowtable_retire", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .data = &init_net.ct.sysctl_flowtable_retire, + .proc_handler = proc_dointvec_jiffies, + }, #endif {} }; @@ -1111,6 +1123,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + /* Disable retire per default */ + net->ct.sysctl_flowtable_retire = 0; +#endif + /* Don't allow non-init_net ns to alter global sysctls */ if (!net_eq(&init_net, net)) { table[NF_SYSCTL_CT_MAX].mode = 0444; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 81c26a96c30b..0a449dec8565 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -285,6 +285,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) int err; flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + if (nf_ct_net(flow->ct)->ct.sysctl_flowtable_retire) { + flow->retire = nf_flowtable_time_stamp + + nf_ct_net(flow->ct)->ct.sysctl_flowtable_retire; + } else { + flow->retire = 0; + } err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, @@ -313,6 +319,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) } EXPORT_SYMBOL_GPL(flow_offload_add); +static inline bool nf_flow_has_retired(const struct flow_offload *flow) +{ + return flow->retire && nf_flow_timeout_delta(flow->retire) <= 0; +} + void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow) { @@ -327,7 +338,8 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, if (likely(!nf_flowtable_hw_offload(flow_table))) return; - nf_flow_offload_add(flow_table, flow); + if (!nf_flow_has_retired(flow)) + nf_flow_offload_add(flow_table, flow); } EXPORT_SYMBOL_GPL(flow_offload_refresh); @@ -339,6 +351,7 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow) static void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow) { + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, nf_flow_offload_rhash_params); @@ -423,12 +436,14 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, nf_ct_is_dying(flow->ct)) flow_offload_teardown(flow); - if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { + if (test_bit(NF_FLOW_TEARDOWN, &flow->flags) || nf_flow_has_retired(flow)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { - if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) + if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) { nf_flow_offload_del(flow_table, flow); - else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) + } else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) { + clear_bit(NF_FLOW_HW, &flow->flags); flow_offload_del(flow_table, flow); + } } else { flow_offload_del(flow_table, flow); }

Periodically flow expire from flow offload tables

Commit Message

Comments

Patch