diff mbox series

[v2,4/6] netfilter: nf_flow_table: add support for sending flows back to the slow path

Message ID 20180225171852.34446-5-nbd@nbd.name
State Superseded
Delegated to: Pablo Neira
Headers show
Series netfilter: nf_flow_table: TCP state tracking and bumping of flows to slow path | expand

Commit Message

Felix Fietkau Feb. 25, 2018, 5:18 p.m. UTC
Reset the timeout. For TCP, also set the state to indicate to use the
next incoming packets to reset window tracking.
This allows the slow path to take over again once the offload state has
been torn down

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 net/netfilter/nf_flow_table_core.c | 50 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

Comments

Pablo Neira Ayuso Feb. 25, 2018, 8:12 p.m. UTC | #1
On Sun, Feb 25, 2018 at 06:18:50PM +0100, Felix Fietkau wrote:
> Reset the timeout. For TCP, also set the state to indicate to use the
> next incoming packets to reset window tracking.
> This allows the slow path to take over again once the offload state has
> been torn down

We still need a way from control plane to request this flush?

Or you were planning to call this from driver?

> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> ---
>  net/netfilter/nf_flow_table_core.c | 50 +++++++++++++++++++++++++++++++++++++-
>  1 file changed, 49 insertions(+), 1 deletion(-)
> 
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index ff5e17a15963..0699981a8511 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -100,6 +100,43 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
>  }
>  EXPORT_SYMBOL_GPL(flow_offload_alloc);
>  
> +static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
> +{
> +	tcp->state = TCP_CONNTRACK_ESTABLISHED;
> +	tcp->seen[0].td_maxwin = 0;
> +	tcp->seen[1].td_maxwin = 0;
> +}
> +
> +static void flow_offload_fixup_ct_state(struct nf_conn *ct)
> +{
> +	const struct nf_conntrack_l4proto *l4proto;
> +	struct net *net = nf_ct_net(ct);
> +	unsigned int *timeouts;
> +	unsigned int timeout;
> +	int l4num;
> +
> +	l4num = nf_ct_protonum(ct);
> +	if (l4num == IPPROTO_TCP)
> +		flow_offload_fixup_tcp(&ct->proto.tcp);
> +
> +	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
> +	if (!l4proto)
> +		return;
> +
> +	timeouts = l4proto->get_timeouts(net);
> +	if (!timeouts)
> +		return;
> +
> +	if (l4num == IPPROTO_TCP)
> +		timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
> +	else if (l4num == IPPROTO_UDP)
> +		timeout = timeouts[UDP_CT_REPLIED];
> +	else
> +		return;
> +
> +	ct->timeout = nfct_time_stamp + timeout;
> +}
> +
>  void flow_offload_free(struct flow_offload *flow)
>  {
>  	struct flow_offload_entry *e;
> @@ -107,7 +144,8 @@ void flow_offload_free(struct flow_offload *flow)
>  	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
>  	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
>  	e = container_of(flow, struct flow_offload_entry, flow);
> -	nf_ct_delete(e->ct, 0, 0);
> +	if (flow->flags & FLOW_OFFLOAD_DYING)
> +		nf_ct_delete(e->ct, 0, 0);
>  	nf_ct_put(e->ct);
>  	kfree_rcu(e, rcu_head);
>  }
> @@ -164,6 +202,8 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
>  static void flow_offload_del(struct nf_flowtable *flow_table,
>  			     struct flow_offload *flow)
>  {
> +	struct flow_offload_entry *e;
> +
>  	rhashtable_remove_fast(&flow_table->rhashtable,
>  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
>  			       nf_flow_offload_rhash_params);
> @@ -171,12 +211,20 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
>  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
>  			       nf_flow_offload_rhash_params);
>  
> +	e = container_of(flow, struct flow_offload_entry, flow);
> +	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
> +
>  	flow_offload_free(flow);
>  }
>  
>  void flow_offload_teardown(struct flow_offload *flow)
>  {
> +	struct flow_offload_entry *e;
> +
>  	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
> +
> +	e = container_of(flow, struct flow_offload_entry, flow);
> +	flow_offload_fixup_ct_state(e->ct);
>  }
>  EXPORT_SYMBOL_GPL(flow_offload_teardown);
>  
> -- 
> 2.14.2
> 
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felix Fietkau Feb. 25, 2018, 10:34 p.m. UTC | #2
On 2018-02-25 21:12, Pablo Neira Ayuso wrote:
> On Sun, Feb 25, 2018 at 06:18:50PM +0100, Felix Fietkau wrote:
>> Reset the timeout. For TCP, also set the state to indicate to use the
>> next incoming packets to reset window tracking.
>> This allows the slow path to take over again once the offload state has
>> been torn down
> 
> We still need a way from control plane to request this flush?
> 
> Or you were planning to call this from driver?
The patches are calling this from the software fast path, and the driver
can call this as well.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox series

Patch

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index ff5e17a15963..0699981a8511 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -100,6 +100,43 @@  flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 }
 EXPORT_SYMBOL_GPL(flow_offload_alloc);
 
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+{
+	tcp->state = TCP_CONNTRACK_ESTABLISHED;
+	tcp->seen[0].td_maxwin = 0;
+	tcp->seen[1].td_maxwin = 0;
+}
+
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	struct net *net = nf_ct_net(ct);
+	unsigned int *timeouts;
+	unsigned int timeout;
+	int l4num;
+
+	l4num = nf_ct_protonum(ct);
+	if (l4num == IPPROTO_TCP)
+		flow_offload_fixup_tcp(&ct->proto.tcp);
+
+	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+	if (!l4proto)
+		return;
+
+	timeouts = l4proto->get_timeouts(net);
+	if (!timeouts)
+		return;
+
+	if (l4num == IPPROTO_TCP)
+		timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+	else if (l4num == IPPROTO_UDP)
+		timeout = timeouts[UDP_CT_REPLIED];
+	else
+		return;
+
+	ct->timeout = nfct_time_stamp + timeout;
+}
+
 void flow_offload_free(struct flow_offload *flow)
 {
 	struct flow_offload_entry *e;
@@ -107,7 +144,8 @@  void flow_offload_free(struct flow_offload *flow)
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 	e = container_of(flow, struct flow_offload_entry, flow);
-	nf_ct_delete(e->ct, 0, 0);
+	if (flow->flags & FLOW_OFFLOAD_DYING)
+		nf_ct_delete(e->ct, 0, 0);
 	nf_ct_put(e->ct);
 	kfree_rcu(e, rcu_head);
 }
@@ -164,6 +202,8 @@  EXPORT_SYMBOL_GPL(flow_offload_add);
 static void flow_offload_del(struct nf_flowtable *flow_table,
 			     struct flow_offload *flow)
 {
+	struct flow_offload_entry *e;
+
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 			       nf_flow_offload_rhash_params);
@@ -171,12 +211,20 @@  static void flow_offload_del(struct nf_flowtable *flow_table,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 			       nf_flow_offload_rhash_params);
 
+	e = container_of(flow, struct flow_offload_entry, flow);
+	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+
 	flow_offload_free(flow);
 }
 
 void flow_offload_teardown(struct flow_offload *flow)
 {
+	struct flow_offload_entry *e;
+
 	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+
+	e = container_of(flow, struct flow_offload_entry, flow);
+	flow_offload_fixup_ct_state(e->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);