diff mbox series

[SRU,J:linux-bluefield,v3,09/11] net/sched: act_ct: offload UDP NEW connections

Message ID 1685598425-1296-10-git-send-email-yifeid@nvidia.com
State New
Headers show
Series net/sched: act_ct: offload UDP NEW connections | expand

Commit Message

Tony Duan June 1, 2023, 5:47 a.m. UTC
From: Vlad Buslov <vladbu@nvidia.com>

BugLink: https://bugs.launchpad.net/bugs/2019264

Modify the offload algorithm of UDP connections to the following:

- Offload NEW connection as unidirectional.

- When connection state changes to ESTABLISHED also update the hardware
flow. However, in order to prevent act_ct from spamming offload add wq for
every packet coming in reply direction in this state verify whether
connection has already been updated to ESTABLISHED in the drivers. If that
it the case, then skip flow_table and let conntrack handle such packets
which will also allow conntrack to potentially promote the connection to
ASSURED.

- When connection state changes to ASSURED set the flow_table flow
NF_FLOW_HW_BIDIRECTIONAL flag which will cause refresh mechanism to offload
the reply direction.

All other protocols have their offload algorithm preserved and are always
offloaded as bidirectional.

Note that this change tries to minimize the load on flow_table add
workqueue. First, it tracks the last ctinfo that was offloaded by using new
flow 'NF_FLOW_HW_ESTABLISHED' flag and doesn't schedule the refresh for
reply direction packets when the offloads have already been updated with
current ctinfo. Second, when 'add' task executes on workqueue it always
update the offload with current flow state (by checking 'bidirectional'
flow flag and obtaining actual ctinfo/cookie through meta action instead of
caching any of these from the moment of scheduling the 'add' work)
preventing the need from scheduling more updates if state changed
concurrently while the 'add' work was pending on workqueue.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 6a9bad0069cf306f3df6ac53cf02438d4e15f296)
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Tony Duan <yifeid@nvidia.com>
---
 net/sched/act_ct.c | 51 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index fb58c83..d72ce86 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -374,7 +374,7 @@  static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
 
 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
 				  struct nf_conn *ct,
-				  bool tcp)
+				  bool tcp, bool bidirectional)
 {
 	struct nf_conn_act_ct_ext *act_ct_ext;
 	struct flow_offload *entry;
@@ -393,6 +393,8 @@  static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
 		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 	}
+	if (bidirectional)
+		__set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
 
 	act_ct_ext = nf_conn_act_ct_ext_find(ct);
 	if (act_ct_ext) {
@@ -416,26 +418,34 @@  static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
 					   struct nf_conn *ct,
 					   enum ip_conntrack_info ctinfo)
 {
-	bool tcp = false;
-
-	if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
-	    !test_bit(IPS_ASSURED_BIT, &ct->status))
-		return;
+	bool tcp = false, bidirectional = true;
 
 	switch (nf_ct_protonum(ct)) {
 	case IPPROTO_TCP:
-		tcp = true;
-		if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+		if ((ctinfo != IP_CT_ESTABLISHED &&
+		     ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+		    !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+		    ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
 			return;
+
+		tcp = true;
 		break;
 	case IPPROTO_UDP:
+		if (!nf_ct_is_confirmed(ct))
+			return;
+		if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+			bidirectional = false;
 		break;
 #ifdef CONFIG_NF_CT_PROTO_GRE
 	case IPPROTO_GRE: {
 		struct nf_conntrack_tuple *tuple;
 
-		if (ct->status & IPS_NAT_MASK)
+		if ((ctinfo != IP_CT_ESTABLISHED &&
+		     ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+		    !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+		    ct->status & IPS_NAT_MASK)
 			return;
+
 		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 		/* No support for GRE v1 */
 		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@@ -451,7 +461,7 @@  static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
 	    ct->status & IPS_SEQ_ADJUST)
 		return;
 
-	tcf_ct_flow_table_add(ct_ft, ct, tcp);
+	tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
 }
 
 static bool
@@ -630,13 +640,30 @@  static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 	ct = flow->ct;
 
+	if (dir == FLOW_OFFLOAD_DIR_REPLY &&
+	    !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
+		/* Only offload reply direction after connection became
+		 * assured.
+		 */
+		if (test_bit(IPS_ASSURED_BIT, &ct->status))
+			set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
+		else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
+			/* If flow_table flow has already been updated to the
+			 * established state, then don't refresh.
+			 */
+			return false;
+	}
+
 	if (tcph && (unlikely(tcph->fin || tcph->rst))) {
 		flow_offload_teardown(flow);
 		return false;
 	}
 
-	ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
-						    IP_CT_ESTABLISHED_REPLY;
+	if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
+		ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+			IP_CT_ESTABLISHED : IP_CT_NEW;
+	else
+		ctinfo = IP_CT_ESTABLISHED_REPLY;
 
 	flow_offload_refresh(nf_ft, flow);
 	nf_conntrack_get(&ct->ct_general);