From patchwork Fri Nov 3 15:26:32 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 833938 X-Patchwork-Delegate: pablo@netfilter.org Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netfilter-devel-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3yT5Rm1HCcz9ryT for ; Sat, 4 Nov 2017 02:27:32 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755978AbdKCP1U (ORCPT ); Fri, 3 Nov 2017 11:27:20 -0400 Received: from mail.us.es ([193.147.175.20]:43314 "EHLO mail.us.es" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755831AbdKCP0q (ORCPT ); Fri, 3 Nov 2017 11:26:46 -0400 Received: from antivirus1-rhel7.int (unknown [192.168.2.11]) by mail.us.es (Postfix) with ESMTP id 6F1BAC0B36 for ; Fri, 3 Nov 2017 16:26:45 +0100 (CET) Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id 5EBF5B7FE9 for ; Fri, 3 Nov 2017 16:26:45 +0100 (CET) Received: by antivirus1-rhel7.int (Postfix, from userid 99) id 5442EB7FE4; Fri, 3 Nov 2017 16:26:45 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on antivirus1-rhel7.int X-Spam-Level: X-Spam-Status: No, score=-108.2 required=7.5 tests=ALL_TRUSTED,BAYES_50, SMTPAUTH_US2,USER_IN_WHITELIST autolearn=disabled version=3.4.1 Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id 146FDB7FE7; Fri, 3 Nov 2017 16:26:43 +0100 (CET) Received: from 192.168.1.97 (192.168.1.97) by antivirus1-rhel7.int (F-Secure/fsigk_smtp/550/antivirus1-rhel7.int); Fri, 03 Nov 2017 16:26:43 +0100 (CET) X-Virus-Status: clean(F-Secure/fsigk_smtp/550/antivirus1-rhel7.int) Received: from salvia.here (unknown [31.4.245.115]) (Authenticated sender: pneira@us.es) by entrada.int (Postfix) with ESMTPA id C904A403DFA0; Fri, 3 Nov 2017 16:26:42 +0100 (CET) X-SMTPAUTHUS: auth mail.us.es From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: netdev@vger.kernel.org Subject: [PATCH RFC, WIP 1/5] netfilter: nf_conntrack: move nf_ct_netns_{get, put}() to core Date: Fri, 3 Nov 2017 16:26:32 +0100 Message-Id: <20171103152636.9967-2-pablo@netfilter.org> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20171103152636.9967-1-pablo@netfilter.org> References: <20171103152636.9967-1-pablo@netfilter.org> X-Virus-Scanned: ClamAV using ClamSMTP Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org So we can call this from other expression that need conntrack in place to work. Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- net/netfilter/nf_conntrack_proto.c | 37 ++++++++++++++++++++++++++++++++++-- net/netfilter/nft_ct.c | 39 +++----------------------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b3e489c859ec..4379f1244154 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -125,7 +125,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); -int nf_ct_netns_get(struct net *net, u8 nfproto) +static int nf_ct_netns_do_get(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; int ret; @@ -150,9 +150,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto) return ret; } + +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + int err; + + if (nfproto == NFPROTO_INET) { + err = nf_ct_netns_do_get(net, NFPROTO_IPV4); + if (err < 0) + goto err1; + err = nf_ct_netns_do_get(net, NFPROTO_IPV6); + if (err < 0) + goto err2; + } else { + err = nf_ct_netns_do_get(net, nfproto); + if (err < 0) + goto err1; + } + return 0; + +err2: + nf_ct_netns_put(net, NFPROTO_IPV4); +err1: + return err; +} EXPORT_SYMBOL_GPL(nf_ct_netns_get); -void nf_ct_netns_put(struct net *net, u8 nfproto) +static void nf_ct_netns_do_put(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; @@ -171,6 +195,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto) nf_ct_l3proto_module_put(nfproto); } + +void nf_ct_netns_put(struct net *net, uint8_t nfproto) +{ + if (nfproto == NFPROTO_INET) { + nf_ct_netns_do_put(net, NFPROTO_IPV4); + nf_ct_netns_do_put(net, NFPROTO_IPV6); + } else + nf_ct_netns_do_put(net, nfproto); +} EXPORT_SYMBOL_GPL(nf_ct_netns_put); const struct nf_conntrack_l4proto * diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bd0975d7dd6f..2647b895f4b0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_netns_get(struct net *net, uint8_t family) -{ - int err; - - if (family == NFPROTO_INET) { - err = nf_ct_netns_get(net, NFPROTO_IPV4); - if (err < 0) - goto err1; - err = nf_ct_netns_get(net, NFPROTO_IPV6); - if (err < 0) - goto err2; - } else { - err = nf_ct_netns_get(net, family); - if (err < 0) - goto err1; - } - return 0; - -err2: - nf_ct_netns_put(net, NFPROTO_IPV4); -err1: - return err; -} - -static void nft_ct_netns_put(struct net *net, uint8_t family) -{ - if (family == NFPROTO_INET) { - nf_ct_netns_put(net, NFPROTO_IPV4); - nf_ct_netns_put(net, NFPROTO_IPV6); - } else - nf_ct_netns_put(net, family); -} - #ifdef CONFIG_NF_CONNTRACK_ZONES static void nft_ct_tmpl_put_pcpu(void) { @@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv = nft_expr_priv(expr); __nft_ct_set_destroy(ctx, priv); - nft_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) From patchwork Fri Nov 3 15:26:33 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 833930 X-Patchwork-Delegate: pablo@netfilter.org Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netfilter-devel-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3yT5R6149Lz9ryT for ; Sat, 4 Nov 2017 02:26:58 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755482AbdKCP0u (ORCPT ); Fri, 3 Nov 2017 11:26:50 -0400 Received: from mail.us.es ([193.147.175.20]:43326 "EHLO mail.us.es" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755819AbdKCP0s (ORCPT ); Fri, 3 Nov 2017 11:26:48 -0400 Received: from antivirus1-rhel7.int (unknown [192.168.2.11]) by mail.us.es (Postfix) with ESMTP id C268DC0B2B for ; Fri, 3 Nov 2017 16:26:46 +0100 (CET) Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id AD1B9B7FE3 for ; Fri, 3 Nov 2017 16:26:46 +0100 (CET) Received: by antivirus1-rhel7.int (Postfix, from userid 99) id A2CCDB7FE2; Fri, 3 Nov 2017 16:26:46 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on antivirus1-rhel7.int X-Spam-Level: X-Spam-Status: No, score=-108.2 required=7.5 tests=ALL_TRUSTED,BAYES_50, SMTPAUTH_US2,USER_IN_WHITELIST autolearn=disabled version=3.4.1 Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id C89C6B7FE3; Fri, 3 Nov 2017 16:26:43 +0100 (CET) Received: from 192.168.1.97 (192.168.1.97) by antivirus1-rhel7.int (F-Secure/fsigk_smtp/550/antivirus1-rhel7.int); Fri, 03 Nov 2017 16:26:43 +0100 (CET) X-Virus-Status: clean(F-Secure/fsigk_smtp/550/antivirus1-rhel7.int) Received: from salvia.here (unknown [31.4.245.115]) (Authenticated sender: pneira@us.es) by entrada.int (Postfix) with ESMTPA id 8EBE5403DFA0; Fri, 3 Nov 2017 16:26:43 +0100 (CET) X-SMTPAUTHUS: auth mail.us.es From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: netdev@vger.kernel.org Subject: [PATCH RFC, WIP 2/5] netfilter: add software flow offload infrastructure Date: Fri, 3 Nov 2017 16:26:33 +0100 Message-Id: <20171103152636.9967-3-pablo@netfilter.org> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20171103152636.9967-1-pablo@netfilter.org> References: <20171103152636.9967-1-pablo@netfilter.org> X-Virus-Scanned: ClamAV using ClamSMTP Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org This patch adds the generic software flow offload infrastructure. This allows users to configure fast path for established flows that will not follow the classic forwarding path. This adds a new hook at netfilter ingress for each existing interface. For each packet that hits the hook, we look up for an existing flow in the table, if there is a hit, the packet is forwarded by using the gateway and interfaces that are cached in the flow table entry. This comes with a kernel thread to release flow table entries if no packets are seen after a little while, so the flow table entry is released. Signed-off-by: Pablo Neira Ayuso --- include/net/flow_offload.h | 67 +++++++ net/netfilter/Kconfig | 7 + net/netfilter/Makefile | 3 + net/netfilter/nf_flow_offload.c | 386 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 463 insertions(+) create mode 100644 include/net/flow_offload.h create mode 100644 net/netfilter/nf_flow_offload.c diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h new file mode 100644 index 000000000000..30bfca7ed3f1 --- /dev/null +++ b/include/net/flow_offload.h @@ -0,0 +1,67 @@ +#ifndef _FLOW_OFFLOAD_H +#define _FLOW_OFFLOAD_H + +#include +#include +#include +#include +#include + +enum flow_offload_tuple_dir { + FLOW_OFFLOAD_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY, + __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, +}; +#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) + +struct flow_offload_tuple { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + struct { + __be16 src_port; + __be16 dst_port; + }; + + u8 l3proto; + u8 l4proto; + u8 dir; + + int iifidx; + int oifidx; + + union { + __be32 gateway; + struct in6_addr gateway6; + }; +}; + +struct flow_offload_tuple_rhash { + struct rhash_head node; + struct flow_offload_tuple tuple; +}; + +#define FLOW_OFFLOAD_SNAT 0x1 +#define FLOW_OFFLOAD_DNAT 0x2 +#define FLOW_OFFLOAD_HW 0x4 + +struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; + u32 flags; + union { + /* Your private driver data here. */ + u32 timeout; + }; + struct rcu_head rcu_head; +}; + +int flow_offload_add(struct flow_offload *flow); +void flow_offload_del(struct flow_offload *flow); +struct flow_offload_tuple_rhash *flow_offload_lookup(struct flow_offload_tuple *tuple); + +#endif /* _FLOW_OFFLOAD_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e4a13cc8a2e7..f022ca91f49d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -436,6 +436,13 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK +config NF_FLOW_OFFLOAD + tristate "Netfilter Generic Flow Offload (GFO) module" + help + This option adds the flow table core infrastructure. + + To compile it as a module, choose M here. + config NF_TABLES select NETFILTER_NETLINK tristate "Netfilter nf_tables support" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index d3891c93edd6..518f54113e06 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -69,6 +69,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # generic packet duplication from netdev family obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o +# generic flow table +obj-$(CONFIG_NF_FLOW_OFFLOAD)+= nf_flow_offload.o + # nf_tables nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \ nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \ diff --git a/net/netfilter/nf_flow_offload.c b/net/netfilter/nf_flow_offload.c new file mode 100644 index 000000000000..c967b29d11a6 --- /dev/null +++ b/net/netfilter/nf_flow_offload.c @@ -0,0 +1,386 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* For layer 4 checksum field offset. */ +#include +#include +#include + +static struct rhashtable flow_table; + +static u32 flow_offload_hash(const void *data, u32 len, u32 seed) +{ + const struct flow_offload_tuple *tuple = data; + + return jhash(tuple, offsetof(struct flow_offload_tuple, l4proto), seed); +} + +static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct flow_offload_tuple_rhash *tuplehash = data; + + return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, l4proto), seed); +} + +static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct flow_offload_tuple_rhash *x = ptr; + const struct flow_offload_tuple *tuple = arg->key; + + if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, l4proto))) + return 1; + + return 0; +} + +static const struct rhashtable_params flow_offload_rhash_params = { + .head_offset = offsetof(struct flow_offload_tuple_rhash, node), + .hashfn = flow_offload_hash, + .obj_hashfn = flow_offload_hash_obj, + .obj_cmpfn = flow_offload_hash_cmp, + .automatic_shrinking = true, +}; + +#define NF_FLOW_LIFETIME 15 + +int flow_offload_add(struct flow_offload *flow) +{ + flow->timeout = (u32)jiffies; + + rhashtable_insert_fast(&flow_table, &flow->tuplehash[0].node, + flow_offload_rhash_params); + rhashtable_insert_fast(&flow_table, &flow->tuplehash[1].node, + flow_offload_rhash_params); + return 0; +} +EXPORT_SYMBOL_GPL(flow_offload_add); + +void flow_offload_del(struct flow_offload *flow) +{ + rhashtable_remove_fast(&flow_table, &flow->tuplehash[0].node, + flow_offload_rhash_params); + rhashtable_remove_fast(&flow_table, &flow->tuplehash[1].node, + flow_offload_rhash_params); + kfree_rcu(flow, rcu_head); +} +EXPORT_SYMBOL_GPL(flow_offload_del); + +struct flow_offload_tuple_rhash * +flow_offload_lookup(struct flow_offload_tuple *tuple) +{ + return rhashtable_lookup_fast(&flow_table, tuple, + flow_offload_rhash_params); +} +EXPORT_SYMBOL_GPL(flow_offload_lookup); + +static void nf_flow_offload_work_gc(struct work_struct *work); + +static DECLARE_DEFERRABLE_WORK(nf_flow_offload_gc, + nf_flow_offload_work_gc); + +static inline bool nf_flow_has_expired(const struct flow_offload *flow) +{ + return (__s32)(flow->timeout - (u32)jiffies) <= 0; +} + +static void nf_flow_offload_work_gc(struct work_struct *work) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct rhashtable_iter hti; + struct flow_offload *flow; + int err, counter = 0; + + rhashtable_walk_init(&flow_table, &hti, GFP_KERNEL); + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) + goto out; + + while ((tuplehash = rhashtable_walk_next(&hti))) { + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) + goto out; + + continue; + } + if (tuplehash->tuple.dir) + continue; + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + + if (nf_flow_has_expired(flow)) + flow_offload_del(flow); + + counter++; + } + + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + +out: + queue_delayed_work(system_power_efficient_wq, &nf_flow_offload_gc, + msecs_to_jiffies(1000)); +} + +static int nf_flow_snat_tcp(struct iphdr *iph, + const struct flow_offload *flow, + struct sk_buff *skb, + unsigned int thoff, + __be32 addr, __be32 new_addr) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); + + return 0; +} + +static int nf_flow_snat_udp(struct iphdr *iph, + const struct flow_offload *flow, + struct sk_buff *skb, + unsigned int thoff, + __be32 addr, __be32 new_addr) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace4(&udph->check, skb, addr, + new_addr, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_snat(struct iphdr *iph, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, struct sk_buff *skb) +{ + __be32 new_addr, addr; + unsigned int thoff; + + if (skb_try_make_writable(skb, sizeof(*iph))) + return NF_DROP; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; + iph->saddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; + iph->daddr = new_addr; + break; + default: + return -1; + } + csum_replace4(&iph->check, addr, new_addr); + + ip_decrease_ttl(iph); + + thoff = iph->ihl * 4; + + switch (iph->protocol) { + case IPPROTO_TCP: + if (nf_flow_snat_tcp(iph, flow, skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_snat_udp(iph, flow, skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + } + + return 0; +} + +/* Similar to rt_nexthop(). */ +static inline __be32 nf_flow_nexthop(__be32 nexthop, __be32 daddr) +{ + if (nexthop) + return nexthop; + + return daddr; +} + +struct flow_ports { + __be16 src, dst; +}; + +static int nf_flow_tuple_ip(struct iphdr *iph, struct sk_buff *skb, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + unsigned int thoff; + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return -1; + + thoff = iph->ihl * 4; + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return -1; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v4.s_addr = iph->saddr; + tuple->dst_v4.s_addr = iph->daddr; + tuple->src_port = ports->src; + tuple->dst_port = ports->dst; + tuple->l3proto = AF_INET; + tuple->l4proto = iph->protocol; + + return 0; +} + +#define NF_FLOW_TIMEOUT (30 * HZ) + +static unsigned int +nf_flow_offload_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct flow_offload_tuple tuple = {}; + struct flow_offload *flow; + struct net_device *outdev; + struct iphdr *iph; + __be32 nexthop; + int err; + + switch (skb->protocol) { + case cpu_to_be16(ETH_P_IP): + if (!pskb_may_pull(skb, sizeof(*iph))) + return NF_ACCEPT; + + iph = ip_hdr(skb); + if (ip_is_fragment(iph)) + return NF_ACCEPT; + + err = nf_flow_tuple_ip(iph, skb, &tuple); + if (err < 0) + return NF_ACCEPT; + break; + default: + return NF_ACCEPT; + } + + tuplehash = flow_offload_lookup(&tuple); + if (tuplehash == NULL) + return NF_ACCEPT; + + outdev = dev_get_by_index_rcu(&init_net, tuplehash->tuple.oifidx); + if (!outdev) + return NF_ACCEPT; + + flow = container_of(tuplehash, struct flow_offload, + tuplehash[tuplehash->tuple.dir]); + + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + + if (flow->flags & FLOW_OFFLOAD_SNAT && + nf_flow_snat(iph, flow, tuplehash->tuple.dir, skb) < 0) + return NF_DROP; + + skb->dev = outdev; + nexthop = nf_flow_nexthop(tuplehash->tuple.gateway, iph->daddr); + + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); + + return NF_STOLEN; +} + +static LIST_HEAD(nf_flow_hook_list); + +struct nf_flow_hook_entry { + struct list_head head; + struct nf_hook_ops ops; +}; + +static int __init nf_flow_offload_module_init(void) +{ + struct rhashtable_params params = flow_offload_rhash_params; + struct nf_hook_ops flow_offload_hook = { + .hook = nf_flow_offload_hook, + .pf = NFPROTO_NETDEV, + .hooknum = NF_NETDEV_INGRESS, + .priority = -100, + }; + struct nf_flow_hook_entry *entry; + struct net_device *dev; + int err; + + params.key_len = offsetof(struct flow_offload_tuple, dir); + err = rhashtable_init(&flow_table, ¶ms); + if (err < 0) + return err; + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + rtnl_unlock(); + return -ENOMEM; + } + entry->ops = flow_offload_hook; + entry->ops.dev = dev; + list_add_tail(&entry->head, &nf_flow_hook_list); + + err = nf_register_net_hook(&init_net, &entry->ops); + if (err < 0) + return err; + + pr_info("register flow table for device %s\n", dev->name); + } + rtnl_unlock(); + + queue_delayed_work(system_power_efficient_wq, &nf_flow_offload_gc, + msecs_to_jiffies(1000)); + return err; +} + +static void flow_offload_destroy(void *ptr, void *arg) +{ + kfree(ptr); +} + +static void __exit nf_flow_offload_module_exit(void) +{ + struct nf_flow_hook_entry *entry, *next; + + cancel_delayed_work_sync(&nf_flow_offload_gc); + list_for_each_entry_safe(entry, next, &nf_flow_hook_list, head) { + pr_info("unregister flow table for device %s\n", + entry->ops.dev->name); + nf_unregister_net_hook(&init_net, &entry->ops); + list_del(&entry->head); + kfree(entry); + } + rhashtable_free_and_destroy(&flow_table, flow_offload_destroy, NULL); +} + +module_init(nf_flow_offload_module_init); +module_exit(nf_flow_offload_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); From patchwork Fri Nov 3 15:26:34 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 833934 X-Patchwork-Delegate: pablo@netfilter.org Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netfilter-devel-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3yT5RP4nVrz9ryT for ; Sat, 4 Nov 2017 02:27:13 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755876AbdKCP1M (ORCPT ); Fri, 3 Nov 2017 11:27:12 -0400 Received: from mail.us.es ([193.147.175.20]:43332 "EHLO mail.us.es" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755861AbdKCP0s (ORCPT ); Fri, 3 Nov 2017 11:26:48 -0400 Received: from antivirus1-rhel7.int (unknown [192.168.2.11]) by mail.us.es (Postfix) with ESMTP id 10473C0B3E for ; Fri, 3 Nov 2017 16:26:47 +0100 (CET) Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id F1AEEB7FE8 for ; Fri, 3 Nov 2017 16:26:46 +0100 (CET) Received: by antivirus1-rhel7.int (Postfix, from userid 99) id E754FB7FE1; Fri, 3 Nov 2017 16:26:46 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on antivirus1-rhel7.int X-Spam-Level: X-Spam-Status: No, score=-108.2 required=7.5 tests=ALL_TRUSTED,BAYES_50, SMTPAUTH_US2,USER_IN_WHITELIST autolearn=disabled version=3.4.1 Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id 8AD81B7FE5; Fri, 3 Nov 2017 16:26:44 +0100 (CET) Received: from 192.168.1.97 (192.168.1.97) by antivirus1-rhel7.int (F-Secure/fsigk_smtp/550/antivirus1-rhel7.int); Fri, 03 Nov 2017 16:26:44 +0100 (CET) X-Virus-Status: clean(F-Secure/fsigk_smtp/550/antivirus1-rhel7.int) Received: from salvia.here (unknown [31.4.245.115]) (Authenticated sender: pneira@us.es) by entrada.int (Postfix) with ESMTPA id 4D810403DFA1; Fri, 3 Nov 2017 16:26:44 +0100 (CET) X-SMTPAUTHUS: auth mail.us.es From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: netdev@vger.kernel.org Subject: [PATCH RFC, WIP 3/5] netfilter: nf_flow_offload: integration with conntrack Date: Fri, 3 Nov 2017 16:26:34 +0100 Message-Id: <20171103152636.9967-4-pablo@netfilter.org> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20171103152636.9967-1-pablo@netfilter.org> References: <20171103152636.9967-1-pablo@netfilter.org> X-Virus-Scanned: ClamAV using ClamSMTP Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org This patch adds the IPS_OFFLOAD status bit, this new bit tells us that the conntrack entry is owned by the flow offload infrastructure. The timer of such conntrack entries is stopped - the conntrack garbage collector skips them - and they display no internal state in the case of TCP flows. # cat /proc/net/nf_conntrack ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2 Note the [OFFLOAD] tag in the listing. Conntrack entries that have been offloaded to the flow table infrastructure cannot be deleted/flushed via ctnetlink. The flow table infrastructure is also responsible for releasing this conntrack entry. Signed-off-by: Pablo Neira Ayuso --- Instead of nf_flow_release_ct(), I'd rather keep a pointer reference to the conntrack object from the flow_offload entry, so we can skip the conntrack look up. include/net/netfilter/nf_conntrack.h | 3 +- include/uapi/linux/netfilter/nf_conntrack_common.h | 4 +++ net/netfilter/nf_conntrack_core.c | 7 ++++- net/netfilter/nf_conntrack_netlink.c | 15 ++++++++- net/netfilter/nf_conntrack_proto_tcp.c | 3 ++ net/netfilter/nf_conntrack_standalone.c | 12 +++++--- net/netfilter/nf_flow_offload.c | 36 ++++++++++++++++++++-- 7 files changed, 71 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f3bd30511de..9af4bb0c2f46 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -272,7 +272,8 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(ct->timeout - nfct_time_stamp) <= 0 && + !test_bit(IPS_OFFLOAD_BIT, &ct->status); } /* use after obtaining a reference count */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index dc947e59d03a..6b463b88182d 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -100,6 +100,10 @@ enum ip_conntrack_status { IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), + /* Conntrack has been offloaded to flow table. */ + IPS_OFFLOAD_BIT = 14, + IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), + /* Be careful here, modifying these bits can make things messy, * so don't let users modify them directly. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01130392b7c0..48f36c4fb756 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -901,6 +901,9 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) + continue; + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); continue; @@ -1011,12 +1014,14 @@ static void gc_worker(struct work_struct *work) tmp = nf_ct_tuplehash_to_ctrack(h); scanned++; + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) + continue; + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); expired_count++; continue; } - if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index de4053d84364..79a74aec7c1e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, }; +static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) +{ + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + return 0; + + return ctnetlink_filter_match(ct, data); +} + static int ctnetlink_flush_conntrack(struct net *net, const struct nlattr * const cda[], u32 portid, int report) @@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(struct net *net, return PTR_ERR(filter); } - nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, + nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, portid, report); kfree(filter); @@ -1163,6 +1171,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { + nf_ct_put(ct); + return -EBUSY; + } + if (cda[CTA_ID]) { u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); if (id != (u32)(unsigned long)ct) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index cba1c6ffe51a..156f529d1668 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + return; + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } #endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5a101caa3e12..46d32baad095 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v) WARN_ON(!l4proto); ret = -ENOSPC; - seq_printf(s, "%-8s %u %-8s %u %ld ", + seq_printf(s, "%-8s %u %-8s %u ", l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), - l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), - nf_ct_expires(ct) / HZ); + l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); + + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) + seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); @@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; - if (test_bit(IPS_ASSURED_BIT, &ct->status)) + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + seq_puts(s, "[OFFLOAD] "); + else if (test_bit(IPS_ASSURED_BIT, &ct->status)) seq_puts(s, "[ASSURED] "); if (seq_has_overflowed(s)) diff --git a/net/netfilter/nf_flow_offload.c b/net/netfilter/nf_flow_offload.c index c967b29d11a6..f4a3fbe11b69 100644 --- a/net/netfilter/nf_flow_offload.c +++ b/net/netfilter/nf_flow_offload.c @@ -13,6 +13,9 @@ #include #include +#include +#include + static struct rhashtable flow_table; static u32 flow_offload_hash(const void *data, u32 len, u32 seed) @@ -91,6 +94,34 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow) return (__s32)(flow->timeout - (u32)jiffies) <= 0; } +static void nf_flow_release_ct(const struct flow_offload_tuple_rhash *th) +{ + struct nf_conntrack_tuple tuple = {}; + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_zone zone; + struct nf_conn *ct; + + nf_ct_zone_init(&zone, NF_CT_DEFAULT_ZONE_ID, + NF_CT_DEFAULT_ZONE_DIR, 0); + + tuple.src.u3.ip = th->tuple.src_v4.s_addr; + tuple.dst.u3.ip = th->tuple.dst_v4.s_addr; + tuple.src.u.all = th->tuple.src_port; + tuple.dst.u.all = th->tuple.dst_port; + tuple.src.l3num = th->tuple.l3proto; + tuple.dst.protonum = th->tuple.l4proto; + tuple.dst.dir = IP_CT_DIR_ORIGINAL; + + h = nf_conntrack_find_get(&init_net, &zone, &tuple); + if (!h) { + pr_err("cannot find conntrack for flow hash %p\n", th); + return; + } + ct = nf_ct_tuplehash_to_ctrack(h); + nf_ct_delete(ct, 0, 0); + nf_ct_put(ct); +} + static void nf_flow_offload_work_gc(struct work_struct *work) { struct flow_offload_tuple_rhash *tuplehash; @@ -116,9 +147,10 @@ static void nf_flow_offload_work_gc(struct work_struct *work) flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); - if (nf_flow_has_expired(flow)) + if (nf_flow_has_expired(flow)) { flow_offload_del(flow); - + nf_flow_release_ct(tuplehash); + } counter++; } From patchwork Fri Nov 3 15:26:35 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 833936 X-Patchwork-Delegate: pablo@netfilter.org Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netfilter-devel-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3yT5RS35qdz9ryT for ; Sat, 4 Nov 2017 02:27:16 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755303AbdKCP1L (ORCPT ); Fri, 3 Nov 2017 11:27:11 -0400 Received: from mail.us.es ([193.147.175.20]:43338 "EHLO mail.us.es" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755876AbdKCP0t (ORCPT ); Fri, 3 Nov 2017 11:26:49 -0400 Received: from antivirus1-rhel7.int (unknown [192.168.2.11]) by mail.us.es (Postfix) with ESMTP id E0654C0B36 for ; Fri, 3 Nov 2017 16:26:47 +0100 (CET) Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id D1253B7FEA for ; Fri, 3 Nov 2017 16:26:47 +0100 (CET) Received: by antivirus1-rhel7.int (Postfix, from userid 99) id C6C01B7FE9; Fri, 3 Nov 2017 16:26:47 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on antivirus1-rhel7.int X-Spam-Level: X-Spam-Status: No, score=-108.2 required=7.5 tests=ALL_TRUSTED,BAYES_50, SMTPAUTH_US2,USER_IN_WHITELIST autolearn=disabled version=3.4.1 Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id 4310CB7FE6; Fri, 3 Nov 2017 16:26:45 +0100 (CET) Received: from 192.168.1.97 (192.168.1.97) by antivirus1-rhel7.int (F-Secure/fsigk_smtp/550/antivirus1-rhel7.int); Fri, 03 Nov 2017 16:26:45 +0100 (CET) X-Virus-Status: clean(F-Secure/fsigk_smtp/550/antivirus1-rhel7.int) Received: from salvia.here (unknown [31.4.245.115]) (Authenticated sender: pneira@us.es) by entrada.int (Postfix) with ESMTPA id 03DD3403DFA0; Fri, 3 Nov 2017 16:26:44 +0100 (CET) X-SMTPAUTHUS: auth mail.us.es From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: netdev@vger.kernel.org Subject: [PATCH RFC,WIP 4/5] netfilter: nf_tables: flow offload expression Date: Fri, 3 Nov 2017 16:26:35 +0100 Message-Id: <20171103152636.9967-5-pablo@netfilter.org> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20171103152636.9967-1-pablo@netfilter.org> References: <20171103152636.9967-1-pablo@netfilter.org> X-Virus-Scanned: ClamAV using ClamSMTP Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org Add new instruction for the nf_tables VM that allows us to specify what flows are offloaded. This has an explicit dependency with the conntrack subsystem. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 9 + net/netfilter/Kconfig | 7 + net/netfilter/Makefile | 1 + net/netfilter/nft_flow_offload.c | 331 +++++++++++++++++++++++++++++++ 4 files changed, 348 insertions(+) create mode 100644 net/netfilter/nft_flow_offload.c diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 871afa4871bf..2edde548de68 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -948,6 +948,15 @@ enum nft_ct_attributes { }; #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) +/** + * enum nft_ct_offload_attributes - ct offload expression attributes + */ +enum nft_offload_attributes { + NFTA_CT_OFFLOAD_UNSPEC, + __NFTA_CT_OFFLOAD_MAX, +}; +#define NFTA_CT_OFFLOAD_MAX (__NFTA_CT_OFFLOAD_MAX - 1) + enum nft_limit_type { NFT_LIMIT_PKTS, NFT_LIMIT_PKT_BYTES diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f022ca91f49d..0a5c33cfaeb8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -504,6 +504,13 @@ config NFT_CT This option adds the "ct" expression that you can use to match connection tracking information such as the flow state. +config NFT_FLOW_OFFLOAD + depends on NF_CONNTRACK + tristate "Netfilter nf_tables hardware flow offload module" + help + This option adds the "flow_offload" expression that you can use to + choose what flows are placed into the hardware. + config NFT_SET_RBTREE tristate "Netfilter nf_tables rbtree set module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 518f54113e06..801ce5c25e5d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_OBJREF) += nft_objref.o diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c new file mode 100644 index 000000000000..d38d185a19a5 --- /dev/null +++ b/net/netfilter/nft_flow_offload.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +union flow_gateway { + __be32 ip; + struct in6_addr ip6; +}; + +static int flow_offload_iterate_cleanup(struct nf_conn *ct, void *data) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct flow_offload_tuple tuple = {}; + struct net_device *indev = data; + struct flow_offload *flow; + + if (!test_and_clear_bit(IPS_OFFLOAD_BIT, &ct->status)) + return 0; + + tuple.src_v4 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; + tuple.dst_v4 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; + tuple.src_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; + tuple.dst_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + tuple.l3proto = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + tuple.l4proto = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + + tuplehash = flow_offload_lookup(&tuple); + BUG_ON(!tuplehash); + + if (indev && tuplehash->tuple.iifidx != indev->ifindex) + return 0; + + flow = container_of(tuplehash, struct flow_offload, + tuplehash[tuplehash->tuple.dir]); + + flow_offload_del(flow); + + /* Do not remove this conntrack from table. */ + return 0; +} + +static void flow_offload_cleanup(struct net *net, + const struct net_device *dev) +{ + nf_ct_iterate_cleanup_net(net, flow_offload_iterate_cleanup, + (void *)dev, 0, 0); +} + +static int flow_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + flow_offload_cleanup(dev_net(dev), dev); + + return NOTIFY_DONE; +} + +static struct notifier_block flow_offload_netdev_notifier = { + .notifier_call = flow_offload_netdev_event, +}; + +static struct flow_offload * +flow_offload_alloc(const struct nf_conn *ct, int iifindex, int oifindex, + union flow_gateway *orig_gateway, + union flow_gateway *reply_gateway) +{ + struct flow_offload *flow; + + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); + if (!flow) + return NULL; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { + case NFPROTO_IPV4: + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.gateway = + orig_gateway->ip; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.gateway = + reply_gateway->ip; + break; + case NFPROTO_IPV6: + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.gateway6 = + orig_gateway->ip6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.gateway6 = + reply_gateway->ip6; + break; + } + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = FLOW_OFFLOAD_DIR_ORIGINAL; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = FLOW_OFFLOAD_DIR_REPLY; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = oifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = iifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = iifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = oifindex; + + if (ct->status & IPS_SRC_NAT) + flow->flags |= FLOW_OFFLOAD_SNAT; + else if (ct->status & IPS_DST_NAT) + flow->flags |= FLOW_OFFLOAD_DNAT; + + return flow; +} + +static int nft_flow_route(const struct nft_pktinfo *pkt, + const struct nf_conn *ct, + union flow_gateway *orig_gw, + union flow_gateway *reply_gw) +{ + const struct dst_entry *reply_dst = skb_dst(pkt->skb); + struct dst_entry *orig_dst; + const struct nf_afinfo *ai; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + fl.u.ip4.daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; + break; + case NFPROTO_IPV6: + fl.u.ip6.daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; + break; + } + + ai = nf_get_afinfo(nft_pf(pkt)); + if (ai) { + ai->route(nft_net(pkt), &orig_dst, &fl, false); + if (!orig_dst) + return -ENOENT; + } + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: { + const struct rtable *orig_rt = (const struct rtable *)orig_dst; + const struct rtable *reply_rt = + (const struct rtable *)reply_dst; + + orig_gw->ip = orig_rt->rt_gateway; + reply_gw->ip = reply_rt->rt_gateway; + break; + } + case NFPROTO_IPV6: + break; + default: + break; + } + + dst_release(orig_dst); + + return 0; +} + +static void nft_flow_offload_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + union flow_gateway orig_gateway, reply_gateway; + struct net_device *outdev = pkt->xt.state->out; + struct net_device *indev = pkt->xt.state->in; + enum ip_conntrack_info ctinfo; + struct flow_offload *flow; + struct nf_conn *ct; + int ret; + + ct = nf_ct_get(pkt->skb, &ctinfo); + if (!ct) + goto out; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + goto out; + } + + if (test_bit(IPS_HELPER_BIT, &ct->status)) + goto out; + + if (ctinfo == IP_CT_NEW || + ctinfo == IP_CT_RELATED) + goto out; + + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) + goto out; + + if (nft_flow_route(pkt, ct, &orig_gateway, &reply_gateway) < 0) + goto err1; + + flow = flow_offload_alloc(ct, indev->ifindex, outdev->ifindex, + &orig_gateway, &reply_gateway); + if (!flow) + goto err1; + + ret = flow_offload_add(flow); + if (ret < 0) + goto err2; + + return; +err2: + kfree(flow); +err1: + clear_bit(IPS_OFFLOAD_BIT, &ct->status); +out: + regs->verdict.code = NFT_BREAK; +} + +static int nft_flow_offload_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + unsigned int hook_mask = (1 << NF_INET_FORWARD); + + return nft_chain_validate_hooks(ctx->chain, hook_mask); +} + +static int nft_flow_offload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + return nf_ct_netns_get(ctx->net, ctx->afi->family); +} + +static void nft_flow_offload_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, ctx->afi->family); +} + +static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + return 0; +} + +struct nft_expr_type nft_flow_offload_type; +static const struct nft_expr_ops nft_flow_offload_ops = { + .type = &nft_flow_offload_type, + .size = NFT_EXPR_SIZE(0), + .eval = nft_flow_offload_eval, + .init = nft_flow_offload_init, + .destroy = nft_flow_offload_destroy, + .validate = nft_flow_offload_validate, + .dump = nft_flow_offload_dump, +}; + +struct nft_expr_type nft_flow_offload_type __read_mostly = { + .name = "flow_offload", + .ops = &nft_flow_offload_ops, + .maxattr = NFTA_CT_OFFLOAD_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_flow_offload_module_init(void) +{ + register_netdevice_notifier(&flow_offload_netdev_notifier); + + return nft_register_expr(&nft_flow_offload_type); +} + +static void __exit nft_flow_offload_module_exit(void) +{ + struct net *net; + + nft_unregister_expr(&nft_flow_offload_type); + unregister_netdevice_notifier(&flow_offload_netdev_notifier); + rtnl_lock(); + for_each_net(net) + flow_offload_cleanup(net, NULL); + rtnl_unlock(); +} + +module_init(nft_flow_offload_module_init); +module_exit(nft_flow_offload_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("flow_offload"); From patchwork Fri Nov 3 15:26:36 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 833932 X-Patchwork-Delegate: pablo@netfilter.org Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netfilter-devel-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3yT5RB4ky2z9ryT for ; Sat, 4 Nov 2017 02:27:02 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755933AbdKCP06 (ORCPT ); Fri, 3 Nov 2017 11:26:58 -0400 Received: from mail.us.es ([193.147.175.20]:43340 "EHLO mail.us.es" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755879AbdKCP0t (ORCPT ); Fri, 3 Nov 2017 11:26:49 -0400 Received: from antivirus1-rhel7.int (unknown [192.168.2.11]) by mail.us.es (Postfix) with ESMTP id 4C9D0C0B44 for ; Fri, 3 Nov 2017 16:26:48 +0100 (CET) Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id 3AC3EB7FEB for ; Fri, 3 Nov 2017 16:26:48 +0100 (CET) Received: by antivirus1-rhel7.int (Postfix, from userid 99) id 3003DB7FE0; Fri, 3 Nov 2017 16:26:48 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on antivirus1-rhel7.int X-Spam-Level: X-Spam-Status: No, score=-108.2 required=7.5 tests=ALL_TRUSTED,BAYES_50, SMTPAUTH_US2,USER_IN_WHITELIST autolearn=disabled version=3.4.1 Received: from antivirus1-rhel7.int (localhost [127.0.0.1]) by antivirus1-rhel7.int (Postfix) with ESMTP id E1FBCB7FE7; Fri, 3 Nov 2017 16:26:45 +0100 (CET) Received: from 192.168.1.97 (192.168.1.97) by antivirus1-rhel7.int (F-Secure/fsigk_smtp/550/antivirus1-rhel7.int); Fri, 03 Nov 2017 16:26:45 +0100 (CET) X-Virus-Status: clean(F-Secure/fsigk_smtp/550/antivirus1-rhel7.int) Received: from salvia.here (unknown [31.4.245.115]) (Authenticated sender: pneira@us.es) by entrada.int (Postfix) with ESMTPA id AA024403DFA1; Fri, 3 Nov 2017 16:26:45 +0100 (CET) X-SMTPAUTHUS: auth mail.us.es From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: netdev@vger.kernel.org Subject: [PATCH RFC, WIP 5/5] netfilter: nft_flow_offload: add ndo hooks for hardware offload Date: Fri, 3 Nov 2017 16:26:36 +0100 Message-Id: <20171103152636.9967-6-pablo@netfilter.org> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20171103152636.9967-1-pablo@netfilter.org> References: <20171103152636.9967-1-pablo@netfilter.org> X-Virus-Scanned: ClamAV using ClamSMTP Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org This patch adds the infrastructure to offload flows to hardware, in case the nic/switch comes with built-in flow tables capabilities. If the hardware comes with not hardware flow tables or they have limitations in terms of features, this falls back to the software generic flow table implementation. The software flow table aging thread skips entries that resides in the hardware, so the hardware will be responsible for releasing this flow table entry too. Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 4 ++ net/netfilter/nf_flow_offload.c | 3 ++ net/netfilter/nft_flow_offload.c | 99 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..0787f53374b3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -826,6 +826,8 @@ struct xfrmdev_ops { }; #endif +struct flow_offload; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1281,6 +1283,8 @@ struct net_device_ops { int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); + int (*ndo_flow_add)(struct flow_offload *flow); + int (*ndo_flow_del)(struct flow_offload *flow); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, diff --git a/net/netfilter/nf_flow_offload.c b/net/netfilter/nf_flow_offload.c index f4a3fbe11b69..ac5786976dbb 100644 --- a/net/netfilter/nf_flow_offload.c +++ b/net/netfilter/nf_flow_offload.c @@ -147,6 +147,9 @@ static void nf_flow_offload_work_gc(struct work_struct *work) flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + if (flow->flags & FLOW_OFFLOAD_HW) + continue; + if (nf_flow_has_expired(flow)) { flow_offload_del(flow); nf_flow_release_ct(tuplehash); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index d38d185a19a5..0cb194a0aaab 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -17,6 +17,22 @@ union flow_gateway { struct in6_addr ip6; }; +static void flow_hw_offload_del(struct flow_offload *flow) +{ + struct net_device *indev; + int ret; + + rtnl_lock(); + indev = __dev_get_by_index(&init_net, flow->tuplehash[0].tuple.iifidx); + WARN_ON(!indev); + + if (indev->netdev_ops->ndo_flow_del) { + ret = indev->netdev_ops->ndo_flow_del(flow); + WARN_ON(ret < 0); + } + rtnl_unlock(); +} + static int flow_offload_iterate_cleanup(struct nf_conn *ct, void *data) { struct flow_offload_tuple_rhash *tuplehash; @@ -44,14 +60,40 @@ static int flow_offload_iterate_cleanup(struct nf_conn *ct, void *data) tuplehash[tuplehash->tuple.dir]); flow_offload_del(flow); + if (flow->flags & FLOW_OFFLOAD_HW) + flow_hw_offload_del(flow); /* Do not remove this conntrack from table. */ return 0; } +static LIST_HEAD(flow_hw_offload_pending_list); +static DEFINE_SPINLOCK(flow_hw_offload_lock); + +struct flow_hw_offload { + struct list_head list; + struct flow_offload *flow; + struct nf_conn *ct; +}; + static void flow_offload_cleanup(struct net *net, const struct net_device *dev) { + struct flow_hw_offload *offload, *next; + + spin_lock_bh(&flow_hw_offload_lock); + list_for_each_entry_safe(offload, next, &flow_hw_offload_pending_list, list) { + if (dev == NULL || + offload->flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx == dev->ifindex || + offload->flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx == dev->ifindex) + continue; + + nf_conntrack_put(&offload->ct->ct_general); + list_del(&offload->list); + kfree(offload); + } + spin_unlock_bh(&flow_hw_offload_lock); + nf_ct_iterate_cleanup_net(net, flow_offload_iterate_cleanup, (void *)dev, 0, 0); } @@ -156,6 +198,43 @@ flow_offload_alloc(const struct nf_conn *ct, int iifindex, int oifindex, return flow; } +static int do_flow_offload(struct flow_offload *flow) +{ + struct net_device *indev; + int ret, ifindex; + + rtnl_lock(); + ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx; + indev = __dev_get_by_index(&init_net, ifindex); + WARN_ON(!indev); + + ret = indev->netdev_ops->ndo_flow_add(flow); + rtnl_unlock(); + + if (ret >= 0) + flow->flags |= FLOW_OFFLOAD_HW; + + return ret; +} + +static struct delayed_work nft_flow_offload_dwork; + +static void flow_offload_work(struct work_struct *work) +{ + struct flow_hw_offload *offload, *next; + + spin_lock_bh(&flow_hw_offload_lock); + list_for_each_entry_safe(offload, next, &flow_hw_offload_pending_list, list) { + do_flow_offload(offload->flow); + nf_conntrack_put(&offload->ct->ct_general); + list_del(&offload->list); + kfree(offload); + } + spin_unlock_bh(&flow_hw_offload_lock); + + queue_delayed_work(system_power_efficient_wq, &nft_flow_offload_dwork, HZ); +} + static int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, union flow_gateway *orig_gw, @@ -211,6 +290,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, union flow_gateway orig_gateway, reply_gateway; struct net_device *outdev = pkt->xt.state->out; struct net_device *indev = pkt->xt.state->in; + struct flow_hw_offload *offload; enum ip_conntrack_info ctinfo; struct flow_offload *flow; struct nf_conn *ct; @@ -250,6 +330,21 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err2; + if (!indev->netdev_ops->ndo_flow_add) + return; + + offload = kmalloc(sizeof(struct flow_hw_offload), GFP_ATOMIC); + if (!offload) + return; + + nf_conntrack_get(&ct->ct_general); + offload->ct = ct; + offload->flow = flow; + + spin_lock_bh(&flow_hw_offload_lock); + list_add_tail(&offload->list, &flow_hw_offload_pending_list); + spin_unlock_bh(&flow_hw_offload_lock); + return; err2: kfree(flow); @@ -308,6 +403,9 @@ static int __init nft_flow_offload_module_init(void) { register_netdevice_notifier(&flow_offload_netdev_notifier); + INIT_DEFERRABLE_WORK(&nft_flow_offload_dwork, flow_offload_work); + queue_delayed_work(system_power_efficient_wq, &nft_flow_offload_dwork, HZ); + return nft_register_expr(&nft_flow_offload_type); } @@ -316,6 +414,7 @@ static void __exit nft_flow_offload_module_exit(void) struct net *net; nft_unregister_expr(&nft_flow_offload_type); + cancel_delayed_work_sync(&nft_flow_offload_dwork); unregister_netdevice_notifier(&flow_offload_netdev_notifier); rtnl_lock(); for_each_net(net)