Message ID | 0ddc5e4fcc6a38c74c185063e73ef4c496eaa7ca.1716026761.git.lorenzo@kernel.org |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | netfilter: Add the capability to offload flowtable in XDP layer | expand |
On Sat, 18 May 2024 at 12:13, Lorenzo Bianconi <lorenzo@kernel.org> wrote: > > Introduce bpf_xdp_flow_offload_lookup kfunc in order to perform the > lookup of a given flowtable entry based on a fib tuple of incoming > traffic. > bpf_xdp_flow_offload_lookup can be used as building block to offload > in xdp the processing of sw flowtable when hw flowtable is not > available. > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Though I think it might have been better to have an opts parameter for extensibility (and have opts->error for now to aid debugging when NULL is returned), but I won't insist (it's not a big deal, as there's only two things that can go wrong: the tuple->family is unsupported or the lookup fails).
On Sat, May 18, 2024 at 3:13 AM Lorenzo Bianconi <lorenzo@kernel.org> wrote: > > Introduce bpf_xdp_flow_offload_lookup kfunc in order to perform the > lookup of a given flowtable entry based on a fib tuple of incoming > traffic. > bpf_xdp_flow_offload_lookup can be used as building block to offload > in xdp the processing of sw flowtable when hw flowtable is not > available. > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > include/net/netfilter/nf_flow_table.h | 10 +++ > net/netfilter/Makefile | 5 ++ > net/netfilter/nf_flow_table_bpf.c | 94 +++++++++++++++++++++++++++ > net/netfilter/nf_flow_table_inet.c | 2 +- > 4 files changed, 110 insertions(+), 1 deletion(-) > create mode 100644 net/netfilter/nf_flow_table_bpf.c > > diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h > index 0bbe6ea8e0651..085660cbcd3f2 100644 > --- a/include/net/netfilter/nf_flow_table.h > +++ b/include/net/netfilter/nf_flow_table.h > @@ -312,6 +312,16 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, > unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, > const struct nf_hook_state *state); > > +#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ > + (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) > +extern int nf_flow_offload_register_bpf(void); > +#else > +static inline int nf_flow_offload_register_bpf(void) > +{ > + return 0; > +} > +#endif > + > #define MODULE_ALIAS_NF_FLOWTABLE(family) \ > MODULE_ALIAS("nf-flowtable-" __stringify(family)) > > diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile > index 614815a3ed738..18b09cec92024 100644 > --- a/net/netfilter/Makefile > +++ b/net/netfilter/Makefile > @@ -144,6 +144,11 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o > nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ > nf_flow_table_offload.o > nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o > +ifeq ($(CONFIG_NF_FLOW_TABLE),m) > +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o > +else ifeq ($(CONFIG_NF_FLOW_TABLE),y) > +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o > +endif > > obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o > > diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c > new file mode 100644 > index 0000000000000..f999ed9712796 > --- /dev/null > +++ b/net/netfilter/nf_flow_table_bpf.c > @@ -0,0 +1,94 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Unstable Flow Table Helpers for XDP hook > + * > + * These are called from the XDP programs. > + * Note that it is allowed to break compatibility for these functions since > + * the interface they are exposed through to BPF programs is explicitly > + * unstable. > + */ > + > +#include <linux/kernel.h> > +#include <linux/init.h> > +#include <linux/module.h> > +#include <net/netfilter/nf_flow_table.h> > +#include <linux/bpf.h> > +#include <linux/btf.h> > +#include <net/xdp.h> > + > +__diag_push(); > +__diag_ignore_all("-Wmissing-prototypes", > + "Global functions as their definitions will be in nf_flow_table BTF"); > + > +static struct flow_offload_tuple_rhash * > +bpf_xdp_flow_offload_tuple_lookup(struct net_device *dev, > + struct flow_offload_tuple *tuple, > + __be16 proto) > +{ > + struct flow_offload_tuple_rhash *tuplehash; > + struct nf_flowtable *flow_table; > + struct flow_offload *flow; > + > + flow_table = nf_flowtable_by_dev(dev); > + if (!flow_table) > + return NULL; > + > + tuplehash = flow_offload_lookup(flow_table, tuple); > + if (!tuplehash) > + return NULL; > + > + flow = container_of(tuplehash, struct flow_offload, > + tuplehash[tuplehash->tuple.dir]); > + flow_offload_refresh(flow_table, flow, false); > + > + return tuplehash; > +} > + > +__bpf_kfunc struct flow_offload_tuple_rhash * > +bpf_xdp_flow_offload_lookup(struct xdp_md *ctx, > + struct bpf_fib_lookup *fib_tuple) > +{ > + struct xdp_buff *xdp = (struct xdp_buff *)ctx; > + struct flow_offload_tuple tuple = { > + .iifidx = fib_tuple->ifindex, > + .l3proto = fib_tuple->family, > + .l4proto = fib_tuple->l4_protocol, > + .src_port = fib_tuple->sport, > + .dst_port = fib_tuple->dport, > + }; > + __be16 proto; > + > + switch (fib_tuple->family) { > + case AF_INET: > + tuple.src_v4.s_addr = fib_tuple->ipv4_src; > + tuple.dst_v4.s_addr = fib_tuple->ipv4_dst; > + proto = htons(ETH_P_IP); > + break; > + case AF_INET6: > + tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src; > + tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst; > + proto = htons(ETH_P_IPV6); > + break; > + default: > + return NULL; > + } > + > + return bpf_xdp_flow_offload_tuple_lookup(xdp->rxq->dev, &tuple, proto); > +} > + > +__diag_pop() > + > +BTF_KFUNCS_START(nf_ft_kfunc_set) > +BTF_ID_FLAGS(func, bpf_xdp_flow_offload_lookup) I think it needs to be KF_RET_NULL. And most likely KF_TRUSTED_ARGS as well. Also the "offload" doesn't fit in the name. The existing code calls it "offload", because it's actually pushing the rules to HW (if I understand the code), but here it's just a lookup from xdp. So call it bpf_xdp_flow_lookup() ? Though "flow" is a bit too generic here. nf_flow maybe? > +BTF_KFUNCS_END(nf_ft_kfunc_set) > + > +static const struct btf_kfunc_id_set nf_flow_offload_kfunc_set = { > + .owner = THIS_MODULE, > + .set = &nf_ft_kfunc_set, > +}; > + > +int nf_flow_offload_register_bpf(void) > +{ > + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, > + &nf_flow_offload_kfunc_set); > +} > +EXPORT_SYMBOL_GPL(nf_flow_offload_register_bpf); > diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c > index 6eef15648b7b0..6175f7556919d 100644 > --- a/net/netfilter/nf_flow_table_inet.c > +++ b/net/netfilter/nf_flow_table_inet.c > @@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void) > nft_register_flowtable_type(&flowtable_ipv6); > nft_register_flowtable_type(&flowtable_inet); > > - return 0; > + return nf_flow_offload_register_bpf(); > } > > static void __exit nf_flow_inet_module_exit(void) > -- > 2.45.1 >
> On Sat, May 18, 2024 at 3:13 AM Lorenzo Bianconi <lorenzo@kernel.org> wrote: [...] > > I think it needs to be KF_RET_NULL. > And most likely KF_TRUSTED_ARGS as well. ack, I will fix it in v2. > > Also the "offload" doesn't fit in the name. > The existing code calls it "offload", because it's actually > pushing the rules to HW (if I understand the code), > but here it's just a lookup from xdp. > So call it > bpf_xdp_flow_lookup() ? ack fine, I do not have a strong opinion on it. I will fix it in v2. > > Though "flow" is a bit too generic here. > nf_flow maybe? ack, I will fix it in v2. Regards, Lorenzo > > > +BTF_KFUNCS_END(nf_ft_kfunc_set) > > + > > +static const struct btf_kfunc_id_set nf_flow_offload_kfunc_set = { > > + .owner = THIS_MODULE, > > + .set = &nf_ft_kfunc_set, > > +}; > > + > > +int nf_flow_offload_register_bpf(void) > > +{ > > + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, > > + &nf_flow_offload_kfunc_set); > > +} > > +EXPORT_SYMBOL_GPL(nf_flow_offload_register_bpf); > > diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c > > index 6eef15648b7b0..6175f7556919d 100644 > > --- a/net/netfilter/nf_flow_table_inet.c > > +++ b/net/netfilter/nf_flow_table_inet.c > > @@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void) > > nft_register_flowtable_type(&flowtable_ipv6); > > nft_register_flowtable_type(&flowtable_inet); > > > > - return 0; > > + return nf_flow_offload_register_bpf(); > > } > > > > static void __exit nf_flow_inet_module_exit(void) > > -- > > 2.45.1 > >
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 0bbe6ea8e0651..085660cbcd3f2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -312,6 +312,16 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) +extern int nf_flow_offload_register_bpf(void); +#else +static inline int nf_flow_offload_register_bpf(void) +{ + return 0; +} +#endif + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ MODULE_ALIAS("nf-flowtable-" __stringify(family)) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 614815a3ed738..18b09cec92024 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -144,6 +144,11 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ nf_flow_table_offload.o nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o +ifeq ($(CONFIG_NF_FLOW_TABLE),m) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o +else ifeq ($(CONFIG_NF_FLOW_TABLE),y) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o +endif obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c new file mode 100644 index 0000000000000..f999ed9712796 --- /dev/null +++ b/net/netfilter/nf_flow_table_bpf.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Flow Table Helpers for XDP hook + * + * These are called from the XDP programs. + * Note that it is allowed to break compatibility for these functions since + * the interface they are exposed through to BPF programs is explicitly + * unstable. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <net/xdp.h> + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_flow_table BTF"); + +static struct flow_offload_tuple_rhash * +bpf_xdp_flow_offload_tuple_lookup(struct net_device *dev, + struct flow_offload_tuple *tuple, + __be16 proto) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table; + struct flow_offload *flow; + + flow_table = nf_flowtable_by_dev(dev); + if (!flow_table) + return NULL; + + tuplehash = flow_offload_lookup(flow_table, tuple); + if (!tuplehash) + return NULL; + + flow = container_of(tuplehash, struct flow_offload, + tuplehash[tuplehash->tuple.dir]); + flow_offload_refresh(flow_table, flow, false); + + return tuplehash; +} + +__bpf_kfunc struct flow_offload_tuple_rhash * +bpf_xdp_flow_offload_lookup(struct xdp_md *ctx, + struct bpf_fib_lookup *fib_tuple) +{ + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + struct flow_offload_tuple tuple = { + .iifidx = fib_tuple->ifindex, + .l3proto = fib_tuple->family, + .l4proto = fib_tuple->l4_protocol, + .src_port = fib_tuple->sport, + .dst_port = fib_tuple->dport, + }; + __be16 proto; + + switch (fib_tuple->family) { + case AF_INET: + tuple.src_v4.s_addr = fib_tuple->ipv4_src; + tuple.dst_v4.s_addr = fib_tuple->ipv4_dst; + proto = htons(ETH_P_IP); + break; + case AF_INET6: + tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src; + tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst; + proto = htons(ETH_P_IPV6); + break; + default: + return NULL; + } + + return bpf_xdp_flow_offload_tuple_lookup(xdp->rxq->dev, &tuple, proto); +} + +__diag_pop() + +BTF_KFUNCS_START(nf_ft_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_flow_offload_lookup) +BTF_KFUNCS_END(nf_ft_kfunc_set) + +static const struct btf_kfunc_id_set nf_flow_offload_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ft_kfunc_set, +}; + +int nf_flow_offload_register_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &nf_flow_offload_kfunc_set); +} +EXPORT_SYMBOL_GPL(nf_flow_offload_register_bpf); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 6eef15648b7b0..6175f7556919d 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void) nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); - return 0; + return nf_flow_offload_register_bpf(); } static void __exit nf_flow_inet_module_exit(void)
Introduce bpf_xdp_flow_offload_lookup kfunc in order to perform the lookup of a given flowtable entry based on a fib tuple of incoming traffic. bpf_xdp_flow_offload_lookup can be used as building block to offload in xdp the processing of sw flowtable when hw flowtable is not available. Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> --- include/net/netfilter/nf_flow_table.h | 10 +++ net/netfilter/Makefile | 5 ++ net/netfilter/nf_flow_table_bpf.c | 94 +++++++++++++++++++++++++++ net/netfilter/nf_flow_table_inet.c | 2 +- 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 net/netfilter/nf_flow_table_bpf.c