diff mbox series

[bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions

Message ID 20240329041430.2176860-1-brad@faucet.nz
State Changes Requested
Headers show
Series [bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions | expand

Commit Message

Brad Cowie March 29, 2024, 4:14 a.m. UTC
Add ct zone id to bpf_ct_opts so that arbitrary ct zone can be
set for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.

Signed-off-by: Brad Cowie <brad@faucet.nz>
---
 net/netfilter/nf_conntrack_bpf.c              | 23 ++++++++++---------
 .../testing/selftests/bpf/prog_tests/bpf_nf.c |  1 -
 .../testing/selftests/bpf/progs/test_bpf_nf.c | 13 ++---------
 3 files changed, 14 insertions(+), 23 deletions(-)

Comments

Martin KaFai Lau April 5, 2024, 8:01 p.m. UTC | #1
On 3/28/24 9:14 PM, Brad Cowie wrote:
> Add ct zone id to bpf_ct_opts so that arbitrary ct zone can be
> set for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
> 
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
>   net/netfilter/nf_conntrack_bpf.c              | 23 ++++++++++---------
>   .../testing/selftests/bpf/prog_tests/bpf_nf.c |  1 -
>   .../testing/selftests/bpf/progs/test_bpf_nf.c | 13 ++---------
>   3 files changed, 14 insertions(+), 23 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..a0f8a64751ec 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -30,7 +30,6 @@
>    * @error      - Out parameter, set for any errors encountered
>    *		 Values:
>    *		   -EINVAL - Passed NULL for bpf_tuple pointer
> - *		   -EINVAL - opts->reserved is not 0
>    *		   -EINVAL - netns_id is less than -1
>    *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
>    *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> @@ -42,16 +41,14 @@
>    *		 Values:
>    *		   IPPROTO_TCP, IPPROTO_UDP
>    * @dir:       - connection tracking tuple direction.
> - * @reserved   - Reserved member, will be reused for more options in future
> - *		 Values:
> - *		   0
> + * @ct_zone    - connection tracking zone id.
>    */
>   struct bpf_ct_opts {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
>   	u8 dir;
> -	u8 reserved[2];
> +	u16 ct_zone;

How about the other fields (flags and dir) in the "struct nf_conntrack_zone" and 
would it be useful to have values other than the default?

[ ... ]

> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> index b30ff6b3b81a..25c3c4e87ed5 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> @@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
>   		goto end;
>   
>   	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
> -	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
>   	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
>   	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
>   	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
> diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> index 77ad8adf68da..4adb73bc1b33 100644
> --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> @@ -45,7 +45,8 @@ struct bpf_ct_opts___local {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
> -	u8 reserved[3];
> +	u8 dir;
> +	u16 ct_zone;
>   } __attribute__((preserve_access_index));
>   
>   struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
> @@ -84,16 +85,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
>   	else
>   		test_einval_bpf_tuple = opts_def.error;
>   
> -	opts_def.reserved[0] = 1;
> -	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> -		       sizeof(opts_def));
> -	opts_def.reserved[0] = 0;
> -	opts_def.l4proto = IPPROTO_TCP;
> -	if (ct)
> -		bpf_ct_release(ct);
> -	else
> -		test_einval_reserved = opts_def.error;
> -

Can it actually test an alloc and lookup of a non default zone id?

Please also separate the selftest into another patch.

pw-bot: cr

>   	opts_def.netns_id = -2;
>   	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
>   		       sizeof(opts_def));
Brad Cowie April 11, 2024, 2:29 a.m. UTC | #2
On Sat, 6 Apr 2024 at 09:01, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> How about the other fields (flags and dir) in the "struct nf_conntrack_zone" and
> would it be useful to have values other than the default?

Good question, it would probably be useful to make these configurable
as well. My reason for only adding ct zone id was to avoid changing
the size of bpf_ct_opts (NF_BPF_CT_OPTS_SZ).

I would be interested in some opinions here on if it's acceptable to
increase the size of bpf_ct_opts, if so, should I also add back some
reserved options to the struct for future use?

> Can it actually test an alloc and lookup of a non default zone id?

Yes, I have a test written now and will include this in my v2 submission.

> Please also separate the selftest into another patch.

Will do.
Martin KaFai Lau April 12, 2024, 12:45 a.m. UTC | #3
On 4/10/24 7:29 PM, Brad Cowie wrote:
> On Sat, 6 Apr 2024 at 09:01, Martin KaFai Lau <martin.lau@linux.dev> wrote:
>> How about the other fields (flags and dir) in the "struct nf_conntrack_zone" and
>> would it be useful to have values other than the default?
> 
> Good question, it would probably be useful to make these configurable
> as well. My reason for only adding ct zone id was to avoid changing
> the size of bpf_ct_opts (NF_BPF_CT_OPTS_SZ).
> 
> I would be interested in some opinions here on if it's acceptable to
> increase the size of bpf_ct_opts, if so, should I also add back some
> reserved options to the struct for future use?

I think the reserved[2] was there for the padding reason.

It should be the first time there is a __sz increase. May be worth to explore 
how it should work.

The opts_len check will need to check == old_size or == new_size. Only use the 
new fields if it is new_size.

There is

enum {
         NF_BPF_CT_OPTS_SZ = 12,
};

This enum probably needs to update with the new size also. NF_BPF_CT_OPTS_SZ 
should be under CO-RE and its enum value will be updated with the running kernel.

The bpf prog has its own struct bpf_ct_opts during compilation (from vmlinux.h 
or defined a local one), so may be the bpf prog can do something like this:

#include "vmlinux.h"

struct bpf_ct_opts___newer {
	s32 netns_id;
	s32 error;
	u8 l4proto;
	u8 dir;
	u8 reserved[2];
	u32 new_field; /* for example */
} __attribute__((preserve_access_index));

SEC("tc")
int run_in_older_kernel(struct __sk_buff *ctx)
{
	struct bpf_ct_opts___newer opts = {};

	/* min of the running kernel opts size or the
	 * local ___newer opts size
	 */
	bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts,
			  min(NF_BPF_CT_OPTS_SZ, sizeof(opts));
}


> 
>> Can it actually test an alloc and lookup of a non default zone id?
> 
> Yes, I have a test written now and will include this in my v2 submission.
> 
>> Please also separate the selftest into another patch.
> 
> Will do.
>
diff mbox series

Patch

diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index d2492d050fe6..a0f8a64751ec 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -30,7 +30,6 @@ 
  * @error      - Out parameter, set for any errors encountered
  *		 Values:
  *		   -EINVAL - Passed NULL for bpf_tuple pointer
- *		   -EINVAL - opts->reserved is not 0
  *		   -EINVAL - netns_id is less than -1
  *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
  *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
@@ -42,16 +41,14 @@ 
  *		 Values:
  *		   IPPROTO_TCP, IPPROTO_UDP
  * @dir:       - connection tracking tuple direction.
- * @reserved   - Reserved member, will be reused for more options in future
- *		 Values:
- *		   0
+ * @ct_zone    - connection tracking zone id.
  */
 struct bpf_ct_opts {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
 	u8 dir;
-	u8 reserved[2];
+	u16 ct_zone;
 };
 
 enum {
@@ -104,11 +101,11 @@  __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			u32 timeout)
 {
 	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple || opts_len != NF_BPF_CT_OPTS_SZ)
 		return ERR_PTR(-EINVAL);
 
 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
@@ -130,7 +127,9 @@  __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			return ERR_PTR(-ENONET);
 	}
 
-	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+	nf_ct_zone_init(&ct_zone, opts->ct_zone, NF_CT_DEFAULT_ZONE_DIR, 0);
+
+	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
 				GFP_ATOMIC);
 	if (IS_ERR(ct))
 		goto out;
@@ -152,11 +151,11 @@  static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 {
 	struct nf_conntrack_tuple_hash *hash;
 	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple || opts_len != NF_BPF_CT_OPTS_SZ)
 		return ERR_PTR(-EINVAL);
 	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
 		return ERR_PTR(-EPROTO);
@@ -174,7 +173,9 @@  static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 			return ERR_PTR(-ENONET);
 	}
 
-	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+	nf_ct_zone_init(&ct_zone, opts->ct_zone, NF_CT_DEFAULT_ZONE_DIR, 0);
+
+	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
 	if (opts->netns_id >= 0)
 		put_net(net);
 	if (!hash)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index b30ff6b3b81a..25c3c4e87ed5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -103,7 +103,6 @@  static void test_bpf_nf_ct(int mode)
 		goto end;
 
 	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
-	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
 	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
 	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
 	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 77ad8adf68da..4adb73bc1b33 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -45,7 +45,8 @@  struct bpf_ct_opts___local {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
-	u8 reserved[3];
+	u8 dir;
+	u16 ct_zone;
 } __attribute__((preserve_access_index));
 
 struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
@@ -84,16 +85,6 @@  nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 	else
 		test_einval_bpf_tuple = opts_def.error;
 
-	opts_def.reserved[0] = 1;
-	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
-		       sizeof(opts_def));
-	opts_def.reserved[0] = 0;
-	opts_def.l4proto = IPPROTO_TCP;
-	if (ct)
-		bpf_ct_release(ct);
-	else
-		test_einval_reserved = opts_def.error;
-
 	opts_def.netns_id = -2;
 	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
 		       sizeof(opts_def));