diff mbox series

[nf-next,v2,1/8] netfilter: nf_tables: Support for subkeys, set with multiple ranged fields

Message ID 90493a6feae0ae64db378fbfc8e9f351d4b7b05d.1574428269.git.sbrivio@redhat.com
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series nftables: Set implementation for arbitrary concatenation of ranges | expand

Commit Message

Stefano Brivio Nov. 22, 2019, 1:40 p.m. UTC
Introduce a new nested netlink attribute, NFTA_SET_SUBKEY, used to
specify the length of each field in a set concatenation.

This allows set implementations to support concatenation of multiple
ranged items, as they can divide the input key into matching data for
every single field. Such set implementations would indicate this
capability with the NFT_SET_SUBKEY flag.

In order to specify the interval for a set entry, userspace would
simply keep using two elements per entry, as it happens now, with the
end element indicating the upper interval bound. As a single element
can now be a concatenation of several fields, with or without the
NFT_SET_ELEM_INTERVAL_END flag, we obtain a convenient way to support
multiple ranged fields in a set.

While at it, export the number of 32-bit registers available for
packet matching, as nftables will need this to know the maximum
number of field lengths that can be specified.

For example, "packets with an IPv4 address between 192.0.2.0 and
192.0.2.42, with destination port between 22 and 25", can be
expressed as two concatenated elements:

  192.0.2.0 . 22
  192.0.2.42 . 25 with NFT_SET_ELEM_INTERVAL_END

and the NFTA_SET_SUBKEY attributes would be 32, 16, in that order.

Note that this does *not* represent the concatenated range:

  0xc0 0x00 0x02 0x00 0x00 0x16 - 0xc0 0x00 0x02 0x2a 0x00 0x25

on the six packet bytes of interest. That is, the range specified
does *not* include e.g. 0xc0 0x00 0x02 0x29 0x00 0x42, which is:
  192.0.0.41 . 66

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v2: No changes

 include/uapi/linux/netfilter/nf_tables.h | 16 ++++++++++++++++
 net/netfilter/nf_tables_api.c            |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

Comments

Pablo Neira Ayuso Nov. 23, 2019, 8:01 p.m. UTC | #1
Hi Stefano,

On Fri, Nov 22, 2019 at 02:40:00PM +0100, Stefano Brivio wrote:
[...]
> diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> index bb9b049310df..f8dbeac14898 100644
> --- a/include/uapi/linux/netfilter/nf_tables.h
> +++ b/include/uapi/linux/netfilter/nf_tables.h
> @@ -48,6 +48,7 @@ enum nft_registers {
>  
>  #define NFT_REG_SIZE	16
>  #define NFT_REG32_SIZE	4
> +#define NFT_REG32_COUNT	(NFT_REG32_15 - NFT_REG32_00 + 1)
>  
>  /**
>   * enum nft_verdicts - nf_tables internal verdicts
> @@ -275,6 +276,7 @@ enum nft_rule_compat_attributes {
>   * @NFT_SET_TIMEOUT: set uses timeouts
>   * @NFT_SET_EVAL: set can be updated from the evaluation path
>   * @NFT_SET_OBJECT: set contains stateful objects
> + * @NFT_SET_SUBKEY: set uses subkeys to map intervals for multiple fields
>   */
>  enum nft_set_flags {
>  	NFT_SET_ANONYMOUS		= 0x1,
> @@ -284,6 +286,7 @@ enum nft_set_flags {
>  	NFT_SET_TIMEOUT			= 0x10,
>  	NFT_SET_EVAL			= 0x20,
>  	NFT_SET_OBJECT			= 0x40,
> +	NFT_SET_SUBKEY			= 0x80,
>  };
>  
>  /**
> @@ -309,6 +312,17 @@ enum nft_set_desc_attributes {
>  };
>  #define NFTA_SET_DESC_MAX	(__NFTA_SET_DESC_MAX - 1)
>  
> +/**
> + * enum nft_set_subkey_attributes - subkeys for multiple ranged fields
> + *
> + * @NFTA_SET_SUBKEY_LEN: length of single field, in bits (NLA_U32)
> + */
> +enum nft_set_subkey_attributes {

Missing NFTA_SET_SUBKEY_UNSPEC here.

Not a problem if nla_parse_nested*() is not used as in your case,
probably good for consistency, in case there is a need for using such
function in the future.

> +	NFTA_SET_SUBKEY_LEN,
> +	__NFTA_SET_SUBKEY_MAX
> +};
> +#define NFTA_SET_SUBKEY_MAX	(__NFTA_SET_SUBKEY_MAX - 1)
> +
>  /**
>   * enum nft_set_attributes - nf_tables set netlink attributes
>   *
> @@ -327,6 +341,7 @@ enum nft_set_desc_attributes {
>   * @NFTA_SET_USERDATA: user data (NLA_BINARY)
>   * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
>   * @NFTA_SET_HANDLE: set handle (NLA_U64)
> + * @NFTA_SET_SUBKEY: subkeys for multiple ranged fields (NLA_NESTED)
>   */
>  enum nft_set_attributes {
>  	NFTA_SET_UNSPEC,
> @@ -346,6 +361,7 @@ enum nft_set_attributes {
>  	NFTA_SET_PAD,
>  	NFTA_SET_OBJ_TYPE,
>  	NFTA_SET_HANDLE,
> +	NFTA_SET_SUBKEY,

Could you use NFTA_SET_DESC instead for this? The idea is to add the
missing front-end code to parse this new attribute and store the
subkeys length in set->desc.klen[], hence nft_pipapo_init() can just
use the already parsed data. I think this will simplify the code that
I'm seeing in nft_pipapo_init() a bit since not netlink parsing will
be required.

I'm attaching a sketch patch, including also the use of NFTA_LIST_ELEM:

NFTA_SET_DESC
  NFTA_SET_DESC_SIZE
  NFTA_SET_DESC_SUBKEY
     NFTA_LIST_ELEM
       NFTA_SET_SUBKEY_LEN
     NFTA_LIST_ELEM
       NFTA_SET_SUBKEY_LEN
     ...

Just in there's a need for more fields to describe the subkey in the
future, it's just more boilerplate code for the future extensibility.

Another suggestion is to rename NFT_SET_SUBKEY to NFT_SET_CONCAT, to
signal the kernel that userspace wants a datastructure that knows how
to deal with concatenations. Although concatenations can be done by
hashtable already, this flags is just interpreted by the kernel as a
hint on what kind of datastructure would fit better for what is
needed. The combination of the NFT_SET_INTERVAL and the NFT_SET_CONCAT
(if you're fine with the rename, of course) is what will kick in
pipapo to be used.

Attaching sketch for the netlink control plane with the changes I've
been describing above, compile-tested only.

Thanks.
Stefano Brivio Nov. 25, 2019, 9:30 a.m. UTC | #2
Hi Pablo,

On Sat, 23 Nov 2019 21:01:08 +0100
Pablo Neira Ayuso <pablo@netfilter.org> wrote:

> Hi Stefano,
> 
> On Fri, Nov 22, 2019 at 02:40:00PM +0100, Stefano Brivio wrote:
> [...]
> > diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> > index bb9b049310df..f8dbeac14898 100644
> > --- a/include/uapi/linux/netfilter/nf_tables.h
> > +++ b/include/uapi/linux/netfilter/nf_tables.h
> > @@ -48,6 +48,7 @@ enum nft_registers {
> >  
> >  #define NFT_REG_SIZE	16
> >  #define NFT_REG32_SIZE	4
> > +#define NFT_REG32_COUNT	(NFT_REG32_15 - NFT_REG32_00 + 1)
> >  
> >  /**
> >   * enum nft_verdicts - nf_tables internal verdicts
> > @@ -275,6 +276,7 @@ enum nft_rule_compat_attributes {
> >   * @NFT_SET_TIMEOUT: set uses timeouts
> >   * @NFT_SET_EVAL: set can be updated from the evaluation path
> >   * @NFT_SET_OBJECT: set contains stateful objects
> > + * @NFT_SET_SUBKEY: set uses subkeys to map intervals for multiple fields
> >   */
> >  enum nft_set_flags {
> >  	NFT_SET_ANONYMOUS		= 0x1,
> > @@ -284,6 +286,7 @@ enum nft_set_flags {
> >  	NFT_SET_TIMEOUT			= 0x10,
> >  	NFT_SET_EVAL			= 0x20,
> >  	NFT_SET_OBJECT			= 0x40,
> > +	NFT_SET_SUBKEY			= 0x80,
> >  };
> >  
> >  /**
> > @@ -309,6 +312,17 @@ enum nft_set_desc_attributes {
> >  };
> >  #define NFTA_SET_DESC_MAX	(__NFTA_SET_DESC_MAX - 1)
> >  
> > +/**
> > + * enum nft_set_subkey_attributes - subkeys for multiple ranged fields
> > + *
> > + * @NFTA_SET_SUBKEY_LEN: length of single field, in bits (NLA_U32)
> > + */
> > +enum nft_set_subkey_attributes {  
> 
> Missing NFTA_SET_SUBKEY_UNSPEC here.
> 
> Not a problem if nla_parse_nested*() is not used as in your case,
> probably good for consistency, in case there is a need for using such
> function in the future.
> 
> > +	NFTA_SET_SUBKEY_LEN,
> > +	__NFTA_SET_SUBKEY_MAX
> > +};
> > +#define NFTA_SET_SUBKEY_MAX	(__NFTA_SET_SUBKEY_MAX - 1)
> > +
> >  /**
> >   * enum nft_set_attributes - nf_tables set netlink attributes
> >   *
> > @@ -327,6 +341,7 @@ enum nft_set_desc_attributes {
> >   * @NFTA_SET_USERDATA: user data (NLA_BINARY)
> >   * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
> >   * @NFTA_SET_HANDLE: set handle (NLA_U64)
> > + * @NFTA_SET_SUBKEY: subkeys for multiple ranged fields (NLA_NESTED)
> >   */
> >  enum nft_set_attributes {
> >  	NFTA_SET_UNSPEC,
> > @@ -346,6 +361,7 @@ enum nft_set_attributes {
> >  	NFTA_SET_PAD,
> >  	NFTA_SET_OBJ_TYPE,
> >  	NFTA_SET_HANDLE,
> > +	NFTA_SET_SUBKEY,  
> 
> Could you use NFTA_SET_DESC instead for this? The idea is to add the
> missing front-end code to parse this new attribute and store the
> subkeys length in set->desc.klen[], hence nft_pipapo_init() can just
> use the already parsed data.

Logically, I think it makes sense. I'll try to implement this in nft
and libnftnl and see if some fundamental issue pops up there.

> I think this will simplify the code that I'm seeing in
> nft_pipapo_init() a bit since not netlink parsing will be required.

I don't think it makes a real difference there, because the actual
parsing parts are rather limited:

	nla_for_each_nested(attr, nla[NFTA_SET_SUBKEY], rem) {
	[...]
		if (nla_len(attr) != sizeof(klen) ||
		    nla_type(attr) != NFTA_SET_SUBKEY_LEN)
			return -EINVAL;
	}

	[...]

	nla_for_each_nested(attr, nla[NFTA_SET_SUBKEY], rem) {
		klen = ntohl(nla_get_be32(attr));
	[...]
	}

the rest is validations (specific for this set type):

	nla_for_each_nested(attr, nla[NFTA_SET_SUBKEY], rem) {
		if (++field_count >= NFT_PIPAPO_MAX_FIELDS)
			return -EINVAL;
	[...]
	}

	[...]

	nla_for_each_nested(attr, nla[NFTA_SET_SUBKEY], rem) {
	[...]
		if (!klen || klen % NFT_PIPAPO_GROUP_BITS)
			goto out_free;

		if (klen > NFT_PIPAPO_MAX_BITS)
			goto out_free;
	[...]
	}

and calculations (also specific):

	nla_for_each_nested(attr, nla[NFTA_SET_SUBKEY], rem) {
		if (++field_count >= NFT_PIPAPO_MAX_FIELDS)
	[...]
	}

	nla_for_each_nested(attr, nla[NFTA_SET_SUBKEY], rem) {
	[...]
		priv->groups += f->groups = klen / NFT_PIPAPO_GROUP_BITS;
		priv->width += round_up(klen / BITS_PER_BYTE, sizeof(u32));
	[...]
	}

that we would still need.

> I'm attaching a sketch patch, including also the use of NFTA_LIST_ELEM:
> 
> NFTA_SET_DESC
>   NFTA_SET_DESC_SIZE
>   NFTA_SET_DESC_SUBKEY
>      NFTA_LIST_ELEM
>        NFTA_SET_SUBKEY_LEN
>      NFTA_LIST_ELEM
>        NFTA_SET_SUBKEY_LEN
>      ...
> 
> Just in there's a need for more fields to describe the subkey in the
> future, it's just more boilerplate code for the future extensibility.

Thanks! I'll play with it and see if I can fit all the pieces.

> Another suggestion is to rename NFT_SET_SUBKEY to NFT_SET_CONCAT, to
> signal the kernel that userspace wants a datastructure that knows how
> to deal with concatenations. Although concatenations can be done by
> hashtable already, this flags is just interpreted by the kernel as a
> hint on what kind of datastructure would fit better for what is
> needed. The combination of the NFT_SET_INTERVAL and the NFT_SET_CONCAT
> (if you're fine with the rename, of course) is what will kick in
> pipapo to be used.

I think that NFT_SET_CONCAT as you propose is conceptually a better
fit. I'm worried about the confusion this might generate for other set
implementations.

That is, a reasonable expectation is that userspace passes
NFT_SET_CONCAT whenever there's a concatenation, and hash
implementations support sets with that flag, too, so I would add it to
the supported feature flags of hash types, and it wouldn't be there for
rbtree.

Right now, that won't break anything: the flag might or might not be
present depending on userspace version, and selection of hash types
would proceed as usual. But I'm worried that we might miss this
subtlety in the future and break concatenation support for older
userspace versions.

Another idea could be that we get rid of this flag altogether: if we
move "subkeys" to set->desc, the ->estimate() functions of rbtree and
pipapo can check for those and refuse or allow set selection
accordingly. I have no idea yet if this introduces further complexity
for nft, because there we would need to decide how to create start/end
elements depending on the existing set description instead of using a
single flag. I can give it a try if it makes sense.
Pablo Neira Ayuso Nov. 25, 2019, 9:58 a.m. UTC | #3
On Mon, Nov 25, 2019 at 10:30:35AM +0100, Stefano Brivio wrote:
[...]
> Another idea could be that we get rid of this flag altogether: if we
> move "subkeys" to set->desc, the ->estimate() functions of rbtree and
> pipapo can check for those and refuse or allow set selection
> accordingly. I have no idea yet if this introduces further complexity
> for nft, because there we would need to decide how to create start/end
> elements depending on the existing set description instead of using a
> single flag. I can give it a try if it makes sense.

nft_set_desc can probably store a boolean 'concat' that is set on if
the NFTA_SET_DESC_SUBKEY attribute is specified. Then, this flag is
not needed and you can just rely on ->estimate() as you describe.

The hashtable will just ignore this description, it does not need the
description even if userspace pass it on since the interval flag is
set on.

You just have to update the rbtree to check for desc->concat, if this
is true, then rbtree->estimate() returns false.

BTW, then probably you can rename this attribute to
NFT_SET_DESC_CONCAT?

Thanks!
Stefano Brivio Nov. 25, 2019, 1:26 p.m. UTC | #4
On Mon, 25 Nov 2019 10:58:17 +0100
Pablo Neira Ayuso <pablo@netfilter.org> wrote:

> On Mon, Nov 25, 2019 at 10:30:35AM +0100, Stefano Brivio wrote:
> [...]
> > Another idea could be that we get rid of this flag altogether: if we
> > move "subkeys" to set->desc, the ->estimate() functions of rbtree and
> > pipapo can check for those and refuse or allow set selection
> > accordingly. I have no idea yet if this introduces further complexity
> > for nft, because there we would need to decide how to create start/end
> > elements depending on the existing set description instead of using a
> > single flag. I can give it a try if it makes sense.  
> 
> nft_set_desc can probably store a boolean 'concat' that is set on if
> the NFTA_SET_DESC_SUBKEY attribute is specified. Then, this flag is
> not needed and you can just rely on ->estimate() as you describe.

I could even just check desc->num_subkeys from your patch then, without
adding another field to nft_set_desc. Too ugly?

> The hashtable will just ignore this description, it does not need the
> description even if userspace pass it on since the interval flag is
> set on.
> 
> You just have to update the rbtree to check for desc->concat, if this
> is true, then rbtree->estimate() returns false.

Yes, I think it all makes sense, thanks for detailing the idea. I'll get
to this in a few hours.

> BTW, then probably you can rename this attribute to
> NFT_SET_DESC_CONCAT?

It would include sizes, though. What about NFT_SET_DESC_SUBSIZE or
NFT_SET_DESC_FIELD_SIZE?
Pablo Neira Ayuso Nov. 25, 2019, 2:30 p.m. UTC | #5
On Mon, Nov 25, 2019 at 02:26:16PM +0100, Stefano Brivio wrote:
> On Mon, 25 Nov 2019 10:58:17 +0100
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> 
> > On Mon, Nov 25, 2019 at 10:30:35AM +0100, Stefano Brivio wrote:
> > [...]
> > > Another idea could be that we get rid of this flag altogether: if we
> > > move "subkeys" to set->desc, the ->estimate() functions of rbtree and
> > > pipapo can check for those and refuse or allow set selection
> > > accordingly. I have no idea yet if this introduces further complexity
> > > for nft, because there we would need to decide how to create start/end
> > > elements depending on the existing set description instead of using a
> > > single flag. I can give it a try if it makes sense.  
> > 
> > nft_set_desc can probably store a boolean 'concat' that is set on if
> > the NFTA_SET_DESC_SUBKEY attribute is specified. Then, this flag is
> > not needed and you can just rely on ->estimate() as you describe.
> 
> I could even just check desc->num_subkeys from your patch then, without
> adding another field to nft_set_desc. Too ugly?

OK.

> > The hashtable will just ignore this description, it does not need the
> > description even if userspace pass it on since the interval flag is
> > set on.
> > 
> > You just have to update the rbtree to check for desc->concat, if this
> > is true, then rbtree->estimate() returns false.
> 
> Yes, I think it all makes sense, thanks for detailing the idea. I'll get
> to this in a few hours.
> 
> > BTW, then probably you can rename this attribute to
> > NFT_SET_DESC_CONCAT?
> 
> It would include sizes, though. What about NFT_SET_DESC_SUBSIZE or
> NFT_SET_DESC_FIELD_SIZE?

You mean this:

       NFT_SET_DESC_SUBSIZE
          NFT_SET_DESC_FIELD_SIZE
          NFT_SET_DESC_FIELD_SIZE

instead of this:

        NFT_SET_DESC_CONCAT
          NFT_LIST_ELEM
             NFT_SET_DESC_SUBKEY_LEN
          NFT_LIST_ELEM
             NFT_SET_DESC_SUBKEY_LEN

If I described this correctly, your approach is more simple indeed.

However, I don't really have specific requirements for the future
right now. The one below is leaving room to add more subkey fields (to
describe each subkey if that is ever required). My experience is that
leaving room to extend netlink in the future is usually a good idea,
that's all.

Instead of NFT_LIST_ELEM, something like NFT_SET_DESC_SUBKEY should be
fine too, ie.

        NFT_SET_DESC_CONCAT
          NFT_SET_DESC_SUBKEY
             NFT_SET_DESC_SUBKEY_LEN
          NFT_SET_DESC_SUBKEY
             NFT_SET_DESC_SUBKEY_LEN

This netlink stuff is tricky in that it's set on stone one exposed.

Thanks.
Stefano Brivio Nov. 25, 2019, 2:54 p.m. UTC | #6
On Mon, 25 Nov 2019 15:30:58 +0100
Pablo Neira Ayuso <pablo@netfilter.org> wrote:

> On Mon, Nov 25, 2019 at 02:26:16PM +0100, Stefano Brivio wrote:
> > On Mon, 25 Nov 2019 10:58:17 +0100
> > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> >   
> > > On Mon, Nov 25, 2019 at 10:30:35AM +0100, Stefano Brivio wrote:
> > > [...]  
> > > > Another idea could be that we get rid of this flag altogether: if we
> > > > move "subkeys" to set->desc, the ->estimate() functions of rbtree and
> > > > pipapo can check for those and refuse or allow set selection
> > > > accordingly. I have no idea yet if this introduces further complexity
> > > > for nft, because there we would need to decide how to create start/end
> > > > elements depending on the existing set description instead of using a
> > > > single flag. I can give it a try if it makes sense.    
> > > 
> > > nft_set_desc can probably store a boolean 'concat' that is set on if
> > > the NFTA_SET_DESC_SUBKEY attribute is specified. Then, this flag is
> > > not needed and you can just rely on ->estimate() as you describe.  
> > 
> > I could even just check desc->num_subkeys from your patch then, without
> > adding another field to nft_set_desc. Too ugly?  
> 
> OK.
> 
> > > The hashtable will just ignore this description, it does not need the
> > > description even if userspace pass it on since the interval flag is
> > > set on.
> > > 
> > > You just have to update the rbtree to check for desc->concat, if this
> > > is true, then rbtree->estimate() returns false.  
> > 
> > Yes, I think it all makes sense, thanks for detailing the idea. I'll get
> > to this in a few hours.
> >   
> > > BTW, then probably you can rename this attribute to
> > > NFT_SET_DESC_CONCAT?  
> > 
> > It would include sizes, though. What about NFT_SET_DESC_SUBSIZE or
> > NFT_SET_DESC_FIELD_SIZE?  
> 
> You mean this:
> 
>        NFT_SET_DESC_SUBSIZE
>           NFT_SET_DESC_FIELD_SIZE
>           NFT_SET_DESC_FIELD_SIZE
> 
> instead of this:
> 
>         NFT_SET_DESC_CONCAT
>           NFT_LIST_ELEM
>              NFT_SET_DESC_SUBKEY_LEN
>           NFT_LIST_ELEM
>              NFT_SET_DESC_SUBKEY_LEN
> 
> If I described this correctly, your approach is more simple indeed.

Ah, yes, that's what I meant, but that's because I didn't understand
your intention in the first place. :) I see now.

> However, I don't really have specific requirements for the future
> right now. The one below is leaving room to add more subkey fields (to
> describe each subkey if that is ever required). My experience is that
> leaving room to extend netlink in the future is usually a good idea,
> that's all.
> 
> Instead of NFT_LIST_ELEM, something like NFT_SET_DESC_SUBKEY should be
> fine too, ie.
> 
>         NFT_SET_DESC_CONCAT
>           NFT_SET_DESC_SUBKEY
>              NFT_SET_DESC_SUBKEY_LEN
>           NFT_SET_DESC_SUBKEY
>              NFT_SET_DESC_SUBKEY_LEN

Actually:

>         NFT_SET_DESC_CONCAT
>           NFT_LIST_ELEM
>              NFT_SET_DESC_SUBKEY_LEN
>           NFT_LIST_ELEM
>              NFT_SET_DESC_SUBKEY_LEN

sounds better to me. Maybe "SUBKEY" starts looking a bit obscure here:
the "SUB" part is already there, the "KEY" part mostly refers to an
implementation detail. What about:

         NFT_SET_DESC_CONCAT
           NFT_LIST_ELEM
              NFT_SET_DESC_LEN
           NFT_LIST_ELEM
              NFT_SET_DESC_LEN

this?
Pablo Neira Ayuso Nov. 25, 2019, 8:38 p.m. UTC | #7
On Mon, Nov 25, 2019 at 03:54:22PM +0100, Stefano Brivio wrote:
> On Mon, 25 Nov 2019 15:30:58 +0100
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> 
> > On Mon, Nov 25, 2019 at 02:26:16PM +0100, Stefano Brivio wrote:
> > > On Mon, 25 Nov 2019 10:58:17 +0100
> > > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > >   
> > > > On Mon, Nov 25, 2019 at 10:30:35AM +0100, Stefano Brivio wrote:
> > > > [...]  
> > > > > Another idea could be that we get rid of this flag altogether: if we
> > > > > move "subkeys" to set->desc, the ->estimate() functions of rbtree and
> > > > > pipapo can check for those and refuse or allow set selection
> > > > > accordingly. I have no idea yet if this introduces further complexity
> > > > > for nft, because there we would need to decide how to create start/end
> > > > > elements depending on the existing set description instead of using a
> > > > > single flag. I can give it a try if it makes sense.    
> > > > 
> > > > nft_set_desc can probably store a boolean 'concat' that is set on if
> > > > the NFTA_SET_DESC_SUBKEY attribute is specified. Then, this flag is
> > > > not needed and you can just rely on ->estimate() as you describe.  
> > > 
> > > I could even just check desc->num_subkeys from your patch then, without
> > > adding another field to nft_set_desc. Too ugly?  
> > 
> > OK.
> > 
> > > > The hashtable will just ignore this description, it does not need the
> > > > description even if userspace pass it on since the interval flag is
> > > > set on.
> > > > 
> > > > You just have to update the rbtree to check for desc->concat, if this
> > > > is true, then rbtree->estimate() returns false.  
> > > 
> > > Yes, I think it all makes sense, thanks for detailing the idea. I'll get
> > > to this in a few hours.
> > >   
> > > > BTW, then probably you can rename this attribute to
> > > > NFT_SET_DESC_CONCAT?  
> > > 
> > > It would include sizes, though. What about NFT_SET_DESC_SUBSIZE or
> > > NFT_SET_DESC_FIELD_SIZE?  
> > 
> > You mean this:
> > 
> >        NFT_SET_DESC_SUBSIZE
> >           NFT_SET_DESC_FIELD_SIZE
> >           NFT_SET_DESC_FIELD_SIZE
> > 
> > instead of this:
> > 
> >         NFT_SET_DESC_CONCAT
> >           NFT_LIST_ELEM
> >              NFT_SET_DESC_SUBKEY_LEN
> >           NFT_LIST_ELEM
> >              NFT_SET_DESC_SUBKEY_LEN
> > 
> > If I described this correctly, your approach is more simple indeed.
> 
> Ah, yes, that's what I meant, but that's because I didn't understand
> your intention in the first place. :) I see now.
> 
> > However, I don't really have specific requirements for the future
> > right now. The one below is leaving room to add more subkey fields (to
> > describe each subkey if that is ever required). My experience is that
> > leaving room to extend netlink in the future is usually a good idea,
> > that's all.
> > 
> > Instead of NFT_LIST_ELEM, something like NFT_SET_DESC_SUBKEY should be
> > fine too, ie.
> > 
> >         NFT_SET_DESC_CONCAT
> >           NFT_SET_DESC_SUBKEY
> >              NFT_SET_DESC_SUBKEY_LEN
> >           NFT_SET_DESC_SUBKEY
> >              NFT_SET_DESC_SUBKEY_LEN
> 
> Actually:
> 
> >         NFT_SET_DESC_CONCAT
> >           NFT_LIST_ELEM
> >              NFT_SET_DESC_SUBKEY_LEN
> >           NFT_LIST_ELEM
> >              NFT_SET_DESC_SUBKEY_LEN
> 
> sounds better to me. Maybe "SUBKEY" starts looking a bit obscure here:
> the "SUB" part is already there, the "KEY" part mostly refers to an
> implementation detail. What about:
> 
>          NFT_SET_DESC_CONCAT
>            NFT_LIST_ELEM
>               NFT_SET_DESC_LEN
>            NFT_LIST_ELEM
>               NFT_SET_DESC_LEN
> 
> this?

I think the _SUBKEY_ infix is fine.

Problem with using NFT_SET_DESC_* for inner (nested) attributes is
that this matches the same prefix as top level netlink attributes.

Note there is NFT_SET_DESC_SIZE. If there is a NFT_SET_DESC_LEN that
is wrapped around NFT_SET_DESC_CONCAT (using same prefix) it might
look a bit misleading.

Having said this, although I started this debate about naming, I don't
have a strong opinion on all these names. As soon as the netlink
attribute scheme that is used allows for extensibility in the future,
such as allowing to add more descriptions for each subkey, I'll be fine.

Thanks.
diff mbox series

Patch

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index bb9b049310df..f8dbeac14898 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -48,6 +48,7 @@  enum nft_registers {
 
 #define NFT_REG_SIZE	16
 #define NFT_REG32_SIZE	4
+#define NFT_REG32_COUNT	(NFT_REG32_15 - NFT_REG32_00 + 1)
 
 /**
  * enum nft_verdicts - nf_tables internal verdicts
@@ -275,6 +276,7 @@  enum nft_rule_compat_attributes {
  * @NFT_SET_TIMEOUT: set uses timeouts
  * @NFT_SET_EVAL: set can be updated from the evaluation path
  * @NFT_SET_OBJECT: set contains stateful objects
+ * @NFT_SET_SUBKEY: set uses subkeys to map intervals for multiple fields
  */
 enum nft_set_flags {
 	NFT_SET_ANONYMOUS		= 0x1,
@@ -284,6 +286,7 @@  enum nft_set_flags {
 	NFT_SET_TIMEOUT			= 0x10,
 	NFT_SET_EVAL			= 0x20,
 	NFT_SET_OBJECT			= 0x40,
+	NFT_SET_SUBKEY			= 0x80,
 };
 
 /**
@@ -309,6 +312,17 @@  enum nft_set_desc_attributes {
 };
 #define NFTA_SET_DESC_MAX	(__NFTA_SET_DESC_MAX - 1)
 
+/**
+ * enum nft_set_subkey_attributes - subkeys for multiple ranged fields
+ *
+ * @NFTA_SET_SUBKEY_LEN: length of single field, in bits (NLA_U32)
+ */
+enum nft_set_subkey_attributes {
+	NFTA_SET_SUBKEY_LEN,
+	__NFTA_SET_SUBKEY_MAX
+};
+#define NFTA_SET_SUBKEY_MAX	(__NFTA_SET_SUBKEY_MAX - 1)
+
 /**
  * enum nft_set_attributes - nf_tables set netlink attributes
  *
@@ -327,6 +341,7 @@  enum nft_set_desc_attributes {
  * @NFTA_SET_USERDATA: user data (NLA_BINARY)
  * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
  * @NFTA_SET_HANDLE: set handle (NLA_U64)
+ * @NFTA_SET_SUBKEY: subkeys for multiple ranged fields (NLA_NESTED)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -346,6 +361,7 @@  enum nft_set_attributes {
 	NFTA_SET_PAD,
 	NFTA_SET_OBJ_TYPE,
 	NFTA_SET_HANDLE,
+	NFTA_SET_SUBKEY,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ff04cdc87f76..a877d60f86a9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3248,7 +3248,7 @@  EXPORT_SYMBOL_GPL(nft_unregister_set);
 
 #define NFT_SET_FEATURES	(NFT_SET_INTERVAL | NFT_SET_MAP | \
 				 NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
-				 NFT_SET_EVAL)
+				 NFT_SET_EVAL | NFT_SET_SUBKEY)
 
 static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
 {
@@ -3826,7 +3826,7 @@  static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
 			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
 			      NFT_SET_MAP | NFT_SET_EVAL |
-			      NFT_SET_OBJECT))
+			      NFT_SET_OBJECT | NFT_SET_SUBKEY))
 			return -EINVAL;
 		/* Only one of these operations is supported */
 		if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==