diff mbox series

[nft,v4,08/32] netlink: send bit-length of bitwise binops to kernel

Message ID 20220404121410.188509-9-jeremy@azazel.net
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series Extend values assignable to packet marks and payload fields | expand

Commit Message

Jeremy Sowden April 4, 2022, 12:13 p.m. UTC
Some bitwise operations are generated when munging paylod expressions.
During delinearization, we attempt to eliminate these operations.
However, this is done before deducing the byte-order or the correct
length in bits of the operands, which means that we don't always handle
multi-byte host-endian operations correctly.  Therefore, pass the
bit-length of these expressions to the kernel in order to have it
available during delinearization.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
---
 src/netlink_delinearize.c | 14 ++++++++++++--
 src/netlink_linearize.c   |  2 ++
 2 files changed, 14 insertions(+), 2 deletions(-)

Comments

Pablo Neira Ayuso May 23, 2022, 5:03 p.m. UTC | #1
Hi,

I'm sorry for taking time to get back to this, I have questions.

On Mon, Apr 04, 2022 at 01:13:46PM +0100, Jeremy Sowden wrote:
> Some bitwise operations are generated when munging paylod expressions.
> During delinearization, we attempt to eliminate these operations.
> However, this is done before deducing the byte-order or the correct
> length in bits of the operands, which means that we don't always handle
> multi-byte host-endian operations correctly.  Therefore, pass the
> bit-length of these expressions to the kernel in order to have it
> available during delinearization.
> 
> Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
> ---
>  src/netlink_delinearize.c | 14 ++++++++++++--
>  src/netlink_linearize.c   |  2 ++
>  2 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
> index a1b00dee209a..733977bc526d 100644
> --- a/src/netlink_delinearize.c
> +++ b/src/netlink_delinearize.c
> @@ -451,20 +451,28 @@ static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
>  					       const struct nftnl_expr *nle,
>  					       enum nft_registers sreg,
>  					       struct expr *left)
> -
>  {
>  	struct nft_data_delinearize nld;
>  	struct expr *expr, *mask, *xor, *or;
> +	unsigned int nbits;
>  	mpz_t m, x, o;
>  
>  	expr = left;
>  
> +	nbits = nftnl_expr_get_u32(nle, NFTNL_EXPR_BITWISE_NBITS);
> +	if (nbits > 0)
> +		expr->len = nbits;

So NFTNL_EXPR_BITWISE_NBITS is signalling that this is an implicit
bitwise that has been generated to operate with a payload header bitfield?

Could you provide an example expression tree to show how this
simplifies delinearization?

> +
>  	nld.value = nftnl_expr_get(nle, NFTNL_EXPR_BITWISE_MASK, &nld.len);
>  	mask = netlink_alloc_value(loc, &nld);
> +	if (nbits > 0)
> +		mpz_switch_byteorder(mask->value, div_round_up(nbits, BITS_PER_BYTE));

What is the byteorder expected for the mask before this switch
operation?

>  	mpz_init_set(m, mask->value);
>  
>  	nld.value = nftnl_expr_get(nle, NFTNL_EXPR_BITWISE_XOR, &nld.len);
> -	xor  = netlink_alloc_value(loc, &nld);
> +	xor = netlink_alloc_value(loc, &nld);
> +	if (nbits > 0)
> +		mpz_switch_byteorder(xor->value, div_round_up(nbits, BITS_PER_BYTE));
>  	mpz_init_set(x, xor->value);
>  
>  	mpz_init_set_ui(o, 0);
> @@ -500,6 +508,8 @@ static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
>  
>  		or = netlink_alloc_value(loc, &nld);
>  		mpz_set(or->value, o);
> +		if (nbits > 0)
> +			mpz_switch_byteorder(or->value, div_round_up(nbits, BITS_PER_BYTE));
>  		expr = binop_expr_alloc(loc, OP_OR, expr, or);
>  		expr->len = left->len;
>  	}
> diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
> index c8bbcb7452b0..4793f3853bee 100644
> --- a/src/netlink_linearize.c
> +++ b/src/netlink_linearize.c
> @@ -677,6 +677,8 @@ static void netlink_gen_bitwise(struct netlink_linearize_ctx *ctx,
>  	netlink_put_register(nle, NFTNL_EXPR_BITWISE_DREG, dreg);
>  	nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_OP, NFT_BITWISE_BOOL);
>  	nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_LEN, len);
> +	if (expr->byteorder == BYTEORDER_HOST_ENDIAN)
> +		nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_NBITS, expr->len);

Why is this only required for host endian expressions?

>  	netlink_gen_raw_data(mask, expr->byteorder, len, &nld);
>  	nftnl_expr_set(nle, NFTNL_EXPR_BITWISE_MASK, nld.value, nld.len);
> -- 
> 2.35.1
>
Jeremy Sowden Nov. 1, 2022, 6:46 p.m. UTC | #2
On 2022-05-23, at 19:03:42 +0200, Pablo Neira Ayuso wrote:
> On Mon, Apr 04, 2022 at 01:13:46PM +0100, Jeremy Sowden wrote:
> > Some bitwise operations are generated when munging paylod
> > expressions.  During delinearization, we attempt to eliminate these
> > operations.  However, this is done before deducing the byte-order or
> > the correct length in bits of the operands, which means that we
> > don't always handle multi-byte host-endian operations correctly.
> > Therefore, pass the bit-length of these expressions to the kernel in
> > order to have it available during delinearization.
> >
> > Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
> > ---
> >  src/netlink_delinearize.c | 14 ++++++++++++--
> >  src/netlink_linearize.c   |  2 ++
> >  2 files changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
> > index a1b00dee209a..733977bc526d 100644
> > --- a/src/netlink_delinearize.c
> > +++ b/src/netlink_delinearize.c
> > @@ -451,20 +451,28 @@ static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
> >  					       const struct nftnl_expr *nle,
> >  					       enum nft_registers sreg,
> >  					       struct expr *left)
> > -
> >  {
> >  	struct nft_data_delinearize nld;
> >  	struct expr *expr, *mask, *xor, *or;
> > +	unsigned int nbits;
> >  	mpz_t m, x, o;
> >
> >  	expr = left;
> >
> > +	nbits = nftnl_expr_get_u32(nle, NFTNL_EXPR_BITWISE_NBITS);
> > +	if (nbits > 0)
> > +		expr->len = nbits;
>
> So NFTNL_EXPR_BITWISE_NBITS is signalling that this is an implicit
> bitwise that has been generated to operate with a payload header
> bitfield?
>
> Could you provide an example expression tree to show how this
> simplifies delinearization?

This rule:

  add rule ip6 t c ct mark set ip6 dscp lshift 2 or 0x10

has the following representation:

  ip6 t c
  [ payload load 2b @ network header + 0 => reg 1 ]
  [ bitwise reg 1 = ( reg 1 & 0x0000c00f ) ^ 0x00000000 ]
  [ bitwise reg 1 = ( reg 1 >> 0x00000006 ) ]
  [ byteorder reg 1 = ntoh(reg 1, 2, 1) ]
  [ bitwise reg 1 = ( reg 1 << 0x00000002 ) ]
  [ bitwise reg 1 = ( reg 1 & 0x00000fef ) ^ 0x00000010 ]
  [ ct set mark with reg 1 ]

This patch is intended to fix a problem with the OR expression:

  [ bitwise reg 1 = ( reg 1 & 0x00000fef ) ^ 0x00000010 ]
  
The expression has length 12 bits.  During linearization the length is
rounded up to 2 bytes.  During delinearization, the length of the
expression is compared to the size of the mask in order to determine
whether the mask can be removed:

  if (left->len > 0 && mpz_scan0(m, 0) == left->len) {
    /* mask encompasses the entire value */
    expr_free(mask);
  } else {
    mpz_set(mask->value, m);
    expr = binop_expr_alloc(loc, OP_AND, expr, mask);
    expr->len = left->len;
  }

Because the length of the expression is now 16 bits, it does not match
the width of the mask and so the mask is retained:

  table ip6 t {
    chain c {
      type filter hook output priority filter; policy accept;
      ct mark set ip6 dscp << 2 & 4095 | 16
    }
  }

> > +
> >  	nld.value = nftnl_expr_get(nle, NFTNL_EXPR_BITWISE_MASK, &nld.len);
> >  	mask = netlink_alloc_value(loc, &nld);
> > +	if (nbits > 0)
> > +		mpz_switch_byteorder(mask->value, div_round_up(nbits, BITS_PER_BYTE));
>
> What is the byteorder expected for the mask before this switch
> operation?

NBO.

> >  	mpz_init_set(m, mask->value);
> >
> >  	nld.value = nftnl_expr_get(nle, NFTNL_EXPR_BITWISE_XOR, &nld.len);
> > -	xor  = netlink_alloc_value(loc, &nld);
> > +	xor = netlink_alloc_value(loc, &nld);
> > +	if (nbits > 0)
> > +		mpz_switch_byteorder(xor->value, div_round_up(nbits, BITS_PER_BYTE));
> >  	mpz_init_set(x, xor->value);
> >
> >  	mpz_init_set_ui(o, 0);
> > @@ -500,6 +508,8 @@ static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
> >
> >  		or = netlink_alloc_value(loc, &nld);
> >  		mpz_set(or->value, o);
> > +		if (nbits > 0)
> > +			mpz_switch_byteorder(or->value, div_round_up(nbits, BITS_PER_BYTE));
> >  		expr = binop_expr_alloc(loc, OP_OR, expr, or);
> >  		expr->len = left->len;
> >  	}
> > diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
> > index c8bbcb7452b0..4793f3853bee 100644
> > --- a/src/netlink_linearize.c
> > +++ b/src/netlink_linearize.c
> > @@ -677,6 +677,8 @@ static void netlink_gen_bitwise(struct netlink_linearize_ctx *ctx,
> >  	netlink_put_register(nle, NFTNL_EXPR_BITWISE_DREG, dreg);
> >  	nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_OP, NFT_BITWISE_BOOL);
> >  	nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_LEN, len);
> > +	if (expr->byteorder == BYTEORDER_HOST_ENDIAN)
> > +		nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_NBITS, expr->len);
>
> Why is this only required for host endian expressions?

I wasn't sure whether keeping track of the bit length by storing it in
the kernel would be an acceptable solution.  Doing it for both byte-
orders caused failures in unrelated test-cases.  Since NBO expressions
don't come up in my use-case I decided to restrict it to HBO to start
with, and, if copying the bit-length to and from the kernel *was*
acceptable, to fix those test-failures in the next version of the
patch-set (I assumed one would be required :)).

However, in the process of replying to the questions in your response to
patch 13, I have realized that this patch may not be necessary after
all.  The problem here lies in the code which attempts to turn a mask-
and-xor expression back into the corresponding original bitwise opera-
tion.  A different solution would be to make use of the native bitwise
operations introduced by this series to avoid having to convert to and
from mask-and-xor expressions altogether.  Further explanation in the
later reply.

J.

> >  	netlink_gen_raw_data(mask, expr->byteorder, len, &nld);
> >  	nftnl_expr_set(nle, NFTNL_EXPR_BITWISE_MASK, nld.value, nld.len);
> > --
> > 2.35.1
> >
>
diff mbox series

Patch

diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index a1b00dee209a..733977bc526d 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -451,20 +451,28 @@  static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
 					       const struct nftnl_expr *nle,
 					       enum nft_registers sreg,
 					       struct expr *left)
-
 {
 	struct nft_data_delinearize nld;
 	struct expr *expr, *mask, *xor, *or;
+	unsigned int nbits;
 	mpz_t m, x, o;
 
 	expr = left;
 
+	nbits = nftnl_expr_get_u32(nle, NFTNL_EXPR_BITWISE_NBITS);
+	if (nbits > 0)
+		expr->len = nbits;
+
 	nld.value = nftnl_expr_get(nle, NFTNL_EXPR_BITWISE_MASK, &nld.len);
 	mask = netlink_alloc_value(loc, &nld);
+	if (nbits > 0)
+		mpz_switch_byteorder(mask->value, div_round_up(nbits, BITS_PER_BYTE));
 	mpz_init_set(m, mask->value);
 
 	nld.value = nftnl_expr_get(nle, NFTNL_EXPR_BITWISE_XOR, &nld.len);
-	xor  = netlink_alloc_value(loc, &nld);
+	xor = netlink_alloc_value(loc, &nld);
+	if (nbits > 0)
+		mpz_switch_byteorder(xor->value, div_round_up(nbits, BITS_PER_BYTE));
 	mpz_init_set(x, xor->value);
 
 	mpz_init_set_ui(o, 0);
@@ -500,6 +508,8 @@  static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
 
 		or = netlink_alloc_value(loc, &nld);
 		mpz_set(or->value, o);
+		if (nbits > 0)
+			mpz_switch_byteorder(or->value, div_round_up(nbits, BITS_PER_BYTE));
 		expr = binop_expr_alloc(loc, OP_OR, expr, or);
 		expr->len = left->len;
 	}
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index c8bbcb7452b0..4793f3853bee 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -677,6 +677,8 @@  static void netlink_gen_bitwise(struct netlink_linearize_ctx *ctx,
 	netlink_put_register(nle, NFTNL_EXPR_BITWISE_DREG, dreg);
 	nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_OP, NFT_BITWISE_BOOL);
 	nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_LEN, len);
+	if (expr->byteorder == BYTEORDER_HOST_ENDIAN)
+		nftnl_expr_set_u32(nle, NFTNL_EXPR_BITWISE_NBITS, expr->len);
 
 	netlink_gen_raw_data(mask, expr->byteorder, len, &nld);
 	nftnl_expr_set(nle, NFTNL_EXPR_BITWISE_MASK, nld.value, nld.len);