diff mbox series

[nft,v4,13/32] evaluate: support shifts larger than the width of the left operand

Message ID 20220404121410.188509-14-jeremy@azazel.net
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series Extend values assignable to packet marks and payload fields | expand

Commit Message

Jeremy Sowden April 4, 2022, 12:13 p.m. UTC
If we want to left-shift a value of narrower type and assign the result
to a variable of a wider type, we are constrained to only shifting up to
the width of the narrower type.  Thus:

  add rule t c meta mark set ip dscp << 2

works, but:

  add rule t c meta mark set ip dscp << 8

does not, even though the lvalue is large enough to accommodate the
result.

Evaluation of the left-hand operand of a shift overwrites the `len`
field of the evaluation context when `expr_evaluate_primary` is called.
Instead, preserve the `len` value of the evaluation context for shifts,
and support shifts up to that size, even if they are larger than the
length of the left operand.

Update netlink_delinearize.c to handle the case where the length of a
shift expression does not match that of its left-hand operand.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
---
 src/evaluate.c            | 23 ++++++++++++++---------
 src/netlink_delinearize.c |  4 ++--
 2 files changed, 16 insertions(+), 11 deletions(-)

Comments

Pablo Neira Ayuso May 23, 2022, 5:42 p.m. UTC | #1
Hi,

I just tested patches 9 and 14 alone, and meta mark set ip dscp ...
now works fine.

So most of the work from 7 to 14 is to allow to use shifts.

So 8, 10, 11, 12 and 13 enable the use of shifts is meta and ct
statements?

The new _NBIT field is there to store the original length for the
payload field (6 bits, for the ip dscp case)?

On Mon, Apr 04, 2022 at 01:13:51PM +0100, Jeremy Sowden wrote:
> If we want to left-shift a value of narrower type and assign the result
> to a variable of a wider type, we are constrained to only shifting up to
> the width of the narrower type.  Thus:
> 
>   add rule t c meta mark set ip dscp << 2
> 
> works, but:
> 
>   add rule t c meta mark set ip dscp << 8
> 
> does not, even though the lvalue is large enough to accommodate the
> result.
> 
> Evaluation of the left-hand operand of a shift overwrites the `len`
> field of the evaluation context when `expr_evaluate_primary` is called.
> Instead, preserve the `len` value of the evaluation context for shifts,
> and support shifts up to that size, even if they are larger than the
> length of the left operand.
> 
> Update netlink_delinearize.c to handle the case where the length of a
> shift expression does not match that of its left-hand operand.
> 
> Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
> ---
>  src/evaluate.c            | 23 ++++++++++++++---------
>  src/netlink_delinearize.c |  4 ++--
>  2 files changed, 16 insertions(+), 11 deletions(-)
> 
> diff --git a/src/evaluate.c b/src/evaluate.c
> index be493f85010c..ee4da5a2b889 100644
> --- a/src/evaluate.c
> +++ b/src/evaluate.c
> @@ -1116,14 +1116,18 @@ static int constant_binop_simplify(struct eval_ctx *ctx, struct expr **expr)
>  static int expr_evaluate_shift(struct eval_ctx *ctx, struct expr **expr)
>  {
>  	struct expr *op = *expr, *left = op->left, *right = op->right;
> +	unsigned int shift = mpz_get_uint32(right->value);
> +	unsigned int op_len = left->len;
>  
> -	if (mpz_get_uint32(right->value) >= left->len)
> -		return expr_binary_error(ctx->msgs, right, left,
> -					 "%s shift of %u bits is undefined "
> -					 "for type of %u bits width",
> -					 op->op == OP_LSHIFT ? "Left" : "Right",
> -					 mpz_get_uint32(right->value),
> -					 left->len);
> +	if (shift >= op_len) {
> +		if (shift >= ctx->ectx.len)
> +			return expr_binary_error(ctx->msgs, right, left,
> +						 "%s shift of %u bits is undefined for type of %u bits width",
> +						 op->op == OP_LSHIFT ? "Left" : "Right",
> +						 shift,
> +						 op_len);
> +		op_len = ctx->ectx.len;
> +	}
>  
>  	/* Both sides need to be in host byte order */
>  	if (byteorder_conversion(ctx, &op->left, BYTEORDER_HOST_ENDIAN) < 0)
> @@ -1134,7 +1138,7 @@ static int expr_evaluate_shift(struct eval_ctx *ctx, struct expr **expr)
>  
>  	op->dtype     = &integer_type;
>  	op->byteorder = BYTEORDER_HOST_ENDIAN;
> -	op->len       = left->len;
> +	op->len	      = op_len;
>  
>  	if (expr_is_constant(left))
>  		return constant_binop_simplify(ctx, expr);
> @@ -1167,6 +1171,7 @@ static int expr_evaluate_binop(struct eval_ctx *ctx, struct expr **expr)
>  {
>  	struct expr *op = *expr, *left, *right;
>  	const char *sym = expr_op_symbols[op->op];
> +	unsigned int ectx_len = ctx->ectx.len;
>  
>  	if (expr_evaluate(ctx, &op->left) < 0)
>  		return -1;
> @@ -1174,7 +1179,7 @@ static int expr_evaluate_binop(struct eval_ctx *ctx, struct expr **expr)
>  
>  	if (op->op == OP_LSHIFT || op->op == OP_RSHIFT)
>  		__expr_set_context(&ctx->ectx, &integer_type,
> -				   left->byteorder, ctx->ectx.len, 0);
> +				   left->byteorder, ectx_len, 0);
>  	if (expr_evaluate(ctx, &op->right) < 0)
>  		return -1;
>  	right = op->right;
> diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
> index cf5359bf269e..9f6fdee3e92d 100644
> --- a/src/netlink_delinearize.c
> +++ b/src/netlink_delinearize.c
> @@ -486,7 +486,7 @@ static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
>  		mpz_ior(m, m, o);
>  	}
>  
> -	if (left->len > 0 && mpz_scan0(m, 0) == left->len) {
> +	if (left->len > 0 && mpz_scan0(m, 0) >= left->len) {
>  		/* mask encompasses the entire value */
>  		expr_free(mask);
>  	} else {
> @@ -536,7 +536,7 @@ static struct expr *netlink_parse_bitwise_shift(struct netlink_parse_ctx *ctx,
>  	right->byteorder = BYTEORDER_HOST_ENDIAN;
>  
>  	expr = binop_expr_alloc(loc, op, left, right);
> -	expr->len = left->len;
> +	expr->len = nftnl_expr_get_u32(nle, NFTNL_EXPR_BITWISE_LEN) * BITS_PER_BYTE;
>  
>  	return expr;
>  }
> -- 
> 2.35.1
>
Jeremy Sowden Nov. 1, 2022, 6:47 p.m. UTC | #2
On 2022-05-23, at 19:42:40 +0200, Pablo Neira Ayuso wrote:
> I just tested patches 9 and 14 alone, and meta mark set ip dscp ...
> now works fine.
>
> So most of the work from 7 to 14 is to allow to use shifts.
>
> So 8, 10, 11, 12 and 13 enable the use of shifts is meta and ct
> statements?

Yes.

> The new _NBIT field is there to store the original length for the
> payload field (6 bits, for the ip dscp case)?

It's for this ip6 dscp case:

  ct mark set ip6 dscp << 2 | 16

This is linearized as:

  [ payload load 2b @ network header + 0 => reg 1 ]
  [ bitwise reg 1 = ( reg 1 & 0x0000c00f ) ^ 0x00000000 ]
  [ bitwise reg 1 = ( reg 1 >> 0x00000006 ) ]
  [ byteorder reg 1 = ntoh(reg 1, 2, 1) ]
  [ bitwise reg 1 = ( reg 1 << 0x00000002 ) ]
  [ bitwise reg 1 = ( reg 1 & 0x00000fef ) ^ 0x00000010 ]
  [ ct set mark with reg 1 ]

The problem is the last bitwise expression:

  [ bitwise reg 1 = ( reg 1 & 0x00000fef ) ^ 0x00000010 ]

`ip6 dscp` spans two octets:

  @nh(0,16) & 0xfc0 >> 6

The original length is 12 bits.  The LSHIFT expression changes the
byte-order to host-endian.

When the OR expression is evaluated, it is converted from:

  ${lhs} | 16

to:

  ${lhs} & 0xfef ^ 0x10

and when it is linearized, the bit-length is rounded up to 16 bits and
the byte-order is converted to big-endian.

During delinearization we try to turn the mask-and-xor back into its
original form before the original endianness and length are known, so
starting from:

  ${lhs} & 0xef0f ^ 0x1000

we fail to strip the mask and end up with:

  ct mark set ip6 dscp << 2 & 4095 | 16

Having gone back and reviewed this bug in the writing of this e-mail
I've realized that the introduction of native kernel bitwise op's in
patches 27-28 could obviate it.  In the current iteration of the
patch-set, the new bitwise op's are only used to support previously
unsupported bitwise expressions with variable right hand operands;
currently supported operations with constant right hand operands are
still converted to mask-and-xor operations.  If the linearization code
were changed to use the native op's for expressions with constant RHS's
too, the problematic conversion from mask-and-xor would go away.

When I tried this out, I had to make a couple of other changes to get it
working.  The big one was to register allocation.  Although netfilter
registers are 32-bits wide these days, they are currently allocated in
blocks of four for backwards-compatibility with older kernels.  Some of
the new test-cases added in this series failed because of a lack of
available registers, so I changed the allocation as follows:

  --- a/src/netlink_linearize.c
  +++ b/src/netlink_linearize.c
  @@ -97,7 +97,7 @@ static void __release_register(struct netlink_linearize_ctx *ctx,
   static enum nft_registers get_register(struct netlink_linearize_ctx *ctx,
                                         const struct expr *expr)
   {
  -       if (expr && expr->etype == EXPR_CONCAT)
  +       if (expr && expr->len)
                  return __get_register(ctx, expr->len);
          else
                  return __get_register(ctx, NFT_REG_SIZE * BITS_PER_BYTE);
  @@ -106,7 +106,7 @@ static enum nft_registers get_register(struct netlink_linearize_ctx *ctx,
   static void release_register(struct netlink_linearize_ctx *ctx,
                               const struct expr *expr)
   {
  -       if (expr && expr->etype == EXPR_CONCAT)
  +       if (expr && expr->len)
                  __release_register(ctx, expr->len);
          else
                  __release_register(ctx, NFT_REG_SIZE * BITS_PER_BYTE);

It's been seven years since the switch from 128- to 32-bit registers.
Would something like this change be acceptable?

J.

> On Mon, Apr 04, 2022 at 01:13:51PM +0100, Jeremy Sowden wrote:
> > If we want to left-shift a value of narrower type and assign the result
> > to a variable of a wider type, we are constrained to only shifting up to
> > the width of the narrower type.  Thus:
> >
> >   add rule t c meta mark set ip dscp << 2
> >
> > works, but:
> >
> >   add rule t c meta mark set ip dscp << 8
> >
> > does not, even though the lvalue is large enough to accommodate the
> > result.
> >
> > Evaluation of the left-hand operand of a shift overwrites the `len`
> > field of the evaluation context when `expr_evaluate_primary` is called.
> > Instead, preserve the `len` value of the evaluation context for shifts,
> > and support shifts up to that size, even if they are larger than the
> > length of the left operand.
> >
> > Update netlink_delinearize.c to handle the case where the length of a
> > shift expression does not match that of its left-hand operand.
> >
> > Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
> > ---
> >  src/evaluate.c            | 23 ++++++++++++++---------
> >  src/netlink_delinearize.c |  4 ++--
> >  2 files changed, 16 insertions(+), 11 deletions(-)
> >
> > diff --git a/src/evaluate.c b/src/evaluate.c
> > index be493f85010c..ee4da5a2b889 100644
> > --- a/src/evaluate.c
> > +++ b/src/evaluate.c
> > @@ -1116,14 +1116,18 @@ static int constant_binop_simplify(struct eval_ctx *ctx, struct expr **expr)
> >  static int expr_evaluate_shift(struct eval_ctx *ctx, struct expr **expr)
> >  {
> >  	struct expr *op = *expr, *left = op->left, *right = op->right;
> > +	unsigned int shift = mpz_get_uint32(right->value);
> > +	unsigned int op_len = left->len;
> >
> > -	if (mpz_get_uint32(right->value) >= left->len)
> > -		return expr_binary_error(ctx->msgs, right, left,
> > -					 "%s shift of %u bits is undefined "
> > -					 "for type of %u bits width",
> > -					 op->op == OP_LSHIFT ? "Left" : "Right",
> > -					 mpz_get_uint32(right->value),
> > -					 left->len);
> > +	if (shift >= op_len) {
> > +		if (shift >= ctx->ectx.len)
> > +			return expr_binary_error(ctx->msgs, right, left,
> > +						 "%s shift of %u bits is undefined for type of %u bits width",
> > +						 op->op == OP_LSHIFT ? "Left" : "Right",
> > +						 shift,
> > +						 op_len);
> > +		op_len = ctx->ectx.len;
> > +	}
> >
> >  	/* Both sides need to be in host byte order */
> >  	if (byteorder_conversion(ctx, &op->left, BYTEORDER_HOST_ENDIAN) < 0)
> > @@ -1134,7 +1138,7 @@ static int expr_evaluate_shift(struct eval_ctx *ctx, struct expr **expr)
> >
> >  	op->dtype     = &integer_type;
> >  	op->byteorder = BYTEORDER_HOST_ENDIAN;
> > -	op->len       = left->len;
> > +	op->len	      = op_len;
> >
> >  	if (expr_is_constant(left))
> >  		return constant_binop_simplify(ctx, expr);
> > @@ -1167,6 +1171,7 @@ static int expr_evaluate_binop(struct eval_ctx *ctx, struct expr **expr)
> >  {
> >  	struct expr *op = *expr, *left, *right;
> >  	const char *sym = expr_op_symbols[op->op];
> > +	unsigned int ectx_len = ctx->ectx.len;
> >
> >  	if (expr_evaluate(ctx, &op->left) < 0)
> >  		return -1;
> > @@ -1174,7 +1179,7 @@ static int expr_evaluate_binop(struct eval_ctx *ctx, struct expr **expr)
> >
> >  	if (op->op == OP_LSHIFT || op->op == OP_RSHIFT)
> >  		__expr_set_context(&ctx->ectx, &integer_type,
> > -				   left->byteorder, ctx->ectx.len, 0);
> > +				   left->byteorder, ectx_len, 0);
> >  	if (expr_evaluate(ctx, &op->right) < 0)
> >  		return -1;
> >  	right = op->right;
> > diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
> > index cf5359bf269e..9f6fdee3e92d 100644
> > --- a/src/netlink_delinearize.c
> > +++ b/src/netlink_delinearize.c
> > @@ -486,7 +486,7 @@ static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
> >  		mpz_ior(m, m, o);
> >  	}
> >
> > -	if (left->len > 0 && mpz_scan0(m, 0) == left->len) {
> > +	if (left->len > 0 && mpz_scan0(m, 0) >= left->len) {
> >  		/* mask encompasses the entire value */
> >  		expr_free(mask);
> >  	} else {
> > @@ -536,7 +536,7 @@ static struct expr *netlink_parse_bitwise_shift(struct netlink_parse_ctx *ctx,
> >  	right->byteorder = BYTEORDER_HOST_ENDIAN;
> >
> >  	expr = binop_expr_alloc(loc, op, left, right);
> > -	expr->len = left->len;
> > +	expr->len = nftnl_expr_get_u32(nle, NFTNL_EXPR_BITWISE_LEN) * BITS_PER_BYTE;
> >
> >  	return expr;
> >  }
> > --
> > 2.35.1
> >
>
Pablo Neira Ayuso Feb. 7, 2023, 12:05 p.m. UTC | #3
Long time, no news.

On Mon, May 23, 2022 at 07:42:43PM +0200, Pablo Neira Ayuso wrote:
> Hi,
> 
> I just tested patches 9 and 14 alone, and meta mark set ip dscp ...
> now works fine.

I have applied 9 and 14, including one testcase for tests/py, so

        meta mark set ip dscp

works, so there is at least some progress on this, sorry about this :(
Jeremy Sowden March 4, 2023, noon UTC | #4
On 2023-02-07, at 13:05:38 +0100, Pablo Neira Ayuso wrote:
> Long time, no news.
> 
> On Mon, May 23, 2022 at 07:42:43PM +0200, Pablo Neira Ayuso wrote:
> > Hi,
> > 
> > I just tested patches 9 and 14 alone, and meta mark set ip dscp ...
> > now works fine.
> 
> I have applied 9 and 14, including one testcase for tests/py, so
> 
>         meta mark set ip dscp
> 
> works, so there is at least some progress on this, sorry about this :(

A step in the right direction. :) Thanks, Pablo.

J.
diff mbox series

Patch

diff --git a/src/evaluate.c b/src/evaluate.c
index be493f85010c..ee4da5a2b889 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -1116,14 +1116,18 @@  static int constant_binop_simplify(struct eval_ctx *ctx, struct expr **expr)
 static int expr_evaluate_shift(struct eval_ctx *ctx, struct expr **expr)
 {
 	struct expr *op = *expr, *left = op->left, *right = op->right;
+	unsigned int shift = mpz_get_uint32(right->value);
+	unsigned int op_len = left->len;
 
-	if (mpz_get_uint32(right->value) >= left->len)
-		return expr_binary_error(ctx->msgs, right, left,
-					 "%s shift of %u bits is undefined "
-					 "for type of %u bits width",
-					 op->op == OP_LSHIFT ? "Left" : "Right",
-					 mpz_get_uint32(right->value),
-					 left->len);
+	if (shift >= op_len) {
+		if (shift >= ctx->ectx.len)
+			return expr_binary_error(ctx->msgs, right, left,
+						 "%s shift of %u bits is undefined for type of %u bits width",
+						 op->op == OP_LSHIFT ? "Left" : "Right",
+						 shift,
+						 op_len);
+		op_len = ctx->ectx.len;
+	}
 
 	/* Both sides need to be in host byte order */
 	if (byteorder_conversion(ctx, &op->left, BYTEORDER_HOST_ENDIAN) < 0)
@@ -1134,7 +1138,7 @@  static int expr_evaluate_shift(struct eval_ctx *ctx, struct expr **expr)
 
 	op->dtype     = &integer_type;
 	op->byteorder = BYTEORDER_HOST_ENDIAN;
-	op->len       = left->len;
+	op->len	      = op_len;
 
 	if (expr_is_constant(left))
 		return constant_binop_simplify(ctx, expr);
@@ -1167,6 +1171,7 @@  static int expr_evaluate_binop(struct eval_ctx *ctx, struct expr **expr)
 {
 	struct expr *op = *expr, *left, *right;
 	const char *sym = expr_op_symbols[op->op];
+	unsigned int ectx_len = ctx->ectx.len;
 
 	if (expr_evaluate(ctx, &op->left) < 0)
 		return -1;
@@ -1174,7 +1179,7 @@  static int expr_evaluate_binop(struct eval_ctx *ctx, struct expr **expr)
 
 	if (op->op == OP_LSHIFT || op->op == OP_RSHIFT)
 		__expr_set_context(&ctx->ectx, &integer_type,
-				   left->byteorder, ctx->ectx.len, 0);
+				   left->byteorder, ectx_len, 0);
 	if (expr_evaluate(ctx, &op->right) < 0)
 		return -1;
 	right = op->right;
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index cf5359bf269e..9f6fdee3e92d 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -486,7 +486,7 @@  static struct expr *netlink_parse_bitwise_bool(struct netlink_parse_ctx *ctx,
 		mpz_ior(m, m, o);
 	}
 
-	if (left->len > 0 && mpz_scan0(m, 0) == left->len) {
+	if (left->len > 0 && mpz_scan0(m, 0) >= left->len) {
 		/* mask encompasses the entire value */
 		expr_free(mask);
 	} else {
@@ -536,7 +536,7 @@  static struct expr *netlink_parse_bitwise_shift(struct netlink_parse_ctx *ctx,
 	right->byteorder = BYTEORDER_HOST_ENDIAN;
 
 	expr = binop_expr_alloc(loc, op, left, right);
-	expr->len = left->len;
+	expr->len = nftnl_expr_get_u32(nle, NFTNL_EXPR_BITWISE_LEN) * BITS_PER_BYTE;
 
 	return expr;
 }