diff mbox series

[bpf-next,7/7] nfp: bpf: migrate to advanced reciprocal divide in reciprocal_div.h

Message ID 20180625035421.2991-8-jakub.kicinski@netronome.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series nfp: bpf: add multiplication, divide and memcpy from maps | expand

Commit Message

Jakub Kicinski June 25, 2018, 3:54 a.m. UTC
From: Jiong Wang <jiong.wang@netronome.com>

As we are doing JIT, we would want to use the advanced version of the
reciprocal divide (reciprocal_value_adv) to trade performance with host.

We could reduce the required ALU instructions from 4 to 2 or 1.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 38 ++++++++++++++-----
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 16 ++++++--
 2 files changed, 42 insertions(+), 12 deletions(-)

Comments

Jakub Kicinski June 26, 2018, 8:59 p.m. UTC | #1
On Sun, 24 Jun 2018 20:54:21 -0700, Jakub Kicinski wrote:
> +	 * NOTE: because we are using "reciprocal_value_adv" which doesn't
> +	 * support dividend with MSB set, so we need to JIT separate NFP
> +	 * sequence to handle such case. It could be a simple sequence if there
> +	 * is conditional move, however there isn't for NFP. So, we don't bother
> +	 * generating compare-if-set-branch sequence by rejecting the program
> +	 * straight away when the u32 dividend has MSB set. Divide by such a
> +	 * large constant would be rare in practice. Also, the programmer could
> +	 * simply rewrite it as "result = divisor >= the_const".

Thinking about this again, can we just use carry bit?  The code may end
up shorter than the explanation why we don't support that case :P

immed[c, 0]
alu[--, a, -, b]
alu[c, c, +carry, 0]

Should be equivalent to:

c = a >= b

(Thanks to Edwin for double-checking the carry semantics.)
Jiong Wang July 5, 2018, 6:28 p.m. UTC | #2
On 26/06/2018 21:59, Jakub Kicinski wrote:
> On Sun, 24 Jun 2018 20:54:21 -0700, Jakub Kicinski wrote:
>> +	 * NOTE: because we are using "reciprocal_value_adv" which doesn't
>> +	 * support dividend with MSB set, so we need to JIT separate NFP
>> +	 * sequence to handle such case. It could be a simple sequence if there
>> +	 * is conditional move, however there isn't for NFP. So, we don't bother
>> +	 * generating compare-if-set-branch sequence by rejecting the program
>> +	 * straight away when the u32 dividend has MSB set. Divide by such a
>> +	 * large constant would be rare in practice. Also, the programmer could
>> +	 * simply rewrite it as "result = divisor >= the_const".
> Thinking about this again, can we just use carry bit?

Good catch, yes we can.

> The code may end
> up shorter than the explanation why we don't support that case :P
>
> immed[c, 0]
> alu[--, a, -, b]
> alu[c, c, +carry, 0]

eBPF input will be "a = a / b", given "immed" doesn't affect carry bit,
I'd reorder the sequence so we only need one tmp register for holding
"b" who is constant.

   alu[--, a, -, b]
   immed[b, 0]
   alu[a, b, +carry, 0]
  
Thanks.
Regards,
Jiong

>
> Should be equivalent to:
>
> c = a >= b
>
> (Thanks to Edwin for double-checking the carry semantics.)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index d732b6cfc356..f99ac00bd649 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1498,8 +1498,9 @@  static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
 {
 	swreg tmp_both = imm_both(nfp_prog), dst_both = reg_both(dst);
 	swreg dst_a = reg_a(dst), dst_b = reg_a(dst);
-	struct reciprocal_value rvalue;
+	struct reciprocal_value_adv rvalue;
 	swreg tmp_b = imm_b(nfp_prog);
+	u8 pre_shift, exp;
 	swreg magic;
 
 	if (imm > U32_MAX) {
@@ -1507,15 +1508,34 @@  static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
 		return 0;
 	}
 
-	rvalue = reciprocal_value(imm);
+	rvalue = reciprocal_value_adv(imm, 32);
+	exp = rvalue.exp;
+	if (rvalue.is_wide_m && !(imm & 1)) {
+		pre_shift = fls(imm & -imm) - 1;
+		rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+	} else {
+		pre_shift = 0;
+	}
 	magic = re_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
-	wrp_mul_u32(nfp_prog, tmp_both, tmp_both, dst_a, magic, true);
-	emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
-	emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-		 SHF_SC_R_SHF, rvalue.sh1);
-	emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
-	emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-		 SHF_SC_R_SHF, rvalue.sh2);
+	if (imm == 1 << exp) {
+		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+			 SHF_SC_R_SHF, exp);
+	} else if (rvalue.is_wide_m) {
+		wrp_mul_u32(nfp_prog, tmp_both, tmp_both, dst_a, magic, true);
+		emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
+		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+			 SHF_SC_R_SHF, 1);
+		emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
+		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+			 SHF_SC_R_SHF, rvalue.sh - 1);
+	} else {
+		if (pre_shift)
+			emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+				 dst_b, SHF_SC_R_SHF, pre_shift);
+		wrp_mul_u32(nfp_prog, dst_both, dst_both, dst_a, magic, true);
+		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+			 dst_b, SHF_SC_R_SHF, rvalue.sh);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index f0f07e988c46..39c2c24fea11 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -561,12 +561,22 @@  nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	/* NFP doesn't have divide instructions, we support divide by constant
 	 * through reciprocal multiplication. Given NFP support multiplication
 	 * no bigger than u32, we'd require divisor and dividend no bigger than
-	 * that as well.
+	 * that as well. There is a further range requirement on dividend,
+	 * please see the NOTE below.
 	 *
 	 * Also eBPF doesn't support signed divide and has enforced this on C
 	 * language level by failing compilation. However LLVM assembler hasn't
 	 * enforced this, so it is possible for negative constant to leak in as
 	 * a BPF_K operand through assembly code, we reject such cases as well.
+	 *
+	 * NOTE: because we are using "reciprocal_value_adv" which doesn't
+	 * support dividend with MSB set, so we need to JIT separate NFP
+	 * sequence to handle such case. It could be a simple sequence if there
+	 * is conditional move, however there isn't for NFP. So, we don't bother
+	 * generating compare-if-set-branch sequence by rejecting the program
+	 * straight away when the u32 dividend has MSB set. Divide by such a
+	 * large constant would be rare in practice. Also, the programmer could
+	 * simply rewrite it as "result = divisor >= the_const".
 	 */
 	if (is_mbpf_div(meta)) {
 		if (meta->umax_dst > U32_MAX) {
@@ -578,8 +588,8 @@  nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 				pr_vlog(env, "dividend is not constant\n");
 				return -EINVAL;
 			}
-			if (meta->umax_src > U32_MAX) {
-				pr_vlog(env, "dividend is not within u32 value range\n");
+			if (meta->umax_src > U32_MAX / 2) {
+				pr_vlog(env, "dividend is bigger than U32_MAX/2\n");
 				return -EINVAL;
 			}
 		}