@@ -1498,8 +1498,9 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
{
swreg tmp_both = imm_both(nfp_prog), dst_both = reg_both(dst);
swreg dst_a = reg_a(dst), dst_b = reg_a(dst);
- struct reciprocal_value rvalue;
+ struct reciprocal_value_adv rvalue;
swreg tmp_b = imm_b(nfp_prog);
+ u8 pre_shift, exp;
swreg magic;
if (imm > U32_MAX) {
@@ -1507,15 +1508,34 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
return 0;
}
- rvalue = reciprocal_value(imm);
+ rvalue = reciprocal_value_adv(imm, 32);
+ exp = rvalue.exp;
+ if (rvalue.is_wide_m && !(imm & 1)) {
+ pre_shift = fls(imm & -imm) - 1;
+ rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+ } else {
+ pre_shift = 0;
+ }
magic = re_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
- wrp_mul_u32(nfp_prog, tmp_both, tmp_both, dst_a, magic, true);
- emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
- emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
- SHF_SC_R_SHF, rvalue.sh1);
- emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
- emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
- SHF_SC_R_SHF, rvalue.sh2);
+ if (imm == 1 << exp) {
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+ SHF_SC_R_SHF, exp);
+ } else if (rvalue.is_wide_m) {
+ wrp_mul_u32(nfp_prog, tmp_both, tmp_both, dst_a, magic, true);
+ emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+ SHF_SC_R_SHF, 1);
+ emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+ SHF_SC_R_SHF, rvalue.sh - 1);
+ } else {
+ if (pre_shift)
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+ dst_b, SHF_SC_R_SHF, pre_shift);
+ wrp_mul_u32(nfp_prog, dst_both, dst_both, dst_a, magic, true);
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+ dst_b, SHF_SC_R_SHF, rvalue.sh);
+ }
return 0;
}
@@ -561,12 +561,22 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
/* NFP doesn't have divide instructions, we support divide by constant
* through reciprocal multiplication. Given NFP support multiplication
* no bigger than u32, we'd require divisor and dividend no bigger than
- * that as well.
+ * that as well. There is a further range requirement on dividend,
+ * please see the NOTE below.
*
* Also eBPF doesn't support signed divide and has enforced this on C
* language level by failing compilation. However LLVM assembler hasn't
* enforced this, so it is possible for negative constant to leak in as
* a BPF_K operand through assembly code, we reject such cases as well.
+ *
+ * NOTE: because we are using "reciprocal_value_adv" which doesn't
+ * support dividend with MSB set, so we need to JIT separate NFP
+ * sequence to handle such case. It could be a simple sequence if there
+ * is conditional move, however there isn't for NFP. So, we don't bother
+ * generating compare-if-set-branch sequence by rejecting the program
+ * straight away when the u32 dividend has MSB set. Divide by such a
+ * large constant would be rare in practice. Also, the programmer could
+ * simply rewrite it as "result = divisor >= the_const".
*/
if (is_mbpf_div(meta)) {
if (meta->umax_dst > U32_MAX) {
@@ -578,8 +588,8 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
pr_vlog(env, "dividend is not constant\n");
return -EINVAL;
}
- if (meta->umax_src > U32_MAX) {
- pr_vlog(env, "dividend is not within u32 value range\n");
+ if (meta->umax_src > U32_MAX / 2) {
+ pr_vlog(env, "dividend is bigger than U32_MAX/2\n");
return -EINVAL;
}
}