diff mbox series

[bpf-next,5/7] nfp: bpf: support u16 and u32 multiplications

Message ID 20180625035421.2991-6-jakub.kicinski@netronome.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series nfp: bpf: add multiplication, divide and memcpy from maps | expand

Commit Message

Jakub Kicinski June 25, 2018, 3:54 a.m. UTC
From: Jiong Wang <jiong.wang@netronome.com>

NFP supports u16 and u32 multiplication. Multiplication is done 8-bits per
step, therefore we need 2 steps for u16 and 4 steps for u32.

We also need one start instruction to initialize the sequence and one or
two instructions to fetch the result depending on either you need the high
halve of u32 multiplication.

For ALU64, if either operand is beyond u32's value range, we reject it. One
thing to note, if the source operand is BPF_K, then we need to check "imm"
field directly, and we'd reject it if it is negative.  Because for ALU64,
"imm" (with s32 type) is expected to be sign extended to s64 which NFP mul
doesn't support. For ALU32, it is fine for "imm" be negative though,
because the result is 32-bits and here is no difference on the low halve
of result for signed/unsigned mul, so we will get correct result.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 137 ++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/bpf/main.h |   5 +
 .../net/ethernet/netronome/nfp/bpf/verifier.c |  58 ++++++--
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  28 ++++
 4 files changed, 217 insertions(+), 11 deletions(-)

Comments

Song Liu June 26, 2018, 10:23 p.m. UTC | #1
On Sun, Jun 24, 2018 at 8:54 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> From: Jiong Wang <jiong.wang@netronome.com>
>
> NFP supports u16 and u32 multiplication. Multiplication is done 8-bits per
> step, therefore we need 2 steps for u16 and 4 steps for u32.
>
> We also need one start instruction to initialize the sequence and one or
> two instructions to fetch the result depending on either you need the high
> halve of u32 multiplication.
>
> For ALU64, if either operand is beyond u32's value range, we reject it. One
> thing to note, if the source operand is BPF_K, then we need to check "imm"
> field directly, and we'd reject it if it is negative.  Because for ALU64,
> "imm" (with s32 type) is expected to be sign extended to s64 which NFP mul
> doesn't support. For ALU32, it is fine for "imm" be negative though,
> because the result is 32-bits and here is no difference on the low halve
> of result for signed/unsigned mul, so we will get correct result.
>
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 137 ++++++++++++++++++
>  drivers/net/ethernet/netronome/nfp/bpf/main.h |   5 +
>  .../net/ethernet/netronome/nfp/bpf/verifier.c |  58 ++++++--
>  drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  28 ++++
>  4 files changed, 217 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> index 4a629e9b5c0f..7d7061d93358 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> @@ -415,6 +415,60 @@ emit_alu(struct nfp_prog *nfp_prog, swreg dst,
>                    reg.dst_lmextn, reg.src_lmextn);
>  }
>
> +static void
> +__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
> +          enum mul_type type, enum mul_step step, u16 breg, bool swap,
> +          bool wr_both, bool dst_lmextn, bool src_lmextn)
> +{
> +       u64 insn;
> +
> +       insn = OP_MUL_BASE |
> +               FIELD_PREP(OP_MUL_A_SRC, areg) |
> +               FIELD_PREP(OP_MUL_B_SRC, breg) |
> +               FIELD_PREP(OP_MUL_STEP, step) |
> +               FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
> +               FIELD_PREP(OP_MUL_SW, swap) |
> +               FIELD_PREP(OP_MUL_TYPE, type) |
> +               FIELD_PREP(OP_MUL_WR_AB, wr_both) |
> +               FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
> +               FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
> +
> +       nfp_prog_push(nfp_prog, insn);
> +}
> +
> +static void
> +emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
> +        enum mul_step step, swreg rreg)
> +{
> +       struct nfp_insn_ur_regs reg;
> +       u16 areg;
> +       int err;
> +
> +       if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
> +               nfp_prog->error = -EINVAL;
> +               return;
> +       }
> +
> +       if (step == MUL_LAST || step == MUL_LAST_2) {
> +               /* When type is step and step Number is LAST or LAST2, left
> +                * source is used as destination.
> +                */
> +               err = swreg_to_unrestricted(lreg, reg_none(), rreg, &reg);
> +               areg = reg.dst;
> +       } else {
> +               err = swreg_to_unrestricted(reg_none(), lreg, rreg, &reg);
> +               areg = reg.areg;
> +       }
> +
> +       if (err) {
> +               nfp_prog->error = err;
> +               return;
> +       }
> +
> +       __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
> +                  reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
> +}
> +
>  static void
>  __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
>                 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
> @@ -1380,6 +1434,65 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
>                       SHF_SC_R_ROT, 16);
>  }
>
> +static void
> +wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
> +           swreg rreg, bool gen_high_half)
> +{
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
> +       emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
> +       if (gen_high_half)
> +               emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
> +                        reg_none());
> +       else
> +               wrp_immed(nfp_prog, dst_hi, 0);
> +}
> +
> +static void
> +wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
> +           swreg rreg)
> +{
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
> +       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
> +       emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
> +}
> +
> +static int
> +wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
> +       bool gen_high_half, bool ropnd_from_reg)
> +{
> +       swreg multiplier, multiplicand, dst_hi, dst_lo;
> +       const struct bpf_insn *insn = &meta->insn;
> +       u32 lopnd_max, ropnd_max;
> +       u8 dst_reg;
> +
> +       dst_reg = insn->dst_reg;
> +       multiplicand = reg_a(dst_reg * 2);
> +       dst_hi = reg_both(dst_reg * 2 + 1);
> +       dst_lo = reg_both(dst_reg * 2);
> +       lopnd_max = meta->umax_dst;
> +       if (ropnd_from_reg) {
> +               multiplier = reg_b(insn->src_reg * 2);
> +               ropnd_max = meta->umax_src;
> +       } else {
> +               u32 imm = insn->imm;
> +
> +               multiplier = re_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
> +               ropnd_max = imm;
> +       }
> +       if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
> +               wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
> +                           gen_high_half);
> +       else
> +               wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
> +
> +       return 0;
> +}
> +
>  static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>  {
>         swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
> @@ -1684,6 +1797,16 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>         return 0;
>  }
>
> +static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
> +{
> +       return wrp_mul(nfp_prog, meta, true, true);
> +}
> +
> +static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
> +{
> +       return wrp_mul(nfp_prog, meta, true, false);
> +}
> +
>  static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>  {
>         const struct bpf_insn *insn = &meta->insn;
> @@ -2097,6 +2220,16 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>         return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
>  }
>
> +static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
> +{
> +       return wrp_mul(nfp_prog, meta, false, true);
> +}
> +
> +static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
> +{
> +       return wrp_mul(nfp_prog, meta, false, false);
> +}
> +
>  static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>  {
>         u8 dst = meta->insn.dst_reg * 2;
> @@ -2848,6 +2981,8 @@ static const instr_cb_t instr_cb[256] = {
>         [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
>         [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
>         [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
> +       [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64,
> +       [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64,
>         [BPF_ALU64 | BPF_NEG] =         neg_reg64,
>         [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
>         [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
> @@ -2867,6 +3002,8 @@ static const instr_cb_t instr_cb[256] = {
>         [BPF_ALU | BPF_ADD | BPF_K] =   add_imm,
>         [BPF_ALU | BPF_SUB | BPF_X] =   sub_reg,
>         [BPF_ALU | BPF_SUB | BPF_K] =   sub_imm,
> +       [BPF_ALU | BPF_MUL | BPF_X] =   mul_reg,
> +       [BPF_ALU | BPF_MUL | BPF_K] =   mul_imm,
>         [BPF_ALU | BPF_NEG] =           neg_reg,
>         [BPF_ALU | BPF_LSH | BPF_K] =   shl_imm,
>         [BPF_ALU | BPF_END | BPF_X] =   end_reg32,
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> index c985d0ac61a3..c10079b1a312 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> @@ -394,6 +394,11 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
>         return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
>  }
>
> +static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta)
> +{
> +       return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL;
> +}
> +
>  /**
>   * struct nfp_prog - nfp BPF program
>   * @bpf: backpointer to the bpf app priv structure
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> index 7bd9666bd8ff..30d4f1580693 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> @@ -516,6 +516,51 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
>         return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
>  }
>
> +static int
> +nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
> +                 struct bpf_verifier_env *env)
> +{
> +       const struct bpf_reg_state *sreg =
> +               cur_regs(env) + meta->insn.src_reg;
> +       const struct bpf_reg_state *dreg =
> +               cur_regs(env) + meta->insn.dst_reg;
> +
> +       meta->umin_src = min(meta->umin_src, sreg->umin_value);
> +       meta->umax_src = max(meta->umax_src, sreg->umax_value);
> +       meta->umin_dst = min(meta->umin_dst, dreg->umin_value);
> +       meta->umax_dst = max(meta->umax_dst, dreg->umax_value);
> +
> +       /* NFP supports u16 and u32 multiplication.
> +        *
> +        * For ALU64, if either operand is beyond u32's value range, we reject
> +        * it. One thing to note, if the source operand is BPF_K, then we need
> +        * to check "imm" field directly, and we'd reject it if it is negative.
> +        * Because for ALU64, "imm" (with s32 type) is expected to be sign
> +        * extended to s64 which NFP mul doesn't support.
> +        *
> +        * For ALU32, it is fine for "imm" be negative though, because the
> +        * result is 32-bits and there is no difference on the low halve of
> +        * the result for signed/unsigned mul, so we will get correct result.
> +        */
> +       if (is_mbpf_mul(meta)) {
> +               if (meta->umax_dst > U32_MAX) {
> +                       pr_vlog(env, "multiplier is not within u32 value range\n");
> +                       return -EINVAL;
> +               }
> +               if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) {
> +                       pr_vlog(env, "multiplicand is not within u32 value range\n");
> +                       return -EINVAL;
> +               }
> +               if (mbpf_class(meta) == BPF_ALU64 &&
> +                   mbpf_src(meta) == BPF_K && meta->insn.imm < 0) {
> +                       pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n");
> +                       return -EINVAL;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
>  static int
>  nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
>  {
> @@ -551,17 +596,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
>         if (is_mbpf_xadd(meta))
>                 return nfp_bpf_check_xadd(nfp_prog, meta, env);
>
> -       if (is_mbpf_alu(meta)) {
> -               const struct bpf_reg_state *sreg =
> -                       cur_regs(env) + meta->insn.src_reg;
> -               const struct bpf_reg_state *dreg =
> -                       cur_regs(env) + meta->insn.dst_reg;
> -
> -               meta->umin_src = min(meta->umin_src, sreg->umin_value);
> -               meta->umax_src = max(meta->umax_src, sreg->umax_value);
> -               meta->umin_dst = min(meta->umin_dst, dreg->umin_value);
> -               meta->umax_dst = max(meta->umax_dst, dreg->umax_value);
> -       }
> +       if (is_mbpf_alu(meta))
> +               return nfp_bpf_check_alu(nfp_prog, meta, env);
>
>         return 0;
>  }
> diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
> index f6677bc9875a..cdc4e065f6f5 100644
> --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
> +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
> @@ -426,4 +426,32 @@ static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset)
>         return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR;
>  }
>
> +enum mul_type {
> +       MUL_TYPE_START          = 0x00,
> +       MUL_TYPE_STEP_24x8      = 0x01,
> +       MUL_TYPE_STEP_16x16     = 0x02,
> +       MUL_TYPE_STEP_32x32     = 0x03,
> +};
> +
> +enum mul_step {
> +       MUL_STEP_1              = 0x00,
> +       MUL_STEP_NONE           = MUL_STEP_1,
> +       MUL_STEP_2              = 0x01,
> +       MUL_STEP_3              = 0x02,
> +       MUL_STEP_4              = 0x03,
> +       MUL_LAST                = 0x04,
> +       MUL_LAST_2              = 0x05,
> +};
> +
> +#define OP_MUL_BASE            0x0f800000000ULL
> +#define OP_MUL_A_SRC           0x000000003ffULL
> +#define OP_MUL_B_SRC           0x000000ffc00ULL
> +#define OP_MUL_STEP            0x00000700000ULL
> +#define OP_MUL_DST_AB          0x00000800000ULL
> +#define OP_MUL_SW              0x00040000000ULL
> +#define OP_MUL_TYPE            0x00180000000ULL
> +#define OP_MUL_WR_AB           0x20000000000ULL
> +#define OP_MUL_SRC_LMEXTN      0x40000000000ULL
> +#define OP_MUL_DST_LMEXTN      0x80000000000ULL
> +
>  #endif
> --
> 2.17.1
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 4a629e9b5c0f..7d7061d93358 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -415,6 +415,60 @@  emit_alu(struct nfp_prog *nfp_prog, swreg dst,
 		   reg.dst_lmextn, reg.src_lmextn);
 }
 
+static void
+__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
+	   enum mul_type type, enum mul_step step, u16 breg, bool swap,
+	   bool wr_both, bool dst_lmextn, bool src_lmextn)
+{
+	u64 insn;
+
+	insn = OP_MUL_BASE |
+		FIELD_PREP(OP_MUL_A_SRC, areg) |
+		FIELD_PREP(OP_MUL_B_SRC, breg) |
+		FIELD_PREP(OP_MUL_STEP, step) |
+		FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
+		FIELD_PREP(OP_MUL_SW, swap) |
+		FIELD_PREP(OP_MUL_TYPE, type) |
+		FIELD_PREP(OP_MUL_WR_AB, wr_both) |
+		FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
+	 enum mul_step step, swreg rreg)
+{
+	struct nfp_insn_ur_regs reg;
+	u16 areg;
+	int err;
+
+	if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
+		nfp_prog->error = -EINVAL;
+		return;
+	}
+
+	if (step == MUL_LAST || step == MUL_LAST_2) {
+		/* When type is step and step Number is LAST or LAST2, left
+		 * source is used as destination.
+		 */
+		err = swreg_to_unrestricted(lreg, reg_none(), rreg, &reg);
+		areg = reg.dst;
+	} else {
+		err = swreg_to_unrestricted(reg_none(), lreg, rreg, &reg);
+		areg = reg.areg;
+	}
+
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
+		   reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
+}
+
 static void
 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
@@ -1380,6 +1434,65 @@  static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
 		      SHF_SC_R_ROT, 16);
 }
 
+static void
+wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
+	    swreg rreg, bool gen_high_half)
+{
+	emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
+	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
+	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
+	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
+	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
+	emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
+	if (gen_high_half)
+		emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
+			 reg_none());
+	else
+		wrp_immed(nfp_prog, dst_hi, 0);
+}
+
+static void
+wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
+	    swreg rreg)
+{
+	emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
+	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
+	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
+	emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
+}
+
+static int
+wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	bool gen_high_half, bool ropnd_from_reg)
+{
+	swreg multiplier, multiplicand, dst_hi, dst_lo;
+	const struct bpf_insn *insn = &meta->insn;
+	u32 lopnd_max, ropnd_max;
+	u8 dst_reg;
+
+	dst_reg = insn->dst_reg;
+	multiplicand = reg_a(dst_reg * 2);
+	dst_hi = reg_both(dst_reg * 2 + 1);
+	dst_lo = reg_both(dst_reg * 2);
+	lopnd_max = meta->umax_dst;
+	if (ropnd_from_reg) {
+		multiplier = reg_b(insn->src_reg * 2);
+		ropnd_max = meta->umax_src;
+	} else {
+		u32 imm = insn->imm;
+
+		multiplier = re_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+		ropnd_max = imm;
+	}
+	if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
+		wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
+			    gen_high_half);
+	else
+		wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
+
+	return 0;
+}
+
 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
@@ -1684,6 +1797,16 @@  static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_mul(nfp_prog, meta, true, true);
+}
+
+static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_mul(nfp_prog, meta, true, false);
+}
+
 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -2097,6 +2220,16 @@  static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
 }
 
+static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_mul(nfp_prog, meta, false, true);
+}
+
+static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_mul(nfp_prog, meta, false, false);
+}
+
 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	u8 dst = meta->insn.dst_reg * 2;
@@ -2848,6 +2981,8 @@  static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
+	[BPF_ALU64 | BPF_MUL | BPF_X] =	mul_reg64,
+	[BPF_ALU64 | BPF_MUL | BPF_K] =	mul_imm64,
 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
 	[BPF_ALU64 | BPF_LSH | BPF_X] =	shl_reg64,
 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
@@ -2867,6 +3002,8 @@  static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
+	[BPF_ALU | BPF_MUL | BPF_X] =	mul_reg,
+	[BPF_ALU | BPF_MUL | BPF_K] =	mul_imm,
 	[BPF_ALU | BPF_NEG] =		neg_reg,
 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index c985d0ac61a3..c10079b1a312 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -394,6 +394,11 @@  static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
 }
 
+static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta)
+{
+	return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL;
+}
+
 /**
  * struct nfp_prog - nfp BPF program
  * @bpf: backpointer to the bpf app priv structure
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 7bd9666bd8ff..30d4f1580693 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -516,6 +516,51 @@  nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
 }
 
+static int
+nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  struct bpf_verifier_env *env)
+{
+	const struct bpf_reg_state *sreg =
+		cur_regs(env) + meta->insn.src_reg;
+	const struct bpf_reg_state *dreg =
+		cur_regs(env) + meta->insn.dst_reg;
+
+	meta->umin_src = min(meta->umin_src, sreg->umin_value);
+	meta->umax_src = max(meta->umax_src, sreg->umax_value);
+	meta->umin_dst = min(meta->umin_dst, dreg->umin_value);
+	meta->umax_dst = max(meta->umax_dst, dreg->umax_value);
+
+	/* NFP supports u16 and u32 multiplication.
+	 *
+	 * For ALU64, if either operand is beyond u32's value range, we reject
+	 * it. One thing to note, if the source operand is BPF_K, then we need
+	 * to check "imm" field directly, and we'd reject it if it is negative.
+	 * Because for ALU64, "imm" (with s32 type) is expected to be sign
+	 * extended to s64 which NFP mul doesn't support.
+	 *
+	 * For ALU32, it is fine for "imm" be negative though, because the
+	 * result is 32-bits and there is no difference on the low halve of
+	 * the result for signed/unsigned mul, so we will get correct result.
+	 */
+	if (is_mbpf_mul(meta)) {
+		if (meta->umax_dst > U32_MAX) {
+			pr_vlog(env, "multiplier is not within u32 value range\n");
+			return -EINVAL;
+		}
+		if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) {
+			pr_vlog(env, "multiplicand is not within u32 value range\n");
+			return -EINVAL;
+		}
+		if (mbpf_class(meta) == BPF_ALU64 &&
+		    mbpf_src(meta) == BPF_K && meta->insn.imm < 0) {
+			pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int
 nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 {
@@ -551,17 +596,8 @@  nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	if (is_mbpf_xadd(meta))
 		return nfp_bpf_check_xadd(nfp_prog, meta, env);
 
-	if (is_mbpf_alu(meta)) {
-		const struct bpf_reg_state *sreg =
-			cur_regs(env) + meta->insn.src_reg;
-		const struct bpf_reg_state *dreg =
-			cur_regs(env) + meta->insn.dst_reg;
-
-		meta->umin_src = min(meta->umin_src, sreg->umin_value);
-		meta->umax_src = max(meta->umax_src, sreg->umax_value);
-		meta->umin_dst = min(meta->umin_dst, dreg->umin_value);
-		meta->umax_dst = max(meta->umax_dst, dreg->umax_value);
-	}
+	if (is_mbpf_alu(meta))
+		return nfp_bpf_check_alu(nfp_prog, meta, env);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index f6677bc9875a..cdc4e065f6f5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -426,4 +426,32 @@  static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset)
 	return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR;
 }
 
+enum mul_type {
+	MUL_TYPE_START		= 0x00,
+	MUL_TYPE_STEP_24x8	= 0x01,
+	MUL_TYPE_STEP_16x16	= 0x02,
+	MUL_TYPE_STEP_32x32	= 0x03,
+};
+
+enum mul_step {
+	MUL_STEP_1		= 0x00,
+	MUL_STEP_NONE		= MUL_STEP_1,
+	MUL_STEP_2		= 0x01,
+	MUL_STEP_3		= 0x02,
+	MUL_STEP_4		= 0x03,
+	MUL_LAST		= 0x04,
+	MUL_LAST_2		= 0x05,
+};
+
+#define OP_MUL_BASE		0x0f800000000ULL
+#define OP_MUL_A_SRC		0x000000003ffULL
+#define OP_MUL_B_SRC		0x000000ffc00ULL
+#define OP_MUL_STEP		0x00000700000ULL
+#define OP_MUL_DST_AB		0x00000800000ULL
+#define OP_MUL_SW		0x00040000000ULL
+#define OP_MUL_TYPE		0x00180000000ULL
+#define OP_MUL_WR_AB		0x20000000000ULL
+#define OP_MUL_SRC_LMEXTN	0x40000000000ULL
+#define OP_MUL_DST_LMEXTN	0x80000000000ULL
+
 #endif