diff mbox series

[RISCV] Add Pattern for builtin overflow

Message ID 20210427060824.259151-1-admin@levyhsu.com
State New
Headers show
Series [RISCV] Add Pattern for builtin overflow | expand

Commit Message

Levy Hsu April 27, 2021, 6:08 a.m. UTC
From: LevyHsu <admin@levyhsu.com>

Added implementation for builtin overflow detection, new patterns are listed below.

---------------------------------------------------------------
Addition:

signed addition (SImode with RV32 || DImode with RV64):
	add     t0, t1, t2
	slti    t3, t2, 0
	slt     t4, t0, t1
	bne     t3, t4, overflow

signed addition (SImode with RV64):
	add     t0, t1, t2
    sext.w  t3, t0
    bne     t0, t3, overflow

unsigned addition (SImode with RV32 || DImode with RV64):
    add     t0, t1, t2
    bltu    t0, t1, overflow

unsigned addition (SImode with RV64):
    sext.w  t3, t1
    addw	t0, t1, t2
    bltu	t0, t3, overflow
---------------------------------------------------------------
Subtraction:

signed subtraction (SImode with RV32 || DImode with RV64):
    sub     t0, t1, t2
    slti    t3, t2, 0
    slt     t4, t1, t0
    bne     t3, t4, overflow

signed subtraction (SImode with RV64):
	sub     t0, t1, t2
    sext.w  t3, t0
    bne     t0, t3, overflow

unsigned subtraction (SImode with RV32 || DImode with RV64):
    add     t0, t1, t2
    bltu    t1, t0, overflow

unsigned subtraction (SImode with RV64):
    sext.w  t3, t1
    subw	t0, t1, t2
    bltu    t0, t3, overflow
---------------------------------------------------------------
Multiplication:

signed multiplication (SImode with RV32 || DImode with RV64):
    mulh    t4, t1, t2
    mul		t0, t1, t2
    srai	t5, t0, 31/63 (RV32/64)
    bne     t4, t5, overflow

signed multiplication (SImode with RV64):
	mul     t0, t1, t2
    sext.w  t3, t0
    bne     t0, t3, overflow

unsigned multiplication (SImode with RV32 || DImode with RV64 ):
    mulhu   t4, t1, t2
    mul     t0, t1, t2
    bne     t4, 0,  overflow

unsigned multiplication (SImode with RV64):
    slli    t0,t0,32
	slli	t1,t1,32
	srli	t0,t0,32
	srli	t1,t1,32
	mul	    t0,t0,t1
	srai	t5,t0,32
	bne	    t5, 0, overflow

---------------------------------------------------------------
---
 gcc/config/riscv/riscv.c  |   8 ++
 gcc/config/riscv/riscv.h  |   5 +
 gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 253 insertions(+)

Comments

Andrew Waterman April 27, 2021, 7:45 a.m. UTC | #1
On Tue, Apr 27, 2021 at 12:18 AM Levy Hsu <admin@levyhsu.com> wrote:
>
> From: LevyHsu <admin@levyhsu.com>
>
> Added implementation for builtin overflow detection, new patterns are listed below.
>
> ---------------------------------------------------------------
> Addition:
>
> signed addition (SImode with RV32 || DImode with RV64):
>         add     t0, t1, t2
>         slti    t3, t2, 0
>         slt     t4, t0, t1
>         bne     t3, t4, overflow
>
> signed addition (SImode with RV64):
>         add     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow

The following version has the same instruction count but offers more ILP:

  add t0, t1, t2
  addw t3, t1, t2
  bne t0, t3, overflow

>
> unsigned addition (SImode with RV32 || DImode with RV64):
>     add     t0, t1, t2
>     bltu    t0, t1, overflow
>
> unsigned addition (SImode with RV64):
>     sext.w  t3, t1
>     addw        t0, t1, t2
>     bltu        t0, t3, overflow

I think you can do this in two instructions, similar to the previous pattern:

  addw t0, t1, t2
  bltu t0, t1, overflow

> ---------------------------------------------------------------
> Subtraction:
>
> signed subtraction (SImode with RV32 || DImode with RV64):
>     sub     t0, t1, t2
>     slti    t3, t2, 0
>     slt     t4, t1, t0
>     bne     t3, t4, overflow
>
> signed subtraction (SImode with RV64):
>         sub     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow

See analogous addition comment.

>
> unsigned subtraction (SImode with RV32 || DImode with RV64):
>     add     t0, t1, t2
>     bltu    t1, t0, overflow
>
> unsigned subtraction (SImode with RV64):
>     sext.w  t3, t1
>     subw        t0, t1, t2
>     bltu    t0, t3, overflow

See analogous addition comment.

> ---------------------------------------------------------------
> Multiplication:
>
> signed multiplication (SImode with RV32 || DImode with RV64):
>     mulh    t4, t1, t2
>     mul         t0, t1, t2
>     srai        t5, t0, 31/63 (RV32/64)
>     bne     t4, t5, overflow
>
> signed multiplication (SImode with RV64):
>         mul     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow
>
> unsigned multiplication (SImode with RV32 || DImode with RV64 ):
>     mulhu   t4, t1, t2
>     mul     t0, t1, t2
>     bne     t4, 0,  overflow
>
> unsigned multiplication (SImode with RV64):
>     slli    t0,t0,32
>         slli    t1,t1,32
>         srli    t0,t0,32
>         srli    t1,t1,32
>         mul         t0,t0,t1
>         srai    t5,t0,32
>         bne         t5, 0, overflow

I think you can eliminate the first two right shifts by replacing mul
with mulhu... something like:

  slli rx, rx, 32
  slli ry, ry, 32
  mulhu rz, rx, ry
  srli rt, rz, 32
  bnez rt, overflow

>
> ---------------------------------------------------------------
> ---
>  gcc/config/riscv/riscv.c  |   8 ++
>  gcc/config/riscv/riscv.h  |   5 +
>  gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 253 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index d489717b2a5..cf94f5c9658 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
>    { "size", generic, &optimize_size_tune_info },
>  };
>
> +/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
> +
> +static unsigned int
> +riscv_min_arithmetic_precision (void)
> +{
> +  return 32;
> +}
> +
>  /* Return the riscv_tune_info entry for the given name string.  */
>
>  static const struct riscv_tune_info *
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 172c7ca7c98..a6f451b97e3 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const char **argv);
>  #define MIN_UNITS_PER_WORD 4
>  #endif
>
> +/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
> +
> +#undef TARGET_MIN_ARITHMETIC_PRECISION
> +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
> +
>  /* The `Q' extension is not yet supported.  */
>  #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 36012ad1f77..c82017a4bce 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -462,6 +462,81 @@
>    [(set_attr "type" "arith")
>     (set_attr "mode" "DI")])
>
> +(define_expand "addv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r,r")
> +        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> +                (match_operand:GPR 2 "arith_operand"    " r,I")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_adddi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx t3 = gen_reg_rtx (<MODE>mode);
> +    rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> +    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> +    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
> +
> +    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
> +    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> +  }
> +  DONE;
> +})
> +
> +(define_expand "uaddv<mode>4"
> +  [(set (match_operand:GPR           0 "register_operand" "=r,r")
> +        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> +                  (match_operand:GPR 2 "arith_operand"    " r,I")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> +    else
> +      t3 = operands[1];
> +    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
> +  }
> +  else
> +  {
> +    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> +    riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*addsi3_extended"
>    [(set (match_operand:DI               0 "register_operand" "=r,r")
>         (sign_extend:DI
> @@ -518,6 +593,85 @@
>    [(set_attr "type" "arith")
>     (set_attr "mode" "SI")])
>
> +(define_expand "subv<mode>4"
> +  [(set (match_operand:GPR          0 "register_operand" "= r")
> +        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> +                 (match_operand:GPR 2 "register_operand" "  r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_subdi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx t3 = gen_reg_rtx (<MODE>mode);
> +    rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +
> +    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> +    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +
> +    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
> +    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> +  }
> +
> +  DONE;
> +})
> +
> +(define_expand "usubv<mode>4"
> +  [(set (match_operand:GPR            0 "register_operand" "= r")
> +        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> +                   (match_operand:GPR 2 "register_operand" "  r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> +    else
> +      t3 = operands[1];
> +    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
> +  }
> +  else
> +  {
> +    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +    riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*subsi3_extended"
>    [(set (match_operand:DI               0 "register_operand" "= r")
>         (sign_extend:DI
> @@ -609,6 +763,92 @@
>    [(set_attr "type" "imul")
>     (set_attr "mode" "DI")])
>
> +(define_expand "mulv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r")
> +        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> +                (match_operand:GPR 2 "register_operand" " r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  "TARGET_MUL"
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_muldi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx hp = gen_reg_rtx (<MODE>mode);
> +    rtx lp = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
> +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1)));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, hp, lp);
> +  }
> +
> +  DONE;
> +})
> +
> +(define_expand "umulv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r")
> +        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> +                (match_operand:GPR 2 "register_operand" " r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  "TARGET_MUL"
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_muldi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx hp = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
> +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*mulsi3_extended"
>    [(set (match_operand:DI              0 "register_operand" "=r")
>         (sign_extend:DI
> --
> 2.30.1
>
Jim Wilson April 28, 2021, 8:18 p.m. UTC | #2
On Tue, Apr 27, 2021 at 12:45 AM Andrew Waterman <andrew@sifive.com> wrote:

> > signed addition (SImode with RV64):
> >         add     t0, t1, t2
> >     sext.w  t3, t0
> >     bne     t0, t3, overflow
>
> The following version has the same instruction count but offers more ILP:
>
>   add t0, t1, t2
>   addw t3, t1, t2
>   bne t0, t3, overflow
>

This is a good suggestion, but in the interests of making forward progress
here, I'd like to accept the patch and then file these as bugzillas as ways
to further improve the patch.

> > unsigned addition (SImode with RV64):
> >     sext.w  t3, t1
> >     addw        t0, t1, t2
> >     bltu        t0, t3, overflow
>
> I think you can do this in two instructions, similar to the previous
> pattern:
>
>   addw t0, t1, t2
>   bltu t0, t1, overflow
>

Likewise.

> > signed subtraction (SImode with RV64):
> >         sub     t0, t1, t2
> >     sext.w  t3, t0
> >     bne     t0, t3, overflow
>
> See analogous addition comment.
>

Likewise.

>
> > unsigned subtraction (SImode with RV64):
> >     sext.w  t3, t1
> >     subw        t0, t1, t2
> >     bltu    t0, t3, overflow
>
> See analogous addition comment.
>

Likewise.

> > unsigned multiplication (SImode with RV64):
> >     slli    t0,t0,32
> >         slli    t1,t1,32
> >         srli    t0,t0,32
> >         srli    t1,t1,32
> >         mul         t0,t0,t1
> >         srai    t5,t0,32
> >         bne         t5, 0, overflow
>
> I think you can eliminate the first two right shifts by replacing mul
> with mulhu... something like:
>
>   slli rx, rx, 32
>   slli ry, ry, 32
>   mulhu rz, rx, ry
>   srli rt, rz, 32
>   bnez rt, overflow
>

Likewise, except this should be a separate bugzilla.

Jim
Andrew Waterman April 28, 2021, 11:04 p.m. UTC | #3
On Wed, Apr 28, 2021 at 1:18 PM Jim Wilson <jimw@sifive.com> wrote:
>
> On Tue, Apr 27, 2021 at 12:45 AM Andrew Waterman <andrew@sifive.com> wrote:
>>
>> > signed addition (SImode with RV64):
>> >         add     t0, t1, t2
>> >     sext.w  t3, t0
>> >     bne     t0, t3, overflow
>>
>> The following version has the same instruction count but offers more ILP:
>>
>>   add t0, t1, t2
>>   addw t3, t1, t2
>>   bne t0, t3, overflow
>
>
> This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch.

Agreed, these potential improvements are definitely not blockers.

>>
>> > unsigned addition (SImode with RV64):
>> >     sext.w  t3, t1
>> >     addw        t0, t1, t2
>> >     bltu        t0, t3, overflow
>>
>> I think you can do this in two instructions, similar to the previous pattern:
>>
>>   addw t0, t1, t2
>>   bltu t0, t1, overflow
>
>
> Likewise.
>>
>> > signed subtraction (SImode with RV64):
>> >         sub     t0, t1, t2
>> >     sext.w  t3, t0
>> >     bne     t0, t3, overflow
>>
>> See analogous addition comment.
>
>
> Likewise.
>>
>>
>> > unsigned subtraction (SImode with RV64):
>> >     sext.w  t3, t1
>> >     subw        t0, t1, t2
>> >     bltu    t0, t3, overflow
>>
>> See analogous addition comment.
>
>
> Likewise.
>>
>> > unsigned multiplication (SImode with RV64):
>> >     slli    t0,t0,32
>> >         slli    t1,t1,32
>> >         srli    t0,t0,32
>> >         srli    t1,t1,32
>> >         mul         t0,t0,t1
>> >         srai    t5,t0,32
>> >         bne         t5, 0, overflow
>>
>> I think you can eliminate the first two right shifts by replacing mul
>> with mulhu... something like:
>>
>>   slli rx, rx, 32
>>   slli ry, ry, 32
>>   mulhu rz, rx, ry
>>   srli rt, rz, 32
>>   bnez rt, overflow
>
>
> Likewise, except this should be a separate bugzilla.
>
> Jim
Jim Wilson April 29, 2021, 10:02 p.m. UTC | #4
On Wed, Apr 28, 2021 at 4:04 PM Andrew Waterman <andrew@sifive.com> wrote:

> > This is a good suggestion, but in the interests of making forward
> progress here, I'd like to accept the patch and then file these as
> bugzillas as ways to further improve the patch.
>
> Agreed, these potential improvements are definitely not blockers.
>

Turns out Levy had time to work on the patch after all, and submitted a
fourth version with your improvements.

Jim
Andrew Waterman May 2, 2021, 12:06 a.m. UTC | #5
On Thu, Apr 29, 2021 at 3:02 PM Jim Wilson <jimw@sifive.com> wrote:
>
> On Wed, Apr 28, 2021 at 4:04 PM Andrew Waterman <andrew@sifive.com> wrote:
>>
>> > This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch.
>>
>> Agreed, these potential improvements are definitely not blockers.
>
>
> Turns out Levy had time to work on the patch after all, and submitted a fourth version with your improvements.

Cool.  Thank you, Levy!

>
> Jim
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index d489717b2a5..cf94f5c9658 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -351,6 +351,14 @@  static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
+/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
+
+static unsigned int
+riscv_min_arithmetic_precision (void)
+{
+  return 32;
+}
+
 /* Return the riscv_tune_info entry for the given name string.  */
 
 static const struct riscv_tune_info *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 172c7ca7c98..a6f451b97e3 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -121,6 +121,11 @@  extern const char *riscv_default_mtune (int argc, const char **argv);
 #define MIN_UNITS_PER_WORD 4
 #endif
 
+/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
+
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
+
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 36012ad1f77..c82017a4bce 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -462,6 +462,81 @@ 
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "addv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r,r")
+        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+                (match_operand:GPR 2 "arith_operand"    " r,I")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_adddi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx t3 = gen_reg_rtx (<MODE>mode);
+    rtx t4 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
+
+    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
+    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  }
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r,r")
+        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+                  (match_operand:GPR 2 "arith_operand"    " r,I")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+      t3 = operands[1];
+    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
+  }
+  else
+  {
+    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+    riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]);
+  }
+
+  DONE;
+})
+
 (define_insn "*addsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "=r,r")
 	(sign_extend:DI
@@ -518,6 +593,85 @@ 
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+(define_expand "subv<mode>4"
+  [(set (match_operand:GPR          0 "register_operand" "= r")
+        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+                 (match_operand:GPR 2 "register_operand" "  r")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_subdi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx t3 = gen_reg_rtx (<MODE>mode);
+    rtx t4 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+
+    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+
+    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
+    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
+
+    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  }
+  
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(set (match_operand:GPR            0 "register_operand" "= r")
+        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+                   (match_operand:GPR 2 "register_operand" "  r")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+      t3 = operands[1];
+    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
+  }
+  else
+  {
+    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+    riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]);
+  }
+
+  DONE;
+})
+
 (define_insn "*subsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "= r")
 	(sign_extend:DI
@@ -609,6 +763,92 @@ 
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")])
 
+(define_expand "mulv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r")
+        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
+                (match_operand:GPR 2 "register_operand" " r")))
+                        (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_muldi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx hp = gen_reg_rtx (<MODE>mode);
+    rtx lp = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1)));
+
+    riscv_expand_conditional_branch (operands[3], NE, hp, lp);
+  }
+
+  DONE;
+})
+
+(define_expand "umulv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r")
+        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
+                (match_operand:GPR 2 "register_operand" " r")))
+                        (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1));
+    else
+      t5 = operands[2];
+    emit_insn (gen_muldi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx hp = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+  
+    riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
+  }
+
+  DONE;
+})
+
 (define_insn "*mulsi3_extended"
   [(set (match_operand:DI              0 "register_operand" "=r")
 	(sign_extend:DI