Message ID | 20210427060824.259151-1-admin@levyhsu.com |
---|---|
State | New |
Headers | show |
Series | [RISCV] Add Pattern for builtin overflow | expand |
On Tue, Apr 27, 2021 at 12:18 AM Levy Hsu <admin@levyhsu.com> wrote: > > From: LevyHsu <admin@levyhsu.com> > > Added implementation for builtin overflow detection, new patterns are listed below. > > --------------------------------------------------------------- > Addition: > > signed addition (SImode with RV32 || DImode with RV64): > add t0, t1, t2 > slti t3, t2, 0 > slt t4, t0, t1 > bne t3, t4, overflow > > signed addition (SImode with RV64): > add t0, t1, t2 > sext.w t3, t0 > bne t0, t3, overflow The following version has the same instruction count but offers more ILP: add t0, t1, t2 addw t3, t1, t2 bne t0, t3, overflow > > unsigned addition (SImode with RV32 || DImode with RV64): > add t0, t1, t2 > bltu t0, t1, overflow > > unsigned addition (SImode with RV64): > sext.w t3, t1 > addw t0, t1, t2 > bltu t0, t3, overflow I think you can do this in two instructions, similar to the previous pattern: addw t0, t1, t2 bltu t0, t1, overflow > --------------------------------------------------------------- > Subtraction: > > signed subtraction (SImode with RV32 || DImode with RV64): > sub t0, t1, t2 > slti t3, t2, 0 > slt t4, t1, t0 > bne t3, t4, overflow > > signed subtraction (SImode with RV64): > sub t0, t1, t2 > sext.w t3, t0 > bne t0, t3, overflow See analogous addition comment. > > unsigned subtraction (SImode with RV32 || DImode with RV64): > add t0, t1, t2 > bltu t1, t0, overflow > > unsigned subtraction (SImode with RV64): > sext.w t3, t1 > subw t0, t1, t2 > bltu t0, t3, overflow See analogous addition comment. > --------------------------------------------------------------- > Multiplication: > > signed multiplication (SImode with RV32 || DImode with RV64): > mulh t4, t1, t2 > mul t0, t1, t2 > srai t5, t0, 31/63 (RV32/64) > bne t4, t5, overflow > > signed multiplication (SImode with RV64): > mul t0, t1, t2 > sext.w t3, t0 > bne t0, t3, overflow > > unsigned multiplication (SImode with RV32 || DImode with RV64 ): > mulhu t4, t1, t2 > mul t0, t1, t2 > bne t4, 0, overflow > > unsigned multiplication (SImode with RV64): > slli t0,t0,32 > slli t1,t1,32 > srli t0,t0,32 > srli t1,t1,32 > mul t0,t0,t1 > srai t5,t0,32 > bne t5, 0, overflow I think you can eliminate the first two right shifts by replacing mul with mulhu... something like: slli rx, rx, 32 slli ry, ry, 32 mulhu rz, rx, ry srli rt, rz, 32 bnez rt, overflow > > --------------------------------------------------------------- > --- > gcc/config/riscv/riscv.c | 8 ++ > gcc/config/riscv/riscv.h | 5 + > gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++ > 3 files changed, 253 insertions(+) > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > index d489717b2a5..cf94f5c9658 100644 > --- a/gcc/config/riscv/riscv.c > +++ b/gcc/config/riscv/riscv.c > @@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = { > { "size", generic, &optimize_size_tune_info }, > }; > > +/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ > + > +static unsigned int > +riscv_min_arithmetic_precision (void) > +{ > + return 32; > +} > + > /* Return the riscv_tune_info entry for the given name string. */ > > static const struct riscv_tune_info * > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index 172c7ca7c98..a6f451b97e3 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const char **argv); > #define MIN_UNITS_PER_WORD 4 > #endif > > +/* Allows SImode op in builtin overflow pattern, see internal-fn.c. */ > + > +#undef TARGET_MIN_ARITHMETIC_PRECISION > +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision > + > /* The `Q' extension is not yet supported. */ > #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) > > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index 36012ad1f77..c82017a4bce 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -462,6 +462,81 @@ > [(set_attr "type" "arith") > (set_attr "mode" "DI")]) > > +(define_expand "addv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r,r") > + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") > + (match_operand:GPR 2 "arith_operand" " r,I"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); > + else > + t5 = operands[2]; > + emit_insn (gen_adddi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx t3 = gen_reg_rtx (<MODE>mode); > + rtx t4 = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); > + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); > + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); > + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]); > + > + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1])); > + riscv_expand_conditional_branch (operands[3], NE, t3, t4); > + } > + DONE; > +}) > + > +(define_expand "uaddv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r,r") > + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") > + (match_operand:GPR 2 "arith_operand" " r,I"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); > + else > + t3 = operands[1]; > + emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); > + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], LTU, t4, t3); > + } > + else > + { > + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); > + riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]); > + } > + > + DONE; > +}) > + > (define_insn "*addsi3_extended" > [(set (match_operand:DI 0 "register_operand" "=r,r") > (sign_extend:DI > @@ -518,6 +593,85 @@ > [(set_attr "type" "arith") > (set_attr "mode" "SI")]) > > +(define_expand "subv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "= r") > + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); > + else > + t5 = operands[2]; > + emit_insn (gen_subdi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx t3 = gen_reg_rtx (<MODE>mode); > + rtx t4 = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); > + > + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); > + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); > + > + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]); > + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0])); > + > + riscv_expand_conditional_branch (operands[3], NE, t3, t4); > + } > + > + DONE; > +}) > + > +(define_expand "usubv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "= r") > + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); > + else > + t3 = operands[1]; > + emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); > + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], LTU, t3, t4); > + } > + else > + { > + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); > + riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]); > + } > + > + DONE; > +}) > + > (define_insn "*subsi3_extended" > [(set (match_operand:DI 0 "register_operand" "= r") > (sign_extend:DI > @@ -609,6 +763,92 @@ > [(set_attr "type" "imul") > (set_attr "mode" "DI")]) > > +(define_expand "mulv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (mult:GPR (match_operand:GPR 1 "register_operand" " r") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "TARGET_MUL" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); > + else > + t5 = operands[2]; > + emit_insn (gen_muldi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx hp = gen_reg_rtx (<MODE>mode); > + rtx lp = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2])); > + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); > + emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1))); > + > + riscv_expand_conditional_branch (operands[3], NE, hp, lp); > + } > + > + DONE; > +}) > + > +(define_expand "umulv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (mult:GPR (match_operand:GPR 1 "register_operand" " r") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "TARGET_MUL" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1)); > + else > + t5 = operands[2]; > + emit_insn (gen_muldi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx hp = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2])); > + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); > + > + riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx); > + } > + > + DONE; > +}) > + > (define_insn "*mulsi3_extended" > [(set (match_operand:DI 0 "register_operand" "=r") > (sign_extend:DI > -- > 2.30.1 >
On Tue, Apr 27, 2021 at 12:45 AM Andrew Waterman <andrew@sifive.com> wrote: > > signed addition (SImode with RV64): > > add t0, t1, t2 > > sext.w t3, t0 > > bne t0, t3, overflow > > The following version has the same instruction count but offers more ILP: > > add t0, t1, t2 > addw t3, t1, t2 > bne t0, t3, overflow > This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch. > > unsigned addition (SImode with RV64): > > sext.w t3, t1 > > addw t0, t1, t2 > > bltu t0, t3, overflow > > I think you can do this in two instructions, similar to the previous > pattern: > > addw t0, t1, t2 > bltu t0, t1, overflow > Likewise. > > signed subtraction (SImode with RV64): > > sub t0, t1, t2 > > sext.w t3, t0 > > bne t0, t3, overflow > > See analogous addition comment. > Likewise. > > > unsigned subtraction (SImode with RV64): > > sext.w t3, t1 > > subw t0, t1, t2 > > bltu t0, t3, overflow > > See analogous addition comment. > Likewise. > > unsigned multiplication (SImode with RV64): > > slli t0,t0,32 > > slli t1,t1,32 > > srli t0,t0,32 > > srli t1,t1,32 > > mul t0,t0,t1 > > srai t5,t0,32 > > bne t5, 0, overflow > > I think you can eliminate the first two right shifts by replacing mul > with mulhu... something like: > > slli rx, rx, 32 > slli ry, ry, 32 > mulhu rz, rx, ry > srli rt, rz, 32 > bnez rt, overflow > Likewise, except this should be a separate bugzilla. Jim
On Wed, Apr 28, 2021 at 1:18 PM Jim Wilson <jimw@sifive.com> wrote: > > On Tue, Apr 27, 2021 at 12:45 AM Andrew Waterman <andrew@sifive.com> wrote: >> >> > signed addition (SImode with RV64): >> > add t0, t1, t2 >> > sext.w t3, t0 >> > bne t0, t3, overflow >> >> The following version has the same instruction count but offers more ILP: >> >> add t0, t1, t2 >> addw t3, t1, t2 >> bne t0, t3, overflow > > > This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch. Agreed, these potential improvements are definitely not blockers. >> >> > unsigned addition (SImode with RV64): >> > sext.w t3, t1 >> > addw t0, t1, t2 >> > bltu t0, t3, overflow >> >> I think you can do this in two instructions, similar to the previous pattern: >> >> addw t0, t1, t2 >> bltu t0, t1, overflow > > > Likewise. >> >> > signed subtraction (SImode with RV64): >> > sub t0, t1, t2 >> > sext.w t3, t0 >> > bne t0, t3, overflow >> >> See analogous addition comment. > > > Likewise. >> >> >> > unsigned subtraction (SImode with RV64): >> > sext.w t3, t1 >> > subw t0, t1, t2 >> > bltu t0, t3, overflow >> >> See analogous addition comment. > > > Likewise. >> >> > unsigned multiplication (SImode with RV64): >> > slli t0,t0,32 >> > slli t1,t1,32 >> > srli t0,t0,32 >> > srli t1,t1,32 >> > mul t0,t0,t1 >> > srai t5,t0,32 >> > bne t5, 0, overflow >> >> I think you can eliminate the first two right shifts by replacing mul >> with mulhu... something like: >> >> slli rx, rx, 32 >> slli ry, ry, 32 >> mulhu rz, rx, ry >> srli rt, rz, 32 >> bnez rt, overflow > > > Likewise, except this should be a separate bugzilla. > > Jim
On Wed, Apr 28, 2021 at 4:04 PM Andrew Waterman <andrew@sifive.com> wrote: > > This is a good suggestion, but in the interests of making forward > progress here, I'd like to accept the patch and then file these as > bugzillas as ways to further improve the patch. > > Agreed, these potential improvements are definitely not blockers. > Turns out Levy had time to work on the patch after all, and submitted a fourth version with your improvements. Jim
On Thu, Apr 29, 2021 at 3:02 PM Jim Wilson <jimw@sifive.com> wrote: > > On Wed, Apr 28, 2021 at 4:04 PM Andrew Waterman <andrew@sifive.com> wrote: >> >> > This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch. >> >> Agreed, these potential improvements are definitely not blockers. > > > Turns out Levy had time to work on the patch after all, and submitted a fourth version with your improvements. Cool. Thank you, Levy! > > Jim
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index d489717b2a5..cf94f5c9658 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = { { "size", generic, &optimize_size_tune_info }, }; +/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ + +static unsigned int +riscv_min_arithmetic_precision (void) +{ + return 32; +} + /* Return the riscv_tune_info entry for the given name string. */ static const struct riscv_tune_info * diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 172c7ca7c98..a6f451b97e3 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const char **argv); #define MIN_UNITS_PER_WORD 4 #endif +/* Allows SImode op in builtin overflow pattern, see internal-fn.c. */ + +#undef TARGET_MIN_ARITHMETIC_PRECISION +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision + /* The `Q' extension is not yet supported. */ #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 36012ad1f77..c82017a4bce 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -462,6 +462,81 @@ [(set_attr "type" "arith") (set_attr "mode" "DI")]) +(define_expand "addv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") + (match_operand:GPR 2 "arith_operand" " r,I"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); + else + t5 = operands[2]; + emit_insn (gen_adddi3 (t3, t4, t5)); + + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx t3 = gen_reg_rtx (<MODE>mode); + rtx t4 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]); + + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1])); + riscv_expand_conditional_branch (operands[3], NE, t3, t4); + } + DONE; +}) + +(define_expand "uaddv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") + (match_operand:GPR 2 "arith_operand" " r,I"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); + else + t3 = operands[1]; + emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], LTU, t4, t3); + } + else + { + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); + riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]); + } + + DONE; +}) + (define_insn "*addsi3_extended" [(set (match_operand:DI 0 "register_operand" "=r,r") (sign_extend:DI @@ -518,6 +593,85 @@ [(set_attr "type" "arith") (set_attr "mode" "SI")]) +(define_expand "subv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "= r") + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); + else + t5 = operands[2]; + emit_insn (gen_subdi3 (t3, t4, t5)); + + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx t3 = gen_reg_rtx (<MODE>mode); + rtx t4 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); + + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); + + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]); + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0])); + + riscv_expand_conditional_branch (operands[3], NE, t3, t4); + } + + DONE; +}) + +(define_expand "usubv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "= r") + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); + else + t3 = operands[1]; + emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], LTU, t3, t4); + } + else + { + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); + riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]); + } + + DONE; +}) + (define_insn "*subsi3_extended" [(set (match_operand:DI 0 "register_operand" "= r") (sign_extend:DI @@ -609,6 +763,92 @@ [(set_attr "type" "imul") (set_attr "mode" "DI")]) +(define_expand "mulv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" " r") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "TARGET_MUL" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); + else + t5 = operands[2]; + emit_insn (gen_muldi3 (t3, t4, t5)); + + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx hp = gen_reg_rtx (<MODE>mode); + rtx lp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2])); + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); + emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1))); + + riscv_expand_conditional_branch (operands[3], NE, hp, lp); + } + + DONE; +}) + +(define_expand "umulv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" " r") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "TARGET_MUL" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1)); + else + t5 = operands[2]; + emit_insn (gen_muldi3 (t3, t4, t5)); + + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx hp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2])); + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); + + riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx); + } + + DONE; +}) + (define_insn "*mulsi3_extended" [(set (match_operand:DI 0 "register_operand" "=r") (sign_extend:DI
From: LevyHsu <admin@levyhsu.com> Added implementation for builtin overflow detection, new patterns are listed below. --------------------------------------------------------------- Addition: signed addition (SImode with RV32 || DImode with RV64): add t0, t1, t2 slti t3, t2, 0 slt t4, t0, t1 bne t3, t4, overflow signed addition (SImode with RV64): add t0, t1, t2 sext.w t3, t0 bne t0, t3, overflow unsigned addition (SImode with RV32 || DImode with RV64): add t0, t1, t2 bltu t0, t1, overflow unsigned addition (SImode with RV64): sext.w t3, t1 addw t0, t1, t2 bltu t0, t3, overflow --------------------------------------------------------------- Subtraction: signed subtraction (SImode with RV32 || DImode with RV64): sub t0, t1, t2 slti t3, t2, 0 slt t4, t1, t0 bne t3, t4, overflow signed subtraction (SImode with RV64): sub t0, t1, t2 sext.w t3, t0 bne t0, t3, overflow unsigned subtraction (SImode with RV32 || DImode with RV64): add t0, t1, t2 bltu t1, t0, overflow unsigned subtraction (SImode with RV64): sext.w t3, t1 subw t0, t1, t2 bltu t0, t3, overflow --------------------------------------------------------------- Multiplication: signed multiplication (SImode with RV32 || DImode with RV64): mulh t4, t1, t2 mul t0, t1, t2 srai t5, t0, 31/63 (RV32/64) bne t4, t5, overflow signed multiplication (SImode with RV64): mul t0, t1, t2 sext.w t3, t0 bne t0, t3, overflow unsigned multiplication (SImode with RV32 || DImode with RV64 ): mulhu t4, t1, t2 mul t0, t1, t2 bne t4, 0, overflow unsigned multiplication (SImode with RV64): slli t0,t0,32 slli t1,t1,32 srli t0,t0,32 srli t1,t1,32 mul t0,t0,t1 srai t5,t0,32 bne t5, 0, overflow --------------------------------------------------------------- --- gcc/config/riscv/riscv.c | 8 ++ gcc/config/riscv/riscv.h | 5 + gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 253 insertions(+)