diff mbox series

RISC-V: Add implementation for builtin overflow

Message ID 20210121102436.3674778-1-admin@levyhsu.com
State New
Headers show
Series RISC-V: Add implementation for builtin overflow | expand

Commit Message

Levy Hsu Jan. 21, 2021, 10:24 a.m. UTC
Added implementation for builtin overflow detection, new patterns are listed below.

signed addition:
	add	t0, t1, t2
	slti	t3, t2, 0
	slt	t4, t0, t1
	bne	t3, t4, overflow
unsigned addition:
	add	t0, t1, t2
	bltu	t0, t1, overflow

signed subtraction:
	sub	t0, t1, t2
	slti	t3, t2, 0
	slt	t4, t1, t0
	bne	t3, t4, overflow
signed subtraction:
	add	t0, t1, t2
	bltu	t1, t0, overflow

signed multiplication:
	mulh	t4, t1, t2
	mul		t0, t1, t2
	srai		t5, t0, 31/63 (RV32/64)
	bne		t4, t5, overflow
unsigned multiplication:
	mulhu	t4, t1, t2
	mul		t0, t1, t2
	bne		t4, 0, overflow

gcc/Changelog:
	* config/riscv/riscv.md:	Add expand pattern for <u>addv<mode>4, <u>subv<mode>4 and <u>mulv<mode>4
	* config/riscv/riscv.h:	Defined riscv_min_arithmetic_precision for hook TARGET_MIN_ARITHMETIC_PRECISION
	* config/riscv/riscv.c:	Set the riscv_min_arithmetic_precision to 32, allows SImode op calls for addw/subw under RV64.

---
 gcc/config/riscv/riscv.c  |   8 +++
 gcc/config/riscv/riscv.h  |   5 ++
 gcc/config/riscv/riscv.md | 104 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

Comments

Jim Wilson Feb. 17, 2021, 12:57 a.m. UTC | #1
On Thu, Jan 21, 2021 at 2:25 AM Levy <admin@levyhsu.com> wrote:

> Added implementation for builtin overflow detection, new patterns are
> listed below.
>

For rv32 SImode and rv64 DImode, the unsigned add/sub and signed/unsigned
mul patterns seem to give the same result as the default code generation.
That has me wondering if we really need the patterns.

For rv64 SImode, the signed add/sub patterns are generating worse code. Signed
add overflow without the pattern is
        add     a1,a0,a1
        sext.w  a0,a1
        bne     a1,a0,.L23
and with the pattern is
        mv      a5,a0
        addw    a0,a0,a1
        slt     a5,a0,a5
        slti    a1,a1,0
        bne     a1,a5,.L27
ignoring the register allocation issue we have one more instruction using the
pattern which is undesirable.  This needs a fix.  We could just use X instead
of GPR.  We could then optionally add rv64 SImode patterns.  If we don't
add them it looks like a bug, so I think we should add another pattern for
this.

For rv64 SImode, the unsigned add/sub patterns are generating better code.  We
have unnecessary zero extends without the patterns.  This suggests that the
unsigned add/sub patterns really are useful.  Only 1 of the 3 expansions is
useful, but if we only implement one of the 3 expansions that looks like a
bug so I think this is OK as is.

The signed/unsigned mul patterns only support rv32 SImode and rv64 DImode.
The rv64 SImode support is missing.  But even though there is no pattern
for it we can still get worse code for it.  An unpatched tree for signed
mul gives
        mul     a1,a0,a1
        sext.w  a0,a1
        bne     a1,a0,.L37
and a patched tree gives
        mul     a0,a0,a1
        sraiw   a4,a0,31
        srai    a5,a0,32
        bne     a4,a5,.L43
On the other hand, an unpatched tree for unsigned mul gives
        slli    a0,a0,32
        slli    a1,a1,32
        srli    a0,a0,32
        srli    a1,a1,32
        mul     a5,a0,a1
        slli    a4,a5,32
        srli    a4,a4,32
        bne     a5,a4,.L61
and a patched tree gives
        slli    a0,a0,32
        slli    a1,a1,32
        srli    a0,a0,32
        srli    a1,a1,32
        mul     a0,a0,a1
        srai    a5,a0,32
        bne     a5,zero,.L69
which is one instruction shorter.  In both cases, the difference is due to
the hook to set min precision to 32.  Presumably we need this hook for
add/sub, so we need to add the missing rv64 SImode signed mul pattern.
Since we have to have one of them, we may as well have all of them for
completeness.

There are a few minor style issues.

In the define_expands, the patterns aren't formatted properly.  An open
parenthesis should be aligned to an open paren at the same depth on the
line above.  If you are an emacs user, you can use M-x emacs-lisp-mode and
hit tab to align them.

In a define_expand the constraints aren't useful and shouldn't be
included.  In a define_expand that always emits its own RTL the pattern
isn't useful either.  Only the operand modes, numbers, and predicates are
useful.  This is why some define_expands only list the operands and don't
include the pattern.  But including the pattern is OK, it just isn't
required.  So the only real fix we need here is to drop the constraints.

I attached the testcase I used for testing purposes.  Every function should
be same size or smaller with the patch.

Jim
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index d489717b2a5..cf94f5c9658 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -351,6 +351,14 @@  static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
+/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
+
+static unsigned int
+riscv_min_arithmetic_precision (void)
+{
+  return 32;
+}
+
 /* Return the riscv_tune_info entry for the given name string.  */
 
 static const struct riscv_tune_info *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 172c7ca7c98..62cebd08cff 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -115,6 +115,11 @@  extern const char *riscv_default_mtune (int argc, const char **argv);
 
 #define MAX_BITS_PER_WORD 64
 
+/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
+
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
+
 /* Width of a word, in units (bytes).  */
 #define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
 #ifndef IN_LIBGCC2
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 36012ad1f77..a95654d77e9 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -462,6 +462,41 @@ 
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "addv<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+  (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+     (match_operand:GPR 2 "arith_operand"    " r,I")))
+  (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx t3 = gen_reg_rtx (<MODE>mode);
+  rtx t4 = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+
+  rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+  emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+
+  rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
+  emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
+
+  riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+  (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+     (match_operand:GPR 2 "arith_operand"    " r,I")))
+  (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+  riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]);
+
+	DONE;
+})
+
 (define_insn "*addsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "=r,r")
 	(sign_extend:DI
@@ -518,6 +553,41 @@ 
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+(define_expand "subv<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "= r")
+  (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+       (match_operand:GPR 2 "register_operand" "  r")))
+  (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx t3 = gen_reg_rtx (<MODE>mode);
+  rtx t4 = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+
+  rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+  emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+
+  rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
+  emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
+
+  riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "= r")
+  (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+      (match_operand:GPR 2 "register_operand" "  r")))
+  (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+  riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]);
+
+  DONE;
+})
+
 (define_insn "*subsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "= r")
 	(sign_extend:DI
@@ -609,6 +679,40 @@ 
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")])
 
+(define_expand "umulv<mode>4"
+  [(set (match_operand:X 0 "register_operand" "=r")
+  (mult:X (match_operand:X 1 "register_operand" " r")
+     (match_operand:X 2 "register_operand" " r")))
+  (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx hp = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
+  emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+
+  riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
+	DONE;
+})
+
+(define_expand "mulv<mode>4"
+  [(set (match_operand:X 0 "register_operand" "=r")
+  (mult:X (match_operand:X 1 "register_operand" " r")
+     (match_operand:X 2 "register_operand" " r")))
+  (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  rtx hp = gen_reg_rtx (<MODE>mode);
+  rtx lp = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
+  emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+  emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1)));
+
+  riscv_expand_conditional_branch (operands[3], NE, hp, lp);
+  DONE;
+})
+
 (define_insn "*mulsi3_extended"
   [(set (match_operand:DI              0 "register_operand" "=r")
 	(sign_extend:DI