diff mbox series

RISC-V: Optimize slli(.uw)? + addw + zext.w into sh[123]add + zext.w

Message ID 20221108195730.2701496-1-philipp.tomsich@vrull.eu
State New
Headers show
Series RISC-V: Optimize slli(.uw)? + addw + zext.w into sh[123]add + zext.w | expand

Commit Message

Philipp Tomsich Nov. 8, 2022, 7:57 p.m. UTC
gcc/ChangeLog:

	* config/riscv/bitmanip.md: Handle corner-cases for combine
	when chaining slli(.uw)? + addw

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/zba-shNadd-04.c: New test.

---

 gcc/config/riscv/bitmanip.md                  | 49 +++++++++++++++++++
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv.cc                     |  7 +++
 .../gcc.target/riscv/zba-shNadd-04.c          | 23 +++++++++
 4 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c

Comments

Jeff Law Nov. 18, 2022, 7:52 p.m. UTC | #1
On 11/8/22 12:57, Philipp Tomsich wrote:
> gcc/ChangeLog:
>
> 	* config/riscv/bitmanip.md: Handle corner-cases for combine
> 	when chaining slli(.uw)? + addw
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/riscv/zba-shNadd-04.c: New test.

OK.

Something to consider.  We're gaining a lot of

(subreg:SI (reg:DI) 0) kinds of operands.


Would it make sense to make an operand predicate that accepted

(reg:SI) or (subreg:SI (reg:DI) 0)?


It will reduce my compaints about subregs :-)  But the real reason I'm 
suggesting we consider adding such a predicate is, AFIACT, it it gives 
combine a chance to eliminate the subreg.  I haven't actually tested 
this, but it seems like it might be worth a quick experiment independent 
of these patches (and probably targeted towards gcc-14 rather than gcc-13).



jeff
Philipp Tomsich Nov. 18, 2022, 7:57 p.m. UTC | #2
Applied to master. Thanks.
--Philipp.


On Fri, 18 Nov 2022 at 20:52, Jeff Law <jeffreyalaw@gmail.com> wrote:

>
> On 11/8/22 12:57, Philipp Tomsich wrote:
> > gcc/ChangeLog:
> >
> >       * config/riscv/bitmanip.md: Handle corner-cases for combine
> >       when chaining slli(.uw)? + addw
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/riscv/zba-shNadd-04.c: New test.
>
> OK.
>
> Something to consider.  We're gaining a lot of
>
> (subreg:SI (reg:DI) 0) kinds of operands.
>
>
> Would it make sense to make an operand predicate that accepted
>
> (reg:SI) or (subreg:SI (reg:DI) 0)?
>
>
> It will reduce my compaints about subregs :-)  But the real reason I'm
> suggesting we consider adding such a predicate is, AFIACT, it it gives
> combine a chance to eliminate the subreg.  I haven't actually tested
> this, but it seems like it might be worth a quick experiment independent
> of these patches (and probably targeted towards gcc-14 rather than gcc-13).
>
>
>
> jeff
>
>
Philipp Tomsich Nov. 18, 2022, 7:59 p.m. UTC | #3
On Fri, 18 Nov 2022 at 20:52, Jeff Law <jeffreyalaw@gmail.com> wrote:

> Something to consider.  We're gaining a lot of
>
> (subreg:SI (reg:DI) 0) kinds of operands.
>
>
> Would it make sense to make an operand predicate that accepted
>
> (reg:SI) or (subreg:SI (reg:DI) 0)?
>
>
> It will reduce my compaints about subregs :-)  But the real reason I'm
> suggesting we consider adding such a predicate is, AFIACT, it it gives
> combine a chance to eliminate the subreg.  I haven't actually tested
> this, but it seems like it might be worth a quick experiment independent
> of these patches (and probably targeted towards gcc-14 rather than gcc-13).
>

I like the idea. Definitively something to consider. We'll give this a try.
--Philipp.
diff mbox series

Patch

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 726a07b0d90..cbc00455b67 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -56,6 +56,55 @@ 
    [(set (match_dup 5) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
     (set (match_dup 0) (sign_extend:DI (div:SI (subreg:SI (match_dup 5) 0) (subreg:SI (match_dup 4) 0))))])
 
+; Zba does not provide W-forms of sh[123]add(.uw)?, which leads to an
+; interesting irregularity: we can generate a signed 32-bit result
+; using slli(.uw)?+ addw, but a unsigned 32-bit result can be more
+; efficiently be generated as sh[123]add+zext.w (the .uw can be
+; dropped, if we zero-extend the output anyway).
+;
+; To enable this optimization, we split [ slli(.uw)?, addw, zext.w ]
+; into [ sh[123]add, zext.w ] for use during combine.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI (plus:SI (ashift:SI (subreg:SI (match_operand:DI 1 "register_operand") 0)
+						       (match_operand:QI 2 "imm123_operand"))
+				 (subreg:SI (match_operand:DI 3 "register_operand") 0))))]
+  "TARGET_64BIT && TARGET_ZBA"
+  [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI (plus:SI (subreg:SI (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+							       (match_operand:QI 2 "imm123_operand"))
+						    (match_operand:DI 3 "consecutive_bits_operand")) 0)
+				 (subreg:SI (match_operand:DI 4 "register_operand") 0))))]
+  "TARGET_64BIT && TARGET_ZBA
+   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))"
+  [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
+   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+
+; Make sure that an andi followed by a sh[123]add remains a two instruction
+; sequence--and is not torn apart into slli, slri, add.
+(define_insn_and_split "*andi_add.uw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+				    (match_operand:QI 2 "imm123_operand" "Ds3"))
+			 (match_operand:DI 3 "consecutive_bits_operand" ""))
+		 (match_operand:DI 4 "register_operand" "r")))
+   (clobber (match_scratch:DI 5 "=&r"))]
+  "TARGET_64BIT && TARGET_ZBA
+   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+   && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 5) (and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (ashift:DI (match_dup 5) (match_dup 2))
+			       (match_dup 4)))]
+{
+	operands[3] = GEN_INT (INTVAL (operands[3]) >> INTVAL (operands[2]));
+})
+
 (define_insn "*shNadduw"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a718bb62b4..2ec3af05aa4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -77,6 +77,7 @@  extern bool riscv_gpr_save_operation_p (rtx);
 extern void riscv_reinit (void);
 extern poly_uint64 riscv_regmode_natural_size (machine_mode);
 extern bool riscv_v_ext_vector_mode_p (machine_mode);
+extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0b2c4b3599d..5a632058003 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6497,6 +6497,13 @@  riscv_regmode_natural_size (machine_mode mode)
   return UNITS_PER_WORD;
 }
 
+/* Return true if a shift-amount matches the trailing cleared bits on                                                                                                                                                      a bitmask */
+bool
+riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
+{
+  return shamt == ctz_hwi (mask);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
new file mode 100644
index 00000000000..abed1491039
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long sub1(unsigned long long a, unsigned long long b)
+{
+  b = (b << 32) >> 31;
+  unsigned int x = a + b;
+  return x;
+}
+
+long long sub2(unsigned long long a, unsigned long long b)
+{
+  return (unsigned int)(a + (b << 1));
+}
+
+long long sub3(unsigned long long a, unsigned long long b)
+{
+  return (a + (b << 1)) & ~0u;
+}
+
+/* { dg-final { scan-assembler-times "sh1add" 3 } } */
+/* { dg-final { scan-assembler-times "zext.w\t" 3 } } */