diff mbox series

[committed] pru: Optimize DI shifts

Message ID 20221009114049.29943-1-dimitar@dinux.eu
State New
Headers show
Series [committed] pru: Optimize DI shifts | expand

Commit Message

Dimitar Dimitrov Oct. 9, 2022, 11:40 a.m. UTC
This patch improves code generation for the PRU backend.  Committed to
trunk.

If the number of shift positions is a constant, then the DI shift
operation is expanded to a sequence of 2 to 4 machine instructions.
That is more efficient than the default action to call libgcc.

gcc/ChangeLog:

	* config/pru/pru.md (lshrdi3): New expand pattern.
	(ashldi3): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/pru/ashiftdi-1.c: New test.
	* gcc.target/pru/lshiftrtdi-1.c: New test.

Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
---
 gcc/config/pru/pru.md                       | 196 ++++++++++++++++++++
 gcc/testsuite/gcc.target/pru/ashiftdi-1.c   |  53 ++++++
 gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c |  53 ++++++
 3 files changed, 302 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/pru/ashiftdi-1.c
 create mode 100644 gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
diff mbox series

Patch

diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index 144cd35d809..53ffff07708 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -703,6 +703,202 @@  (define_insn "ashr<mode>3_single"
   [(set_attr "type" "alu")
    (set_attr "length" "12")])
 
+
+; 64-bit LSHIFTRT with a constant shift count can be expanded into
+; more efficient code sequence than a variable register shift.
+;
+; 1. For shift >= 32:
+;    dst_lo = (src_hi >> (shift - 32))
+;    dst_hi = 0
+;
+; 2. For shift==1 there is no need for a temporary:
+;    dst_lo = (src_lo >> 1)
+;    if (src_hi & 1)
+;       dst_lo |= (1 << 31)
+;    dst_hi = (src_hi >> 1)
+;
+; 3. For shift < 32:
+;    dst_lo = (src_lo >> shift)
+;    tmp = (src_hi << (32 - shift)
+;    dst_lo |= tmp
+;    dst_hi = (src_hi >> shift)
+;
+; 4. For shift in a register:
+;    Fall back to calling libgcc.
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand")
+	  (lshiftrt:DI
+	    (match_operand:DI 1 "register_operand")
+	    (match_operand:QI 2 "const_int_operand")))]
+  ""
+{
+  gcc_assert (CONST_INT_P (operands[2]));
+
+  const int nshifts = INTVAL (operands[2]);
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (nshifts >= 32)
+    {
+      emit_insn (gen_rtx_SET (dst_lo,
+			      gen_rtx_LSHIFTRT (SImode,
+						src_hi,
+						GEN_INT (nshifts - 32))));
+      emit_insn (gen_rtx_SET (dst_hi, const0_rtx));
+      DONE;
+    }
+
+  gcc_assert (can_create_pseudo_p ());
+
+  /* The expansions which follow are safe only if DST_LO and SRC_HI
+     do not overlap.  If they do, then fix by using a temporary register.
+     Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI
+     is set, SRC_LO is no longer live.  */
+  if (reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      rtx new_src_hi = gen_reg_rtx (SImode);
+
+      emit_move_insn (new_src_hi, src_hi);
+      src_hi = new_src_hi;
+    }
+
+  if (nshifts == 1)
+    {
+      rtx_code_label *skip_hiset_label;
+      rtx j;
+
+      emit_insn (gen_rtx_SET (dst_lo,
+			      gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx)));
+
+      /* The code generated by `genemit' would create a LABEL_REF.  */
+      skip_hiset_label = gen_label_rtx ();
+      j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
+						  SImode,
+						  src_hi,
+						  GEN_INT (0),
+						  skip_hiset_label));
+      JUMP_LABEL (j) = skip_hiset_label;
+      LABEL_NUSES (skip_hiset_label)++;
+
+      emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31)));
+      emit_label (skip_hiset_label);
+      emit_insn (gen_rtx_SET (dst_hi,
+			      gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx)));
+      DONE;
+    }
+
+  if (nshifts < 32)
+    {
+      rtx tmpval = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (dst_lo,
+			      gen_rtx_LSHIFTRT (SImode,
+						src_lo,
+						GEN_INT (nshifts))));
+      emit_insn (gen_rtx_SET (tmpval,
+			      gen_rtx_ASHIFT (SImode,
+					      src_hi,
+					      GEN_INT (32 - nshifts))));
+      emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval));
+      emit_insn (gen_rtx_SET (dst_hi,
+			      gen_rtx_LSHIFTRT (SImode,
+						src_hi,
+						GEN_INT (nshifts))));
+      DONE;
+    }
+  gcc_unreachable ();
+})
+
+; 64-bit ASHIFT with a constant shift count can be expanded into
+; more efficient code sequence than the libgcc call required by
+; a variable shift in a register.
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand")
+	  (ashift:DI
+	    (match_operand:DI 1 "register_operand")
+	    (match_operand:QI 2 "const_int_operand")))]
+  ""
+{
+  gcc_assert (CONST_INT_P (operands[2]));
+
+  const int nshifts = INTVAL (operands[2]);
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (nshifts >= 32)
+    {
+      emit_insn (gen_rtx_SET (dst_hi,
+			      gen_rtx_ASHIFT (SImode,
+					      src_lo,
+					      GEN_INT (nshifts - 32))));
+      emit_insn (gen_rtx_SET (dst_lo, const0_rtx));
+      DONE;
+    }
+
+  gcc_assert (can_create_pseudo_p ());
+
+  /* The expansions which follow are safe only if DST_HI and SRC_LO
+     do not overlap.  If they do, then fix by using a temporary register.
+     Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO
+     is set, SRC_HI is no longer live.  */
+  if (reg_overlap_mentioned_p (dst_hi, src_lo))
+    {
+      rtx new_src_lo = gen_reg_rtx (SImode);
+
+      emit_move_insn (new_src_lo, src_lo);
+      src_lo = new_src_lo;
+    }
+
+  if (nshifts == 1)
+    {
+      rtx_code_label *skip_hiset_label;
+      rtx j;
+
+      emit_insn (gen_rtx_SET (dst_hi,
+			      gen_rtx_ASHIFT (SImode, src_hi, const1_rtx)));
+
+      skip_hiset_label = gen_label_rtx ();
+      j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
+						  SImode,
+						  src_lo,
+						  GEN_INT (31),
+						  skip_hiset_label));
+      JUMP_LABEL (j) = skip_hiset_label;
+      LABEL_NUSES (skip_hiset_label)++;
+
+      emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0)));
+      emit_label (skip_hiset_label);
+      emit_insn (gen_rtx_SET (dst_lo,
+			      gen_rtx_ASHIFT (SImode, src_lo, const1_rtx)));
+      DONE;
+    }
+
+  if (nshifts < 32)
+    {
+      rtx tmpval = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (dst_hi,
+			      gen_rtx_ASHIFT (SImode,
+					      src_hi,
+					      GEN_INT (nshifts))));
+      emit_insn (gen_rtx_SET (tmpval,
+			      gen_rtx_LSHIFTRT (SImode,
+						src_lo,
+						GEN_INT (32 - nshifts))));
+      emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval));
+      emit_insn (gen_rtx_SET (dst_lo,
+			      gen_rtx_ASHIFT (SImode,
+					      src_lo,
+					      GEN_INT (nshifts))));
+      DONE;
+    }
+  gcc_unreachable ();
+})
 
 ;; Include ALU patterns with zero-extension of operands.  That's where
 ;; the real insns are defined.
diff --git a/gcc/testsuite/gcc.target/pru/ashiftdi-1.c b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c
new file mode 100644
index 00000000000..516e5a86102
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c
@@ -0,0 +1,53 @@ 
+/* Functional test for DI left shift.  */
+
+/* { dg-do run } */
+/* { dg-options "-pedantic-errors" } */
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern void abort (void);
+
+uint64_t __attribute__((noinline)) ashift_1 (uint64_t a)
+{
+  return a << 1;
+}
+
+uint64_t __attribute__((noinline)) ashift_10 (uint64_t a)
+{
+  return a << 10;
+}
+
+uint64_t __attribute__((noinline)) ashift_32 (uint64_t a)
+{
+  return a << 32;
+}
+
+uint64_t __attribute__((noinline)) ashift_36 (uint64_t a)
+{
+  return a << 36;
+}
+
+int
+main (int argc, char** argv)
+{
+  if (ashift_1 (0xaaaa5555aaaa5555ull) != 0x5554aaab5554aaaaull)
+    abort();
+  if (ashift_10 (0xaaaa5555aaaa5555ull) != 0xa95556aaa9555400ull)
+    abort();
+  if (ashift_32 (0xaaaa5555aaaa5555ull) != 0xaaaa555500000000ull)
+    abort();
+  if (ashift_36 (0xaaaa5555aaaa5555ull) != 0xaaa5555000000000ull)
+    abort();
+
+  if (ashift_1 (0x1234567822334455ull) != 0x2468acf0446688aaull)
+    abort();
+  if (ashift_10 (0x1234567822334455ull) != 0xd159e088cd115400ull)
+    abort();
+  if (ashift_32 (0x1234567822334455ull) != 0x2233445500000000ull)
+    abort();
+  if (ashift_36 (0x1234567822334455ull) != 0x2334455000000000ull)
+    abort();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
new file mode 100644
index 00000000000..7adae6ccc13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
@@ -0,0 +1,53 @@ 
+/* Functional test for DI right shift.  */
+
+/* { dg-do run } */
+/* { dg-options "-pedantic-errors" } */
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern void abort (void);
+
+uint64_t __attribute__((noinline)) lshift_1 (uint64_t a)
+{
+  return a >> 1;
+}
+
+uint64_t __attribute__((noinline)) lshift_10 (uint64_t a)
+{
+  return a >> 10;
+}
+
+uint64_t __attribute__((noinline)) lshift_32 (uint64_t a)
+{
+  return a >> 32;
+}
+
+uint64_t __attribute__((noinline)) lshift_36 (uint64_t a)
+{
+  return a >> 36;
+}
+
+int
+main (int argc, char** argv)
+{
+  if (lshift_1 (0xaaaa5555aaaa5555ull) != 0x55552aaad5552aaaull)
+    abort();
+  if (lshift_10 (0xaaaa5555aaaa5555ull) != 0x002aaa95556aaa95ull)
+    abort();
+  if (lshift_32 (0xaaaa5555aaaa5555ull) != 0x00000000aaaa5555ull)
+    abort();
+  if (lshift_36 (0xaaaa5555aaaa5555ull) != 0x000000000aaaa555ull)
+    abort();
+
+  if (lshift_1 (0x1234567822334455ull) != 0x091a2b3c1119a22aull)
+    abort();
+  if (lshift_10 (0x1234567822334455ull) != 0x00048d159e088cd1ull)
+    abort();
+  if (lshift_32 (0x1234567822334455ull) != 0x0000000012345678ull)
+    abort();
+  if (lshift_36 (0x1234567822334455ull) != 0x0000000001234567ull)
+    abort();
+
+  return 0;
+}