diff mbox series

Power10: Add BRH, BRW, BRD support.

Message ID 20200805024015.GA22456@ibm-tinman.the-meissners.org
State New
Headers show
Series Power10: Add BRH, BRW, BRD support. | expand

Commit Message

Michael Meissner Aug. 5, 2020, 2:40 a.m. UTC
Power10: Add BRH, BRW, BRD support.

The power10 processor adds 3 new instructions (BRH, BRW, BRD) that byte swaps
half-words, words, and double-words within a GPR register.  This patch adds
support for these instructions.  I have applied the suggestions from the
previous times I have submitted this patch.  I have done bootstrap builds on a
Linux power8 system.  I have run the regression tests, and there were no
regressions, and the 3 new tests pass.  Can I check this into the master
branch?

gcc/
2020-08-04  Michael Meissner  <meissner@linux.ibm.com>

	* config/rs6000/rs6000.md (bswaphi2_reg): Generate the BRH
	instruction on ISA 3.1.
	(bswapsi2_reg): Generate the BRW instruction on ISA 3.1.
	(bswapdi2): Rename bswapdi2_xxbrd to bswapdi2_brd.
	(bswapdi2_brd): Rename from bswapdi2_xxbrd.  Generate the BRD
	instruction on ISA 3.1.

gcc/testsuite/
2020-08-04  Michael Meissner  <meissner@linux.ibm.com>

	* gcc.target/powerpc/bswap-brd.c: New test.
	* gcc.target/powerpc/bswap-brw.c: New test.
	* gcc.target/powerpc/bswap-brh.c: New test.
---
 gcc/config/rs6000/rs6000.md                  | 44 +++++++++++++++-------------
 gcc/testsuite/gcc.target/powerpc/bswap-brd.c | 23 +++++++++++++++
 gcc/testsuite/gcc.target/powerpc/bswap-brh.c | 11 +++++++
 gcc/testsuite/gcc.target/powerpc/bswap-brw.c | 22 ++++++++++++++
 4 files changed, 80 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-brd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-brh.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-brw.c

Comments

Segher Boessenkool Aug. 6, 2020, 6:11 p.m. UTC | #1
On Tue, Aug 04, 2020 at 10:40:15PM -0400, Michael Meissner wrote:
> The power10 processor adds 3 new instructions (BRH, BRW, BRD) that byte swaps
> half-words, words, and double-words within a GPR register.

The brh insn reverses the bytes in each of four 16-bit words in a GPR,
but this patch only does it for HImode.  Similar for brw.  Okay.

> 2020-08-04  Michael Meissner  <meissner@linux.ibm.com>
> 
> 	* config/rs6000/rs6000.md (bswaphi2_reg): Generate the BRH
> 	instruction on ISA 3.1.

The changelog should just describe the change, not the effect of the
change, so just "New define_insn." or "New pattern." or "New." here.
All other info goes in the commit message.

This patch is okay for trunk, and all backports later.  Thanks Mike!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 48f1f1c..43b620a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2591,15 +2591,16 @@  (define_insn "bswap<mode>2_store"
   [(set_attr "type" "store")])
 
 (define_insn_and_split "bswaphi2_reg"
-  [(set (match_operand:HI 0 "gpc_reg_operand" "=&r,wa")
+  [(set (match_operand:HI 0 "gpc_reg_operand" "=r,&r,wa")
 	(bswap:HI
-	 (match_operand:HI 1 "gpc_reg_operand" "r,wa")))
-   (clobber (match_scratch:SI 2 "=&r,X"))]
+	 (match_operand:HI 1 "gpc_reg_operand" "r,r,wa")))
+   (clobber (match_scratch:SI 2 "=X,&r,X"))]
   ""
   "@
+   brh %0,%1
    #
    xxbrh %x0,%x1"
-  "reload_completed && int_reg_operand (operands[0], HImode)"
+  "reload_completed && !TARGET_POWER10 && int_reg_operand (operands[0], HImode)"
   [(set (match_dup 3)
 	(and:SI (lshiftrt:SI (match_dup 4)
 			     (const_int 8))
@@ -2615,21 +2616,22 @@  (define_insn_and_split "bswaphi2_reg"
   operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
   operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
 }
-  [(set_attr "length" "12,4")
-   (set_attr "type" "*,vecperm")
-   (set_attr "isa" "*,p9v")])
+  [(set_attr "length" "*,12,*")
+   (set_attr "type" "shift,*,vecperm")
+   (set_attr "isa" "p10,*,p9v")])
 
 ;; We are always BITS_BIG_ENDIAN, so the bit positions below in
 ;; zero_extract insns do not change for -mlittle.
 (define_insn_and_split "bswapsi2_reg"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,wa")
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,&r,wa")
 	(bswap:SI
-	 (match_operand:SI 1 "gpc_reg_operand" "r,wa")))]
+	 (match_operand:SI 1 "gpc_reg_operand" "r,r,wa")))]
   ""
   "@
+   brw %0,%1
    #
    xxbrw %x0,%x1"
-  "reload_completed && int_reg_operand (operands[0], SImode)"
+  "reload_completed && !TARGET_POWER10 && int_reg_operand (operands[0], SImode)"
   [(set (match_dup 0)					; DABC
 	(rotate:SI (match_dup 1)
 		   (const_int 24)))
@@ -2646,9 +2648,9 @@  (define_insn_and_split "bswapsi2_reg"
 		(and:SI (match_dup 0)
 			(const_int -256))))]
   ""
-  [(set_attr "length" "12,4")
-   (set_attr "type" "*,vecperm")
-   (set_attr "isa" "*,p9v")])
+  [(set_attr "length" "4,12,4")
+   (set_attr "type" "shift,*,vecperm")
+   (set_attr "isa" "p10,*,p9v")])
 
 ;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like
 ;; we do for L{H,W}BRX and ST{H,W}BRX above.  If not, we have to generate more
@@ -2681,7 +2683,7 @@  (define_expand "bswapdi2"
 	  emit_insn (gen_bswapdi2_store (dest, src));
         }
       else if (TARGET_P9_VECTOR)
-	emit_insn (gen_bswapdi2_xxbrd (dest, src));
+	emit_insn (gen_bswapdi2_brd (dest, src));
       else
 	emit_insn (gen_bswapdi2_reg (dest, src));
       DONE;
@@ -2712,13 +2714,15 @@  (define_insn "bswapdi2_store"
   "stdbrx %1,%y0"
   [(set_attr "type" "store")])
 
-(define_insn "bswapdi2_xxbrd"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=wa")
-	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "wa")))]
+(define_insn "bswapdi2_brd"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,wa")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r,wa")))]
   "TARGET_P9_VECTOR"
-  "xxbrd %x0,%x1"
-  [(set_attr "type" "vecperm")
-   (set_attr "isa" "p9v")])
+  "@
+   brd %0,%1
+   xxbrd %x0,%x1"
+  [(set_attr "type" "shift,vecperm")
+   (set_attr "isa" "p10,p9v")])
 
 (define_insn "bswapdi2_reg"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-brd.c b/gcc/testsuite/gcc.target/powerpc/bswap-brd.c
new file mode 100644
index 0000000..876129e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bswap-brd.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+/* This tests whether GCC generates the ISA 3.1 BRW byte swap instruction for
+   GPR data, but generates XXBRW for data in a vector register.  */
+
+unsigned long long
+bswap_ll (unsigned long long a)
+{
+  return __builtin_bswap64 (a); /* { dg-final { scan-assembler {\mbrd\M} } } */
+}
+
+double
+bswap_ll_dbl (unsigned long long a)
+{
+  unsigned int b = a;
+  /* Force the value to be loaded into a vector register.  */
+  __asm__ (" # %x0" : "+wa" (b));
+
+  /* { dg-final { scan-assembler {\mxxbrd\M} } } */
+  return (double) __builtin_bswap64 (b);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-brh.c b/gcc/testsuite/gcc.target/powerpc/bswap-brh.c
new file mode 100644
index 0000000..4dbab12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bswap-brh.c
@@ -0,0 +1,11 @@ 
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+/* This tests whether GCC generates the ISA 3.1 16-bit byte swap
+   instruction BRH.  */
+
+unsigned short
+bswap_short (unsigned short a)
+{
+  return __builtin_bswap16 (a); /* { dg-final { scan-assembler {\mbrh\M} } } */
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-brw.c b/gcc/testsuite/gcc.target/powerpc/bswap-brw.c
new file mode 100644
index 0000000..b3f923e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bswap-brw.c
@@ -0,0 +1,22 @@ 
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+/* This tests whether GCC generates the ISA 3.1 BRW byte swap instruction for
+   GPR data, but generates XXBRW for data in a vector register.  */
+
+unsigned int
+bswap_int (unsigned int a)
+{
+  return __builtin_bswap32 (a); /* { dg-final { scan-assembler {\mbrw\M} } } */
+}
+
+double
+bswap_int_dbl (unsigned int a)
+{
+  unsigned int b = a;
+  /* Force the value to be loaded into a vector register.  */
+  __asm__ (" # %x0" : "+wa" (b));
+
+  /* { dg-final { scan-assembler {\mxxbrw\M} } } */
+  return (double) __builtin_bswap32 (b);
+}