diff mbox

[#2,rs6000,GCC6] Fix PR78543, ICE in push_reload on powerpc64le-linux-gnu

Message ID 20170327194115.GA10135@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner March 27, 2017, 7:41 p.m. UTC
On Fri, Mar 24, 2017 at 07:16:32PM -0500, Segher Boessenkool wrote:
> Hi Mike,
> 
> This patch is okay for trunk.  Also for 6, but what is the hurry there?

I applied the patch (svn id 246508 for trunk, svn id 246509 for gcc 6.0).  The
hurry up for GCC 6 is due to the fact that the bug does not show by default in
trunk (due to failing with RELOAD), but the bug shows up in GCC 6 (since RELOAD
is default there), and it affects some of the Linux distributions.

[gcc]
2017-03-27  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/78543
	* config/rs6000/rs6000.md (bswaphi2_extenddi): Combine bswap
	HImode and SImode with zero extend to DImode to one insn.
	(bswap<mode>2_extenddi): Likewise.
	(bswapsi2_extenddi): Likewise.
	(bswaphi2_extendsi): Likewise.
	(bswaphi2): Combine bswap HImode and SImode into one insn.
	Separate memory insns from swapping register.
	(bswapsi2): Likewise.
	(bswap<mode>2): Likewise.
	(bswaphi2_internal): Delete, no longer used.
	(bswapsi2_internal): Likewise.
	(bswap<mode>2_load): Split bswap HImode/SImode into separate load,
	store, and gpr<-gpr swap insns.
	(bswap<mode>2_store): Likewise.
	(bswaphi2_reg): Register only splitter, combine with the splitter.
	(bswaphi2 splitter): Likewise.
	(bswapsi2_reg): Likewise.
	(bswapsi2 splitter): Likewise.
	(bswapdi2): If we have the LDBRX and STDBRX instructions, split
	the insns into load, store, and register/register insns.
	(bswapdi2_ldbrx): Likewise.
	(bswapdi2_load): Likewise.
	(bswapdi2_store): Likewise.
	(bswapdi2_reg): Likewise.

[gcc/testsuite]
2017-03-27  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/78543
	* gcc.target/powerpc/pr78543.c: New test.
diff mbox

Patch

Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 246507)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -2350,12 +2350,12 @@  (define_insn "cmpb<mode>3"
 
 ;; Since the hardware zeros the upper part of the register, save generating the
 ;; AND immediate if we are converting to unsigned
-(define_insn "*bswaphi2_extenddi"
+(define_insn "*bswap<mode>2_extenddi"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(zero_extend:DI
-	 (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+	 (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z"))))]
   "TARGET_POWERPC64"
-  "lhbrx %0,%y1"
+  "l<wd>brx %0,%y1"
   [(set_attr "length" "4")
    (set_attr "type" "load")])
 
@@ -2368,34 +2368,52 @@  (define_insn "*bswaphi2_extendsi"
   [(set_attr "length" "4")
    (set_attr "type" "load")])
 
-(define_expand "bswaphi2"
-  [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "")
-		   (bswap:HI
-		    (match_operand:HI 1 "reg_or_mem_operand" "")))
-	      (clobber (match_scratch:SI 2 ""))])]
+;; Separate the bswap patterns into load, store, and gpr<-gpr.  This prevents
+;; the register allocator from converting a gpr<-gpr swap into a store and then
+;; load with byte swap, which can be slower than doing it in the registers.  It
+;; also prevents certain failures with the RELOAD register allocator.
+
+(define_expand "bswap<mode>2"
+  [(use (match_operand:HSI 0 "reg_or_mem_operand"))
+   (use (match_operand:HSI 1 "reg_or_mem_operand"))]
   ""
 {
-  if (!REG_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (HImode, operands[1]);
+  rtx dest = operands[0];
+  rtx src = operands[1];
+
+  if (!REG_P (dest) && !REG_P (src))
+    src = force_reg (<MODE>mode, src);
+
+  if (MEM_P (src))
+    emit_insn (gen_bswap<mode>2_load (dest, src));
+  else if (MEM_P (dest))
+    emit_insn (gen_bswap<mode>2_store (dest, src));
+  else
+    emit_insn (gen_bswap<mode>2_reg (dest, src));
+  DONE;
 })
 
-(define_insn "bswaphi2_internal"
-  [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r")
+(define_insn "bswap<mode>2_load"
+  [(set (match_operand:HSI 0 "gpc_reg_operand" "=r")
+	(bswap:HSI (match_operand:HSI 1 "memory_operand" "Z")))]
+  ""
+  "l<wd>brx %0,%y1"
+  [(set_attr "type" "load")])
+
+(define_insn "bswap<mode>2_store"
+  [(set (match_operand:HSI 0 "memory_operand" "=Z")
+	(bswap:HSI (match_operand:HSI 1 "gpc_reg_operand" "r")))]
+  ""
+  "st<wd>brx %1,%y0"
+  [(set_attr "type" "store")])
+
+(define_insn_and_split "bswaphi2_reg"
+  [(set (match_operand:HI 0 "gpc_reg_operand" "=&r")
 	(bswap:HI
-	 (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r")))
-   (clobber (match_scratch:SI 2 "=X,X,&r"))]
+	 (match_operand:HI 1 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&r"))]
   ""
-  "@
-   lhbrx %0,%y1
-   sthbrx %1,%y0
-   #"
-  [(set_attr "length" "4,4,12")
-   (set_attr "type" "load,store,*")])
-
-(define_split
-  [(set (match_operand:HI 0 "gpc_reg_operand" "")
-	(bswap:HI (match_operand:HI 1 "gpc_reg_operand" "")))
-   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "#"
   "reload_completed"
   [(set (match_dup 3)
 	(and:SI (lshiftrt:SI (match_dup 4)
@@ -2408,48 +2426,21 @@  (define_split
    (set (match_dup 3)
 	(ior:SI (match_dup 3)
 		(match_dup 2)))]
-  "
 {
   operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
   operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
-}")
-
-(define_insn "*bswapsi2_extenddi"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(zero_extend:DI
-	 (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))]
-  "TARGET_POWERPC64"
-  "lwbrx %0,%y1"
-  [(set_attr "length" "4")
-   (set_attr "type" "load")])
-
-(define_expand "bswapsi2"
-  [(set (match_operand:SI 0 "reg_or_mem_operand" "")
-	(bswap:SI
-	 (match_operand:SI 1 "reg_or_mem_operand" "")))]
-  ""
-{
-  if (!REG_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (SImode, operands[1]);
-})
-
-(define_insn "*bswapsi2_internal"
-  [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r")
-	(bswap:SI
-	 (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
-  ""
-  "@
-   lwbrx %0,%y1
-   stwbrx %1,%y0
-   #"
-  [(set_attr "length" "4,4,12")
-   (set_attr "type" "load,store,*")])
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "*")])
 
 ;; We are always BITS_BIG_ENDIAN, so the bit positions below in
 ;; zero_extract insns do not change for -mlittle.
-(define_split
-  [(set (match_operand:SI 0 "gpc_reg_operand" "")
-	(bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
+(define_insn_and_split "bswapsi2_reg"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(bswap:SI
+	 (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  ""
+  "#"
   "reload_completed"
   [(set (match_dup 0)					; DABC
 	(rotate:SI (match_dup 1)
@@ -2465,11 +2456,13 @@  (define_split
 				     (const_int 24))
 			(const_int 255))
 		(and:SI (match_dup 0)
-			(const_int -256))))
-
-  ]
+			(const_int -256))))]
   "")
 
+;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like
+;; we do for L{H,W}BRX and ST{H,W}BRX above.  If not, we have to generate more
+;; complex code.
+
 (define_expand "bswapdi2"
   [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "")
 		   (bswap:DI
@@ -2478,33 +2471,56 @@  (define_expand "bswapdi2"
 	      (clobber (match_scratch:DI 3 ""))])]
   ""
 {
-  if (!REG_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (DImode, operands[1]);
+  rtx dest = operands[0];
+  rtx src = operands[1];
+
+  if (!REG_P (dest) && !REG_P (src))
+    operands[1] = src = force_reg (DImode, src);
+
+  if (TARGET_POWERPC64 && TARGET_LDBRX)
+    {
+      if (MEM_P (src))
+	emit_insn (gen_bswapdi2_load (dest, src));
+      else if (MEM_P (dest))
+	emit_insn (gen_bswapdi2_store (dest, src));
+      else
+	emit_insn (gen_bswapdi2_reg (dest, src));
+      DONE;
+    }
 
   if (!TARGET_POWERPC64)
     {
       /* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode
 	 that uses 64-bit registers needs the same scratch registers as 64-bit
 	 mode.  */
-      emit_insn (gen_bswapdi2_32bit (operands[0], operands[1]));
+      emit_insn (gen_bswapdi2_32bit (dest, src));
       DONE;
     }
 })
 
 ;; Power7/cell has ldbrx/stdbrx, so use it directly
-(define_insn "*bswapdi2_ldbrx"
-  [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,&r")
-	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
-   (clobber (match_scratch:DI 2 "=X,X,&r"))
-   (clobber (match_scratch:DI 3 "=X,X,&r"))]
-  "TARGET_POWERPC64 && TARGET_LDBRX
-   && (REG_P (operands[0]) || REG_P (operands[1]))"
-  "@
-   ldbrx %0,%y1
-   stdbrx %1,%y0
-   #"
-  [(set_attr "length" "4,4,36")
-   (set_attr "type" "load,store,*")])
+(define_insn "bswapdi2_load"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(bswap:DI (match_operand:DI 1 "memory_operand" "Z")))]
+  "TARGET_POWERPC64 && TARGET_LDBRX"
+  "ldbrx %0,%y1"
+  [(set_attr "type" "load")])
+
+(define_insn "bswapdi2_store"
+  [(set (match_operand:DI 0 "memory_operand" "=Z")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64 && TARGET_LDBRX"
+  "stdbrx %1,%y0"
+  [(set_attr "type" "store")])
+
+(define_insn "bswapdi2_reg"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:DI 2 "=&r"))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  "TARGET_POWERPC64 && TARGET_LDBRX"
+  "#"
+  [(set_attr "length" "36")])
 
 ;; Non-power7/cell, fall back to use lwbrx/stwbrx
 (define_insn "*bswapdi2_64bit"
Index: gcc/testsuite/gcc.target/powerpc/pr78543.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr78543.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr78543.c	(working copy)
@@ -0,0 +1,60 @@ 
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O1 -mno-lra" } */
+
+typedef long a;
+enum c { e, f, g, h, i, ab } j();
+int l, n, o, p;
+a q, r;
+void *memcpy();
+void b();
+static int k(int *s) {
+  int m;
+  if (j(&m))
+    *s = m;
+  return !0;
+}
+void d(char s) {
+  int af[4];
+  int ag;
+  enum c ah;
+  char ai[24 << 11];
+  unsigned aj;
+  if (!k(&aj))
+    goto ak;
+  for (;;) {
+    if (!k(&ag))
+      goto ak;
+    switch (ah) {
+    case e:
+      b("");
+      b("bad length %d for GUID in fileinfo v%u for \"%s\"");
+    case i:
+      b("bad length %d for TTH in fileinfo v%u for \"%s\"", aj);
+    case ab:
+      if (ag % 24)
+        b("for \"%s\"", s);
+    case f:
+      if (20 == ag)
+      case h:
+        if (20 == ag)
+          o = 0;
+      break;
+    case g:
+      memcpy(af, ai, sizeof af);
+      b();
+      if (p) {
+        a al, am;
+        r = al << 2 | am;
+        n = af[2];
+        al = n;
+        l = __builtin_bswap32(af[3]);
+        am = q = n | l;
+      }
+    default:
+      b("%s0 unhandled field ID %u 0", __func__);
+    }
+  }
+ak:;
+}