diff mbox

, PR 71294, Fix -O3 -fstack-protector bug on PowerPC power8

Message ID 20160527015017.GA7798@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner May 27, 2016, 1:50 a.m. UTC
It might be argued that this is a reload bug (since it runs on LRA), but
sometimes it is simpler to place a simpler work around in the machine dependent
code.  If the maintainers decide that it should be fixed in reload instead of
via this patch, that is fine.

PR 71294 involves vectorization where the compiler is forming a V2DI vector
from 2 DI elements.  The elements are the same value (a stack address), so the
register allocator copies the address over to the VSX register file, and does
an XXPERMDI.  Because of the -fstack-protector, frame addresses are modified,
and become an ADD operation, and the direct move fails.

I added a splitter for DImode so that if a virtual register or frame address
register was attempted to be splatted to the VSX register file, it would copy
the value to a pseudo register, and do a direct move on that.

I have done a bootstrap and regression test with these patches and there were
no regressions.  Are the patches ok to install in the trunk?

[gcc]
2016-05-26  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/71294
	* config/rs6000/predicates.md (virtual_or_frame_reg_operand): New
	predicate to return true if the operand is a virtual or frame
	register.
	* config/rs6000/vsx.md (move splat splitters): Add splitters to
	copy a frame related pointer into a new pseudo register during the
	first split pass, so that we don't confuse the register allocator.

[gcc/testsuite]
2016-05-26  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/71294
	* g++.dg/pr71294.C: New test.

Comments

Segher Boessenkool May 27, 2016, 2:46 a.m. UTC | #1
On Thu, May 26, 2016 at 09:50:18PM -0400, Michael Meissner wrote:
> It might be argued that this is a reload bug (since it runs on LRA), but
> sometimes it is simpler to place a simpler work around in the machine dependent
> code.  If the maintainers decide that it should be fixed in reload instead of
> via this patch, that is fine.

It is either a bug in reload or in the rs6000 reload hooks (not because it
works with LRA, but because it crashes in reload ;-) )

I don't think using many lines of extra splitters to work around a missing
reload or two is such a great idea.  Someone needs to look deeper into
the problem, find what the actual problem is.

The splitters also hurt code quality in various cases, e.g. when splatting
the stack pointer (r1) or the hard frame pointer (r31); those work fine
without extra copy first.

> +(define_predicate "virtual_or_frame_reg_operand"
> +  (match_code "reg,subreg")
> +{
> +  HOST_WIDE_INT r;
> +  if (SUBREG_P (op))
> +    op = SUBREG_REG (op);
> +
> +  if (!REG_P (op))
> +    return 0;
> +
> +  r = REGNO (op);
> +  return REGNO_PTR_FRAME_P (r);
> +})

A regno is not a HOST_WIDE_INT but an unsigned int instead.  You can of
course get rid of "r" completely here.


Segher
diff mbox

Patch

Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md	(revision 236800)
+++ gcc/config/rs6000/predicates.md	(working copy)
@@ -1959,3 +1959,20 @@  (define_predicate "fusion_offsettable_me
 
   return offsettable_nonstrict_memref_p (op);
 })
+
+;; Return true if the operand is a virtual or frame register.  The register
+;; allocator gets confused if a virtual/frame register is used in a splat
+;; operation when -fstack-protector is used.
+(define_predicate "virtual_or_frame_reg_operand"
+  (match_code "reg,subreg")
+{
+  HOST_WIDE_INT r;
+  if (SUBREG_P (op))
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  return REGNO_PTR_FRAME_P (r);
+})
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 236800)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -2397,6 +2397,20 @@  (define_insn "vsx_splat_<mode>"
    lxvdsx %x0,%y1"
   [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
 
+;; Virtual/frame registers cause problems because they are replaced by a PLUS
+;; operation which confuses RELOAD if -fstack-protector is used.  Add a
+;; splitter to copy such registers to a temporary
+(define_split
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "")
+	(vec_duplicate:V2DI
+	 (match_operand:DI 1 "virtual_or_frame_reg_operand" "")))]
+  "TARGET_VSX && TARGET_POWERPC64 && can_create_pseudo_p ()"
+  [(match_dup 2) (match_dup 1)
+   (match_dup 0) (vec_duplicate:VSX_D (match_dup 2))]
+{
+  operands[2] = gen_reg_rtx (DImode);
+})
+
 ;; V4SI splat (ISA 3.0)
 ;; When SI's are allowed in VSX registers, add XXSPLTW support
 (define_expand "vsx_splat_<mode>"
@@ -2411,6 +2425,17 @@  (define_expand "vsx_splat_<mode>"
     operands[1] = force_reg (<VS_scalar>mode, operands[1]);
 })
 
+(define_split
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "")
+	(vec_duplicate:V4SI
+	 (match_operand:SI 1 "virtual_or_frame_reg_operand" "")))]
+  "TARGET_P9_VECTOR && !TARGET_POWERPC64 && can_create_pseudo_p ()"
+  [(match_dup 2) (match_dup 1)
+   (match_dup 0) (vec_duplicate:VSX_D (match_dup 2))]
+{
+  operands[2] = gen_reg_rtx (SImode);
+})
+
 (define_insn "*vsx_splat_v4si_internal"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
 	(vec_duplicate:V4SI
Index: gcc/testsuite/g++.dg/pr71294.C
===================================================================
--- gcc/testsuite/g++.dg/pr71294.C	(revision 0)
+++ gcc/testsuite/g++.dg/pr71294.C	(revision 0)
@@ -0,0 +1,56 @@ 
+/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -fstack-protector" } */
+
+class A;
+template <typename _Tp, int m, int n> class B {
+public:
+  _Tp val[m * n];
+};
+class C {
+public:
+  C(A);
+};
+struct D {
+  D();
+  unsigned long &operator[](int);
+  unsigned long *p;
+};
+class A {
+public:
+  template <typename _Tp, int m, int n> A(const B<_Tp, m, n> &, bool);
+  int rows, cols;
+  unsigned char *data;
+  unsigned char *datastart;
+  unsigned char *dataend;
+  unsigned char *datalimit;
+  D step;
+};
+template <typename _Tp, int m, int n>
+A::A(const B<_Tp, m, n> &p1, bool)
+    : rows(m), cols(n) {
+  step[0] = cols * sizeof(_Tp);
+  datastart = data = (unsigned char *)p1.val;
+  datalimit = dataend = datastart + rows * step[0];
+}
+class F {
+public:
+  static void compute(C);
+  template <typename _Tp, int m, int n, int nm>
+  static void compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+                      B<_Tp, n, nm> &);
+};
+D::D() {}
+unsigned long &D::operator[](int p1) { return p[p1]; }
+template <typename _Tp, int m, int n, int nm>
+void F::compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+                B<_Tp, n, nm> &p4) {
+  A a(p4, false);
+  compute(a);
+}
+void fn1() {
+  B<double, 4, 4> b, c, e;
+  B<double, 4, 1> d;
+  F::compute(b, d, c, e);
+}