diff mbox series

rs6000: MMA built-in dies with incorrect sharing of tree nodes error

Message ID 990a956d-77cc-368d-f9fc-61cfac3facd4@linux.ibm.com
State New
Headers show
Series rs6000: MMA built-in dies with incorrect sharing of tree nodes error | expand

Commit Message

Peter Bergner Sept. 1, 2020, 5 p.m. UTC
When we expand our MMA built-ins into gimple, we (me!) erroneously reused
the accumulator memory reference for both the source input value as well as
the destination output value.  This led to a tree sharing error.
The solution is to create separate memory references for the input
and output values.

I'll note that the old build1 call is not actually needed.  I believe I
started using that first and then noticed that didn't work in call cases,
so I used build_simple_mem_ref in the other cases.  I should have noticed
that build_simple_mem_ref worked for all cases. :-)

This passed bootstrap and regtesting on powerpc64le-linux with no regressions.
Raji has also done some runtime testing of her MMA tests using this patch
and they all showed no regressions either.  Ok for trunk and GCC 10 after
a couple of days of burn in?

Peter


gcc/
	PR target/96808
	* config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Do not
	reuse accumulator memory reference for source and destination accesses.

gcc/testsuite/
	PR target/96808
	* gcc.target/powerpc/pr96808.c: New test.

Comments

Segher Boessenkool Sept. 1, 2020, 5:38 p.m. UTC | #1
Hi!

On Tue, Sep 01, 2020 at 12:00:50PM -0500, Peter Bergner wrote:
> When we expand our MMA built-ins into gimple, we (me!) erroneously reused
> the accumulator memory reference for both the source input value as well as
> the destination output value.  This led to a tree sharing error.
> The solution is to create separate memory references for the input
> and output values.
> 
> I'll note that the old build1 call is not actually needed.  I believe I
> started using that first and then noticed that didn't work in call cases,
> so I used build_simple_mem_ref in the other cases.  I should have noticed
> that build_simple_mem_ref worked for all cases. :-)

So...  simple!

> This passed bootstrap and regtesting on powerpc64le-linux with no regressions.
> Raji has also done some runtime testing of her MMA tests using this patch
> and they all showed no regressions either.  Ok for trunk and GCC 10 after
> a couple of days of burn in?

Okay for both.  Thanks!


Segher
Peter Bergner Sept. 1, 2020, 6:51 p.m. UTC | #2
On 9/1/20 12:38 PM, Segher Boessenkool wrote:
>> This passed bootstrap and regtesting on powerpc64le-linux with no regressions.
>> Raji has also done some runtime testing of her MMA tests using this patch
>> and they all showed no regressions either.  Ok for trunk and GCC 10 after
>> a couple of days of burn in?
> 
> Okay for both.  Thanks!

Ok, fix pushed to trunk.  I'll push to GCC 10 in a couple of days.
Thanks!

Peter
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 26388762c5f..b6b45687aae 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -11471,12 +11471,8 @@  rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
   new_decl = rs6000_builtin_decls[fncode + 1];
-  tree lhs, mem, op[MAX_MMA_OPERANDS];
+  tree lhs, op[MAX_MMA_OPERANDS];
   tree acc = gimple_call_arg (stmt, 0);
-  if (TREE_CODE (acc) == PARM_DECL)
-    mem = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (acc)), acc);
-  else
-    mem = build_simple_mem_ref (acc);
   push_gimplify_context (true);
 
   if ((attr & RS6000_BTC_QUAD) != 0)
@@ -11486,7 +11482,7 @@  rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
       op[0] = make_ssa_name (vector_quad_type_node);
       for (unsigned i = 1; i < nopnds; i++)
 	op[i] = gimple_call_arg (stmt, i);
-      gimplify_assign (op[0], mem, &new_seq);
+      gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
     }
   else
     {
@@ -11536,7 +11532,7 @@  rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
     lhs = make_ssa_name (vector_quad_type_node);
   gimple_call_set_lhs (new_call, lhs);
   gimple_seq_add_stmt (&new_seq, new_call);
-  gimplify_assign (mem, lhs, &new_seq);
+  gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
   pop_gimplify_context (NULL);
   gsi_replace_with_seq (gsi, new_seq, true);
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96808.c b/gcc/testsuite/gcc.target/powerpc/pr96808.c
new file mode 100644
index 00000000000..2d44bd51b20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96808.c
@@ -0,0 +1,59 @@ 
+/* PR target/96808 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+/* Verify we do not ICE on the tests below.  */
+
+void
+old_ok (__vector_quad *dst, vector unsigned char vc)
+{
+  __vector_quad vq;
+  __builtin_mma_xxsetaccz(&vq);
+  __builtin_mma_xvf32gerpp(&vq, vc, vc);
+  *dst = vq;
+}
+
+void
+test0 (__vector_quad *dst, vector unsigned char vc)
+{
+  __vector_quad vq[2];
+  __builtin_mma_xxsetaccz(&vq[1]);
+  __builtin_mma_xvf32gerpp(&vq[1], vc, vc);
+  *dst = vq[1];
+}
+
+void
+test1 (__vector_quad *dst, vector unsigned char vc)
+{
+  __vector_quad vq[2][2];
+  __builtin_mma_xxsetaccz(&vq[1][1]);
+  __builtin_mma_xvf32gerpp(&vq[1][1], vc, vc);
+  *dst = vq[1][1];
+}
+
+void
+test2 (__vector_quad *dst, vector unsigned char vc)
+{
+  struct {
+    __vector_quad dummy;
+    __vector_quad acc;
+  } vq;
+  __builtin_mma_xxsetaccz(&vq.acc);
+  __builtin_mma_xvf32gerpp(&vq.acc, vc, vc);
+  *dst = vq.acc;
+}
+
+void
+test3 (__vector_quad *dst, vector unsigned char vc)
+{
+  __builtin_mma_xxsetaccz(&dst[1]);
+  __builtin_mma_xvf32gerpp(&dst[1], vc, vc);
+}
+
+void
+test4 (__vector_quad *dst[], vector unsigned char vc)
+{
+  __builtin_mma_xxsetaccz(&dst[1][2]);
+  __builtin_mma_xvf32gerpp(&dst[1][2], vc, vc);
+}