[HSA,PR,82119] Make HSA resilient to side-effects of split_edge

Message ID 20170911090339.fa6en3twtabgrm73@virgil.suse.cz
State New
Headers show
Series
  • [HSA,PR,82119] Make HSA resilient to side-effects of split_edge
Related show

Commit Message

Martin Jambor Sept. 11, 2017, 9:03 a.m.
Hi,

in r251264 the code of split_edge was changed not to reallocate PHI
vectors, but it reorders them along with the order of incoming edges
to the BB.  There are two places in the HSA BE where we call
split_edge while iterating over PHI node arguments and those broke,
resulting in a number of libgomp testsuite failures.

Fixed thusly.  I have tested the patch on HSA capable APU and also did
full bootstrap and testing, I will commit in a few moments.

Martin


2017-09-11  Martin Jambor  <mjambor@suse.cz>

	PR hsa/82119
	* hsa-gen.c (gen_hsa_phi_from_gimple_phi): Process ADDR_EXPRs in
	arguments in advance.
	* hsa-regalloc.c (naive_process_phi): New parameter predecessors,
	use it to find predecessor edges.
	(naive_outof_ssa): Collect vector of predecessors.
---
 gcc/hsa-gen.c      | 51 ++++++++++++++++++++++++++++++++++++++-------------
 gcc/hsa-regalloc.c | 14 +++++++++++---
 2 files changed, 49 insertions(+), 16 deletions(-)

Comments

Richard Biener Sept. 11, 2017, 10:11 a.m. | #1
On September 11, 2017 11:03:39 AM GMT+02:00, Martin Jambor <mjambor@suse.cz> wrote:
>Hi,
>
>in r251264 the code of split_edge was changed not to reallocate PHI
>vectors, but it reorders them along with the order of incoming edges
>to the BB.  There are two places in the HSA BE where we call
>split_edge while iterating over PHI node arguments and those broke,
>resulting in a number of libgomp testsuite failures.

Ah, interesting. I'll try to change split_edge to not reorder PHI arguments. 

Richard. 

>Fixed thusly.  I have tested the patch on HSA capable APU and also did
>full bootstrap and testing, I will commit in a few moments.
>
>Martin
>
>
>2017-09-11  Martin Jambor  <mjambor@suse.cz>
>
>	PR hsa/82119
>	* hsa-gen.c (gen_hsa_phi_from_gimple_phi): Process ADDR_EXPRs in
>	arguments in advance.
>	* hsa-regalloc.c (naive_process_phi): New parameter predecessors,
>	use it to find predecessor edges.
>	(naive_outof_ssa): Collect vector of predecessors.
>---
>gcc/hsa-gen.c      | 51
>++++++++++++++++++++++++++++++++++++++-------------
> gcc/hsa-regalloc.c | 14 +++++++++++---
> 2 files changed, 49 insertions(+), 16 deletions(-)
>
>diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
>index bd227626e83..6e054c0ce82 100644
>--- a/gcc/hsa-gen.c
>+++ b/gcc/hsa-gen.c
>@@ -5657,8 +5657,37 @@ gen_hsa_phi_from_gimple_phi (gimple *phi_stmt,
>hsa_bb *hbb)
>   hphi = new hsa_insn_phi (count, dest);
>   hphi->m_bb = hbb->m_bb;
> 
>-  tree lhs = gimple_phi_result (phi_stmt);
>+  auto_vec <tree, 8> aexprs;
>+  auto_vec <hsa_op_reg *, 8> aregs;
>+
>+  /* Calling split_edge when processing a PHI node messes up with the
>order of
>+     gimple phi node arguments (it moves the one associated with the
>edge to
>+     the end).  We need to keep the order of edges and arguments of
>HSA phi
>+     node arguments consistent, so we do all required splitting as the
>first
>+     step, and in reverse order as to not be affected by the
>re-orderings.  */
>+  for (unsigned j = count; j != 0; j--)
>+    {
>+      unsigned i = j - 1;
>+      tree op = gimple_phi_arg_def (phi_stmt, i);
>+      if (TREE_CODE (op) != ADDR_EXPR)
>+	continue;
> 
>+      edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
>+      hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
>+      hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
>+					   hbb_src);
>+
>+      hsa_op_reg *dest
>+	= new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
>+      hsa_insn_basic *insn
>+	= new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
>+			      dest, addr);
>+      hbb_src->append_insn (insn);
>+      aexprs.safe_push (op);
>+      aregs.safe_push (dest);
>+    }
>+
>+  tree lhs = gimple_phi_result (phi_stmt);
>   for (unsigned i = 0; i < count; i++)
>     {
>       tree op = gimple_phi_arg_def (phi_stmt, i);
>@@ -5684,18 +5713,14 @@ gen_hsa_phi_from_gimple_phi (gimple *phi_stmt,
>hsa_bb *hbb)
> 	    }
> 	  else if (TREE_CODE (op) == ADDR_EXPR)
> 	    {
>-	      edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
>-	      hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
>-	      hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
>-						   hbb_src);
>-
>-	      hsa_op_reg *dest
>-		= new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
>-	      hsa_insn_basic *insn
>-		= new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
>-				      dest, addr);
>-	      hbb_src->append_insn (insn);
>-
>+	      hsa_op_reg *dest = NULL;
>+	      for (unsigned a_idx = 0; a_idx < aexprs.length (); a_idx++)
>+		if (aexprs[a_idx] == op)
>+		  {
>+		    dest = aregs[a_idx];
>+		    break;
>+		  }
>+	      gcc_assert (dest);
> 	      hphi->set_op (i, dest);
> 	    }
> 	  else
>diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
>index 2a17254c3b2..7fc3a8afa3d 100644
>--- a/gcc/hsa-regalloc.c
>+++ b/gcc/hsa-regalloc.c
>@@ -42,7 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>/* Process a PHI node PHI of basic block BB as a part of naive
>out-f-ssa.  */
> 
> static void
>-naive_process_phi (hsa_insn_phi *phi)
>+naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
> {
>   unsigned count = phi->operand_count ();
>   for (unsigned i = 0; i < count; i++)
>@@ -55,7 +55,7 @@ naive_process_phi (hsa_insn_phi *phi)
>       if (!op)
> 	break;
> 
>-      e = EDGE_PRED (phi->m_bb, i);
>+      e = predecessors[i];
>       if (single_succ_p (e->src))
> 	hbb = hsa_bb_for_bb (e->src);
>       else
>@@ -89,10 +89,18 @@ naive_outof_ssa (void)
>     hsa_bb *hbb = hsa_bb_for_bb (bb);
>     hsa_insn_phi *phi;
> 
>+    /* naive_process_phi can call split_edge on an incoming edge which
>order if
>+       the incoming edges to the basic block and thus make it
>inconsistent with
>+       the ordering of PHI arguments, so we collect them in advance. 
>*/
>+    auto_vec<edge, 8> predecessors;
>+    unsigned pred_count = EDGE_COUNT (bb->preds);
>+    for (unsigned i = 0; i < pred_count; i++)
>+      predecessors.safe_push (EDGE_PRED (bb, i));
>+
>     for (phi = hbb->m_first_phi;
> 	 phi;
> 	 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
>-      naive_process_phi (phi);
>+      naive_process_phi (phi, predecessors);
> 
>/* Zap PHI nodes, they will be deallocated when everything else will. 
>*/
>     hbb->m_first_phi = NULL;
Richard Biener Sept. 13, 2017, 8:59 a.m. | #2
On Mon, Sep 11, 2017 at 12:11 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On September 11, 2017 11:03:39 AM GMT+02:00, Martin Jambor <mjambor@suse.cz> wrote:
>>Hi,
>>
>>in r251264 the code of split_edge was changed not to reallocate PHI
>>vectors, but it reorders them along with the order of incoming edges
>>to the BB.  There are two places in the HSA BE where we call
>>split_edge while iterating over PHI node arguments and those broke,
>>resulting in a number of libgomp testsuite failures.
>
> Ah, interesting. I'll try to change split_edge to not reorder PHI arguments.

Hmm, I think it wasn't guaranteed before to not reorder either and I can't
see a way to do this with the current building blocks.

Richard.

> Richard.
>
>>Fixed thusly.  I have tested the patch on HSA capable APU and also did
>>full bootstrap and testing, I will commit in a few moments.
>>
>>Martin
>>
>>
>>2017-09-11  Martin Jambor  <mjambor@suse.cz>
>>
>>       PR hsa/82119
>>       * hsa-gen.c (gen_hsa_phi_from_gimple_phi): Process ADDR_EXPRs in
>>       arguments in advance.
>>       * hsa-regalloc.c (naive_process_phi): New parameter predecessors,
>>       use it to find predecessor edges.
>>       (naive_outof_ssa): Collect vector of predecessors.
>>---
>>gcc/hsa-gen.c      | 51
>>++++++++++++++++++++++++++++++++++++++-------------
>> gcc/hsa-regalloc.c | 14 +++++++++++---
>> 2 files changed, 49 insertions(+), 16 deletions(-)
>>
>>diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
>>index bd227626e83..6e054c0ce82 100644
>>--- a/gcc/hsa-gen.c
>>+++ b/gcc/hsa-gen.c
>>@@ -5657,8 +5657,37 @@ gen_hsa_phi_from_gimple_phi (gimple *phi_stmt,
>>hsa_bb *hbb)
>>   hphi = new hsa_insn_phi (count, dest);
>>   hphi->m_bb = hbb->m_bb;
>>
>>-  tree lhs = gimple_phi_result (phi_stmt);
>>+  auto_vec <tree, 8> aexprs;
>>+  auto_vec <hsa_op_reg *, 8> aregs;
>>+
>>+  /* Calling split_edge when processing a PHI node messes up with the
>>order of
>>+     gimple phi node arguments (it moves the one associated with the
>>edge to
>>+     the end).  We need to keep the order of edges and arguments of
>>HSA phi
>>+     node arguments consistent, so we do all required splitting as the
>>first
>>+     step, and in reverse order as to not be affected by the
>>re-orderings.  */
>>+  for (unsigned j = count; j != 0; j--)
>>+    {
>>+      unsigned i = j - 1;
>>+      tree op = gimple_phi_arg_def (phi_stmt, i);
>>+      if (TREE_CODE (op) != ADDR_EXPR)
>>+      continue;
>>
>>+      edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
>>+      hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
>>+      hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
>>+                                         hbb_src);
>>+
>>+      hsa_op_reg *dest
>>+      = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
>>+      hsa_insn_basic *insn
>>+      = new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
>>+                            dest, addr);
>>+      hbb_src->append_insn (insn);
>>+      aexprs.safe_push (op);
>>+      aregs.safe_push (dest);
>>+    }
>>+
>>+  tree lhs = gimple_phi_result (phi_stmt);
>>   for (unsigned i = 0; i < count; i++)
>>     {
>>       tree op = gimple_phi_arg_def (phi_stmt, i);
>>@@ -5684,18 +5713,14 @@ gen_hsa_phi_from_gimple_phi (gimple *phi_stmt,
>>hsa_bb *hbb)
>>           }
>>         else if (TREE_CODE (op) == ADDR_EXPR)
>>           {
>>-            edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
>>-            hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
>>-            hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
>>-                                                 hbb_src);
>>-
>>-            hsa_op_reg *dest
>>-              = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
>>-            hsa_insn_basic *insn
>>-              = new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
>>-                                    dest, addr);
>>-            hbb_src->append_insn (insn);
>>-
>>+            hsa_op_reg *dest = NULL;
>>+            for (unsigned a_idx = 0; a_idx < aexprs.length (); a_idx++)
>>+              if (aexprs[a_idx] == op)
>>+                {
>>+                  dest = aregs[a_idx];
>>+                  break;
>>+                }
>>+            gcc_assert (dest);
>>             hphi->set_op (i, dest);
>>           }
>>         else
>>diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
>>index 2a17254c3b2..7fc3a8afa3d 100644
>>--- a/gcc/hsa-regalloc.c
>>+++ b/gcc/hsa-regalloc.c
>>@@ -42,7 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>>/* Process a PHI node PHI of basic block BB as a part of naive
>>out-f-ssa.  */
>>
>> static void
>>-naive_process_phi (hsa_insn_phi *phi)
>>+naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
>> {
>>   unsigned count = phi->operand_count ();
>>   for (unsigned i = 0; i < count; i++)
>>@@ -55,7 +55,7 @@ naive_process_phi (hsa_insn_phi *phi)
>>       if (!op)
>>       break;
>>
>>-      e = EDGE_PRED (phi->m_bb, i);
>>+      e = predecessors[i];
>>       if (single_succ_p (e->src))
>>       hbb = hsa_bb_for_bb (e->src);
>>       else
>>@@ -89,10 +89,18 @@ naive_outof_ssa (void)
>>     hsa_bb *hbb = hsa_bb_for_bb (bb);
>>     hsa_insn_phi *phi;
>>
>>+    /* naive_process_phi can call split_edge on an incoming edge which
>>order if
>>+       the incoming edges to the basic block and thus make it
>>inconsistent with
>>+       the ordering of PHI arguments, so we collect them in advance.
>>*/
>>+    auto_vec<edge, 8> predecessors;
>>+    unsigned pred_count = EDGE_COUNT (bb->preds);
>>+    for (unsigned i = 0; i < pred_count; i++)
>>+      predecessors.safe_push (EDGE_PRED (bb, i));
>>+
>>     for (phi = hbb->m_first_phi;
>>        phi;
>>        phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
>>-      naive_process_phi (phi);
>>+      naive_process_phi (phi, predecessors);
>>
>>/* Zap PHI nodes, they will be deallocated when everything else will.
>>*/
>>     hbb->m_first_phi = NULL;
>

Patch

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index bd227626e83..6e054c0ce82 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -5657,8 +5657,37 @@  gen_hsa_phi_from_gimple_phi (gimple *phi_stmt, hsa_bb *hbb)
   hphi = new hsa_insn_phi (count, dest);
   hphi->m_bb = hbb->m_bb;
 
-  tree lhs = gimple_phi_result (phi_stmt);
+  auto_vec <tree, 8> aexprs;
+  auto_vec <hsa_op_reg *, 8> aregs;
+
+  /* Calling split_edge when processing a PHI node messes up with the order of
+     gimple phi node arguments (it moves the one associated with the edge to
+     the end).  We need to keep the order of edges and arguments of HSA phi
+     node arguments consistent, so we do all required splitting as the first
+     step, and in reverse order as to not be affected by the re-orderings.  */
+  for (unsigned j = count; j != 0; j--)
+    {
+      unsigned i = j - 1;
+      tree op = gimple_phi_arg_def (phi_stmt, i);
+      if (TREE_CODE (op) != ADDR_EXPR)
+	continue;
 
+      edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
+      hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
+      hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
+					   hbb_src);
+
+      hsa_op_reg *dest
+	= new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
+      hsa_insn_basic *insn
+	= new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
+			      dest, addr);
+      hbb_src->append_insn (insn);
+      aexprs.safe_push (op);
+      aregs.safe_push (dest);
+    }
+
+  tree lhs = gimple_phi_result (phi_stmt);
   for (unsigned i = 0; i < count; i++)
     {
       tree op = gimple_phi_arg_def (phi_stmt, i);
@@ -5684,18 +5713,14 @@  gen_hsa_phi_from_gimple_phi (gimple *phi_stmt, hsa_bb *hbb)
 	    }
 	  else if (TREE_CODE (op) == ADDR_EXPR)
 	    {
-	      edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
-	      hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
-	      hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
-						   hbb_src);
-
-	      hsa_op_reg *dest
-		= new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
-	      hsa_insn_basic *insn
-		= new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
-				      dest, addr);
-	      hbb_src->append_insn (insn);
-
+	      hsa_op_reg *dest = NULL;
+	      for (unsigned a_idx = 0; a_idx < aexprs.length (); a_idx++)
+		if (aexprs[a_idx] == op)
+		  {
+		    dest = aregs[a_idx];
+		    break;
+		  }
+	      gcc_assert (dest);
 	      hphi->set_op (i, dest);
 	    }
 	  else
diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
index 2a17254c3b2..7fc3a8afa3d 100644
--- a/gcc/hsa-regalloc.c
+++ b/gcc/hsa-regalloc.c
@@ -42,7 +42,7 @@  along with GCC; see the file COPYING3.  If not see
 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa.  */
 
 static void
-naive_process_phi (hsa_insn_phi *phi)
+naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
 {
   unsigned count = phi->operand_count ();
   for (unsigned i = 0; i < count; i++)
@@ -55,7 +55,7 @@  naive_process_phi (hsa_insn_phi *phi)
       if (!op)
 	break;
 
-      e = EDGE_PRED (phi->m_bb, i);
+      e = predecessors[i];
       if (single_succ_p (e->src))
 	hbb = hsa_bb_for_bb (e->src);
       else
@@ -89,10 +89,18 @@  naive_outof_ssa (void)
     hsa_bb *hbb = hsa_bb_for_bb (bb);
     hsa_insn_phi *phi;
 
+    /* naive_process_phi can call split_edge on an incoming edge which order if
+       the incoming edges to the basic block and thus make it inconsistent with
+       the ordering of PHI arguments, so we collect them in advance.  */
+    auto_vec<edge, 8> predecessors;
+    unsigned pred_count = EDGE_COUNT (bb->preds);
+    for (unsigned i = 0; i < pred_count; i++)
+      predecessors.safe_push (EDGE_PRED (bb, i));
+
     for (phi = hbb->m_first_phi;
 	 phi;
 	 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
-      naive_process_phi (phi);
+      naive_process_phi (phi, predecessors);
 
     /* Zap PHI nodes, they will be deallocated when everything else will.  */
     hbb->m_first_phi = NULL;