Patchwork [3/6] Fix PR46029: reimplement if-convert stores.

login
register
mail settings
Submitter Sebastian Pop
Date Oct. 28, 2010, 10:58 p.m.
Message ID <1288306702-5543-4-git-send-email-sebpop@gmail.com>
Download mbox | patch
Permalink /patch/69509/
State New
Headers show

Comments

Sebastian Pop - Oct. 28, 2010, 10:58 p.m.
2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>

	PR tree-optimization/46029
	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
	* tree-if-conv.c (has_unaligned_memory_refs): New.
	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
	(create_scratchpad): New.
	(create_indirect_cond_expr): New.
	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
	parameter for scratch_pad.
	(combine_blocks): Same.
	(tree_if_conversion): Same.
	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
	scratch_pad.

testsuite/
	* g++.dg/tree-ssa/ifc-pr46029.C: New.
	* gcc.dg/tree-ssa/ifc-8.c: New.
	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
---
 gcc/ChangeLog                               |   15 ++
 gcc/doc/invoke.texi                         |   18 ++-
 gcc/testsuite/ChangeLog                     |    7 +
 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C |   76 +++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c       |   17 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c       |   29 ++++
 gcc/tree-if-conv.c                          |  193 +++++++++++++++++++++++----
 7 files changed, 318 insertions(+), 37 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4a51a4d..f14d9b1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,20 @@ 
 2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
 
+	PR tree-optimization/46029
+	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
+	* tree-if-conv.c (has_unaligned_memory_refs): New.
+	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
+	(create_scratchpad): New.
+	(create_indirect_cond_expr): New.
+	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
+	parameter for scratch_pad.
+	(combine_blocks): Same.
+	(tree_if_conversion): Same.
+	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
+	scratch_pad.
+
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* tree-if-conv.c (struct ifc_dr): Removed.
 	(IFC_DR): Removed.
 	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ee68454..28b0cbb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6935,20 +6935,26 @@  if vectorization is enabled.
 
 @item -ftree-loop-if-convert-stores
 Attempt to also if-convert conditional jumps containing memory writes.
-This transformation can be unsafe for multi-threaded programs as it
-transforms conditional memory writes into unconditional memory writes.
 For example,
 @smallexample
 for (i = 0; i < N; i++)
   if (cond)
-    A[i] = expr;
+    A[i] = B[i] + 2;
 @end smallexample
 would be transformed to
 @smallexample
-for (i = 0; i < N; i++)
-  A[i] = cond ? expr : A[i];
+void *scratchpad = alloca (64);
+for (i = 0; i < N; i++) @{
+  a = cond ? &A[i] : scratchpad;
+  b = cond ? &B[i] : scratchpad;
+  *a = *b + 2;
+@}
 @end smallexample
-potentially producing data races.
+The compiler allocates a scratchpad memory on the stack for each
+function in which the if-conversion of memory stores or reads
+happened.  This scratchpad memory is used during the part of the
+computation that is discarded, i.e., when the condition is evaluated
+to false.
 
 @item -ftree-loop-distribution
 Perform loop distribution.  This flag can improve cache performance on
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8ab520e..bf73b91 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,12 @@ 
 2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
 
+	PR tree-optimization/46029
+	* g++.dg/tree-ssa/ifc-pr46029.C: New.
+	* gcc.dg/tree-ssa/ifc-8.c: New.
+	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
+
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* gcc.dg/tree-ssa/flat-loop-1.c: New.
 	* gcc.dg/tree-ssa/flat-loop-2.c: New.
 	* gcc.dg/tree-ssa/flat-loop-3.c: New.
diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
new file mode 100644
index 0000000..2a54bdb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
@@ -0,0 +1,76 @@ 
+// { dg-do run }
+/* { dg-options "-O -ftree-loop-if-convert-stores" } */
+
+namespace
+{
+  struct rb_tree_node_
+  {
+    rb_tree_node_ ():m_p_left (0), m_p_parent (0), m_metadata (0)
+    {
+    }
+    unsigned &get_metadata ()
+    {
+      return m_metadata;
+    }
+    rb_tree_node_ *m_p_left;
+    rb_tree_node_ *m_p_parent;
+    unsigned m_metadata;
+  };
+
+  struct bin_search_tree_const_node_it_
+  {
+    bin_search_tree_const_node_it_ (rb_tree_node_ * p_nd):m_p_nd (p_nd)
+    {
+    }
+    unsigned &get_metadata ()
+    {
+      return m_p_nd->get_metadata ();
+    }
+    bin_search_tree_const_node_it_ get_l_child ()
+    {
+      return bin_search_tree_const_node_it_ (m_p_nd->m_p_left);
+    }
+
+    rb_tree_node_ *m_p_nd;
+  };
+
+  struct bin_search_tree_no_data_
+  {
+    typedef rb_tree_node_ *node_pointer;
+      bin_search_tree_no_data_ ():m_p_head (new rb_tree_node_ ())
+    {
+    }
+    void insert_imp_empty (int r_value)
+    {
+      rb_tree_node_ *p_new_node = new rb_tree_node_ ();
+      m_p_head->m_p_parent = p_new_node;
+      p_new_node->m_p_parent = m_p_head;
+      update_to_top (m_p_head->m_p_parent);
+    }
+    void apply_update (bin_search_tree_const_node_it_ nd_it)
+    {
+      unsigned
+	l_max_endpoint
+	=
+	(nd_it.get_l_child ().m_p_nd ==
+	 0) ? 0 : nd_it.get_l_child ().get_metadata ();
+      nd_it.get_metadata () = l_max_endpoint;
+    }
+    void update_to_top (node_pointer p_nd)
+    {
+      while (p_nd != m_p_head)
+	{
+	  apply_update (p_nd);
+	  p_nd = p_nd->m_p_parent;
+	}
+    }
+
+    rb_tree_node_ * m_p_head;
+  };
+}
+
+int main ()
+{
+  bin_search_tree_no_data_ ().insert_imp_empty (0);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
index a9cc816..d88c4a2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
@@ -12,11 +12,18 @@  dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
   for (i = 0; i <= nCoeffs; i++)
     {
       level = block[i];
-      if (level < 0)
-	level = level * qmul - qadd;
-      else
-	level = level * qmul + qadd;
-      block[i] = level;
+      if (level)
+        {
+          if (level < 0)
+            {
+              level = level * qmul - qadd;
+            }
+          else
+            {
+              level = level * qmul + qadd;
+            }
+          block[i] = level;
+        }
     }
 }
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
new file mode 100644
index 0000000..d7cf279
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
+
+typedef union tree_node *tree;
+struct tree_common
+{
+  unsigned volatile_flag : 1;
+  unsigned unsigned_flag : 1;
+};
+struct tree_type
+{
+  tree next_variant;
+  tree main_variant;
+};
+union tree_node
+{
+  struct tree_common common;
+  struct tree_type type;
+};
+void finish_enum (tree enumtype)
+{
+  tree tem;
+  for (tem = ((enumtype)->type.main_variant); tem; tem = ((tem)->type.next_variant))
+    {
+      if (tem == enumtype)
+	continue;
+      ((tem)->common.unsigned_flag) = ((enumtype)->common.unsigned_flag);
+    }
+}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index ec03bf6..cb4828a 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -459,6 +459,36 @@  ifcvt_could_trap_p (gimple stmt)
   return gimple_could_trap_p (stmt);
 }
 
+/* Returns true when stmt contains a data reference  */
+
+static bool
+has_unaligned_memory_refs (gimple stmt)
+{
+  int unsignedp, volatilep;
+  HOST_WIDE_INT bitsize, bitpos;
+  tree toffset;
+  enum machine_mode mode;
+  VEC (data_ref_loc, heap) *refs = VEC_alloc (data_ref_loc, heap, 3);
+  bool res = get_references_in_stmt (stmt, &refs);
+  unsigned i;
+  data_ref_loc *ref;
+
+  FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
+    {
+      get_inner_reference (*ref->pos, &bitsize, &bitpos, &toffset,
+			   &mode, &unsignedp, &volatilep, true);
+
+      if ((bitpos % BITS_PER_UNIT) != 0)
+	{
+	  res = true;
+	  break;
+	}
+    }
+
+  VEC_free (data_ref_loc, heap, refs);
+  return res;
+}
+
 /* Return true when STMT is if-convertible.
 
    GIMPLE_ASSIGN statement is not if-convertible if,
@@ -501,6 +531,14 @@  if_convertible_gimple_assign_stmt_p (gimple stmt)
 	    fprintf (dump_file, "tree could trap...\n");
 	  return false;
 	}
+
+      if (has_unaligned_memory_refs (stmt))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "uses misaligned memory...\n");
+	  return false;
+	}
+
       return true;
     }
 
@@ -1190,6 +1228,78 @@  insert_gimplified_predicates (loop_p loop)
     }
 }
 
+/* Insert at the beginning of the first basic block of the current
+   function the allocation on the stack of N bytes of memory and
+   return a pointer to this scratchpad memory.  */
+
+static tree
+create_scratchpad (void)
+{
+  basic_block bb = single_succ (ENTRY_BLOCK_PTR);
+  gimple_stmt_iterator gsi = gsi_after_labels (bb);
+
+  /* void *tmp = __builtin_alloca */
+  const char *name = "scratch_pad";
+  tree x = build_int_cst (integer_type_node, 64);
+  gimple stmt = gimple_build_call (built_in_decls[BUILT_IN_ALLOCA], 1, x);
+  tree var = create_tmp_var (ptr_type_node, name);
+  tree tmp = make_ssa_name (var, stmt);
+
+  add_referenced_var (var);
+  gimple_call_set_lhs (stmt, tmp);
+  SSA_NAME_DEF_STMT (tmp) = stmt;
+  update_stmt (stmt);
+
+  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+  return tmp;
+}
+
+/* Returns a memory reference to the pointer defined by the
+   conditional expression: pointer = cond ? &A[i] : scratch_pad; and
+   inserts this code at GSI.  */
+
+static tree
+create_indirect_cond_expr (tree ai, tree cond, tree *scratch_pad,
+			   gimple_stmt_iterator *gsi)
+{
+  tree type = TREE_TYPE (ai);
+
+  tree pointer_to_type, address_of_ai, addr_expr, cond_expr;
+  tree pointer, star_pointer;
+  gimple addr_stmt, pointer_stmt;
+
+  /* address_of_ai = &A[i];  */
+  pointer_to_type = build_pointer_type (type);
+  address_of_ai = create_tmp_var (pointer_to_type, "_ifc_");
+  add_referenced_var (address_of_ai);
+  addr_expr = build_fold_addr_expr (ai);
+  addr_stmt = gimple_build_assign (address_of_ai, addr_expr);
+  address_of_ai = make_ssa_name (address_of_ai, addr_stmt);
+  gimple_assign_set_lhs (addr_stmt, address_of_ai);
+  SSA_NAME_DEF_STMT (address_of_ai) = addr_stmt;
+  update_stmt (addr_stmt);
+  gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
+
+  /* Allocate the scratch pad only once per function.  */
+  if (!*scratch_pad)
+    *scratch_pad = create_scratchpad ();
+
+  /* pointer = cond ? address_of_ai : scratch_pad;  */
+  pointer = create_tmp_var (pointer_to_type, "_ifc_");
+  add_referenced_var (pointer);
+  cond_expr = build3 (COND_EXPR, pointer_to_type, unshare_expr (cond),
+		      address_of_ai, *scratch_pad);
+  pointer_stmt = gimple_build_assign (pointer, cond_expr);
+  pointer = make_ssa_name (pointer, pointer_stmt);
+  gimple_assign_set_lhs (pointer_stmt, pointer);
+  SSA_NAME_DEF_STMT (pointer) = pointer_stmt;
+  update_stmt (pointer_stmt);
+  gsi_insert_before (gsi, pointer_stmt, GSI_SAME_STMT);
+
+  star_pointer = build_simple_mem_ref (pointer);
+  return star_pointer;
+}
+
 /* Predicate each write to memory in LOOP.
 
    This function transforms control flow constructs containing memory
@@ -1201,10 +1311,19 @@  insert_gimplified_predicates (loop_p loop)
 
    into the following form that does not contain control flow:
 
-   | for (i = 0; i < N; i++)
-   |   A[i] = cond ? expr : A[i];
+   | void *scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
+   |
+   | for (i = 0; i < N; i++) {
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
+   | }
+
+   SCRATCH_PAD is allocated on the stack for each function once and it is
+   large enough to contain any kind of scalar assignment or read.  All
+   values read or written to SCRATCH_PAD are not used in the computation.
 
-   The original CFG looks like this:
+   In a more detailed way, the if-conversion of memory writes works
+   like this, supposing that the original CFG looks like this:
 
    | bb_0
    |   i = 0
@@ -1254,10 +1373,12 @@  insert_gimplified_predicates (loop_p loop)
    |   goto bb_1
    | end_bb_4
 
-   predicate_mem_writes is then predicating the memory write as follows:
+   predicate_mem_writes is then allocating SCRATCH_PAD in the basic block
+   preceding the loop header, and is predicating the memory write:
 
    | bb_0
    |   i = 0
+   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
    | end_bb_0
    |
    | bb_1
@@ -1265,12 +1386,14 @@  insert_gimplified_predicates (loop_p loop)
    | end_bb_1
    |
    | bb_2
+   |   cond = some_computation;
    |   if (cond) goto bb_3 else goto bb_4
    | end_bb_2
    |
    | bb_3
    |   cond = some_computation;
-   |   A[i] = cond ? expr : A[i];
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
    |   goto bb_4
    | end_bb_3
    |
@@ -1283,12 +1406,14 @@  insert_gimplified_predicates (loop_p loop)
 
    | bb_0
    |   i = 0
+   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
    |   if (i < N) goto bb_5 else goto bb_1
    | end_bb_0
    |
    | bb_1
    |   cond = some_computation;
-   |   A[i] = cond ? expr : A[i];
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
    |   if (i < N) goto bb_5 else goto bb_4
    | end_bb_1
    |
@@ -1298,7 +1423,7 @@  insert_gimplified_predicates (loop_p loop)
 */
 
 static void
-predicate_mem_writes (loop_p loop)
+predicate_mem_writes (loop_p loop, tree *scratch_pad)
 {
   unsigned int i, orig_loop_num_nodes = loop->num_nodes;
 
@@ -1313,20 +1438,35 @@  predicate_mem_writes (loop_p loop)
 	continue;
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	if ((stmt = gsi_stmt (gsi))
-	    && gimple_assign_single_p (stmt)
-	    && gimple_vdef (stmt))
-	  {
-	    tree lhs = gimple_assign_lhs (stmt);
-	    tree rhs = gimple_assign_rhs1 (stmt);
-	    tree type = TREE_TYPE (lhs);
-
-	    lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
-	    rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
-	    rhs = build3 (COND_EXPR, type, unshare_expr (cond), rhs, lhs);
-	    gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
-	    update_stmt (stmt);
-	  }
+	{
+	  stmt = gsi_stmt (gsi);
+	  if (gimple_assign_single_p (stmt)
+	      && gimple_vdef (stmt))
+	    {
+	      /* A[i] = x;  */
+	      tree ai = gimple_assign_lhs (stmt);
+
+	      /* pointer = cond ? &A[i] : scratch_pad;  */
+	      tree star_pointer = create_indirect_cond_expr (ai, cond,
+							     scratch_pad, &gsi);
+	      /* *pointer = x;  */
+	      gimple_assign_set_lhs (stmt, star_pointer);
+	      update_stmt (stmt);
+	    }
+	  else if (gimple_assign_single_p (stmt)
+		   && gimple_vuse (stmt))
+	    {
+	      /* x = A[i];  */
+	      tree ai = gimple_assign_rhs1 (stmt);
+
+	      /* pointer = cond ? &A[i] : scratch_pad;  */
+	      tree star_pointer = create_indirect_cond_expr (ai, cond,
+							     scratch_pad, &gsi);
+	      /* x = *pointer;  */
+	      gimple_assign_set_rhs1 (stmt, star_pointer);
+	      update_stmt (stmt);
+	    }
+	}
     }
 }
 
@@ -1376,7 +1516,7 @@  remove_conditions_and_labels (loop_p loop)
    blocks.  Replace PHI nodes with conditional modify expressions.  */
 
 static void
-combine_blocks (struct loop *loop)
+combine_blocks (struct loop *loop, tree *scratch_pad)
 {
   basic_block bb, exit_bb, merge_target_bb;
   unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -1389,7 +1529,7 @@  combine_blocks (struct loop *loop)
   predicate_all_scalar_phis (loop);
 
   if (flag_tree_loop_if_convert_stores)
-    predicate_mem_writes (loop);
+    predicate_mem_writes (loop, scratch_pad);
 
   /* Merge basic blocks: first remove all the edges in the loop,
      except for those from the exit block.  */
@@ -1478,7 +1618,7 @@  combine_blocks (struct loop *loop)
    profitability analysis.  Returns true when something changed.  */
 
 static bool
-tree_if_conversion (struct loop *loop)
+tree_if_conversion (struct loop *loop, tree *scratch_pad)
 {
   bool changed = false;
   ifc_bbs = NULL;
@@ -1490,7 +1630,7 @@  tree_if_conversion (struct loop *loop)
   /* Now all statements are if-convertible.  Combine all the basic
      blocks into one huge basic block doing the if-conversion
      on-the-fly.  */
-  combine_blocks (loop);
+  combine_blocks (loop, scratch_pad);
 
   if (flag_tree_loop_if_convert_stores)
     mark_sym_for_renaming (gimple_vop (cfun));
@@ -1521,12 +1661,13 @@  main_tree_if_conversion (void)
   struct loop *loop;
   bool changed = false;
   unsigned todo = 0;
+  tree scratch_pad = NULL_TREE;
 
   if (number_of_loops () <= 1)
     return 0;
 
   FOR_EACH_LOOP (li, loop, 0)
-    changed |= tree_if_conversion (loop);
+    changed |= tree_if_conversion (loop, &scratch_pad);
 
   if (changed)
     todo |= TODO_cleanup_cfg;