From 3dd7298ecc74c49df46e05913efeb8395bb11c62 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 26 Oct 2010 16:34:29 -0500
Subject: [PATCH] Fix PR46029: reimplement if-convert stores.
2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
PR tree-optimization/46029
* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
* tree-if-conv.c (has_unaligned_memory_refs): New.
(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
(create_scratchpad): New.
(create_indirect_cond_expr): New.
(predicate_mem_writes): Call create_indirect_cond_expr. Take an extra
parameter for scratch_pad.
(combine_blocks): Same.
(tree_if_conversion): Same.
(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
scratch_pad.
(struct ifc_dr): Removed.
(IFC_DR): Removed.
(DR_WRITTEN_AT_LEAST_ONCE): Removed.
(DR_RW_UNCONDITIONALLY): Removed.
(memrefs_read_or_written_unconditionally): Removed.
(write_memrefs_written_at_least_once): Removed.
(ifcvt_memrefs_wont_trap): Removed.
(ifcvt_could_trap_p): Does not take refs parameter anymore.
(if_convertible_gimple_assign_stmt_p): Same.
(if_convertible_stmt_p): Same.
(if_convertible_loop_p_1): Remove initialization of dr->aux,
DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
(if_convertible_loop_p): Remove deallocation of the same.
testsuite/
* g++.dg/tree-ssa/ifc-pr46029.C: New.
* gcc.dg/tree-ssa/ifc-8.c: New.
* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
---
gcc/ChangeLog | 28 ++
gcc/doc/invoke.texi | 18 +-
gcc/testsuite/ChangeLog | 7 +
gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C | 76 ++++++
gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c | 19 +-
gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c | 29 ++
gcc/tree-if-conv.c | 375 ++++++++++++---------------
7 files changed, 332 insertions(+), 220 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
@@ -1,3 +1,31 @@
+2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
+
+ PR tree-optimization/46029
+ * doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
+ * tree-if-conv.c (has_unaligned_memory_refs): New.
+ (if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
+ (create_scratchpad): New.
+ (create_indirect_cond_expr): New.
+ (predicate_mem_writes): Call create_indirect_cond_expr. Take an extra
+ parameter for scratch_pad.
+ (combine_blocks): Same.
+ (tree_if_conversion): Same.
+ (main_tree_if_conversion): Pass to tree_if_conversion a pointer to
+ scratch_pad.
+ (struct ifc_dr): Removed.
+ (IFC_DR): Removed.
+ (DR_WRITTEN_AT_LEAST_ONCE): Removed.
+ (DR_RW_UNCONDITIONALLY): Removed.
+ (memrefs_read_or_written_unconditionally): Removed.
+ (write_memrefs_written_at_least_once): Removed.
+ (ifcvt_memrefs_wont_trap): Removed.
+ (ifcvt_could_trap_p): Does not take refs parameter anymore.
+ (if_convertible_gimple_assign_stmt_p): Same.
+ (if_convertible_stmt_p): Same.
+ (if_convertible_loop_p_1): Remove initialization of dr->aux,
+ DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
+ (if_convertible_loop_p): Remove deallocation of the same.
+
2010-11-11 Joern Rennecke <amylaar@spamcop.net>
PR target/44749
@@ -6959,20 +6959,26 @@ if vectorization is enabled.
@item -ftree-loop-if-convert-stores
Attempt to also if-convert conditional jumps containing memory writes.
-This transformation can be unsafe for multi-threaded programs as it
-transforms conditional memory writes into unconditional memory writes.
For example,
@smallexample
for (i = 0; i < N; i++)
if (cond)
- A[i] = expr;
+ A[i] = B[i] + 2;
@end smallexample
would be transformed to
@smallexample
-for (i = 0; i < N; i++)
- A[i] = cond ? expr : A[i];
+void *scratchpad = alloca (64);
+for (i = 0; i < N; i++) @{
+ a = cond ? &A[i] : scratchpad;
+ b = cond ? &B[i] : scratchpad;
+ *a = *b + 2;
+@}
@end smallexample
-potentially producing data races.
+The compiler allocates a scratchpad memory on the stack for each
+function in which the if-conversion of memory stores or reads
+happened. This scratchpad memory is used during the part of the
+computation that is discarded, i.e., when the condition is evaluated
+to false.
@item -ftree-loop-distribution
Perform loop distribution. This flag can improve cache performance on
@@ -1,3 +1,10 @@
+2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
+
+ PR tree-optimization/46029
+ * g++.dg/tree-ssa/ifc-pr46029.C: New.
+ * gcc.dg/tree-ssa/ifc-8.c: New.
+ * gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
+
2010-11-11 Nicola Pero <nicola.pero@meta-innovation.com>
* objc.dg/property/at-property-20.m: New.
new file mode 100644
@@ -0,0 +1,76 @@
+// { dg-do run }
+/* { dg-options "-O -ftree-loop-if-convert-stores" } */
+
+namespace
+{
+ struct rb_tree_node_
+ {
+ rb_tree_node_ ():m_p_left (0), m_p_parent (0), m_metadata (0)
+ {
+ }
+ unsigned &get_metadata ()
+ {
+ return m_metadata;
+ }
+ rb_tree_node_ *m_p_left;
+ rb_tree_node_ *m_p_parent;
+ unsigned m_metadata;
+ };
+
+ struct bin_search_tree_const_node_it_
+ {
+ bin_search_tree_const_node_it_ (rb_tree_node_ * p_nd):m_p_nd (p_nd)
+ {
+ }
+ unsigned &get_metadata ()
+ {
+ return m_p_nd->get_metadata ();
+ }
+ bin_search_tree_const_node_it_ get_l_child ()
+ {
+ return bin_search_tree_const_node_it_ (m_p_nd->m_p_left);
+ }
+
+ rb_tree_node_ *m_p_nd;
+ };
+
+ struct bin_search_tree_no_data_
+ {
+ typedef rb_tree_node_ *node_pointer;
+ bin_search_tree_no_data_ ():m_p_head (new rb_tree_node_ ())
+ {
+ }
+ void insert_imp_empty (int r_value)
+ {
+ rb_tree_node_ *p_new_node = new rb_tree_node_ ();
+ m_p_head->m_p_parent = p_new_node;
+ p_new_node->m_p_parent = m_p_head;
+ update_to_top (m_p_head->m_p_parent);
+ }
+ void apply_update (bin_search_tree_const_node_it_ nd_it)
+ {
+ unsigned
+ l_max_endpoint
+ =
+ (nd_it.get_l_child ().m_p_nd ==
+ 0) ? 0 : nd_it.get_l_child ().get_metadata ();
+ nd_it.get_metadata () = l_max_endpoint;
+ }
+ void update_to_top (node_pointer p_nd)
+ {
+ while (p_nd != m_p_head)
+ {
+ apply_update (p_nd);
+ p_nd = p_nd->m_p_parent;
+ }
+ }
+
+ rb_tree_node_ * m_p_head;
+ };
+}
+
+int main ()
+{
+ bin_search_tree_no_data_ ().insert_imp_empty (0);
+ return 0;
+}
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
+/* { dg-options "-c -O2 -ftree-vectorize -ftree-loop-if-convert-stores -fdump-tree-ifcvt-stats" { target *-*-* } } */
void
dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
@@ -12,11 +12,18 @@ dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
for (i = 0; i <= nCoeffs; i++)
{
level = block[i];
- if (level < 0)
- level = level * qmul - qadd;
- else
- level = level * qmul + qadd;
- block[i] = level;
+ if (level)
+ {
+ if (level < 0)
+ {
+ level = level * qmul - qadd;
+ }
+ else
+ {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
}
}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
+
+typedef union tree_node *tree;
+struct tree_common
+{
+ unsigned volatile_flag : 1;
+ unsigned unsigned_flag : 1;
+};
+struct tree_type
+{
+ tree next_variant;
+ tree main_variant;
+};
+union tree_node
+{
+ struct tree_common common;
+ struct tree_type type;
+};
+void finish_enum (tree enumtype)
+{
+ tree tem;
+ for (tem = ((enumtype)->type.main_variant); tem; tem = ((tem)->type.next_variant))
+ {
+ if (tem == enumtype)
+ continue;
+ ((tem)->common.unsigned_flag) = ((enumtype)->common.unsigned_flag);
+ }
+}
@@ -226,7 +226,7 @@ ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
gimple stmt;
/* Create new temporary variable. */
- var = create_tmp_var (type, name);
+ var = create_tmp_reg (type, name);
add_referenced_var (var);
/* Build new statement to assign EXPR to new variable. */
@@ -446,171 +446,38 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
return true;
}
-/* Records the status of a data reference. This struct is attached to
- each DR->aux field. */
-
-struct ifc_dr {
- /* -1 when not initialized, 0 when false, 1 when true. */
- int written_at_least_once;
-
- /* -1 when not initialized, 0 when false, 1 when true. */
- int rw_unconditionally;
-};
-
-#define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
-#define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
-#define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
-
-/* Returns true when the memory references of STMT are read or written
- unconditionally. In other words, this function returns true when
- for every data reference A in STMT there exist other accesses to
- the same data reference with predicates that add up (OR-up) to the
- true predicate: this ensures that the data reference A is touched
- (read or written) on every iteration of the if-converted loop. */
+/* Wrapper around gimple_could_trap_p refined for the needs of the
+ if-conversion. */
static bool
-memrefs_read_or_written_unconditionally (gimple stmt,
- VEC (data_reference_p, heap) *drs)
+ifcvt_could_trap_p (gimple stmt)
{
- int i, j;
- data_reference_p a, b;
- tree ca = bb_predicate (gimple_bb (stmt));
-
- for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
- if (DR_STMT (a) == stmt)
- {
- bool found = false;
- int x = DR_RW_UNCONDITIONALLY (a);
-
- if (x == 0)
- return false;
-
- if (x == 1)
- continue;
-
- for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
- if (DR_STMT (b) != stmt
- && same_data_refs (a, b))
- {
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
- if (DR_RW_UNCONDITIONALLY (b) == 1
- || is_true_predicate (cb)
- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
- ca, cb)))
- {
- DR_RW_UNCONDITIONALLY (a) = 1;
- DR_RW_UNCONDITIONALLY (b) = 1;
- found = true;
- break;
- }
- }
-
- if (!found)
- {
- DR_RW_UNCONDITIONALLY (a) = 0;
- return false;
- }
- }
+ if (gimple_vuse (stmt)
+ && !gimple_could_trap_p_1 (stmt, false, false))
+ return false;
- return true;
+ return gimple_could_trap_p (stmt);
}
-/* Returns true when the memory references of STMT are unconditionally
- written. In other words, this function returns true when for every
- data reference A written in STMT, there exist other writes to the
- same data reference with predicates that add up (OR-up) to the true
- predicate: this ensures that the data reference A is written on
- every iteration of the if-converted loop. */
+/* Returns true when stmt contains a data reference. */
static bool
-write_memrefs_written_at_least_once (gimple stmt,
- VEC (data_reference_p, heap) *drs)
+has_non_addressable_refs (gimple stmt)
{
- int i, j;
- data_reference_p a, b;
- tree ca = bb_predicate (gimple_bb (stmt));
+ VEC (data_ref_loc, heap) *refs = VEC_alloc (data_ref_loc, heap, 3);
+ bool res = get_references_in_stmt (stmt, &refs);
+ unsigned i;
+ data_ref_loc *ref;
- for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
- if (DR_STMT (a) == stmt
- && DR_IS_WRITE (a))
+ FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
+ if (may_be_nonaddressable_p (*(ref->pos)))
{
- bool found = false;
- int x = DR_WRITTEN_AT_LEAST_ONCE (a);
-
- if (x == 0)
- return false;
-
- if (x == 1)
- continue;
-
- for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
- if (DR_STMT (b) != stmt
- && DR_IS_WRITE (b)
- && same_data_refs_base_objects (a, b))
- {
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
- if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
- || is_true_predicate (cb)
- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
- ca, cb)))
- {
- DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
- DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
- found = true;
- break;
- }
- }
-
- if (!found)
- {
- DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
- return false;
- }
+ res = true;
+ break;
}
- return true;
-}
-
-/* Return true when the memory references of STMT won't trap in the
- if-converted code. There are two things that we have to check for:
-
- - writes to memory occur to writable memory: if-conversion of
- memory writes transforms the conditional memory writes into
- unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
- into "A[i] = cond ? foo : A[i]", and as the write to memory may not
- be executed at all in the original code, it may be a readonly
- memory. To check that A is not const-qualified, we check that
- there exists at least an unconditional write to A in the current
- function.
-
- - reads or writes to memory are valid memory accesses for every
- iteration. To check that the memory accesses are correctly formed
- and that we are allowed to read and write in these locations, we
- check that the memory accesses to be if-converted occur at every
- iteration unconditionally. */
-
-static bool
-ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs)
-{
- return write_memrefs_written_at_least_once (stmt, refs)
- && memrefs_read_or_written_unconditionally (stmt, refs);
-}
-
-/* Wrapper around gimple_could_trap_p refined for the needs of the
- if-conversion. Try to prove that the memory accesses of STMT could
- not trap in the innermost loop containing STMT. */
-
-static bool
-ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
-{
- if (gimple_vuse (stmt)
- && !gimple_could_trap_p_1 (stmt, false, false)
- && ifcvt_memrefs_wont_trap (stmt, refs))
- return false;
-
- return gimple_could_trap_p (stmt);
+ VEC_free (data_ref_loc, heap, refs);
+ return res;
}
/* Return true when STMT is if-convertible.
@@ -621,8 +488,7 @@ ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
- LHS is not var decl. */
static bool
-if_convertible_gimple_assign_stmt_p (gimple stmt,
- VEC (data_reference_p, heap) *refs)
+if_convertible_gimple_assign_stmt_p (gimple stmt)
{
tree lhs = gimple_assign_lhs (stmt);
basic_block bb;
@@ -650,19 +516,27 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
if (flag_tree_loop_if_convert_stores)
{
- if (ifcvt_could_trap_p (stmt, refs))
+ if (ifcvt_could_trap_p (stmt))
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "tree could trap...\n");
+ fprintf (dump_file, "tree could trap\n");
return false;
}
+
+ if (has_non_addressable_refs (stmt))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "has non-addressable memory references\n");
+ return false;
+ }
+
return true;
}
if (gimple_assign_rhs_could_trap_p (stmt))
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "tree could trap...\n");
+ fprintf (dump_file, "tree could trap\n");
return false;
}
@@ -690,7 +564,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
- it is a GIMPLE_LABEL or a GIMPLE_COND. */
static bool
-if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
+if_convertible_stmt_p (gimple stmt)
{
switch (gimple_code (stmt))
{
@@ -700,7 +574,7 @@ if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
return true;
case GIMPLE_ASSIGN:
- return if_convertible_gimple_assign_stmt_p (stmt, refs);
+ return if_convertible_gimple_assign_stmt_p (stmt);
default:
/* Don't know what to do with 'em so don't do anything. */
@@ -1016,18 +890,6 @@ if_convertible_loop_p_1 (struct loop *loop,
if (!res)
return false;
- if (flag_tree_loop_if_convert_stores)
- {
- data_reference_p dr;
-
- for (i = 0; VEC_iterate (data_reference_p, *refs, i, dr); i++)
- {
- dr->aux = XNEW (struct ifc_dr);
- DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
- DR_RW_UNCONDITIONALLY (dr) = -1;
- }
- }
-
for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = ifc_bbs[i];
@@ -1040,7 +902,7 @@ if_convertible_loop_p_1 (struct loop *loop,
/* Check the if-convertibility of statements in predicated BBs. */
if (is_predicated (bb))
for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
- if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
+ if (!if_convertible_stmt_p (gsi_stmt (itr)))
return false;
}
@@ -1101,15 +963,6 @@ if_convertible_loop_p (struct loop *loop)
ddrs = VEC_alloc (ddr_p, heap, 25);
res = if_convertible_loop_p_1 (loop, &refs, &ddrs);
- if (flag_tree_loop_if_convert_stores)
- {
- data_reference_p dr;
- unsigned int i;
-
- for (i = 0; VEC_iterate (data_reference_p, refs, i, dr); i++)
- free (dr->aux);
- }
-
free_data_refs (refs);
free_dependence_relations (ddrs);
return res;
@@ -1370,6 +1223,81 @@ insert_gimplified_predicates (loop_p loop)
}
}
+/* Inserts at position GSI a statement "ADDRESS_OF_AI = &AI;" and
+ returns the ADDRESS_OF_AI. */
+
+static tree
+insert_address_of (tree ai, gimple_stmt_iterator *gsi)
+{
+ tree addr_expr = build_fold_addr_expr (ai);
+ tree address_of_ai = create_tmp_reg (TREE_TYPE (addr_expr), "_ifc_");
+ gimple addr_stmt = gimple_build_assign (address_of_ai, addr_expr);
+
+ add_referenced_var (address_of_ai);
+ address_of_ai = make_ssa_name (address_of_ai, addr_stmt);
+ gimple_assign_set_lhs (addr_stmt, address_of_ai);
+ SSA_NAME_DEF_STMT (address_of_ai) = addr_stmt;
+ update_stmt (addr_stmt);
+ gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
+
+ return address_of_ai;
+}
+
+/* Insert at the beginning of the first basic block of the current
+ function the allocation on the stack of N_BYTES of memory and
+ return a pointer to this scratchpad memory. */
+
+static tree
+create_scratchpad (int n_bytes)
+{
+ basic_block bb = single_succ (ENTRY_BLOCK_PTR);
+ gimple_stmt_iterator gsi = gsi_after_labels (bb);
+ tree x = build_int_cst (integer_type_node, n_bytes - 1);
+ tree elt_type = char_type_node;
+ tree array_type = build_array_type (elt_type, build_index_type (x));
+ tree base = create_tmp_reg (array_type, "scratch_pad");
+ tree a0 = build4 (ARRAY_REF, elt_type, base, integer_zero_node, NULL_TREE,
+ NULL_TREE);
+
+ add_referenced_var (base);
+ return insert_address_of (a0, &gsi);
+}
+
+/* Returns a memory reference to the pointer defined by the
+ conditional expression: pointer = cond ? &A[i] : scratch_pad; and
+ inserts this code at GSI. */
+
+static tree
+create_indirect_cond_expr (tree ai, tree cond, tree *scratch_pad,
+ gimple_stmt_iterator *gsi)
+{
+ tree cond_expr;
+ tree pointer;
+ gimple pointer_stmt;
+
+ /* address_of_ai = &A[i]; */
+ tree address_of_ai = insert_address_of (ai, gsi);
+
+ /* Allocate the scratch pad only once per function. */
+ if (!*scratch_pad)
+ *scratch_pad = create_scratchpad (64);
+
+ /* pointer = cond ? address_of_ai : scratch_pad; */
+ pointer = create_tmp_reg (TREE_TYPE (address_of_ai), "_ifc_");
+ add_referenced_var (pointer);
+ cond_expr = build3 (COND_EXPR, TREE_TYPE (address_of_ai),
+ unshare_expr (cond), address_of_ai, *scratch_pad);
+ pointer_stmt = gimple_build_assign (pointer, cond_expr);
+ pointer = make_ssa_name (pointer, pointer_stmt);
+ gimple_assign_set_lhs (pointer_stmt, pointer);
+ SSA_NAME_DEF_STMT (pointer) = pointer_stmt;
+ update_stmt (pointer_stmt);
+ gsi_insert_before (gsi, pointer_stmt, GSI_SAME_STMT);
+
+ return build2 (MEM_REF, TREE_TYPE (ai), pointer,
+ build_int_cst (reference_alias_ptr_type (ai), 0));
+}
+
/* Predicate each write to memory in LOOP.
This function transforms control flow constructs containing memory
@@ -1381,10 +1309,19 @@ insert_gimplified_predicates (loop_p loop)
into the following form that does not contain control flow:
- | for (i = 0; i < N; i++)
- | A[i] = cond ? expr : A[i];
+ | void *scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
+ |
+ | for (i = 0; i < N; i++) {
+ | p = cond ? &A[i] : scratch_pad;
+ | *p = expr;
+ | }
+
+ SCRATCH_PAD is allocated on the stack for each function once and it is
+ large enough to contain any kind of scalar assignment or read. All
+ values read or written to SCRATCH_PAD are not used in the computation.
- The original CFG looks like this:
+ In a more detailed way, the if-conversion of memory writes works
+ like this, supposing that the original CFG looks like this:
| bb_0
| i = 0
@@ -1434,10 +1371,12 @@ insert_gimplified_predicates (loop_p loop)
| goto bb_1
| end_bb_4
- predicate_mem_writes is then predicating the memory write as follows:
+ predicate_mem_writes is then allocating SCRATCH_PAD in the basic block
+ preceding the loop header, and is predicating the memory write:
| bb_0
| i = 0
+ | scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
| end_bb_0
|
| bb_1
@@ -1445,12 +1384,14 @@ insert_gimplified_predicates (loop_p loop)
| end_bb_1
|
| bb_2
+ | cond = some_computation;
| if (cond) goto bb_3 else goto bb_4
| end_bb_2
|
| bb_3
| cond = some_computation;
- | A[i] = cond ? expr : A[i];
+ | p = cond ? &A[i] : scratch_pad;
+ | *p = expr;
| goto bb_4
| end_bb_3
|
@@ -1463,12 +1404,14 @@ insert_gimplified_predicates (loop_p loop)
| bb_0
| i = 0
+ | scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
| if (i < N) goto bb_5 else goto bb_1
| end_bb_0
|
| bb_1
| cond = some_computation;
- | A[i] = cond ? expr : A[i];
+ | p = cond ? &A[i] : scratch_pad;
+ | *p = expr;
| if (i < N) goto bb_5 else goto bb_4
| end_bb_1
|
@@ -1478,7 +1421,7 @@ insert_gimplified_predicates (loop_p loop)
*/
static void
-predicate_mem_writes (loop_p loop)
+predicate_mem_writes (loop_p loop, tree *scratch_pad)
{
unsigned int i, orig_loop_num_nodes = loop->num_nodes;
@@ -1493,20 +1436,35 @@ predicate_mem_writes (loop_p loop)
continue;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- if ((stmt = gsi_stmt (gsi))
- && gimple_assign_single_p (stmt)
- && gimple_vdef (stmt))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- tree type = TREE_TYPE (lhs);
-
- lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
- rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
- rhs = build3 (COND_EXPR, type, unshare_expr (cond), rhs, lhs);
- gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
- update_stmt (stmt);
- }
+ {
+ stmt = gsi_stmt (gsi);
+ if (gimple_assign_single_p (stmt)
+ && gimple_vdef (stmt))
+ {
+ /* A[i] = x; */
+ tree ai = gimple_assign_lhs (stmt);
+
+ /* pointer = cond ? &A[i] : scratch_pad; */
+ tree star_pointer = create_indirect_cond_expr (ai, cond,
+ scratch_pad, &gsi);
+ /* *pointer = x; */
+ gimple_assign_set_lhs (stmt, star_pointer);
+ update_stmt (stmt);
+ }
+ else if (gimple_assign_single_p (stmt)
+ && gimple_vuse (stmt))
+ {
+ /* x = A[i]; */
+ tree ai = gimple_assign_rhs1 (stmt);
+
+ /* pointer = cond ? &A[i] : scratch_pad; */
+ tree star_pointer = create_indirect_cond_expr (ai, cond,
+ scratch_pad, &gsi);
+ /* x = *pointer; */
+ gimple_assign_set_rhs1 (stmt, star_pointer);
+ update_stmt (stmt);
+ }
+ }
}
}
@@ -1556,7 +1514,7 @@ remove_conditions_and_labels (loop_p loop)
blocks. Replace PHI nodes with conditional modify expressions. */
static void
-combine_blocks (struct loop *loop)
+combine_blocks (struct loop *loop, tree *scratch_pad)
{
basic_block bb, exit_bb, merge_target_bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -1569,7 +1527,7 @@ combine_blocks (struct loop *loop)
predicate_all_scalar_phis (loop);
if (flag_tree_loop_if_convert_stores)
- predicate_mem_writes (loop);
+ predicate_mem_writes (loop, scratch_pad);
/* Merge basic blocks: first remove all the edges in the loop,
except for those from the exit block. */
@@ -1658,7 +1616,7 @@ combine_blocks (struct loop *loop)
profitability analysis. Returns true when something changed. */
static bool
-tree_if_conversion (struct loop *loop)
+tree_if_conversion (struct loop *loop, tree *scratch_pad)
{
bool changed = false;
ifc_bbs = NULL;
@@ -1670,7 +1628,7 @@ tree_if_conversion (struct loop *loop)
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
on-the-fly. */
- combine_blocks (loop);
+ combine_blocks (loop, scratch_pad);
if (flag_tree_loop_if_convert_stores)
mark_sym_for_renaming (gimple_vop (cfun));
@@ -1701,12 +1659,13 @@ main_tree_if_conversion (void)
struct loop *loop;
bool changed = false;
unsigned todo = 0;
+ tree scratch_pad = NULL_TREE;
if (number_of_loops () <= 1)
return 0;
FOR_EACH_LOOP (li, loop, 0)
- changed |= tree_if_conversion (loop);
+ changed |= tree_if_conversion (loop, &scratch_pad);
if (changed)
todo |= TODO_cleanup_cfg;
--
1.7.0.4