Patchwork [3/4] Do not check whether memory references accessed in every iteration trap.

login
register
mail settings
Submitter Sebastian Pop
Date July 8, 2010, 9:41 p.m.
Message ID <1278625285-12667-4-git-send-email-sebpop@gmail.com>
Download mbox | patch
Permalink /patch/58306/
State New
Headers show

Comments

Sebastian Pop - July 8, 2010, 9:41 p.m.
This patch relaxes the checks from gimple_could_trap_p in order to
allow the flag_loop_if_convert_memory_writes to if-convert more loops
in which it is possible to prove that:

- the accesses to an array in a loop do not trap (more than the
  original non-if-converted loop).  This is true when the memory
  accesses are executed at every iteration of the if-converted loop.

- the writes to memory occur on arrays that are not const qualified.
  This is true when there exists at least one unconditional write to
  the array in the analyzed program.  In this patch this analysis is
  limited to the loop to be if-converted.

	* gimple.c (gimple_op_could_trap_p): Outlined from
	gimple_could_trap_p_1.
	(gimple_could_trap_p_1): Call gimple_op_could_trap_p.
	* gimple.h (gimple_op_could_trap_p): Declared.
	* tree-data-ref.h (same_data_refs_base_objects): New.
	(same_data_refs): New.
	* tree-if-conv.c (memref_read_or_written_unconditionally): New.
	(memrefs_read_or_written_unconditionally): New.
	(write_memref_written_at_least_once): New.
	(write_memrefs_written_at_least_once): New.
	(ifcvt_memrefs_wont_trap): New.
	(operations_could_trap): New.
	(ifcvt_could_trap_p): New.
	(if_convertible_gimple_assign_stmt_p): Call ifcvt_could_trap_p.
	Gets a vector of data refs.
	(if_convertible_stmt_p): Same.
	(if_convertible_loop_p): Before freeing the data refs vector,
	pass it to if_convertible_stmt_p.

	* gcc.dg/tree-ssa/ifc-5.c: New.
	* gcc.dg/vect/vect-cond-1.c: Update pattern.
	* gcc.dg/vect/vect-cond-3.c: Same.
	* gcc.dg/vect/vect-cond-4.c: Same.
	* gcc.dg/vect/vect-cond-6.c: Same.
---
 gcc/gimple.c                            |   38 ++++--
 gcc/gimple.h                            |    1 +
 gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c   |   24 ++++
 gcc/testsuite/gcc.dg/vect/vect-cond-1.c |    2 +-
 gcc/testsuite/gcc.dg/vect/vect-cond-3.c |    2 +-
 gcc/testsuite/gcc.dg/vect/vect-cond-4.c |    2 +-
 gcc/testsuite/gcc.dg/vect/vect-cond-6.c |    1 +
 gcc/tree-data-ref.h                     |   34 +++++
 gcc/tree-if-conv.c                      |  220 +++++++++++++++++++++++++++----
 9 files changed, 279 insertions(+), 45 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
Richard Guenther - Aug. 13, 2010, 9:24 a.m.
On Thu, 8 Jul 2010, Sebastian Pop wrote:

> This patch relaxes the checks from gimple_could_trap_p in order to
> allow the flag_loop_if_convert_memory_writes to if-convert more loops
> in which it is possible to prove that:
> 
> - the accesses to an array in a loop do not trap (more than the
>   original non-if-converted loop).  This is true when the memory
>   accesses are executed at every iteration of the if-converted loop.
> 
> - the writes to memory occur on arrays that are not const qualified.
>   This is true when there exists at least one unconditional write to
>   the array in the analyzed program.  In this patch this analysis is
>   limited to the loop to be if-converted.

Comments below.

> 	* gimple.c (gimple_op_could_trap_p): Outlined from
> 	gimple_could_trap_p_1.
> 	(gimple_could_trap_p_1): Call gimple_op_could_trap_p.
> 	* gimple.h (gimple_op_could_trap_p): Declared.
> 	* tree-data-ref.h (same_data_refs_base_objects): New.
> 	(same_data_refs): New.
> 	* tree-if-conv.c (memref_read_or_written_unconditionally): New.
> 	(memrefs_read_or_written_unconditionally): New.
> 	(write_memref_written_at_least_once): New.
> 	(write_memrefs_written_at_least_once): New.
> 	(ifcvt_memrefs_wont_trap): New.
> 	(operations_could_trap): New.
> 	(ifcvt_could_trap_p): New.
> 	(if_convertible_gimple_assign_stmt_p): Call ifcvt_could_trap_p.
> 	Gets a vector of data refs.
> 	(if_convertible_stmt_p): Same.
> 	(if_convertible_loop_p): Before freeing the data refs vector,
> 	pass it to if_convertible_stmt_p.
> 
> 	* gcc.dg/tree-ssa/ifc-5.c: New.
> 	* gcc.dg/vect/vect-cond-1.c: Update pattern.
> 	* gcc.dg/vect/vect-cond-3.c: Same.
> 	* gcc.dg/vect/vect-cond-4.c: Same.
> 	* gcc.dg/vect/vect-cond-6.c: Same.
> ---
>  gcc/gimple.c                            |   38 ++++--
>  gcc/gimple.h                            |    1 +
>  gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c   |   24 ++++
>  gcc/testsuite/gcc.dg/vect/vect-cond-1.c |    2 +-
>  gcc/testsuite/gcc.dg/vect/vect-cond-3.c |    2 +-
>  gcc/testsuite/gcc.dg/vect/vect-cond-4.c |    2 +-
>  gcc/testsuite/gcc.dg/vect/vect-cond-6.c |    1 +
>  gcc/tree-data-ref.h                     |   34 +++++
>  gcc/tree-if-conv.c                      |  220 +++++++++++++++++++++++++++----
>  9 files changed, 279 insertions(+), 45 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
> 
> diff --git a/gcc/gimple.c b/gcc/gimple.c
> index 707d4e4..f515acc 100644
> --- a/gcc/gimple.c
> +++ b/gcc/gimple.c
> @@ -2399,24 +2399,14 @@ gimple_rhs_has_side_effects (const_gimple s)
>    return false;
>  }
>  
> +/* Helper for gimple_could_trap_p_1.  Return true if the operation of
> +   S can trap.  */
>  
> -/* Helper for gimple_could_trap_p and gimple_assign_rhs_could_trap_p.
> -   Return true if S can trap.  If INCLUDE_LHS is true and S is a
> -   GIMPLE_ASSIGN, the LHS of the assignment is also checked.
> -   Otherwise, only the RHS of the assignment is checked.  */
> -
> -static bool
> -gimple_could_trap_p_1 (gimple s, bool include_lhs)
> +bool
> +gimple_op_could_trap_p (gimple s)

This is not a very good name.  What is "operation"?  It seems you
exclude loads and stores here, so gimple_nonloadstore_could_trap_p
would be better.  Or to not confuse things too much instead
of splitting the function add another flag bool include_loads and
export the function - that would be my preference here
(include_lhs is effectively include_stores, nothing else on the
lhs can trap).

With that change the gimple.[ch] changes are ok.  More comments
on the rest below.

>  {
> -  unsigned i, start;
> -  tree t, div = NULL_TREE;
>    enum tree_code op;
> -
> -  start = (is_gimple_assign (s) && !include_lhs) ? 1 : 0;
> -
> -  for (i = start; i < gimple_num_ops (s); i++)
> -    if (tree_could_trap_p (gimple_op (s, i)))
> -      return true;
> +  tree t, div = NULL_TREE;
>  
>    switch (gimple_code (s))
>      {
> @@ -2445,7 +2435,25 @@ gimple_could_trap_p_1 (gimple s, bool include_lhs)
>      }
>  
>    return false;
> +}
> +
> +/* Helper for gimple_could_trap_p and gimple_assign_rhs_could_trap_p.
> +   Return true if S can trap.  If INCLUDE_LHS is true and S is a
> +   GIMPLE_ASSIGN, the LHS of the assignment is also checked.
> +   Otherwise, only the RHS of the assignment is checked.  */
> +
> +static bool
> +gimple_could_trap_p_1 (gimple s, bool include_lhs)
> +{
> +  unsigned i, start;
> +
> +  start = (is_gimple_assign (s) && !include_lhs) ? 1 : 0;
> +
> +  for (i = start; i < gimple_num_ops (s); i++)
> +    if (tree_could_trap_p (gimple_op (s, i)))
> +      return true;
>  
> +  return gimple_op_could_trap_p (s);
>  }
>  
>  
> diff --git a/gcc/gimple.h b/gcc/gimple.h
> index ec7fc93..c446cd3 100644
> --- a/gcc/gimple.h
> +++ b/gcc/gimple.h
> @@ -885,6 +885,7 @@ gimple gimple_build_cond_from_tree (tree, tree, tree);
>  void gimple_cond_set_condition_from_tree (gimple, tree);
>  bool gimple_has_side_effects (const_gimple);
>  bool gimple_rhs_has_side_effects (const_gimple);
> +bool gimple_op_could_trap_p (gimple);
>  bool gimple_could_trap_p (gimple);
>  bool gimple_assign_rhs_could_trap_p (gimple);
>  void gimple_regimplify_operands (gimple, gimple_stmt_iterator *);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
> new file mode 100644
> index 0000000..a9cc816
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
> +
> +void
> +dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
> +{
> +  int i, level, qmul, qadd;
> +
> +  qadd = (qscale - 1) | 1;
> +  qmul = qscale << 1;
> +
> +  for (i = 0; i <= nCoeffs; i++)
> +    {
> +      level = block[i];
> +      if (level < 0)
> +	level = level * qmul - qadd;
> +      else
> +	level = level * qmul + qadd;
> +      block[i] = level;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
> +/* { dg-final { cleanup-tree-dump "ifcvt" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-1.c b/gcc/testsuite/gcc.dg/vect/vect-cond-1.c
> index 4ee6713..888e5cb 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-cond-1.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-1.c
> @@ -52,7 +52,7 @@ int main (void)
>    return 0;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */
>

As this was testing for outer loop vectorization please duplicate the
test and disable if-conversion in one variant.

>  
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-3.c
> index 56cfbb2..42b9f35 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-cond-3.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-3.c
> @@ -60,7 +60,7 @@ int main (void)
>    return 0;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */

Likewise.

>  
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-4.c
> index c3a1585..1d1d8fc 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-cond-4.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-4.c
> @@ -57,7 +57,7 @@ int main (void)
>    return 0;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */

Likewise.

>  
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-6.c
> index e5e9391..7820444 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-cond-6.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-6.c
> @@ -56,5 +56,6 @@ int main ()
>  }
>  
>  /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */

Likewise.

> diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
> index eff5348..391d6fc 100644
> --- a/gcc/tree-data-ref.h
> +++ b/gcc/tree-data-ref.h
> @@ -417,6 +417,40 @@ extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
>  extern bool dr_may_alias_p (const struct data_reference *,
>  			    const struct data_reference *);
>  
> +
> +/* Return true when the base objects of data references A and B are
> +   the same memory object.  */
> +
> +static inline bool
> +same_data_refs_base_objects (data_reference_p a, data_reference_p b)
> +{
> +  return DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b)
> +    && dr_may_alias_p (a, b)
> +    && operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0);

I don't see why the dr_may_alias_p check is necessary here.  In fact
it might even pessimize what you want to check.

> +}
> +
> +/* Return true when the data references A and B are accessing the same
> +   memory object with the same access functions.  */

Isn't this too special again?

> +static inline bool
> +same_data_refs (data_reference_p a, data_reference_p b)
> +{
> +  unsigned int i;
> +
> +  /* The references are exactly the same.  */
> +  if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
> +    return true;
> +
> +  if (!same_data_refs_base_objects (a, b))
> +    return false;
> +
> +  for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
> +    if (!eq_evolutions_p (DR_ACCESS_FN (a, i), DR_ACCESS_FN (b, i)))
> +      return false;
> +
> +  return true;
> +}
> +
>  /* Return true when the DDR contains two data references that have the
>     same access functions.  */
>  
> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
> index cac5a3b..f0bc026 100644
> --- a/gcc/tree-if-conv.c
> +++ b/gcc/tree-if-conv.c
> @@ -441,6 +441,166 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
>    return true;
>  }
>  
> +/* Returns true when the memory reference A at position P in the data
> +   references vector DRS and executed under the condition CA, is read

"and" executed?  Is A executed under condition CA or is the whole
vector DRS?  Please clarify the comment...

> +   or written unconditionally.  */
> +
> +static bool
> +memref_read_or_written_unconditionally (int pos, data_reference_p a,
> +					VEC (data_reference_p, heap) *drs,
> +					tree ca)
> +{
> +  int i;
> +  data_reference_p b;
> +
> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
> +    if (i != pos && same_data_refs (a, b))
> +      {
> +	tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> +
> +	if (is_true_predicate (cb)
> +	    || is_true_predicate (ca = fold_or_predicates (ca, cb)))

... else this doesn't make sense?  B is either executed unconditionally
(is_true_predicate (cb)) or at least A or B is executed 
uncontionally.  Why is the latter case worth specializing?  In fact
you are probably doing redundant work in fold_or_predicates all the
time in practice (cb will be the same most of the time).

> +	  return true;
> +      }
> +
> +  return false;
> +}
> +
> +/* Returns true when the memory references of STMT are read or written
> +   unconditionally.  */
> +
> +static bool
> +memrefs_read_or_written_unconditionally (gimple stmt,
> +					 VEC (data_reference_p, heap) *drs)
> +{
> +  int i;
> +  data_reference_p a;
> +  tree ca = bb_predicate (gimple_bb (stmt));
> +
> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> +    if (DR_STMT (a) == stmt

Ick.  Why not check DR_STMT (b) != stmt in 
memref_read_or_written_unconditionally and avoid all this linear
searching mess?

> +	&& !memref_read_or_written_unconditionally (i, a, drs, ca))
> +      return false;
> +
> +  return true;
> +}
> +
> +
> +/* Returns true when the memory reference A at position P in the data
> +   references vector DRS and executed under the condition CA, is
> +   unconditionally written.  */
> +
> +static bool
> +write_memref_written_at_least_once (int pos, data_reference_p a,
> +				    VEC (data_reference_p, heap) *drs,
> +				    tree ca)
> +{
> +  int i;
> +  data_reference_p b;
> +
> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
> +    if (i != pos
> +	&& !DR_IS_READ (b)
> +	&& same_data_refs_base_objects (a, b))
> +      {
> +	tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> +
> +	if (is_true_predicate (cb)
> +	    || is_true_predicate (ca = fold_or_predicates (ca, cb)))
> +	  return true;

Same comments as above.

> +      }
> +
> +  return false;
> +}
> +
> +/* Returns true when the memory references of STMT are unconditionally
> +   written.  */
> +
> +static bool
> +write_memrefs_written_at_least_once (gimple stmt,
> +				     VEC (data_reference_p, heap) *drs)
> +{
> +  int i;
> +  data_reference_p a;
> +  tree ca = bb_predicate (gimple_bb (stmt));
> +
> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> +    if (DR_STMT (a) == stmt
> +	&& !DR_IS_READ (a)
> +	&& !write_memref_written_at_least_once (i, a, drs, ca))
> +      return false;

Likewise.

> +  return true;
> +}
> +
> +/* Return true when the memory references of STMT won't trap in the
> +   if-converted code.  There are two things that we have to check for:
> +
> +   - writes to memory occur to writable memory: if-conversion of
> +   memory writes transforms the conditional memory writes into
> +   unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
> +   into "A[i] = cond ? foo : A[i]", and as the write to memory may not
> +   be executed at all in the original code, it may be a readonly
> +   memory.  To check that A is not const-qualified, we check that
> +   there exists at least an unconditional write to A in the current
> +   function.
> +
> +   - reads or writes to memory are valid memory accesses for every
> +   iteration.  To check that the memory accesses are correctly formed
> +   and that we are allowed to read and write in these locations, we
> +   check that the memory accesses to be if-converted occur at every
> +   iteration unconditionally.  */
> +
> +static bool
> +ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs)
> +{
> +  return write_memrefs_written_at_least_once (stmt, refs)
> +    && memrefs_read_or_written_unconditionally (stmt, refs);
> +}
> +
> +/* Same as gimple_could_trap_p_1, returns true when the statement S
> +   could trap, but do not check the memory references.  */
> +
> +static bool
> +operations_could_trap (gimple s)
> +{
> +  unsigned i;
> +
> +  for (i = 0; i < gimple_num_ops (s); i++)
> +    {
> +      tree expr = gimple_op (s, i);
> +
> +      switch (TREE_CODE (expr))
> +	{
> +	case ARRAY_REF:
> +	case INDIRECT_REF:
> +	case MISALIGNED_INDIRECT_REF:
> +	  break;
> +
> +	default:
> +	  if (tree_could_trap_p (expr))
> +	    return true;

Well - only memory references can trap here anyway, so this whole
function is just gimple_op_could_trap_p (s) (or the changed
gimple_could_trap_p_1 as I requessted).

> +	}
> +    }
> +
> +  return gimple_op_could_trap_p (s);
> +}
> +
> +/* Wrapper around gimple_could_trap_p refined for the needs of the
> +   if-conversion.  Try to prove that the memory accesses of STMT could
> +   not trap in the innermost loop containing STMT.  */
> +
> +static bool
> +ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
> +{
> +  if (gimple_has_mem_ops (stmt)

That's not the correct check.  You probably want gimple_vuse (stmt).

> +      && ifcvt_memrefs_wont_trap (stmt, refs)
> +      && !operations_could_trap (stmt))

And re-order those checks as ifcvt_memrefs_wont_trap is surely more
expensive.

> +    return false;
> +
> +  return gimple_could_trap_p (stmt);
> +}
> +
>  /* Return true when STMT is if-convertible.
>  
>     GIMPLE_ASSIGN statement is not if-convertible if,
> @@ -449,7 +609,8 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
>     - LHS is not var decl.  */
>  
>  static bool
> -if_convertible_gimple_assign_stmt_p (gimple stmt)
> +if_convertible_gimple_assign_stmt_p (gimple stmt,
> +				     VEC (data_reference_p, heap) *refs)
>  {
>    tree lhs = gimple_assign_lhs (stmt);
>    basic_block bb;
> @@ -477,7 +638,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt)
>  
>    if (flag_tree_loop_if_convert_memory_writes)
>      {
> -      if (gimple_could_trap_p (stmt))
> +      if (ifcvt_could_trap_p (stmt, refs))
>  	{
>  	  if (dump_file && (dump_flags & TDF_DETAILS))
>  	    fprintf (dump_file, "tree could trap...\n");
> @@ -517,7 +678,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt)
>     - it is a GIMPLE_LABEL or a GIMPLE_COND.  */
>  
>  static bool
> -if_convertible_stmt_p (gimple stmt)
> +if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
>  {
>    switch (gimple_code (stmt))
>      {
> @@ -527,7 +688,7 @@ if_convertible_stmt_p (gimple stmt)
>        return true;
>  
>      case GIMPLE_ASSIGN:
> -      return if_convertible_gimple_assign_stmt_p (stmt);
> +      return if_convertible_gimple_assign_stmt_p (stmt, refs);
>  
>      default:
>        /* Don't know what to do with 'em so don't do anything.  */
> @@ -810,6 +971,9 @@ if_convertible_loop_p (struct loop *loop)
>    edge e;
>    edge_iterator ei;
>    basic_block exit_bb = NULL;
> +  bool res = false;
> +  VEC (data_reference_p, heap) *refs;
> +  VEC (ddr_p, heap) *ddrs;
>  
>    /* Handle only innermost loop.  */
>    if (!loop || loop->inner)
> @@ -847,18 +1011,14 @@ if_convertible_loop_p (struct loop *loop)
>  
>    /* Don't if-convert the loop when the data dependences cannot be
>       computed: the loop won't be vectorized in that case.  */
> -  {
> -    VEC (data_reference_p, heap) *refs = VEC_alloc (data_reference_p, heap, 5);
> -    VEC (ddr_p, heap) *ddrs = VEC_alloc (ddr_p, heap, 25);
> -    bool res = compute_data_dependences_for_loop (loop, true, &refs, &ddrs);
> -
> -    free_data_refs (refs);
> -    free_dependence_relations (ddrs);
> +  refs = VEC_alloc (data_reference_p, heap, 5);
> +  ddrs = VEC_alloc (ddr_p, heap, 25);
> +  res = compute_data_dependences_for_loop (loop, true, &refs, &ddrs);
>  
> -    if (!res)
> -      return false;
> -  }
> +  if (!res)
> +    goto cleanup;

Instead of gotos wrap the function into another doing the allocation
and de-allocation.

Please rework the patch according to the requested changes.

Thanks,
Richard.

> +  res = false;
>    calculate_dominance_info (CDI_DOMINATORS);
>  
>    /* Allow statements that can be handled during if-conversion.  */
> @@ -867,7 +1027,7 @@ if_convertible_loop_p (struct loop *loop)
>      {
>        if (dump_file && (dump_flags & TDF_DETAILS))
>  	fprintf (dump_file, "Irreducible loop\n");
> -      return false;
> +      goto cleanup;
>      }
>  
>    for (i = 0; i < loop->num_nodes; i++)
> @@ -875,14 +1035,18 @@ if_convertible_loop_p (struct loop *loop)
>        basic_block bb = ifc_bbs[i];
>  
>        if (!if_convertible_bb_p (loop, bb, exit_bb))
> -	return false;
> +	goto cleanup;
>  
>        if (bb_with_exit_edge_p (loop, bb))
>  	exit_bb = bb;
>      }
>  
> -  if (!predicate_bbs (loop))
> -    return false;
> +  res = predicate_bbs (loop);
> +
> +  if (!res)
> +    goto cleanup;
> +
> +  res = false;
>  
>    for (i = 0; i < loop->num_nodes; i++)
>      {
> @@ -891,21 +1055,23 @@ if_convertible_loop_p (struct loop *loop)
>  
>        for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
>  	if (!if_convertible_phi_p (loop, bb, gsi_stmt (itr)))
> -	  return false;
> -
> -      /* For non predicated BBs, don't check their statements.  */
> -      if (!is_predicated (bb))
> -	continue;
> +	  goto cleanup;
>  
> -      for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
> -	if (!if_convertible_stmt_p (gsi_stmt (itr)))
> -	  return false;
> +      /* Check the if-convertibility of statements in predicated BBs.  */
> +      if (is_predicated (bb))
> +	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
> +	  if (!if_convertible_stmt_p (gsi_stmt (itr), refs))
> +	    goto cleanup;
>      }
>  
> +  res = true;
>    if (dump_file)
>      fprintf (dump_file, "Applying if-conversion\n");
>  
> -  return true;
> + cleanup:
> +  free_data_refs (refs);
> +  free_dependence_relations (ddrs);
> +  return res;
>  }
>  
>  /* Basic block BB has two predecessors.  Using predecessor's bb
>
Sebastian Pop - Aug. 17, 2010, 5:01 p.m.
On Fri, Aug 13, 2010 at 04:24, Richard Guenther <rguenther@suse.de> wrote:
>> -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
>>  /* { dg-final { cleanup-tree-dump "vect" } } */
>>
>
> As this was testing for outer loop vectorization please duplicate the
> test and disable if-conversion in one variant.

I removed these changes as they are not needed anymore: the
if-conversion with memory accesses is not enabled in -O3.

>> +
>> +/* Return true when the base objects of data references A and B are
>> +   the same memory object.  */
>> +
>> +static inline bool
>> +same_data_refs_base_objects (data_reference_p a, data_reference_p b)
>> +{
>> +  return DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b)
>> +    && dr_may_alias_p (a, b)
>> +    && operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0);
>
> I don't see why the dr_may_alias_p check is necessary here.  In fact
> it might even pessimize what you want to check.

I agree, there is no reason to have the call to dr_may_alias_p: I
removed it.

>
>> +}
>> +
>> +/* Return true when the data references A and B are accessing the same
>> +   memory object with the same access functions.  */
>
> Isn't this too special again?
>
>> +static inline bool
>> +same_data_refs (data_reference_p a, data_reference_p b)

I do not understand what you are suggesting here.
Should I make the two functions same_data_refs and
same_data_refs_base_objects static and put them in tree-if-conv.c?

>> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
>> index cac5a3b..f0bc026 100644
>> --- a/gcc/tree-if-conv.c
>> +++ b/gcc/tree-if-conv.c
>> @@ -441,6 +441,166 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
>>    return true;
>>  }
>>
>> +/* Returns true when the memory reference A at position P in the data
>> +   references vector DRS and executed under the condition CA, is read
>
> "and" executed?  Is A executed under condition CA or is the whole
> vector DRS?  Please clarify the comment...
>

I added one more sentence to clarify the comment:

/* Returns true when the memory reference A at position POS in the
   data references vector DRS and executed under the condition CA, is
   read or written unconditionally.  In other words, this function
   returns true when there exist other accesses to the same data
   reference with predicates that add up (OR-up) to the true
   predicate: this ensures that the data reference A is touched (read
   or written) on every iteration of the if-converted loop.  */

>> +   or written unconditionally.  */
>> +
>> +static bool
>> +memref_read_or_written_unconditionally (int pos, data_reference_p a,
>> +                                     VEC (data_reference_p, heap) *drs,
>> +                                     tree ca)
>> +{
>> +  int i;
>> +  data_reference_p b;
>> +
>> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
>> +    if (i != pos && same_data_refs (a, b))
>> +      {
>> +     tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
>> +
>> +     if (is_true_predicate (cb)
>> +         || is_true_predicate (ca = fold_or_predicates (ca, cb)))
>> +       return true;
>
> ... else this doesn't make sense?  B is either executed unconditionally
> (is_true_predicate (cb)) or at least A or B is executed
> uncontionally.  Why is the latter case worth specializing?  In fact
> you are probably doing redundant work in fold_or_predicates all the
> time in practice (cb will be the same most of the time).
>

This condition stands for:

>> +     if (is_true_predicate (cb)

1. A is touched at every iteration when there exist a data ref B with
the same base as A and that is executed unconditionally.

>> +         || is_true_predicate (ca = fold_or_predicates (ca, cb)))

2. A is also touched at every iteration when B is predicated by a
condition that is a complement of CA.  CA is first initialized to the
predicate of A, and then CA is OR-ed to all the predicates under which
the same data reference is touched.  If eventually CA becomes the true
predicate, then each iteration of the loop contains a path touching
the data reference A.

>> +      }
>> +
>> +  return false;
>> +}
>> +
>> +/* Returns true when the memory references of STMT are read or written
>> +   unconditionally.  */
>> +
>> +static bool
>> +memrefs_read_or_written_unconditionally (gimple stmt,
>> +                                      VEC (data_reference_p, heap) *drs)
>> +{
>> +  int i;
>> +  data_reference_p a;
>> +  tree ca = bb_predicate (gimple_bb (stmt));
>> +
>> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
>> +    if (DR_STMT (a) == stmt
>
> Ick.  Why not check DR_STMT (b) != stmt in
> memref_read_or_written_unconditionally and avoid all this linear
> searching mess?
>

Could you please be more specific: I do not understand what you
are asking here.

The algorithm is O (n^2) with n the number of data references in the
innermost loops.  This is because for each data reference we try to
prove that it is executed unconditionally, i.e., that there exist
other data references with the same base that are accessed on
different paths at every iteration.

>
> Please rework the patch according to the requested changes.
>

I sent the combined patch separately.

Sebastian
Richard Guenther - Aug. 18, 2010, 9:32 a.m.
On Tue, 17 Aug 2010, Sebastian Pop wrote:

> On Fri, Aug 13, 2010 at 04:24, Richard Guenther <rguenther@suse.de> wrote:
> >> -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> >>  /* { dg-final { cleanup-tree-dump "vect" } } */
> >>
> >
> > As this was testing for outer loop vectorization please duplicate the
> > test and disable if-conversion in one variant.
> 
> I removed these changes as they are not needed anymore: the
> if-conversion with memory accesses is not enabled in -O3.

Thanks.

> >> +
> >> +/* Return true when the base objects of data references A and B are
> >> +   the same memory object.  */
> >> +
> >> +static inline bool
> >> +same_data_refs_base_objects (data_reference_p a, data_reference_p b)
> >> +{
> >> +  return DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b)
> >> +    && dr_may_alias_p (a, b)
> >> +    && operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0);
> >
> > I don't see why the dr_may_alias_p check is necessary here.  In fact
> > it might even pessimize what you want to check.
> 
> I agree, there is no reason to have the call to dr_may_alias_p: I
> removed it.
> 
> >
> >> +}
> >> +
> >> +/* Return true when the data references A and B are accessing the same
> >> +   memory object with the same access functions.  */
> >
> > Isn't this too special again?
> >
> >> +static inline bool
> >> +same_data_refs (data_reference_p a, data_reference_p b)
> 
> I do not understand what you are suggesting here.
> Should I make the two functions same_data_refs and
> same_data_refs_base_objects static and put them in tree-if-conv.c?

I misunderstood what you are testing here - you are in fact testing
that the data-refs are exactly equal, a[i] vs. a[i+1] would not be
equal.  So just disregard my comment here.

> >> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
> >> index cac5a3b..f0bc026 100644
> >> --- a/gcc/tree-if-conv.c
> >> +++ b/gcc/tree-if-conv.c
> >> @@ -441,6 +441,166 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
> >>    return true;
> >>  }
> >>
> >> +/* Returns true when the memory reference A at position P in the data
> >> +   references vector DRS and executed under the condition CA, is read
> >
> > "and" executed?  Is A executed under condition CA or is the whole
> > vector DRS?  Please clarify the comment...
> >
> 
> I added one more sentence to clarify the comment:
> 
> /* Returns true when the memory reference A at position POS in the
>    data references vector DRS and executed under the condition CA, is
>    read or written unconditionally.  In other words, this function
>    returns true when there exist other accesses to the same data
>    reference with predicates that add up (OR-up) to the true
>    predicate: this ensures that the data reference A is touched (read
>    or written) on every iteration of the if-converted loop.  */

Thanks.

> >> +   or written unconditionally.  */
> >> +
> >> +static bool
> >> +memref_read_or_written_unconditionally (int pos, data_reference_p a,
> >> +                                     VEC (data_reference_p, heap) *drs,
> >> +                                     tree ca)
> >> +{
> >> +  int i;
> >> +  data_reference_p b;
> >> +
> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
> >> +    if (i != pos && same_data_refs (a, b))
> >> +      {
> >> +     tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> >> +
> >> +     if (is_true_predicate (cb)
> >> +         || is_true_predicate (ca = fold_or_predicates (ca, cb)))
> >> +       return true;
> >
> > ... else this doesn't make sense?  B is either executed unconditionally
> > (is_true_predicate (cb)) or at least A or B is executed
> > uncontionally.  Why is the latter case worth specializing?  In fact
> > you are probably doing redundant work in fold_or_predicates all the
> > time in practice (cb will be the same most of the time).
> >
> 
> This condition stands for:
> 
> >> +     if (is_true_predicate (cb)
> 
> 1. A is touched at every iteration when there exist a data ref B with
> the same base as A and that is executed unconditionally.
> 
> >> +         || is_true_predicate (ca = fold_or_predicates (ca, cb)))
> 
> 2. A is also touched at every iteration when B is predicated by a
> condition that is a complement of CA.  CA is first initialized to the
> predicate of A, and then CA is OR-ed to all the predicates under which
> the same data reference is touched.  If eventually CA becomes the true
> predicate, then each iteration of the loop contains a path touching
> the data reference A.

Ah, ok.  That makes sense.  (the fold_or_predicates still looks
very expensive - we eventually want to put a limit on the number
of DRs we perform the quadratic check on)

> >> +      }
> >> +
> >> +  return false;
> >> +}
> >> +
> >> +/* Returns true when the memory references of STMT are read or written
> >> +   unconditionally.  */
> >> +
> >> +static bool
> >> +memrefs_read_or_written_unconditionally (gimple stmt,
> >> +                                      VEC (data_reference_p, heap) *drs)
> >> +{
> >> +  int i;
> >> +  data_reference_p a;
> >> +  tree ca = bb_predicate (gimple_bb (stmt));
> >> +
> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> >> +    if (DR_STMT (a) == stmt
> >
> > Ick.  Why not check DR_STMT (b) != stmt in
> > memref_read_or_written_unconditionally and avoid all this linear
> > searching mess?
> >
> 
> Could you please be more specific: I do not understand what you
> are asking here.

> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)    
> >> +    if (DR_STMT (a) == stmt

Here you find the data-ref with DR_STMT == stmt, just to call
memref_read_or_written_unconditionally with it's index in the
DR vector.  Why not simply do

  memref_read_or_written_unconditionally (stmt, drs, ca)

and in that function instead of the i != pos check

> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
> +    if (i != pos && same_data_refs (a, b))

do DR_STMT (b) != stmt?  I guess I am asking to fold the two
functions into one to make the quadraticness obvious and avoid
passing down indices.  I'm not sure if the result is more
readable but certainly the current version looked odd to me.

Thanks,
Richard.
Sebastian Pop - Aug. 18, 2010, 4:46 p.m.
On Wed, Aug 18, 2010 at 04:32, Richard Guenther <rguenther@suse.de> wrote:
>> >> +      }
>> >> +
>> >> +  return false;
>> >> +}
>> >> +
>> >> +/* Returns true when the memory references of STMT are read or written
>> >> +   unconditionally.  */
>> >> +
>> >> +static bool
>> >> +memrefs_read_or_written_unconditionally (gimple stmt,
>> >> +                                      VEC (data_reference_p, heap) *drs)
>> >> +{
>> >> +  int i;
>> >> +  data_reference_p a;
>> >> +  tree ca = bb_predicate (gimple_bb (stmt));
>> >> +
>> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
>> >> +    if (DR_STMT (a) == stmt
>> >
>> > Ick.  Why not check DR_STMT (b) != stmt in
>> > memref_read_or_written_unconditionally and avoid all this linear
>> > searching mess?
>> >
>>
>> Could you please be more specific: I do not understand what you
>> are asking here.
>
>> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
>> >> +    if (DR_STMT (a) == stmt
>
> Here you find the data-ref with DR_STMT == stmt, just to call
> memref_read_or_written_unconditionally with it's index in the
> DR vector.  Why not simply do
>
>  memref_read_or_written_unconditionally (stmt, drs, ca)
>
> and in that function instead of the i != pos check
>
>> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
>> +    if (i != pos && same_data_refs (a, b))
>
> do DR_STMT (b) != stmt?  I guess I am asking to fold the two
> functions into one to make the quadraticness obvious and avoid
> passing down indices.  I'm not sure if the result is more
> readable but certainly the current version looked odd to me.
>

I modified both memrefs_read_or_written_unconditionally and
write_memrefs_written_at_least_once.  What do you think about this form?

/* Returns true when the memory references of STMT are read or written
   unconditionally.  In other words, this function returns true when
   for every data reference A in STMT there exist other accesses to
   the same data reference with predicates that add up (OR-up) to the
   true predicate: this ensures that the data reference A is touched
   (read or written) on every iteration of the if-converted loop.  */

static bool
memrefs_read_or_written_unconditionally (gimple stmt,
					 VEC (data_reference_p, heap) *drs)
{
  int i, j;
  data_reference_p a, b;
  tree ca = bb_predicate (gimple_bb (stmt));

  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
    if (DR_STMT (a) == stmt)
      {
	bool found = false;
	int x = DR_RW_UNCONDITIONALLY (a);

	if (x == 0)
	  return false;

	if (x == 1)
	  continue;

	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
	  if (DR_STMT (b) != stmt
	      && same_data_refs (a, b))
	    {
	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));

	      if (DR_RW_UNCONDITIONALLY (b) == 1
		  || is_true_predicate (cb)
		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
								 ca, cb)))
		{
		  DR_RW_UNCONDITIONALLY (a) = 1;
		  DR_RW_UNCONDITIONALLY (b) = 1;
		  found = true;
		  break;
		}
	    }

	if (!found)
	  {
	    DR_RW_UNCONDITIONALLY (a) = 0;
	    return false;
	  }
      }

  return true;
}

/* Returns true when the memory references of STMT are unconditionally
   written.  In other words, this function returns true when for every
   data reference A written in STMT, there exist other writes to the
   same data reference with predicates that add up (OR-up) to the true
   predicate: this ensures that the data reference A is written on
   every iteration of the if-converted loop.  */

static bool
write_memrefs_written_at_least_once (gimple stmt,
				     VEC (data_reference_p, heap) *drs)
{
  int i, j;
  data_reference_p a, b;
  tree ca = bb_predicate (gimple_bb (stmt));

  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
    if (DR_STMT (a) == stmt
	&& !DR_IS_READ (a))
      {
	bool found = false;
	int x = DR_WRITTEN_AT_LEAST_ONCE (a);

	if (x == 0)
	  return false;

	if (x == 1)
	  continue;

	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
	  if (DR_STMT (b) != stmt
	      && !DR_IS_READ (b)
	      && same_data_refs_base_objects (a, b))
	    {
	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));

	      if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
		  || is_true_predicate (cb)
		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
								 ca, cb)))
		{
		  DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
		  DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
		  found = true;
		  break;
		}
	    }

	if (!found)
	  {
	    DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
	    return false;
	  }
      }

  return true;
}

I will prepare the combined patch if you think that this is a better
form.

Thanks,
Sebastian
Richard Guenther - Aug. 18, 2010, 6:38 p.m.
On Wed, 18 Aug 2010, Sebastian Pop wrote:

> On Wed, Aug 18, 2010 at 04:32, Richard Guenther <rguenther@suse.de> wrote:
> >> >> +      }
> >> >> +
> >> >> +  return false;
> >> >> +}
> >> >> +
> >> >> +/* Returns true when the memory references of STMT are read or written
> >> >> +   unconditionally.  */
> >> >> +
> >> >> +static bool
> >> >> +memrefs_read_or_written_unconditionally (gimple stmt,
> >> >> +                                      VEC (data_reference_p, heap) *drs)
> >> >> +{
> >> >> +  int i;
> >> >> +  data_reference_p a;
> >> >> +  tree ca = bb_predicate (gimple_bb (stmt));
> >> >> +
> >> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> >> >> +    if (DR_STMT (a) == stmt
> >> >
> >> > Ick.  Why not check DR_STMT (b) != stmt in
> >> > memref_read_or_written_unconditionally and avoid all this linear
> >> > searching mess?
> >> >
> >>
> >> Could you please be more specific: I do not understand what you
> >> are asking here.
> >
> >> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> >> >> +    if (DR_STMT (a) == stmt
> >
> > Here you find the data-ref with DR_STMT == stmt, just to call
> > memref_read_or_written_unconditionally with it's index in the
> > DR vector.  Why not simply do
> >
> >  memref_read_or_written_unconditionally (stmt, drs, ca)
> >
> > and in that function instead of the i != pos check
> >
> >> +  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
> >> +    if (i != pos && same_data_refs (a, b))
> >
> > do DR_STMT (b) != stmt?  I guess I am asking to fold the two
> > functions into one to make the quadraticness obvious and avoid
> > passing down indices.  I'm not sure if the result is more
> > readable but certainly the current version looked odd to me.
> >
> 
> I modified both memrefs_read_or_written_unconditionally and
> write_memrefs_written_at_least_once.  What do you think about this form?
> 
> /* Returns true when the memory references of STMT are read or written
>    unconditionally.  In other words, this function returns true when
>    for every data reference A in STMT there exist other accesses to
>    the same data reference with predicates that add up (OR-up) to the
>    true predicate: this ensures that the data reference A is touched
>    (read or written) on every iteration of the if-converted loop.  */
> 
> static bool
> memrefs_read_or_written_unconditionally (gimple stmt,
> 					 VEC (data_reference_p, heap) *drs)
> {
>   int i, j;
>   data_reference_p a, b;
>   tree ca = bb_predicate (gimple_bb (stmt));
> 
>   for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
>     if (DR_STMT (a) == stmt)
>       {
> 	bool found = false;
> 	int x = DR_RW_UNCONDITIONALLY (a);
> 
> 	if (x == 0)
> 	  return false;
> 
> 	if (x == 1)
> 	  continue;
> 
> 	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
> 	  if (DR_STMT (b) != stmt
> 	      && same_data_refs (a, b))
> 	    {
> 	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> 
> 	      if (DR_RW_UNCONDITIONALLY (b) == 1
> 		  || is_true_predicate (cb)
> 		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
> 								 ca, cb)))
> 		{
> 		  DR_RW_UNCONDITIONALLY (a) = 1;
> 		  DR_RW_UNCONDITIONALLY (b) = 1;
> 		  found = true;
> 		  break;
> 		}
> 	    }
> 
> 	if (!found)
> 	  {
> 	    DR_RW_UNCONDITIONALLY (a) = 0;
> 	    return false;
> 	  }
>       }
> 
>   return true;
> }
> 
> /* Returns true when the memory references of STMT are unconditionally
>    written.  In other words, this function returns true when for every
>    data reference A written in STMT, there exist other writes to the
>    same data reference with predicates that add up (OR-up) to the true
>    predicate: this ensures that the data reference A is written on
>    every iteration of the if-converted loop.  */
> 
> static bool
> write_memrefs_written_at_least_once (gimple stmt,
> 				     VEC (data_reference_p, heap) *drs)
> {
>   int i, j;
>   data_reference_p a, b;
>   tree ca = bb_predicate (gimple_bb (stmt));
> 
>   for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
>     if (DR_STMT (a) == stmt
> 	&& !DR_IS_READ (a))
>       {
> 	bool found = false;
> 	int x = DR_WRITTEN_AT_LEAST_ONCE (a);
> 
> 	if (x == 0)
> 	  return false;
> 
> 	if (x == 1)
> 	  continue;
> 
> 	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
> 	  if (DR_STMT (b) != stmt
> 	      && !DR_IS_READ (b)
> 	      && same_data_refs_base_objects (a, b))
> 	    {
> 	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> 
> 	      if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
> 		  || is_true_predicate (cb)
> 		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
> 								 ca, cb)))
> 		{
> 		  DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
> 		  DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
> 		  found = true;
> 		  break;
> 		}
> 	    }
> 
> 	if (!found)
> 	  {
> 	    DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
> 	    return false;
> 	  }
>       }
> 
>   return true;
> }
> 
> I will prepare the combined patch if you think that this is a better
> form.

Yes, I like that better.

Thanks,
Richard.
Sebastian Pop - Aug. 18, 2010, 7:31 p.m.
>> I will prepare the combined patch if you think that this is a better
>> form.
>
> Yes, I like that better.

I sent out the new combined patches.
Ok for trunk after regstrap?

Thanks,
Sebastian
Richard Guenther - Aug. 24, 2010, 10 a.m.
On Wed, 18 Aug 2010, Sebastian Pop wrote:

> >> I will prepare the combined patch if you think that this is a better
> >> form.
> >
> > Yes, I like that better.
> 
> I sent out the new combined patches.
> Ok for trunk after regstrap?

Now I just noticed that -ftree-loop-if-convert-memory-writes is long
for -ftree-loop-if-convert-stores.  But I suppose it doesn't really
matter.  The patches are ok (with or without changing the option
name - whatever you prefer).

Thanks,
Richard.
Sebastian Pop - Aug. 24, 2010, 3:11 p.m.
On Tue, Aug 24, 2010 at 05:00, Richard Guenther <rguenther@suse.de> wrote:
> Now I just noticed that -ftree-loop-if-convert-memory-writes is long
> for -ftree-loop-if-convert-stores.  But I suppose it doesn't really

This sounds much better, thanks for the suggestion.

> matter.  The patches are ok (with or without changing the option
> name - whatever you prefer).

I will change the flag name, and do another round of regstrap
and if everything is alright, I will commit to trunk.

Thanks,
Sebastian
Gerald Pfeifer - Sept. 19, 2010, 10:47 p.m.
On Tue, 24 Aug 2010, Sebastian Pop wrote:
> I will change the flag name, and do another round of regstrap
> and if everything is alright, I will commit to trunk.

Mind also adding a note to gcc-4.6/changes.html ?

Thanks,
Gerald

Patch

diff --git a/gcc/gimple.c b/gcc/gimple.c
index 707d4e4..f515acc 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2399,24 +2399,14 @@  gimple_rhs_has_side_effects (const_gimple s)
   return false;
 }
 
+/* Helper for gimple_could_trap_p_1.  Return true if the operation of
+   S can trap.  */
 
-/* Helper for gimple_could_trap_p and gimple_assign_rhs_could_trap_p.
-   Return true if S can trap.  If INCLUDE_LHS is true and S is a
-   GIMPLE_ASSIGN, the LHS of the assignment is also checked.
-   Otherwise, only the RHS of the assignment is checked.  */
-
-static bool
-gimple_could_trap_p_1 (gimple s, bool include_lhs)
+bool
+gimple_op_could_trap_p (gimple s)
 {
-  unsigned i, start;
-  tree t, div = NULL_TREE;
   enum tree_code op;
-
-  start = (is_gimple_assign (s) && !include_lhs) ? 1 : 0;
-
-  for (i = start; i < gimple_num_ops (s); i++)
-    if (tree_could_trap_p (gimple_op (s, i)))
-      return true;
+  tree t, div = NULL_TREE;
 
   switch (gimple_code (s))
     {
@@ -2445,7 +2435,25 @@  gimple_could_trap_p_1 (gimple s, bool include_lhs)
     }
 
   return false;
+}
+
+/* Helper for gimple_could_trap_p and gimple_assign_rhs_could_trap_p.
+   Return true if S can trap.  If INCLUDE_LHS is true and S is a
+   GIMPLE_ASSIGN, the LHS of the assignment is also checked.
+   Otherwise, only the RHS of the assignment is checked.  */
+
+static bool
+gimple_could_trap_p_1 (gimple s, bool include_lhs)
+{
+  unsigned i, start;
+
+  start = (is_gimple_assign (s) && !include_lhs) ? 1 : 0;
+
+  for (i = start; i < gimple_num_ops (s); i++)
+    if (tree_could_trap_p (gimple_op (s, i)))
+      return true;
 
+  return gimple_op_could_trap_p (s);
 }
 
 
diff --git a/gcc/gimple.h b/gcc/gimple.h
index ec7fc93..c446cd3 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -885,6 +885,7 @@  gimple gimple_build_cond_from_tree (tree, tree, tree);
 void gimple_cond_set_condition_from_tree (gimple, tree);
 bool gimple_has_side_effects (const_gimple);
 bool gimple_rhs_has_side_effects (const_gimple);
+bool gimple_op_could_trap_p (gimple);
 bool gimple_could_trap_p (gimple);
 bool gimple_assign_rhs_could_trap_p (gimple);
 void gimple_regimplify_operands (gimple, gimple_stmt_iterator *);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
new file mode 100644
index 0000000..a9cc816
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+void
+dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
+{
+  int i, level, qmul, qadd;
+
+  qadd = (qscale - 1) | 1;
+  qmul = qscale << 1;
+
+  for (i = 0; i <= nCoeffs; i++)
+    {
+      level = block[i];
+      if (level < 0)
+	level = level * qmul - qadd;
+      else
+	level = level * qmul + qadd;
+      block[i] = level;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
+/* { dg-final { cleanup-tree-dump "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-1.c b/gcc/testsuite/gcc.dg/vect/vect-cond-1.c
index 4ee6713..888e5cb 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-1.c
@@ -52,7 +52,7 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-3.c
index 56cfbb2..42b9f35 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-3.c
@@ -60,7 +60,7 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-4.c
index c3a1585..1d1d8fc 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-4.c
@@ -57,7 +57,7 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-6.c
index e5e9391..7820444 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-6.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-6.c
@@ -56,5 +56,6 @@  int main ()
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
       
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index eff5348..391d6fc 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -417,6 +417,40 @@  extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
 extern bool dr_may_alias_p (const struct data_reference *,
 			    const struct data_reference *);
 
+
+/* Return true when the base objects of data references A and B are
+   the same memory object.  */
+
+static inline bool
+same_data_refs_base_objects (data_reference_p a, data_reference_p b)
+{
+  return DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b)
+    && dr_may_alias_p (a, b)
+    && operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0);
+}
+
+/* Return true when the data references A and B are accessing the same
+   memory object with the same access functions.  */
+
+static inline bool
+same_data_refs (data_reference_p a, data_reference_p b)
+{
+  unsigned int i;
+
+  /* The references are exactly the same.  */
+  if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
+    return true;
+
+  if (!same_data_refs_base_objects (a, b))
+    return false;
+
+  for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
+    if (!eq_evolutions_p (DR_ACCESS_FN (a, i), DR_ACCESS_FN (b, i)))
+      return false;
+
+  return true;
+}
+
 /* Return true when the DDR contains two data references that have the
    same access functions.  */
 
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index cac5a3b..f0bc026 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -441,6 +441,166 @@  if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
   return true;
 }
 
+/* Returns true when the memory reference A at position P in the data
+   references vector DRS and executed under the condition CA, is read
+   or written unconditionally.  */
+
+static bool
+memref_read_or_written_unconditionally (int pos, data_reference_p a,
+					VEC (data_reference_p, heap) *drs,
+					tree ca)
+{
+  int i;
+  data_reference_p b;
+
+  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
+    if (i != pos && same_data_refs (a, b))
+      {
+	tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+
+	if (is_true_predicate (cb)
+	    || is_true_predicate (ca = fold_or_predicates (ca, cb)))
+	  return true;
+      }
+
+  return false;
+}
+
+/* Returns true when the memory references of STMT are read or written
+   unconditionally.  */
+
+static bool
+memrefs_read_or_written_unconditionally (gimple stmt,
+					 VEC (data_reference_p, heap) *drs)
+{
+  int i;
+  data_reference_p a;
+  tree ca = bb_predicate (gimple_bb (stmt));
+
+  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
+    if (DR_STMT (a) == stmt
+	&& !memref_read_or_written_unconditionally (i, a, drs, ca))
+      return false;
+
+  return true;
+}
+
+
+/* Returns true when the memory reference A at position P in the data
+   references vector DRS and executed under the condition CA, is
+   unconditionally written.  */
+
+static bool
+write_memref_written_at_least_once (int pos, data_reference_p a,
+				    VEC (data_reference_p, heap) *drs,
+				    tree ca)
+{
+  int i;
+  data_reference_p b;
+
+  for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++)
+    if (i != pos
+	&& !DR_IS_READ (b)
+	&& same_data_refs_base_objects (a, b))
+      {
+	tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+
+	if (is_true_predicate (cb)
+	    || is_true_predicate (ca = fold_or_predicates (ca, cb)))
+	  return true;
+      }
+
+  return false;
+}
+
+/* Returns true when the memory references of STMT are unconditionally
+   written.  */
+
+static bool
+write_memrefs_written_at_least_once (gimple stmt,
+				     VEC (data_reference_p, heap) *drs)
+{
+  int i;
+  data_reference_p a;
+  tree ca = bb_predicate (gimple_bb (stmt));
+
+  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
+    if (DR_STMT (a) == stmt
+	&& !DR_IS_READ (a)
+	&& !write_memref_written_at_least_once (i, a, drs, ca))
+      return false;
+
+  return true;
+}
+
+/* Return true when the memory references of STMT won't trap in the
+   if-converted code.  There are two things that we have to check for:
+
+   - writes to memory occur to writable memory: if-conversion of
+   memory writes transforms the conditional memory writes into
+   unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
+   into "A[i] = cond ? foo : A[i]", and as the write to memory may not
+   be executed at all in the original code, it may be a readonly
+   memory.  To check that A is not const-qualified, we check that
+   there exists at least an unconditional write to A in the current
+   function.
+
+   - reads or writes to memory are valid memory accesses for every
+   iteration.  To check that the memory accesses are correctly formed
+   and that we are allowed to read and write in these locations, we
+   check that the memory accesses to be if-converted occur at every
+   iteration unconditionally.  */
+
+static bool
+ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs)
+{
+  return write_memrefs_written_at_least_once (stmt, refs)
+    && memrefs_read_or_written_unconditionally (stmt, refs);
+}
+
+/* Same as gimple_could_trap_p_1, returns true when the statement S
+   could trap, but do not check the memory references.  */
+
+static bool
+operations_could_trap (gimple s)
+{
+  unsigned i;
+
+  for (i = 0; i < gimple_num_ops (s); i++)
+    {
+      tree expr = gimple_op (s, i);
+
+      switch (TREE_CODE (expr))
+	{
+	case ARRAY_REF:
+	case INDIRECT_REF:
+	case MISALIGNED_INDIRECT_REF:
+	  break;
+
+	default:
+	  if (tree_could_trap_p (expr))
+	    return true;
+	}
+    }
+
+  return gimple_op_could_trap_p (s);
+}
+
+/* Wrapper around gimple_could_trap_p refined for the needs of the
+   if-conversion.  Try to prove that the memory accesses of STMT could
+   not trap in the innermost loop containing STMT.  */
+
+static bool
+ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
+{
+  if (gimple_has_mem_ops (stmt)
+      && ifcvt_memrefs_wont_trap (stmt, refs)
+      && !operations_could_trap (stmt))
+    return false;
+
+  return gimple_could_trap_p (stmt);
+}
+
 /* Return true when STMT is if-convertible.
 
    GIMPLE_ASSIGN statement is not if-convertible if,
@@ -449,7 +609,8 @@  if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
    - LHS is not var decl.  */
 
 static bool
-if_convertible_gimple_assign_stmt_p (gimple stmt)
+if_convertible_gimple_assign_stmt_p (gimple stmt,
+				     VEC (data_reference_p, heap) *refs)
 {
   tree lhs = gimple_assign_lhs (stmt);
   basic_block bb;
@@ -477,7 +638,7 @@  if_convertible_gimple_assign_stmt_p (gimple stmt)
 
   if (flag_tree_loop_if_convert_memory_writes)
     {
-      if (gimple_could_trap_p (stmt))
+      if (ifcvt_could_trap_p (stmt, refs))
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file, "tree could trap...\n");
@@ -517,7 +678,7 @@  if_convertible_gimple_assign_stmt_p (gimple stmt)
    - it is a GIMPLE_LABEL or a GIMPLE_COND.  */
 
 static bool
-if_convertible_stmt_p (gimple stmt)
+if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
 {
   switch (gimple_code (stmt))
     {
@@ -527,7 +688,7 @@  if_convertible_stmt_p (gimple stmt)
       return true;
 
     case GIMPLE_ASSIGN:
-      return if_convertible_gimple_assign_stmt_p (stmt);
+      return if_convertible_gimple_assign_stmt_p (stmt, refs);
 
     default:
       /* Don't know what to do with 'em so don't do anything.  */
@@ -810,6 +971,9 @@  if_convertible_loop_p (struct loop *loop)
   edge e;
   edge_iterator ei;
   basic_block exit_bb = NULL;
+  bool res = false;
+  VEC (data_reference_p, heap) *refs;
+  VEC (ddr_p, heap) *ddrs;
 
   /* Handle only innermost loop.  */
   if (!loop || loop->inner)
@@ -847,18 +1011,14 @@  if_convertible_loop_p (struct loop *loop)
 
   /* Don't if-convert the loop when the data dependences cannot be
      computed: the loop won't be vectorized in that case.  */
-  {
-    VEC (data_reference_p, heap) *refs = VEC_alloc (data_reference_p, heap, 5);
-    VEC (ddr_p, heap) *ddrs = VEC_alloc (ddr_p, heap, 25);
-    bool res = compute_data_dependences_for_loop (loop, true, &refs, &ddrs);
-
-    free_data_refs (refs);
-    free_dependence_relations (ddrs);
+  refs = VEC_alloc (data_reference_p, heap, 5);
+  ddrs = VEC_alloc (ddr_p, heap, 25);
+  res = compute_data_dependences_for_loop (loop, true, &refs, &ddrs);
 
-    if (!res)
-      return false;
-  }
+  if (!res)
+    goto cleanup;
 
+  res = false;
   calculate_dominance_info (CDI_DOMINATORS);
 
   /* Allow statements that can be handled during if-conversion.  */
@@ -867,7 +1027,7 @@  if_convertible_loop_p (struct loop *loop)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Irreducible loop\n");
-      return false;
+      goto cleanup;
     }
 
   for (i = 0; i < loop->num_nodes; i++)
@@ -875,14 +1035,18 @@  if_convertible_loop_p (struct loop *loop)
       basic_block bb = ifc_bbs[i];
 
       if (!if_convertible_bb_p (loop, bb, exit_bb))
-	return false;
+	goto cleanup;
 
       if (bb_with_exit_edge_p (loop, bb))
 	exit_bb = bb;
     }
 
-  if (!predicate_bbs (loop))
-    return false;
+  res = predicate_bbs (loop);
+
+  if (!res)
+    goto cleanup;
+
+  res = false;
 
   for (i = 0; i < loop->num_nodes; i++)
     {
@@ -891,21 +1055,23 @@  if_convertible_loop_p (struct loop *loop)
 
       for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
 	if (!if_convertible_phi_p (loop, bb, gsi_stmt (itr)))
-	  return false;
-
-      /* For non predicated BBs, don't check their statements.  */
-      if (!is_predicated (bb))
-	continue;
+	  goto cleanup;
 
-      for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
-	if (!if_convertible_stmt_p (gsi_stmt (itr)))
-	  return false;
+      /* Check the if-convertibility of statements in predicated BBs.  */
+      if (is_predicated (bb))
+	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
+	  if (!if_convertible_stmt_p (gsi_stmt (itr), refs))
+	    goto cleanup;
     }
 
+  res = true;
   if (dump_file)
     fprintf (dump_file, "Applying if-conversion\n");
 
-  return true;
+ cleanup:
+  free_data_refs (refs);
+  free_dependence_relations (ddrs);
+  return res;
 }
 
 /* Basic block BB has two predecessors.  Using predecessor's bb