Patchwork [1/3] Fix PR46029: reimplement if-convert stores.

login
register
mail settings
Submitter Sebastian Pop
Date Nov. 3, 2010, 3:52 p.m.
Message ID <1288799546-29668-1-git-send-email-sebpop@gmail.com>
Download mbox | patch
Permalink /patch/70031/
State New
Headers show

Comments

Sebastian Pop - Nov. 3, 2010, 3:52 p.m.
2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>

	PR tree-optimization/46029
	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
	* tree-if-conv.c (has_unaligned_memory_refs): New.
	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
	(create_scratchpad): New.
	(create_indirect_cond_expr): New.
	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
	parameter for scratch_pad.
	(combine_blocks): Same.
	(tree_if_conversion): Same.
	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
	scratch_pad.
	(struct ifc_dr): Removed.
	(IFC_DR): Removed.
	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
	(DR_RW_UNCONDITIONALLY): Removed.
	(memrefs_read_or_written_unconditionally): Removed.
	(write_memrefs_written_at_least_once): Removed.
	(ifcvt_memrefs_wont_trap): Removed.
	(ifcvt_could_trap_p): Does not take refs parameter anymore.
	(if_convertible_gimple_assign_stmt_p): Same.
	(if_convertible_stmt_p): Same.
	(if_convertible_loop_p_1): Remove initialization of dr->aux,
	DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
	(if_convertible_loop_p): Remove deallocation of the same.

testsuite/
	* g++.dg/tree-ssa/ifc-pr46029.C: New.
	* gcc.dg/tree-ssa/ifc-8.c: New.
	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
---
 gcc/ChangeLog                               |   28 ++
 gcc/doc/invoke.texi                         |   18 +-
 gcc/testsuite/ChangeLog                     |    7 +
 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C |   76 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c       |   17 +-
 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c       |   29 ++
 gcc/tree-if-conv.c                          |  379 ++++++++++++---------------
 7 files changed, 336 insertions(+), 218 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
Richard Guenther - Nov. 5, 2010, 12:06 p.m.
On Wed, 3 Nov 2010, Sebastian Pop wrote:

> 2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
> 
> 	PR tree-optimization/46029
> 	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
> 	* tree-if-conv.c (has_unaligned_memory_refs): New.
> 	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
> 	(create_scratchpad): New.
> 	(create_indirect_cond_expr): New.
> 	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
> 	parameter for scratch_pad.
> 	(combine_blocks): Same.
> 	(tree_if_conversion): Same.
> 	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
> 	scratch_pad.
> 	(struct ifc_dr): Removed.
> 	(IFC_DR): Removed.
> 	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
> 	(DR_RW_UNCONDITIONALLY): Removed.
> 	(memrefs_read_or_written_unconditionally): Removed.
> 	(write_memrefs_written_at_least_once): Removed.
> 	(ifcvt_memrefs_wont_trap): Removed.
> 	(ifcvt_could_trap_p): Does not take refs parameter anymore.
> 	(if_convertible_gimple_assign_stmt_p): Same.
> 	(if_convertible_stmt_p): Same.
> 	(if_convertible_loop_p_1): Remove initialization of dr->aux,
> 	DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
> 	(if_convertible_loop_p): Remove deallocation of the same.

Comments in-line

> testsuite/
> 	* g++.dg/tree-ssa/ifc-pr46029.C: New.
> 	* gcc.dg/tree-ssa/ifc-8.c: New.
> 	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
> ---
>  gcc/ChangeLog                               |   28 ++
>  gcc/doc/invoke.texi                         |   18 +-
>  gcc/testsuite/ChangeLog                     |    7 +
>  gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C |   76 ++++++
>  gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c       |   17 +-
>  gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c       |   29 ++
>  gcc/tree-if-conv.c                          |  379 ++++++++++++---------------
>  7 files changed, 336 insertions(+), 218 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
> 
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index beed454..0f58882 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,31 @@
> +2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
> +
> +	PR tree-optimization/46029
> +	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
> +	* tree-if-conv.c (has_unaligned_memory_refs): New.
> +	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
> +	(create_scratchpad): New.
> +	(create_indirect_cond_expr): New.
> +	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
> +	parameter for scratch_pad.
> +	(combine_blocks): Same.
> +	(tree_if_conversion): Same.
> +	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
> +	scratch_pad.
> +	(struct ifc_dr): Removed.
> +	(IFC_DR): Removed.
> +	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
> +	(DR_RW_UNCONDITIONALLY): Removed.
> +	(memrefs_read_or_written_unconditionally): Removed.
> +	(write_memrefs_written_at_least_once): Removed.
> +	(ifcvt_memrefs_wont_trap): Removed.
> +	(ifcvt_could_trap_p): Does not take refs parameter anymore.
> +	(if_convertible_gimple_assign_stmt_p): Same.
> +	(if_convertible_stmt_p): Same.
> +	(if_convertible_loop_p_1): Remove initialization of dr->aux,
> +	DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
> +	(if_convertible_loop_p): Remove deallocation of the same.
> +
>  2010-10-20  Nathan Froyd  <froydnj@codesourcery.com>
>  
>  	* ifcvt.c (noce_emit_cmove): If both of the values are SUBREGs, try
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index ee68454..28b0cbb 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -6935,20 +6935,26 @@ if vectorization is enabled.
>  
>  @item -ftree-loop-if-convert-stores
>  Attempt to also if-convert conditional jumps containing memory writes.
> -This transformation can be unsafe for multi-threaded programs as it
> -transforms conditional memory writes into unconditional memory writes.
>  For example,
>  @smallexample
>  for (i = 0; i < N; i++)
>    if (cond)
> -    A[i] = expr;
> +    A[i] = B[i] + 2;
>  @end smallexample
>  would be transformed to
>  @smallexample
> -for (i = 0; i < N; i++)
> -  A[i] = cond ? expr : A[i];
> +void *scratchpad = alloca (64);
> +for (i = 0; i < N; i++) @{
> +  a = cond ? &A[i] : scratchpad;
> +  b = cond ? &B[i] : scratchpad;
> +  *a = *b + 2;
> +@}
>  @end smallexample
> -potentially producing data races.
> +The compiler allocates a scratchpad memory on the stack for each
> +function in which the if-conversion of memory stores or reads
> +happened.  This scratchpad memory is used during the part of the
> +computation that is discarded, i.e., when the condition is evaluated
> +to false.
>  
>  @item -ftree-loop-distribution
>  Perform loop distribution.  This flag can improve cache performance on
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 9d9c543..4233f86 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,10 @@
> +2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
> +
> +	PR tree-optimization/46029
> +	* g++.dg/tree-ssa/ifc-pr46029.C: New.
> +	* gcc.dg/tree-ssa/ifc-8.c: New.
> +	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
> +
>  2010-10-20  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
>  
>  	PR c++/46024
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
> new file mode 100644
> index 0000000..2a54bdb
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
> @@ -0,0 +1,76 @@
> +// { dg-do run }
> +/* { dg-options "-O -ftree-loop-if-convert-stores" } */
> +
> +namespace
> +{
> +  struct rb_tree_node_
> +  {
> +    rb_tree_node_ ():m_p_left (0), m_p_parent (0), m_metadata (0)
> +    {
> +    }
> +    unsigned &get_metadata ()
> +    {
> +      return m_metadata;
> +    }
> +    rb_tree_node_ *m_p_left;
> +    rb_tree_node_ *m_p_parent;
> +    unsigned m_metadata;
> +  };
> +
> +  struct bin_search_tree_const_node_it_
> +  {
> +    bin_search_tree_const_node_it_ (rb_tree_node_ * p_nd):m_p_nd (p_nd)
> +    {
> +    }
> +    unsigned &get_metadata ()
> +    {
> +      return m_p_nd->get_metadata ();
> +    }
> +    bin_search_tree_const_node_it_ get_l_child ()
> +    {
> +      return bin_search_tree_const_node_it_ (m_p_nd->m_p_left);
> +    }
> +
> +    rb_tree_node_ *m_p_nd;
> +  };
> +
> +  struct bin_search_tree_no_data_
> +  {
> +    typedef rb_tree_node_ *node_pointer;
> +      bin_search_tree_no_data_ ():m_p_head (new rb_tree_node_ ())
> +    {
> +    }
> +    void insert_imp_empty (int r_value)
> +    {
> +      rb_tree_node_ *p_new_node = new rb_tree_node_ ();
> +      m_p_head->m_p_parent = p_new_node;
> +      p_new_node->m_p_parent = m_p_head;
> +      update_to_top (m_p_head->m_p_parent);
> +    }
> +    void apply_update (bin_search_tree_const_node_it_ nd_it)
> +    {
> +      unsigned
> +	l_max_endpoint
> +	=
> +	(nd_it.get_l_child ().m_p_nd ==
> +	 0) ? 0 : nd_it.get_l_child ().get_metadata ();
> +      nd_it.get_metadata () = l_max_endpoint;
> +    }
> +    void update_to_top (node_pointer p_nd)
> +    {
> +      while (p_nd != m_p_head)
> +	{
> +	  apply_update (p_nd);
> +	  p_nd = p_nd->m_p_parent;
> +	}
> +    }
> +
> +    rb_tree_node_ * m_p_head;
> +  };
> +}
> +
> +int main ()
> +{
> +  bin_search_tree_no_data_ ().insert_imp_empty (0);
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
> index a9cc816..d88c4a2 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
> @@ -12,11 +12,18 @@ dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
>    for (i = 0; i <= nCoeffs; i++)
>      {
>        level = block[i];
> -      if (level < 0)
> -	level = level * qmul - qadd;
> -      else
> -	level = level * qmul + qadd;
> -      block[i] = level;
> +      if (level)
> +        {
> +          if (level < 0)
> +            {
> +              level = level * qmul - qadd;
> +            }
> +          else
> +            {
> +              level = level * qmul + qadd;
> +            }
> +          block[i] = level;
> +        }
>      }
>  }
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
> new file mode 100644
> index 0000000..d7cf279
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
> +
> +typedef union tree_node *tree;
> +struct tree_common
> +{
> +  unsigned volatile_flag : 1;
> +  unsigned unsigned_flag : 1;
> +};
> +struct tree_type
> +{
> +  tree next_variant;
> +  tree main_variant;
> +};
> +union tree_node
> +{
> +  struct tree_common common;
> +  struct tree_type type;
> +};
> +void finish_enum (tree enumtype)
> +{
> +  tree tem;
> +  for (tem = ((enumtype)->type.main_variant); tem; tem = ((tem)->type.next_variant))
> +    {
> +      if (tem == enumtype)
> +	continue;
> +      ((tem)->common.unsigned_flag) = ((enumtype)->common.unsigned_flag);
> +    }
> +}
> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
> index 642dbda..9fc6190 100644
> --- a/gcc/tree-if-conv.c
> +++ b/gcc/tree-if-conv.c
> @@ -446,171 +446,47 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
>    return true;
>  }
>  
> -/* Records the status of a data reference.  This struct is attached to
> -   each DR->aux field.  */
> -
> -struct ifc_dr {
> -  /* -1 when not initialized, 0 when false, 1 when true.  */
> -  int written_at_least_once;
> -
> -  /* -1 when not initialized, 0 when false, 1 when true.  */
> -  int rw_unconditionally;
> -};
> -
> -#define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
> -#define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
> -#define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
> -
> -/* Returns true when the memory references of STMT are read or written
> -   unconditionally.  In other words, this function returns true when
> -   for every data reference A in STMT there exist other accesses to
> -   the same data reference with predicates that add up (OR-up) to the
> -   true predicate: this ensures that the data reference A is touched
> -   (read or written) on every iteration of the if-converted loop.  */
> -
> -static bool
> -memrefs_read_or_written_unconditionally (gimple stmt,
> -					 VEC (data_reference_p, heap) *drs)
> -{
> -  int i, j;
> -  data_reference_p a, b;
> -  tree ca = bb_predicate (gimple_bb (stmt));
> -
> -  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> -    if (DR_STMT (a) == stmt)
> -      {
> -	bool found = false;
> -	int x = DR_RW_UNCONDITIONALLY (a);
> -
> -	if (x == 0)
> -	  return false;
> -
> -	if (x == 1)
> -	  continue;
> -
> -	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
> -	  if (DR_STMT (b) != stmt
> -	      && same_data_refs (a, b))
> -	    {
> -	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> -
> -	      if (DR_RW_UNCONDITIONALLY (b) == 1
> -		  || is_true_predicate (cb)
> -		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
> -								 ca, cb)))
> -		{
> -		  DR_RW_UNCONDITIONALLY (a) = 1;
> -		  DR_RW_UNCONDITIONALLY (b) = 1;
> -		  found = true;
> -		  break;
> -		}
> -	    }
> -
> -	if (!found)
> -	  {
> -	    DR_RW_UNCONDITIONALLY (a) = 0;
> -	    return false;
> -	  }
> -      }
> -
> -  return true;
> -}
> -
> -/* Returns true when the memory references of STMT are unconditionally
> -   written.  In other words, this function returns true when for every
> -   data reference A written in STMT, there exist other writes to the
> -   same data reference with predicates that add up (OR-up) to the true
> -   predicate: this ensures that the data reference A is written on
> -   every iteration of the if-converted loop.  */
> +/* Wrapper around gimple_could_trap_p refined for the needs of the
> +   if-conversion.  */
>  
>  static bool
> -write_memrefs_written_at_least_once (gimple stmt,
> -				     VEC (data_reference_p, heap) *drs)
> +ifcvt_could_trap_p (gimple stmt)
>  {
> -  int i, j;
> -  data_reference_p a, b;
> -  tree ca = bb_predicate (gimple_bb (stmt));
> -
> -  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
> -    if (DR_STMT (a) == stmt
> -	&& DR_IS_WRITE (a))
> -      {
> -	bool found = false;
> -	int x = DR_WRITTEN_AT_LEAST_ONCE (a);
> -
> -	if (x == 0)
> -	  return false;
> -
> -	if (x == 1)
> -	  continue;
> -
> -	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
> -	  if (DR_STMT (b) != stmt
> -	      && DR_IS_WRITE (b)
> -	      && same_data_refs_base_objects (a, b))
> -	    {
> -	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
> -
> -	      if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
> -		  || is_true_predicate (cb)
> -		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
> -								 ca, cb)))
> -		{
> -		  DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
> -		  DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
> -		  found = true;
> -		  break;
> -		}
> -	    }
> -
> -	if (!found)
> -	  {
> -	    DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
> -	    return false;
> -	  }
> -      }
> +  if (gimple_vuse (stmt)
> +      && !gimple_could_trap_p_1 (stmt, false, false))
> +    return false;
>  
> -  return true;
> +  return gimple_could_trap_p (stmt);
>  }
>  
> -/* Return true when the memory references of STMT won't trap in the
> -   if-converted code.  There are two things that we have to check for:
> -
> -   - writes to memory occur to writable memory: if-conversion of
> -   memory writes transforms the conditional memory writes into
> -   unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
> -   into "A[i] = cond ? foo : A[i]", and as the write to memory may not
> -   be executed at all in the original code, it may be a readonly
> -   memory.  To check that A is not const-qualified, we check that
> -   there exists at least an unconditional write to A in the current
> -   function.
> -
> -   - reads or writes to memory are valid memory accesses for every
> -   iteration.  To check that the memory accesses are correctly formed
> -   and that we are allowed to read and write in these locations, we
> -   check that the memory accesses to be if-converted occur at every
> -   iteration unconditionally.  */
> +/* Returns true when stmt contains a data reference.  */
>  
>  static bool
> -ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs)
> +has_unaligned_memory_refs (gimple stmt)

Ick - unified diffs are hard to read (sometimes).  The comment
doesn't match the function name -- unaligned data reference or not?

>  {
> -  return write_memrefs_written_at_least_once (stmt, refs)
> -    && memrefs_read_or_written_unconditionally (stmt, refs);
> -}
> -
> -/* Wrapper around gimple_could_trap_p refined for the needs of the
> -   if-conversion.  Try to prove that the memory accesses of STMT could
> -   not trap in the innermost loop containing STMT.  */
> +  int unsignedp, volatilep;
> +  HOST_WIDE_INT bitsize, bitpos;
> +  tree toffset;
> +  enum machine_mode mode;
> +  VEC (data_ref_loc, heap) *refs = VEC_alloc (data_ref_loc, heap, 3);
> +  bool res = get_references_in_stmt (stmt, &refs);
> +  unsigned i;
> +  data_ref_loc *ref;
> +
> +  FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
> +    {
> +      get_inner_reference (*ref->pos, &bitsize, &bitpos, &toffset,
> +			   &mode, &unsignedp, &volatilep, true);
>  
> -static bool
> -ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
> -{
> -  if (gimple_vuse (stmt)
> -      && !gimple_could_trap_p_1 (stmt, false, false)
> -      && ifcvt_memrefs_wont_trap (stmt, refs))
> -    return false;
> +      if ((bitpos % BITS_PER_UNIT) != 0)

Hmm, that's not actually unaligned but not addressable, right?
I guess you want to re-use ivopts may_be_nonaddressable_p instead.

> +	{
> +	  res = true;
> +	  break;
> +	}
> +    }
>  
> -  return gimple_could_trap_p (stmt);
> +  VEC_free (data_ref_loc, heap, refs);
> +  return res;
>  }
>  
>  /* Return true when STMT is if-convertible.
> @@ -621,8 +497,7 @@ ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
>     - LHS is not var decl.  */
>  
>  static bool
> -if_convertible_gimple_assign_stmt_p (gimple stmt,
> -				     VEC (data_reference_p, heap) *refs)
> +if_convertible_gimple_assign_stmt_p (gimple stmt)
>  {
>    tree lhs = gimple_assign_lhs (stmt);
>    basic_block bb;
> @@ -650,12 +525,20 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
>  
>    if (flag_tree_loop_if_convert_stores)
>      {
> -      if (ifcvt_could_trap_p (stmt, refs))
> +      if (ifcvt_could_trap_p (stmt))
>  	{
>  	  if (dump_file && (dump_flags & TDF_DETAILS))
>  	    fprintf (dump_file, "tree could trap...\n");
>  	  return false;
>  	}
> +
> +      if (has_unaligned_memory_refs (stmt))
> +	{
> +	  if (dump_file && (dump_flags & TDF_DETAILS))
> +	    fprintf (dump_file, "uses misaligned memory...\n");

But here it suggests misaligned again (why'd we care for misalignment?)

> +	  return false;
> +	}
> +
>        return true;
>      }
>  
> @@ -690,7 +573,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
>     - it is a GIMPLE_LABEL or a GIMPLE_COND.  */
>  
>  static bool
> -if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
> +if_convertible_stmt_p (gimple stmt)
>  {
>    switch (gimple_code (stmt))
>      {
> @@ -700,7 +583,7 @@ if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
>        return true;
>  
>      case GIMPLE_ASSIGN:
> -      return if_convertible_gimple_assign_stmt_p (stmt, refs);
> +      return if_convertible_gimple_assign_stmt_p (stmt);
>  
>      default:
>        /* Don't know what to do with 'em so don't do anything.  */
> @@ -1016,18 +899,6 @@ if_convertible_loop_p_1 (struct loop *loop,
>    if (!res)
>      return false;
>  
> -  if (flag_tree_loop_if_convert_stores)
> -    {
> -      data_reference_p dr;
> -
> -      for (i = 0; VEC_iterate (data_reference_p, *refs, i, dr); i++)
> -	{
> -	  dr->aux = XNEW (struct ifc_dr);
> -	  DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
> -	  DR_RW_UNCONDITIONALLY (dr) = -1;
> -	}
> -    }
> -
>    for (i = 0; i < loop->num_nodes; i++)
>      {
>        basic_block bb = ifc_bbs[i];
> @@ -1040,7 +911,7 @@ if_convertible_loop_p_1 (struct loop *loop,
>        /* Check the if-convertibility of statements in predicated BBs.  */
>        if (is_predicated (bb))
>  	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
> -	  if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
> +	  if (!if_convertible_stmt_p (gsi_stmt (itr)))
>  	    return false;
>      }
>  
> @@ -1101,15 +972,6 @@ if_convertible_loop_p (struct loop *loop)
>    ddrs = VEC_alloc (ddr_p, heap, 25);
>    res = if_convertible_loop_p_1 (loop, &refs, &ddrs);
>  
> -  if (flag_tree_loop_if_convert_stores)
> -    {
> -      data_reference_p dr;
> -      unsigned int i;
> -
> -      for (i = 0; VEC_iterate (data_reference_p, refs, i, dr); i++)
> -	free (dr->aux);
> -    }
> -
>    free_data_refs (refs);
>    free_dependence_relations (ddrs);
>    return res;
> @@ -1366,6 +1228,78 @@ insert_gimplified_predicates (loop_p loop)
>      }
>  }
>  
> +/* Insert at the beginning of the first basic block of the current
> +   function the allocation on the stack of N bytes of memory and
> +   return a pointer to this scratchpad memory.  */
> +
> +static tree
> +create_scratchpad (void)
> +{
> +  basic_block bb = single_succ (ENTRY_BLOCK_PTR);
> +  gimple_stmt_iterator gsi = gsi_after_labels (bb);
> +
> +  /* void *tmp = __builtin_alloca */
> +  const char *name = "scratch_pad";
> +  tree x = build_int_cst (integer_type_node, 64);
> +  gimple stmt = gimple_build_call (built_in_decls[BUILT_IN_ALLOCA], 1, x);
> +  tree var = create_tmp_var (ptr_type_node, name);
> +  tree tmp = make_ssa_name (var, stmt);

It would be better to use an automatic variable than using alloca
which is expensive.  Why was your choice that way?  (Are we ever
if-converting aggregate stores?  I hope not.)

Also you are unconditionally allocating 64 bytes instead of N.

Note that if you want to make vectorization happy you would need
to ensure that for

  if (x)
    a[i] = ...;

the scratchpad you'll end up using will have the _same_ alignment
as a[i] (same or larger for all offsets).  Using a local array
of chars should make it possible for the vectorizer to adjust
its alignment if needed.

> +  add_referenced_var (var);
> +  gimple_call_set_lhs (stmt, tmp);
> +  SSA_NAME_DEF_STMT (tmp) = stmt;
> +  update_stmt (stmt);
> +
> +  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
> +  return tmp;
> +}
> +
> +/* Returns a memory reference to the pointer defined by the
> +   conditional expression: pointer = cond ? &A[i] : scratch_pad; and
> +   inserts this code at GSI.  */
> +
> +static tree
> +create_indirect_cond_expr (tree ai, tree cond, tree *scratch_pad,
> +			   gimple_stmt_iterator *gsi)
> +{
> +  tree type = TREE_TYPE (ai);
> +
> +  tree pointer_to_type, address_of_ai, addr_expr, cond_expr;
> +  tree pointer, star_pointer;
> +  gimple addr_stmt, pointer_stmt;
> +
> +  /* address_of_ai = &A[i];  */
> +  pointer_to_type = build_pointer_type (type);
> +  address_of_ai = create_tmp_var (pointer_to_type, "_ifc_");

Use create_tmp_reg (everywhere)

> +  add_referenced_var (address_of_ai);
> +  addr_expr = build_fold_addr_expr (ai);

If you build that before create_tmp_reg you can use TREE_TYPE of
it and avoid creating pointer_to_type.

> +  addr_stmt = gimple_build_assign (address_of_ai, addr_expr);
> +  address_of_ai = make_ssa_name (address_of_ai, addr_stmt);
> +  gimple_assign_set_lhs (addr_stmt, address_of_ai);
> +  SSA_NAME_DEF_STMT (address_of_ai) = addr_stmt;
> +  update_stmt (addr_stmt);
> +  gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
> +
> +  /* Allocate the scratch pad only once per function.  */
> +  if (!*scratch_pad)
> +    *scratch_pad = create_scratchpad ();
> +
> +  /* pointer = cond ? address_of_ai : scratch_pad;  */
> +  pointer = create_tmp_var (pointer_to_type, "_ifc_");
> +  add_referenced_var (pointer);
> +  cond_expr = build3 (COND_EXPR, pointer_to_type, unshare_expr (cond),
> +		      address_of_ai, *scratch_pad);
> +  pointer_stmt = gimple_build_assign (pointer, cond_expr);
> +  pointer = make_ssa_name (pointer, pointer_stmt);
> +  gimple_assign_set_lhs (pointer_stmt, pointer);
> +  SSA_NAME_DEF_STMT (pointer) = pointer_stmt;
> +  update_stmt (pointer_stmt);
> +  gsi_insert_before (gsi, pointer_stmt, GSI_SAME_STMT);
> +
> +  star_pointer = build_simple_mem_ref (pointer);

build2 (MEM_REF, TREE_TYPE (ai), pointer,
        build_int_cst (reference_alias_ptr_type (ai), 0));

as you need to preserve TBAA info.

> +  return star_pointer;
> +}
> +
>  /* Predicate each write to memory in LOOP.
>  
>     This function transforms control flow constructs containing memory
> @@ -1377,10 +1311,19 @@ insert_gimplified_predicates (loop_p loop)
>  
>     into the following form that does not contain control flow:
>  
> -   | for (i = 0; i < N; i++)
> -   |   A[i] = cond ? expr : A[i];
> +   | void *scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
> +   |
> +   | for (i = 0; i < N; i++) {
> +   |   p = cond ? &A[i] : scratch_pad;
> +   |   *p = expr;
> +   | }
> +
> +   SCRATCH_PAD is allocated on the stack for each function once and it is
> +   large enough to contain any kind of scalar assignment or read.  All
> +   values read or written to SCRATCH_PAD are not used in the computation.
>  
> -   The original CFG looks like this:
> +   In a more detailed way, the if-conversion of memory writes works
> +   like this, supposing that the original CFG looks like this:
>  
>     | bb_0
>     |   i = 0
> @@ -1430,10 +1373,12 @@ insert_gimplified_predicates (loop_p loop)
>     |   goto bb_1
>     | end_bb_4
>  
> -   predicate_mem_writes is then predicating the memory write as follows:
> +   predicate_mem_writes is then allocating SCRATCH_PAD in the basic block
> +   preceding the loop header, and is predicating the memory write:
>  
>     | bb_0
>     |   i = 0
> +   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
>     | end_bb_0
>     |
>     | bb_1
> @@ -1441,12 +1386,14 @@ insert_gimplified_predicates (loop_p loop)
>     | end_bb_1
>     |
>     | bb_2
> +   |   cond = some_computation;
>     |   if (cond) goto bb_3 else goto bb_4
>     | end_bb_2
>     |
>     | bb_3
>     |   cond = some_computation;
> -   |   A[i] = cond ? expr : A[i];
> +   |   p = cond ? &A[i] : scratch_pad;
> +   |   *p = expr;
>     |   goto bb_4
>     | end_bb_3
>     |
> @@ -1459,12 +1406,14 @@ insert_gimplified_predicates (loop_p loop)
>  
>     | bb_0
>     |   i = 0
> +   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
>     |   if (i < N) goto bb_5 else goto bb_1
>     | end_bb_0
>     |
>     | bb_1
>     |   cond = some_computation;
> -   |   A[i] = cond ? expr : A[i];
> +   |   p = cond ? &A[i] : scratch_pad;
> +   |   *p = expr;
>     |   if (i < N) goto bb_5 else goto bb_4
>     | end_bb_1
>     |
> @@ -1474,7 +1423,7 @@ insert_gimplified_predicates (loop_p loop)
>  */
>  
>  static void
> -predicate_mem_writes (loop_p loop)
> +predicate_mem_writes (loop_p loop, tree *scratch_pad)
>  {
>    unsigned int i, orig_loop_num_nodes = loop->num_nodes;
>  
> @@ -1489,20 +1438,35 @@ predicate_mem_writes (loop_p loop)
>  	continue;
>  
>        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> -	if ((stmt = gsi_stmt (gsi))
> -	    && gimple_assign_single_p (stmt)
> -	    && gimple_vdef (stmt))
> -	  {
> -	    tree lhs = gimple_assign_lhs (stmt);
> -	    tree rhs = gimple_assign_rhs1 (stmt);
> -	    tree type = TREE_TYPE (lhs);
> -
> -	    lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
> -	    rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
> -	    rhs = build3 (COND_EXPR, type, unshare_expr (cond), rhs, lhs);
> -	    gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
> -	    update_stmt (stmt);
> -	  }
> +	{
> +	  stmt = gsi_stmt (gsi);
> +	  if (gimple_assign_single_p (stmt)
> +	      && gimple_vdef (stmt))
> +	    {
> +	      /* A[i] = x;  */
> +	      tree ai = gimple_assign_lhs (stmt);
> +
> +	      /* pointer = cond ? &A[i] : scratch_pad;  */
> +	      tree star_pointer = create_indirect_cond_expr (ai, cond,
> +							     scratch_pad, &gsi);
> +	      /* *pointer = x;  */
> +	      gimple_assign_set_lhs (stmt, star_pointer);
> +	      update_stmt (stmt);
> +	    }
> +	  else if (gimple_assign_single_p (stmt)
> +		   && gimple_vuse (stmt))
> +	    {
> +	      /* x = A[i];  */
> +	      tree ai = gimple_assign_rhs1 (stmt);
> +
> +	      /* pointer = cond ? &A[i] : scratch_pad;  */
> +	      tree star_pointer = create_indirect_cond_expr (ai, cond,
> +							     scratch_pad, &gsi);
> +	      /* x = *pointer;  */
> +	      gimple_assign_set_rhs1 (stmt, star_pointer);
> +	      update_stmt (stmt);
> +	    }
> +	}
>      }
>  }
>  
> @@ -1552,7 +1516,7 @@ remove_conditions_and_labels (loop_p loop)
>     blocks.  Replace PHI nodes with conditional modify expressions.  */
>  
>  static void
> -combine_blocks (struct loop *loop)
> +combine_blocks (struct loop *loop, tree *scratch_pad)
>  {
>    basic_block bb, exit_bb, merge_target_bb;
>    unsigned int orig_loop_num_nodes = loop->num_nodes;
> @@ -1565,7 +1529,7 @@ combine_blocks (struct loop *loop)
>    predicate_all_scalar_phis (loop);
>  
>    if (flag_tree_loop_if_convert_stores)
> -    predicate_mem_writes (loop);
> +    predicate_mem_writes (loop, scratch_pad);
>  
>    /* Merge basic blocks: first remove all the edges in the loop,
>       except for those from the exit block.  */
> @@ -1654,7 +1618,7 @@ combine_blocks (struct loop *loop)
>     profitability analysis.  Returns true when something changed.  */
>  
>  static bool
> -tree_if_conversion (struct loop *loop)
> +tree_if_conversion (struct loop *loop, tree *scratch_pad)
>  {
>    bool changed = false;
>    ifc_bbs = NULL;
> @@ -1666,7 +1630,7 @@ tree_if_conversion (struct loop *loop)
>    /* Now all statements are if-convertible.  Combine all the basic
>       blocks into one huge basic block doing the if-conversion
>       on-the-fly.  */
> -  combine_blocks (loop);
> +  combine_blocks (loop, scratch_pad);
>  
>    if (flag_tree_loop_if_convert_stores)
>      mark_sym_for_renaming (gimple_vop (cfun));
> @@ -1697,12 +1661,13 @@ main_tree_if_conversion (void)
>    struct loop *loop;
>    bool changed = false;
>    unsigned todo = 0;
> +  tree scratch_pad = NULL_TREE;
>  
>    if (number_of_loops () <= 1)
>      return 0;
>  
>    FOR_EACH_LOOP (li, loop, 0)
> -    changed |= tree_if_conversion (loop);
> +    changed |= tree_if_conversion (loop, &scratch_pad);
>  
>    if (changed)
>      todo |= TODO_cleanup_cfg;

Overall I like the new way much more.  Please update and repost.

Thanks,
Richard.
Sebastian Pop - Nov. 5, 2010, 4:08 p.m.
On Fri, Nov 5, 2010 at 07:06, Richard Guenther <rguenther@suse.de> wrote:
>> +/* Returns true when stmt contains a data reference.  */
>>
>>  static bool
>> -ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs)
>> +has_unaligned_memory_refs (gimple stmt)
>
> Ick - unified diffs are hard to read (sometimes).  The comment
> doesn't match the function name -- unaligned data reference or not?
>

Right, the comment is incomplete.

>> +      if ((bitpos % BITS_PER_UNIT) != 0)
>
> Hmm, that's not actually unaligned but not addressable, right?
> I guess you want to re-use ivopts may_be_nonaddressable_p instead.

Correct.  One of the testcases that I added triggered this error in
expand, and I took part of this code from may_be_nonaddressable_p.  I
don't remember why I was not able to use may_be_nonaddressable_p, but
I will try.

>> +      if (has_unaligned_memory_refs (stmt))
>> +     {
>> +       if (dump_file && (dump_flags & TDF_DETAILS))
>> +         fprintf (dump_file, "uses misaligned memory...\n");
>
> But here it suggests misaligned again (why'd we care for misalignment?)
>

I will change this printf to nonaddressable memory.

>> +/* Insert at the beginning of the first basic block of the current
>> +   function the allocation on the stack of N bytes of memory and
>> +   return a pointer to this scratchpad memory.  */
>> +
>> +static tree
>> +create_scratchpad (void)
>> +{
>> +  basic_block bb = single_succ (ENTRY_BLOCK_PTR);
>> +  gimple_stmt_iterator gsi = gsi_after_labels (bb);
>> +
>> +  /* void *tmp = __builtin_alloca */
>> +  const char *name = "scratch_pad";
>> +  tree x = build_int_cst (integer_type_node, 64);
>> +  gimple stmt = gimple_build_call (built_in_decls[BUILT_IN_ALLOCA], 1, x);
>> +  tree var = create_tmp_var (ptr_type_node, name);
>> +  tree tmp = make_ssa_name (var, stmt);
>
> It would be better to use an automatic variable than using alloca
> which is expensive.  Why was your choice that way?  (Are we ever
> if-converting aggregate stores?  I hope not.)
>
> Also you are unconditionally allocating 64 bytes instead of N.
>
> Note that if you want to make vectorization happy you would need
> to ensure that for
>
>  if (x)
>    a[i] = ...;
>
> the scratchpad you'll end up using will have the _same_ alignment
> as a[i] (same or larger for all offsets).  Using a local array
> of chars should make it possible for the vectorizer to adjust
> its alignment if needed.
>

Ok, thanks for the recommendation, I will try to use a local array.

Also, for the vectorization to happen, we should teach the vectorizer
and the data dependence analysis that the reads/writes to the
scratchpad have inconsequential effects.  The vectorizer should also
be able to use extra scratchpad memory to transform the scratchpaded
dataref into a vector in the scratchpad.

>
> Overall I like the new way much more.  Please update and repost.

Thanks for the review.  I will send an updated patch.

Sebastian
Sebastian Pop - Nov. 10, 2010, 11:23 p.m.
Hi Richi,

On Fri, Nov 5, 2010 at 11:08, Sebastian Pop <sebpop@gmail.com> wrote:
>> Note that if you want to make vectorization happy you would need
>> to ensure that for
>>
>>  if (x)
>>    a[i] = ...;
>>
>> the scratchpad you'll end up using will have the _same_ alignment
>> as a[i] (same or larger for all offsets).  Using a local array
>> of chars should make it possible for the vectorizer to adjust
>> its alignment if needed.
>>
>
> Ok, thanks for the recommendation, I will try to use a local array.

Do you happen to know how to declare an automatic variable?

I was looking at the code in tree-switch-conversion.c:build_one_array.
Does that look the right approach to statically declare the scratchpad
on the stack?

Thanks,
Sebastian
Richard Guenther - Nov. 11, 2010, 9:43 a.m.
On Wed, 10 Nov 2010, Sebastian Pop wrote:

> Hi Richi,
> 
> On Fri, Nov 5, 2010 at 11:08, Sebastian Pop <sebpop@gmail.com> wrote:
> >> Note that if you want to make vectorization happy you would need
> >> to ensure that for
> >>
> >>  if (x)
> >>    a[i] = ...;
> >>
> >> the scratchpad you'll end up using will have the _same_ alignment
> >> as a[i] (same or larger for all offsets).  Using a local array
> >> of chars should make it possible for the vectorizer to adjust
> >> its alignment if needed.
> >>
> >
> > Ok, thanks for the recommendation, I will try to use a local array.
> 
> Do you happen to know how to declare an automatic variable?
> 
> I was looking at the code in tree-switch-conversion.c:build_one_array.
> Does that look the right approach to statically declare the scratchpad
> on the stack?

As simple as

var = create_tmp_var (type, NULL_TREE);

switch-conversion builds a local static array, not an automatic var.

Richard.

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index beed454..0f58882 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@ 
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
+	PR tree-optimization/46029
+	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
+	* tree-if-conv.c (has_unaligned_memory_refs): New.
+	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
+	(create_scratchpad): New.
+	(create_indirect_cond_expr): New.
+	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
+	parameter for scratch_pad.
+	(combine_blocks): Same.
+	(tree_if_conversion): Same.
+	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
+	scratch_pad.
+	(struct ifc_dr): Removed.
+	(IFC_DR): Removed.
+	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
+	(DR_RW_UNCONDITIONALLY): Removed.
+	(memrefs_read_or_written_unconditionally): Removed.
+	(write_memrefs_written_at_least_once): Removed.
+	(ifcvt_memrefs_wont_trap): Removed.
+	(ifcvt_could_trap_p): Does not take refs parameter anymore.
+	(if_convertible_gimple_assign_stmt_p): Same.
+	(if_convertible_stmt_p): Same.
+	(if_convertible_loop_p_1): Remove initialization of dr->aux,
+	DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
+	(if_convertible_loop_p): Remove deallocation of the same.
+
 2010-10-20  Nathan Froyd  <froydnj@codesourcery.com>
 
 	* ifcvt.c (noce_emit_cmove): If both of the values are SUBREGs, try
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ee68454..28b0cbb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6935,20 +6935,26 @@  if vectorization is enabled.
 
 @item -ftree-loop-if-convert-stores
 Attempt to also if-convert conditional jumps containing memory writes.
-This transformation can be unsafe for multi-threaded programs as it
-transforms conditional memory writes into unconditional memory writes.
 For example,
 @smallexample
 for (i = 0; i < N; i++)
   if (cond)
-    A[i] = expr;
+    A[i] = B[i] + 2;
 @end smallexample
 would be transformed to
 @smallexample
-for (i = 0; i < N; i++)
-  A[i] = cond ? expr : A[i];
+void *scratchpad = alloca (64);
+for (i = 0; i < N; i++) @{
+  a = cond ? &A[i] : scratchpad;
+  b = cond ? &B[i] : scratchpad;
+  *a = *b + 2;
+@}
 @end smallexample
-potentially producing data races.
+The compiler allocates a scratchpad memory on the stack for each
+function in which the if-conversion of memory stores or reads
+happened.  This scratchpad memory is used during the part of the
+computation that is discarded, i.e., when the condition is evaluated
+to false.
 
 @item -ftree-loop-distribution
 Perform loop distribution.  This flag can improve cache performance on
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9d9c543..4233f86 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@ 
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
+	PR tree-optimization/46029
+	* g++.dg/tree-ssa/ifc-pr46029.C: New.
+	* gcc.dg/tree-ssa/ifc-8.c: New.
+	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
+
 2010-10-20  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	PR c++/46024
diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
new file mode 100644
index 0000000..2a54bdb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
@@ -0,0 +1,76 @@ 
+// { dg-do run }
+/* { dg-options "-O -ftree-loop-if-convert-stores" } */
+
+namespace
+{
+  struct rb_tree_node_
+  {
+    rb_tree_node_ ():m_p_left (0), m_p_parent (0), m_metadata (0)
+    {
+    }
+    unsigned &get_metadata ()
+    {
+      return m_metadata;
+    }
+    rb_tree_node_ *m_p_left;
+    rb_tree_node_ *m_p_parent;
+    unsigned m_metadata;
+  };
+
+  struct bin_search_tree_const_node_it_
+  {
+    bin_search_tree_const_node_it_ (rb_tree_node_ * p_nd):m_p_nd (p_nd)
+    {
+    }
+    unsigned &get_metadata ()
+    {
+      return m_p_nd->get_metadata ();
+    }
+    bin_search_tree_const_node_it_ get_l_child ()
+    {
+      return bin_search_tree_const_node_it_ (m_p_nd->m_p_left);
+    }
+
+    rb_tree_node_ *m_p_nd;
+  };
+
+  struct bin_search_tree_no_data_
+  {
+    typedef rb_tree_node_ *node_pointer;
+      bin_search_tree_no_data_ ():m_p_head (new rb_tree_node_ ())
+    {
+    }
+    void insert_imp_empty (int r_value)
+    {
+      rb_tree_node_ *p_new_node = new rb_tree_node_ ();
+      m_p_head->m_p_parent = p_new_node;
+      p_new_node->m_p_parent = m_p_head;
+      update_to_top (m_p_head->m_p_parent);
+    }
+    void apply_update (bin_search_tree_const_node_it_ nd_it)
+    {
+      unsigned
+	l_max_endpoint
+	=
+	(nd_it.get_l_child ().m_p_nd ==
+	 0) ? 0 : nd_it.get_l_child ().get_metadata ();
+      nd_it.get_metadata () = l_max_endpoint;
+    }
+    void update_to_top (node_pointer p_nd)
+    {
+      while (p_nd != m_p_head)
+	{
+	  apply_update (p_nd);
+	  p_nd = p_nd->m_p_parent;
+	}
+    }
+
+    rb_tree_node_ * m_p_head;
+  };
+}
+
+int main ()
+{
+  bin_search_tree_no_data_ ().insert_imp_empty (0);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
index a9cc816..d88c4a2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
@@ -12,11 +12,18 @@  dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
   for (i = 0; i <= nCoeffs; i++)
     {
       level = block[i];
-      if (level < 0)
-	level = level * qmul - qadd;
-      else
-	level = level * qmul + qadd;
-      block[i] = level;
+      if (level)
+        {
+          if (level < 0)
+            {
+              level = level * qmul - qadd;
+            }
+          else
+            {
+              level = level * qmul + qadd;
+            }
+          block[i] = level;
+        }
     }
 }
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
new file mode 100644
index 0000000..d7cf279
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
+
+typedef union tree_node *tree;
+struct tree_common
+{
+  unsigned volatile_flag : 1;
+  unsigned unsigned_flag : 1;
+};
+struct tree_type
+{
+  tree next_variant;
+  tree main_variant;
+};
+union tree_node
+{
+  struct tree_common common;
+  struct tree_type type;
+};
+void finish_enum (tree enumtype)
+{
+  tree tem;
+  for (tem = ((enumtype)->type.main_variant); tem; tem = ((tem)->type.next_variant))
+    {
+      if (tem == enumtype)
+	continue;
+      ((tem)->common.unsigned_flag) = ((enumtype)->common.unsigned_flag);
+    }
+}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 642dbda..9fc6190 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -446,171 +446,47 @@  if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi)
   return true;
 }
 
-/* Records the status of a data reference.  This struct is attached to
-   each DR->aux field.  */
-
-struct ifc_dr {
-  /* -1 when not initialized, 0 when false, 1 when true.  */
-  int written_at_least_once;
-
-  /* -1 when not initialized, 0 when false, 1 when true.  */
-  int rw_unconditionally;
-};
-
-#define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
-#define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
-#define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
-
-/* Returns true when the memory references of STMT are read or written
-   unconditionally.  In other words, this function returns true when
-   for every data reference A in STMT there exist other accesses to
-   the same data reference with predicates that add up (OR-up) to the
-   true predicate: this ensures that the data reference A is touched
-   (read or written) on every iteration of the if-converted loop.  */
-
-static bool
-memrefs_read_or_written_unconditionally (gimple stmt,
-					 VEC (data_reference_p, heap) *drs)
-{
-  int i, j;
-  data_reference_p a, b;
-  tree ca = bb_predicate (gimple_bb (stmt));
-
-  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
-    if (DR_STMT (a) == stmt)
-      {
-	bool found = false;
-	int x = DR_RW_UNCONDITIONALLY (a);
-
-	if (x == 0)
-	  return false;
-
-	if (x == 1)
-	  continue;
-
-	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
-	  if (DR_STMT (b) != stmt
-	      && same_data_refs (a, b))
-	    {
-	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
-	      if (DR_RW_UNCONDITIONALLY (b) == 1
-		  || is_true_predicate (cb)
-		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
-								 ca, cb)))
-		{
-		  DR_RW_UNCONDITIONALLY (a) = 1;
-		  DR_RW_UNCONDITIONALLY (b) = 1;
-		  found = true;
-		  break;
-		}
-	    }
-
-	if (!found)
-	  {
-	    DR_RW_UNCONDITIONALLY (a) = 0;
-	    return false;
-	  }
-      }
-
-  return true;
-}
-
-/* Returns true when the memory references of STMT are unconditionally
-   written.  In other words, this function returns true when for every
-   data reference A written in STMT, there exist other writes to the
-   same data reference with predicates that add up (OR-up) to the true
-   predicate: this ensures that the data reference A is written on
-   every iteration of the if-converted loop.  */
+/* Wrapper around gimple_could_trap_p refined for the needs of the
+   if-conversion.  */
 
 static bool
-write_memrefs_written_at_least_once (gimple stmt,
-				     VEC (data_reference_p, heap) *drs)
+ifcvt_could_trap_p (gimple stmt)
 {
-  int i, j;
-  data_reference_p a, b;
-  tree ca = bb_predicate (gimple_bb (stmt));
-
-  for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++)
-    if (DR_STMT (a) == stmt
-	&& DR_IS_WRITE (a))
-      {
-	bool found = false;
-	int x = DR_WRITTEN_AT_LEAST_ONCE (a);
-
-	if (x == 0)
-	  return false;
-
-	if (x == 1)
-	  continue;
-
-	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
-	  if (DR_STMT (b) != stmt
-	      && DR_IS_WRITE (b)
-	      && same_data_refs_base_objects (a, b))
-	    {
-	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
-	      if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
-		  || is_true_predicate (cb)
-		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
-								 ca, cb)))
-		{
-		  DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
-		  DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
-		  found = true;
-		  break;
-		}
-	    }
-
-	if (!found)
-	  {
-	    DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
-	    return false;
-	  }
-      }
+  if (gimple_vuse (stmt)
+      && !gimple_could_trap_p_1 (stmt, false, false))
+    return false;
 
-  return true;
+  return gimple_could_trap_p (stmt);
 }
 
-/* Return true when the memory references of STMT won't trap in the
-   if-converted code.  There are two things that we have to check for:
-
-   - writes to memory occur to writable memory: if-conversion of
-   memory writes transforms the conditional memory writes into
-   unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
-   into "A[i] = cond ? foo : A[i]", and as the write to memory may not
-   be executed at all in the original code, it may be a readonly
-   memory.  To check that A is not const-qualified, we check that
-   there exists at least an unconditional write to A in the current
-   function.
-
-   - reads or writes to memory are valid memory accesses for every
-   iteration.  To check that the memory accesses are correctly formed
-   and that we are allowed to read and write in these locations, we
-   check that the memory accesses to be if-converted occur at every
-   iteration unconditionally.  */
+/* Returns true when stmt contains a data reference.  */
 
 static bool
-ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs)
+has_unaligned_memory_refs (gimple stmt)
 {
-  return write_memrefs_written_at_least_once (stmt, refs)
-    && memrefs_read_or_written_unconditionally (stmt, refs);
-}
-
-/* Wrapper around gimple_could_trap_p refined for the needs of the
-   if-conversion.  Try to prove that the memory accesses of STMT could
-   not trap in the innermost loop containing STMT.  */
+  int unsignedp, volatilep;
+  HOST_WIDE_INT bitsize, bitpos;
+  tree toffset;
+  enum machine_mode mode;
+  VEC (data_ref_loc, heap) *refs = VEC_alloc (data_ref_loc, heap, 3);
+  bool res = get_references_in_stmt (stmt, &refs);
+  unsigned i;
+  data_ref_loc *ref;
+
+  FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
+    {
+      get_inner_reference (*ref->pos, &bitsize, &bitpos, &toffset,
+			   &mode, &unsignedp, &volatilep, true);
 
-static bool
-ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
-{
-  if (gimple_vuse (stmt)
-      && !gimple_could_trap_p_1 (stmt, false, false)
-      && ifcvt_memrefs_wont_trap (stmt, refs))
-    return false;
+      if ((bitpos % BITS_PER_UNIT) != 0)
+	{
+	  res = true;
+	  break;
+	}
+    }
 
-  return gimple_could_trap_p (stmt);
+  VEC_free (data_ref_loc, heap, refs);
+  return res;
 }
 
 /* Return true when STMT is if-convertible.
@@ -621,8 +497,7 @@  ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs)
    - LHS is not var decl.  */
 
 static bool
-if_convertible_gimple_assign_stmt_p (gimple stmt,
-				     VEC (data_reference_p, heap) *refs)
+if_convertible_gimple_assign_stmt_p (gimple stmt)
 {
   tree lhs = gimple_assign_lhs (stmt);
   basic_block bb;
@@ -650,12 +525,20 @@  if_convertible_gimple_assign_stmt_p (gimple stmt,
 
   if (flag_tree_loop_if_convert_stores)
     {
-      if (ifcvt_could_trap_p (stmt, refs))
+      if (ifcvt_could_trap_p (stmt))
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file, "tree could trap...\n");
 	  return false;
 	}
+
+      if (has_unaligned_memory_refs (stmt))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "uses misaligned memory...\n");
+	  return false;
+	}
+
       return true;
     }
 
@@ -690,7 +573,7 @@  if_convertible_gimple_assign_stmt_p (gimple stmt,
    - it is a GIMPLE_LABEL or a GIMPLE_COND.  */
 
 static bool
-if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
+if_convertible_stmt_p (gimple stmt)
 {
   switch (gimple_code (stmt))
     {
@@ -700,7 +583,7 @@  if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs)
       return true;
 
     case GIMPLE_ASSIGN:
-      return if_convertible_gimple_assign_stmt_p (stmt, refs);
+      return if_convertible_gimple_assign_stmt_p (stmt);
 
     default:
       /* Don't know what to do with 'em so don't do anything.  */
@@ -1016,18 +899,6 @@  if_convertible_loop_p_1 (struct loop *loop,
   if (!res)
     return false;
 
-  if (flag_tree_loop_if_convert_stores)
-    {
-      data_reference_p dr;
-
-      for (i = 0; VEC_iterate (data_reference_p, *refs, i, dr); i++)
-	{
-	  dr->aux = XNEW (struct ifc_dr);
-	  DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
-	  DR_RW_UNCONDITIONALLY (dr) = -1;
-	}
-    }
-
   for (i = 0; i < loop->num_nodes; i++)
     {
       basic_block bb = ifc_bbs[i];
@@ -1040,7 +911,7 @@  if_convertible_loop_p_1 (struct loop *loop,
       /* Check the if-convertibility of statements in predicated BBs.  */
       if (is_predicated (bb))
 	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
-	  if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
+	  if (!if_convertible_stmt_p (gsi_stmt (itr)))
 	    return false;
     }
 
@@ -1101,15 +972,6 @@  if_convertible_loop_p (struct loop *loop)
   ddrs = VEC_alloc (ddr_p, heap, 25);
   res = if_convertible_loop_p_1 (loop, &refs, &ddrs);
 
-  if (flag_tree_loop_if_convert_stores)
-    {
-      data_reference_p dr;
-      unsigned int i;
-
-      for (i = 0; VEC_iterate (data_reference_p, refs, i, dr); i++)
-	free (dr->aux);
-    }
-
   free_data_refs (refs);
   free_dependence_relations (ddrs);
   return res;
@@ -1366,6 +1228,78 @@  insert_gimplified_predicates (loop_p loop)
     }
 }
 
+/* Insert at the beginning of the first basic block of the current
+   function the allocation on the stack of N bytes of memory and
+   return a pointer to this scratchpad memory.  */
+
+static tree
+create_scratchpad (void)
+{
+  basic_block bb = single_succ (ENTRY_BLOCK_PTR);
+  gimple_stmt_iterator gsi = gsi_after_labels (bb);
+
+  /* void *tmp = __builtin_alloca */
+  const char *name = "scratch_pad";
+  tree x = build_int_cst (integer_type_node, 64);
+  gimple stmt = gimple_build_call (built_in_decls[BUILT_IN_ALLOCA], 1, x);
+  tree var = create_tmp_var (ptr_type_node, name);
+  tree tmp = make_ssa_name (var, stmt);
+
+  add_referenced_var (var);
+  gimple_call_set_lhs (stmt, tmp);
+  SSA_NAME_DEF_STMT (tmp) = stmt;
+  update_stmt (stmt);
+
+  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+  return tmp;
+}
+
+/* Returns a memory reference to the pointer defined by the
+   conditional expression: pointer = cond ? &A[i] : scratch_pad; and
+   inserts this code at GSI.  */
+
+static tree
+create_indirect_cond_expr (tree ai, tree cond, tree *scratch_pad,
+			   gimple_stmt_iterator *gsi)
+{
+  tree type = TREE_TYPE (ai);
+
+  tree pointer_to_type, address_of_ai, addr_expr, cond_expr;
+  tree pointer, star_pointer;
+  gimple addr_stmt, pointer_stmt;
+
+  /* address_of_ai = &A[i];  */
+  pointer_to_type = build_pointer_type (type);
+  address_of_ai = create_tmp_var (pointer_to_type, "_ifc_");
+  add_referenced_var (address_of_ai);
+  addr_expr = build_fold_addr_expr (ai);
+  addr_stmt = gimple_build_assign (address_of_ai, addr_expr);
+  address_of_ai = make_ssa_name (address_of_ai, addr_stmt);
+  gimple_assign_set_lhs (addr_stmt, address_of_ai);
+  SSA_NAME_DEF_STMT (address_of_ai) = addr_stmt;
+  update_stmt (addr_stmt);
+  gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
+
+  /* Allocate the scratch pad only once per function.  */
+  if (!*scratch_pad)
+    *scratch_pad = create_scratchpad ();
+
+  /* pointer = cond ? address_of_ai : scratch_pad;  */
+  pointer = create_tmp_var (pointer_to_type, "_ifc_");
+  add_referenced_var (pointer);
+  cond_expr = build3 (COND_EXPR, pointer_to_type, unshare_expr (cond),
+		      address_of_ai, *scratch_pad);
+  pointer_stmt = gimple_build_assign (pointer, cond_expr);
+  pointer = make_ssa_name (pointer, pointer_stmt);
+  gimple_assign_set_lhs (pointer_stmt, pointer);
+  SSA_NAME_DEF_STMT (pointer) = pointer_stmt;
+  update_stmt (pointer_stmt);
+  gsi_insert_before (gsi, pointer_stmt, GSI_SAME_STMT);
+
+  star_pointer = build_simple_mem_ref (pointer);
+  return star_pointer;
+}
+
 /* Predicate each write to memory in LOOP.
 
    This function transforms control flow constructs containing memory
@@ -1377,10 +1311,19 @@  insert_gimplified_predicates (loop_p loop)
 
    into the following form that does not contain control flow:
 
-   | for (i = 0; i < N; i++)
-   |   A[i] = cond ? expr : A[i];
+   | void *scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
+   |
+   | for (i = 0; i < N; i++) {
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
+   | }
+
+   SCRATCH_PAD is allocated on the stack for each function once and it is
+   large enough to contain any kind of scalar assignment or read.  All
+   values read or written to SCRATCH_PAD are not used in the computation.
 
-   The original CFG looks like this:
+   In a more detailed way, the if-conversion of memory writes works
+   like this, supposing that the original CFG looks like this:
 
    | bb_0
    |   i = 0
@@ -1430,10 +1373,12 @@  insert_gimplified_predicates (loop_p loop)
    |   goto bb_1
    | end_bb_4
 
-   predicate_mem_writes is then predicating the memory write as follows:
+   predicate_mem_writes is then allocating SCRATCH_PAD in the basic block
+   preceding the loop header, and is predicating the memory write:
 
    | bb_0
    |   i = 0
+   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
    | end_bb_0
    |
    | bb_1
@@ -1441,12 +1386,14 @@  insert_gimplified_predicates (loop_p loop)
    | end_bb_1
    |
    | bb_2
+   |   cond = some_computation;
    |   if (cond) goto bb_3 else goto bb_4
    | end_bb_2
    |
    | bb_3
    |   cond = some_computation;
-   |   A[i] = cond ? expr : A[i];
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
    |   goto bb_4
    | end_bb_3
    |
@@ -1459,12 +1406,14 @@  insert_gimplified_predicates (loop_p loop)
 
    | bb_0
    |   i = 0
+   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
    |   if (i < N) goto bb_5 else goto bb_1
    | end_bb_0
    |
    | bb_1
    |   cond = some_computation;
-   |   A[i] = cond ? expr : A[i];
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
    |   if (i < N) goto bb_5 else goto bb_4
    | end_bb_1
    |
@@ -1474,7 +1423,7 @@  insert_gimplified_predicates (loop_p loop)
 */
 
 static void
-predicate_mem_writes (loop_p loop)
+predicate_mem_writes (loop_p loop, tree *scratch_pad)
 {
   unsigned int i, orig_loop_num_nodes = loop->num_nodes;
 
@@ -1489,20 +1438,35 @@  predicate_mem_writes (loop_p loop)
 	continue;
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	if ((stmt = gsi_stmt (gsi))
-	    && gimple_assign_single_p (stmt)
-	    && gimple_vdef (stmt))
-	  {
-	    tree lhs = gimple_assign_lhs (stmt);
-	    tree rhs = gimple_assign_rhs1 (stmt);
-	    tree type = TREE_TYPE (lhs);
-
-	    lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
-	    rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
-	    rhs = build3 (COND_EXPR, type, unshare_expr (cond), rhs, lhs);
-	    gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
-	    update_stmt (stmt);
-	  }
+	{
+	  stmt = gsi_stmt (gsi);
+	  if (gimple_assign_single_p (stmt)
+	      && gimple_vdef (stmt))
+	    {
+	      /* A[i] = x;  */
+	      tree ai = gimple_assign_lhs (stmt);
+
+	      /* pointer = cond ? &A[i] : scratch_pad;  */
+	      tree star_pointer = create_indirect_cond_expr (ai, cond,
+							     scratch_pad, &gsi);
+	      /* *pointer = x;  */
+	      gimple_assign_set_lhs (stmt, star_pointer);
+	      update_stmt (stmt);
+	    }
+	  else if (gimple_assign_single_p (stmt)
+		   && gimple_vuse (stmt))
+	    {
+	      /* x = A[i];  */
+	      tree ai = gimple_assign_rhs1 (stmt);
+
+	      /* pointer = cond ? &A[i] : scratch_pad;  */
+	      tree star_pointer = create_indirect_cond_expr (ai, cond,
+							     scratch_pad, &gsi);
+	      /* x = *pointer;  */
+	      gimple_assign_set_rhs1 (stmt, star_pointer);
+	      update_stmt (stmt);
+	    }
+	}
     }
 }
 
@@ -1552,7 +1516,7 @@  remove_conditions_and_labels (loop_p loop)
    blocks.  Replace PHI nodes with conditional modify expressions.  */
 
 static void
-combine_blocks (struct loop *loop)
+combine_blocks (struct loop *loop, tree *scratch_pad)
 {
   basic_block bb, exit_bb, merge_target_bb;
   unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -1565,7 +1529,7 @@  combine_blocks (struct loop *loop)
   predicate_all_scalar_phis (loop);
 
   if (flag_tree_loop_if_convert_stores)
-    predicate_mem_writes (loop);
+    predicate_mem_writes (loop, scratch_pad);
 
   /* Merge basic blocks: first remove all the edges in the loop,
      except for those from the exit block.  */
@@ -1654,7 +1618,7 @@  combine_blocks (struct loop *loop)
    profitability analysis.  Returns true when something changed.  */
 
 static bool
-tree_if_conversion (struct loop *loop)
+tree_if_conversion (struct loop *loop, tree *scratch_pad)
 {
   bool changed = false;
   ifc_bbs = NULL;
@@ -1666,7 +1630,7 @@  tree_if_conversion (struct loop *loop)
   /* Now all statements are if-convertible.  Combine all the basic
      blocks into one huge basic block doing the if-conversion
      on-the-fly.  */
-  combine_blocks (loop);
+  combine_blocks (loop, scratch_pad);
 
   if (flag_tree_loop_if_convert_stores)
     mark_sym_for_renaming (gimple_vop (cfun));
@@ -1697,12 +1661,13 @@  main_tree_if_conversion (void)
   struct loop *loop;
   bool changed = false;
   unsigned todo = 0;
+  tree scratch_pad = NULL_TREE;
 
   if (number_of_loops () <= 1)
     return 0;
 
   FOR_EACH_LOOP (li, loop, 0)
-    changed |= tree_if_conversion (loop);
+    changed |= tree_if_conversion (loop, &scratch_pad);
 
   if (changed)
     todo |= TODO_cleanup_cfg;