diff mbox

fix PR46029: reimplement if conversion of loads and stores [2nd submitted version of patch]

Message ID 5593142A.7020607@yahoo.com
State New
Headers show

Commit Message

Abe Skolnik June 30, 2015, 10:11 p.m. UTC
New relative to previous version of same patch:
   * addressed comments/suggestions from Alan and Richard.
   * disabled the test cases that are currently not if-converted correctly, pending improvements to the new if-converter

Passed regression testing and bootstrap on amd64-linux.

2015-06-12  Sebastian Pop  <s.pop@samsung.com>
             Abe Skolnik  <a.skolnik@samsung.com>

	PR tree-optimization/46029
	* tree-data-ref.c (struct data_ref_loc_d): Moved...
	(get_references_in_stmt): Exported.
	* tree-data-ref.h (struct data_ref_loc_d): ... here.
	(get_references_in_stmt): Declared.

	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
	* tree-if-conv.c (struct ifc_dr): Removed.
	(IFC_DR): Removed.
	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
	(DR_RW_UNCONDITIONALLY): Removed.
	(memrefs_read_or_written_unconditionally): Removed.
	(write_memrefs_written_at_least_once): Removed.
	(ifcvt_could_trap_p): Does not take refs parameter anymore.
	(ifcvt_memrefs_wont_trap): Removed.
	(has_non_addressable_refs): New.
	(if_convertible_gimple_assign_stmt_p): Call has_non_addressable_refs.
	Removed use of refs.
	(if_convertible_stmt_p): Removed use of refs.
	(if_convertible_gimple_assign_stmt_p): Same.
	(if_convertible_loop_p_1): Removed use of refs.  Remove initialization
	of dr->aux, DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
	(insert_address_of): New.
	(create_scratchpad): New.
	(create_indirect_cond_expr): New.
	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
	parameter for scratch_pad.
	(combine_blocks): Same.
	(tree_if_conversion): Same.

	testsuite/
	* g++.dg/tree-ssa/ifc-pr46029.C: New.
	* gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
	* gcc.dg/tree-ssa/ifc-8.c: New.
	* gcc.dg/tree-ssa/ifc-9.c: New.
	* gcc.dg/tree-ssa/ifc-10.c: New.
	* gcc.dg/tree-ssa/ifc-11.c: New.
	* gcc.dg/tree-ssa/ifc-12.c: New.
	* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: Disabled.
	* gcc.dg/vect/if-cvt-stores-vect-ifcvt-19.c: New.
	* gcc.dg/vect/pr61194.c: Disabled.
	* gcc.dg/vect/vect-mask-load-1.c: Disabled.
	* gcc.dg/vect/vect-mask-loadstore-1.c: Disabled.
	* gcc.target/i386/avx2-gather-6.c: Disabled.
	* gcc.target/i386/avx2-vect-aggressive-1.c: Disabled.
	* gcc.target/i386/avx2-vect-aggressive.c: Disabled.
---
  gcc/ChangeLog                                      |  28 ++
  gcc/common.opt                                     |   2 +-
  gcc/doc/invoke.texi                                |  18 +-
  gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C        |  76 ++++
  gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c         |   2 +-
  gcc/testsuite/gcc.dg/tree-ssa/ifc-10.c             |  17 +
  gcc/testsuite/gcc.dg/tree-ssa/ifc-11.c             |  16 +
  gcc/testsuite/gcc.dg/tree-ssa/ifc-12.c             |  13 +
  gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c              |  19 +-
  gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c              |  29 ++
  gcc/testsuite/gcc.dg/tree-ssa/ifc-9.c              |  17 +
  .../gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c      |  10 +-
  .../gcc.dg/vect/if-cvt-stores-vect-ifcvt-19.c      |  46 +++
  gcc/testsuite/gcc.dg/vect/pr61194.c                |   4 +-
  gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c       |   4 +-
  gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c  |   4 +-
  gcc/testsuite/gcc.target/i386/avx2-gather-6.c      |   4 +-
  .../gcc.target/i386/avx2-vect-aggressive-1.c       |   4 +-
  .../gcc.target/i386/avx2-vect-aggressive.c         |   4 +-
  gcc/tree-data-ref.c                                |  13 +-
  gcc/tree-data-ref.h                                |  14 +
  gcc/tree-if-conv.c                                 | 416 ++++++++++-----------
  22 files changed, 500 insertions(+), 260 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-10.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-11.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-12.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-9.c
  create mode 100644 gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-19.c

Comments

Alan Lawrence July 2, 2015, 9:49 a.m. UTC | #1
Thanks, Abe. A couple comments below...

> @@ -883,7 +733,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
> 
>     if (flag_tree_loop_if_convert_stores)
>       {
> -      if (ifcvt_could_trap_p (stmt, refs))
> +      if (ifcvt_could_trap_p (stmt))
>         {
>           if (ifcvt_can_use_mask_load_store (stmt))
>             {
> @@ -892,9 +742,17 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
>               return true;
>             }
>           if (dump_file && (dump_flags & TDF_DETAILS))
> -           fprintf (dump_file, "tree could trap...\n");
> +           fprintf (dump_file, "tree could trap\n");
>           return false;
>         }
> +
> +      if (has_non_addressable_refs (stmt))
> +       {
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           fprintf (dump_file, "has non-addressable memory references\n");
> +         return false;
> +       }
> +
>         return true;
>       }

As before, I'm still confused here. This still returns false, i.e. bails out of
if-conversion, if the statement could trap. Doesn't the scratchpad let us handle
that? Or do we just not care because it won't be vectorizable anyway???

> @@ -1342,7 +1190,7 @@ if_convertible_loop_p_1 (struct loop *loop,
>         /* Check the if-convertibility of statements in predicated BBs.  */
>         if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
>         for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
> -         if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
> +         if (!if_convertible_stmt_p (gsi_stmt (itr),
>                                       any_mask_load_store))

Nit: as before - line no longer needs wrapping (a few other cases too)

> @@ -2063,12 +1997,14 @@ mask_exists (int size, vec<int> vec)
>      | end_bb_1
>      |
>      | bb_2
> +   |   cond = some_computation;

Nit: as before - thanks for fixing the example here, but...

>      |   if (cond) goto bb_3 else goto bb_4
>      | end_bb_2
>      |
>      | bb_3
>      |   cond = some_computation;

...I think you mean to remove this last too.

> @@ -2817,10 +2761,26 @@ public:
>   bool
>   pass_if_conversion::gate (function *fun)
>   {
> -  return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
> -          && flag_tree_loop_if_convert != 0)
> -         || flag_tree_loop_if_convert == 1
> -         || flag_tree_loop_if_convert_stores == 1);
> +  return  (
> +            (
> +              flag_tree_loop_vectorize
> +           || fun->has_force_vectorize_loops
> +            )
> +         && (
> +              (
> +                flag_tree_loop_if_convert        != 0
> +              )
> +           || (
> +                flag_tree_loop_if_convert_stores != 0
> +              )
> +            )
> +         )
> +         || (
> +              flag_tree_loop_if_convert        > 0
> +            )
> +         || (
> +              flag_tree_loop_if_convert_stores > 0
> +            );
>   }

That is quite complex. Where can I find info on what the different flag values 
mean? (I had thought they were booleans, clearly I'm wrong, but a quick scan 
through invoke.texi doesn't seem to help; both your testcases and your updates 
to invoke.texi say e.g. -ftree-loop-if-convert-stores not 
-ftree-loop-if-convert-stores=<value>)


Cheers, Alan
Abe Skolnik July 2, 2015, 8:17 p.m. UTC | #2
On 7/2/15 4:49 AM, Alan Lawrence wrote:

> Thanks, Abe.

You are welcome, sir!  :-)


> As before, I'm still confused here. This still returns false, i.e. bails out of
> if-conversion, if the statement could trap. Doesn't the scratchpad let us handle
> that? Or do we just not care because it won't be vectorizable anyway???

This seems like an opportunity for more optimization in the future.  However, at this time we do not see what kind of code would benefit from this optimization.
If you have some time to spare and wish to spend some of it on this issue, then please find/write a test case that would exercise this path, i.e. a snippet of code
that is not optimized due to the above concern, even though it _could_ be if-converted -- and, preferably, the resulting conversion _is_ vectorizer-friendly.

Of course, even if a test case can be written to trigger this missed opportunity,
that in and of itself does not yet tell us how much opportunity we are missing in _real-world_ code.


> Nit: as before [...]

Thanks for the reminder[s].


> Nit: as before - thanks for fixing the example here

You are welcome.


> Where can I find info on what the different flag values mean?
 > (I had thought they were booleans [...]

Sorry; I don`t know if that is documented anywhere yet.

In this case, (-1) simply means "defaulted": on if the vectorizer is on, and off if it is off.
(0) means "user specified no if conversion" and (1) means "user specified [yes] if conversion".

Regards,

Abe
Alan Lawrence July 6, 2015, 3:09 p.m. UTC | #3
Abe wrote:
> On 7/2/15 4:49 AM, Alan Lawrence wrote:
>> As before, I'm still confused here. This still returns false, i.e. bails out of
>> if-conversion, if the statement could trap. Doesn't the scratchpad let us handle
>> that? Or do we just not care because it won't be vectorizable anyway???
> 
> This seems like an opportunity for more optimization in the future....

==> we get enough benefit from the patch, even without my suggested extra 
change. Ok, fair enough! Thanks for the clarification.

>> Where can I find info on what the different flag values mean?
>  > (I had thought they were booleans [...]
> 
> Sorry; I don`t know if that is documented anywhere yet.
> 
> In this case, (-1) simply means "defaulted": on if the vectorizer is on, and off if it is off.
> (0) means "user specified no if conversion" and (1) means "user specified [yes] if conversion".

Ah, right, that makes sense now. Obviously I would like to see this written in 
doc/ .

Cheers, Alan
Abe Skolnik July 6, 2015, 5:46 p.m. UTC | #4
[Abe wrote:]
>> This seems like an opportunity for more optimization in the future....

[On 7/6/15 10:09 AM, Alan Lawrence wrote:]
> we get enough benefit from the patch, even without my suggested extra change. Ok, fair enough! Thanks for the clarification.

You are welcome, sir.


[Alan wrote:]
>>> Where can I find info on what the different flag values mean?
>>> (I had thought they were booleans [...]

[Abe wrote:]
>> Sorry; I don`t know if that is documented anywhere yet.
>> In this case, (-1) simply means "defaulted": on if the vectorizer is on, and off if it is off.
>> (0) means "user specified no if conversion" and (1) means "user specified [yes] if conversion".

[Alan wrote:]
> Ah, right, that makes sense now. Obviously I would like to see this written in doc/ .

Please consider it added to the work queue.  ;-)

Regards,

Abe
Abe Skolnik July 8, 2015, 3:30 p.m. UTC | #5
[Alan wrote:]

>>> Where can I find info on what the different flag values mean?
>>> (I had thought they were booleans [...]


[Abe wrote:]

>> Sorry; I don`t know if that is documented anywhere yet.

>> In this case, (-1) simply means "defaulted": on if the vectorizer is on, and off if it is off.
>> (0) means "user specified no if conversion" and (1) means "user specified [yes] if conversion".


[Alan wrote:]

> Ah, right, that makes sense now. Obviously I would like to see this written in doc/ .

Is the following sufficient?  From "doc/invoke.texi", I propose to replace:

   This is enabled by default if vectorization is enabled.

... with:

   This is enabled by default when vectorization is  enabled
   and    disabled by default when vectorization is disabled.


Regards,

Abe
Alan Lawrence July 8, 2015, 4:54 p.m. UTC | #6
Abe wrote:
> [Alan wrote:]
> 
>>>> Where can I find info on what the different flag values mean?
>>>> (I had thought they were booleans [...]
> 
> 
> [Abe wrote:]
> 
>>> Sorry; I don`t know if that is documented anywhere yet.
> 
>>> In this case, (-1) simply means "defaulted": on if the vectorizer is on, and off if it is off.
>>> (0) means "user specified no if conversion" and (1) means "user specified [yes] if conversion".
> 
> 
> [Alan wrote:]
> 
>> Ah, right, that makes sense now. Obviously I would like to see this written in doc/ .
> 
> Is the following sufficient?  From "doc/invoke.texi", I propose to replace:
> 
>    This is enabled by default if vectorization is enabled.
> 
> ... with:
> 
>    This is enabled by default when vectorization is  enabled
>    and    disabled by default when vectorization is disabled.
> 

That would be helpful - but has the syntax not changed? I was thinking it'd be 
useful to document somewher the meanings of 1/0/-1, as you explained them to me:

 > In this case, (-1) simply means "defaulted": on if the vectorizer is on, and 
off if it is off.
 > (0) means "user specified no if conversion" and (1) means "user specified 
[yes] if conversion".

Perhaps in the gate() function if not in the doc.

Thanks, Alan
Abe Skolnik July 8, 2015, 6:12 p.m. UTC | #7
[Abe wrote:]
>> Is the following sufficient?  From "doc/invoke.texi", I propose to replace:

>>    This is enabled by default if vectorization is enabled.

>> ... with:

>>    This is enabled by default when vectorization is  enabled
>>    and    disabled by default when vectorization is disabled.
>>


[Alan wrote:]

> That would be helpful - but has the syntax not changed?

AFAIK & IIRC, not yet.  We plan to change the flags later.


[Alan wrote:]
> I was thinking it'd be useful to document somewher the meanings of 1/0/-1, as you explained them to me:
[snip]
> Perhaps in the gate() function if not in the doc.

OK.  It seemed like the doc files was not the right place to document internal values that flags influence.

Regards,

Abe
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3dec6b1..70af07c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@ 
+2015-05-18  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/46029
+	* doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
+	* tree-if-conv.c (has_unaligned_memory_refs): New.
+	(if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
+	(create_scratchpad): New.
+	(create_indirect_cond_expr): New.
+	(predicate_mem_writes): Call create_indirect_cond_expr.  Take an extra
+	parameter for scratch_pad.
+	(combine_blocks): Same.
+	(tree_if_conversion): Same.
+	(main_tree_if_conversion): Pass to tree_if_conversion a pointer to
+	scratch_pad.
+	(struct ifc_dr): Removed.
+	(IFC_DR): Removed.
+	(DR_WRITTEN_AT_LEAST_ONCE): Removed.
+	(DR_RW_UNCONDITIONALLY): Removed.
+	(memrefs_read_or_written_unconditionally): Removed.
+	(write_memrefs_written_at_least_once): Removed.
+	(ifcvt_memrefs_wont_trap): Removed.
+	(ifcvt_could_trap_p): Does not take refs parameter anymore.
+	(if_convertible_gimple_assign_stmt_p): Same.
+	(if_convertible_stmt_p): Same.
+	(if_convertible_loop_p_1): Remove initialization of dr->aux,
+	DR_WRITTEN_AT_LEAST_ONCE, and DR_RW_UNCONDITIONALLY.
+	(if_convertible_loop_p): Remove deallocation of the same.
+
  2015-05-15  Marc Glisse  <marc.glisse@inria.fr>

  	PR tree-optimization/64454
diff --git a/gcc/common.opt b/gcc/common.opt
index 859488f..6cce163 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1403,7 +1403,7 @@  Common Report Var(flag_tree_loop_if_convert) Init(-1) Optimization
  Convert conditional jumps in innermost loops to branchless equivalents

  ftree-loop-if-convert-stores
-Common Report Var(flag_tree_loop_if_convert_stores) Optimization
+Common Report Var(flag_tree_loop_if_convert_stores) Init(-1) Optimization
  Also if-convert conditional jumps containing memory writes

  ; -finhibit-size-directive inhibits output of .size for ELF.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 117b5d9..c0e27e8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -8815,20 +8815,26 @@  if vectorization is enabled.
  @item -ftree-loop-if-convert-stores
  @opindex ftree-loop-if-convert-stores
  Attempt to also if-convert conditional jumps containing memory writes.
-This transformation can be unsafe for multi-threaded programs as it
-transforms conditional memory writes into unconditional memory writes.
  For example,
  @smallexample
  for (i = 0; i < N; i++)
    if (cond)
-    A[i] = expr;
+    A[i] = B[i] + 2;
  @end smallexample
  is transformed to
  @smallexample
-for (i = 0; i < N; i++)
-  A[i] = cond ? expr : A[i];
+void *scratchpad = alloca (64);
+for (i = 0; i < N; i++) @{
+  a = cond ? &A[i] : scratchpad;
+  b = cond ? &B[i] : scratchpad;
+  *a = *b + 2;
+@}
  @end smallexample
-potentially producing data races.
+The compiler allocates a scratchpad memory on the stack for each
+function in which the if-conversion of memory stores or reads
+happened.  This scratchpad memory is used during the part of the
+computation that is discarded, i.e., when the condition is evaluated
+to false.

  @item -ftree-loop-distribution
  @opindex ftree-loop-distribution
diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
new file mode 100644
index 0000000..2a54bdb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/ifc-pr46029.C
@@ -0,0 +1,76 @@ 
+// { dg-do run }
+/* { dg-options "-O -ftree-loop-if-convert-stores" } */
+
+namespace
+{
+  struct rb_tree_node_
+  {
+    rb_tree_node_ ():m_p_left (0), m_p_parent (0), m_metadata (0)
+    {
+    }
+    unsigned &get_metadata ()
+    {
+      return m_metadata;
+    }
+    rb_tree_node_ *m_p_left;
+    rb_tree_node_ *m_p_parent;
+    unsigned m_metadata;
+  };
+
+  struct bin_search_tree_const_node_it_
+  {
+    bin_search_tree_const_node_it_ (rb_tree_node_ * p_nd):m_p_nd (p_nd)
+    {
+    }
+    unsigned &get_metadata ()
+    {
+      return m_p_nd->get_metadata ();
+    }
+    bin_search_tree_const_node_it_ get_l_child ()
+    {
+      return bin_search_tree_const_node_it_ (m_p_nd->m_p_left);
+    }
+
+    rb_tree_node_ *m_p_nd;
+  };
+
+  struct bin_search_tree_no_data_
+  {
+    typedef rb_tree_node_ *node_pointer;
+      bin_search_tree_no_data_ ():m_p_head (new rb_tree_node_ ())
+    {
+    }
+    void insert_imp_empty (int r_value)
+    {
+      rb_tree_node_ *p_new_node = new rb_tree_node_ ();
+      m_p_head->m_p_parent = p_new_node;
+      p_new_node->m_p_parent = m_p_head;
+      update_to_top (m_p_head->m_p_parent);
+    }
+    void apply_update (bin_search_tree_const_node_it_ nd_it)
+    {
+      unsigned
+	l_max_endpoint
+	=
+	(nd_it.get_l_child ().m_p_nd ==
+	 0) ? 0 : nd_it.get_l_child ().get_metadata ();
+      nd_it.get_metadata () = l_max_endpoint;
+    }
+    void update_to_top (node_pointer p_nd)
+    {
+      while (p_nd != m_p_head)
+	{
+	  apply_update (p_nd);
+	  p_nd = p_nd->m_p_parent;
+	}
+    }
+
+    rb_tree_node_ * m_p_head;
+  };
+}
+
+int main ()
+{
+  bin_search_tree_no_data_ ().insert_imp_empty (0);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c
index f2cd904..f59b9ad 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c
@@ -1,5 +1,5 @@ 
  /* { dg-do compile } */
-/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-details" } */
+/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-details -fno-tree-loop-if-convert-stores" } */
  int a[3];
  int b[4];
  int
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-10.c
new file mode 100644
index 0000000..3dcc202
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-10.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O3 -ftree-loop-if-convert-stores -fdump-tree-ifcvt-details -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+/* middle-end/PR65947 */
+
+int a[32];
+
+int main(int argc, char **argv)
+{
+  int res = 3;
+  for (int i = 0; i < 32; i++)
+    if (a[i]) res = 7;
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
+/* { dg-final { cleanup-tree-dump "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-11.c
new file mode 100644
index 0000000..2943c04
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-11.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O3 -ftree-loop-if-convert-stores -fdump-tree-ifcvt-details -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+int read(int *r, int *c)
+{
+  int res = 42;
+  int i;
+  for (i = 0; i < 32; i++)
+    if (c[i])
+      res = *r;
+
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
+/* { dg-final { cleanup-tree-dump "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-12.c
new file mode 100644
index 0000000..01acf0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-12.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O3 -ftree-loop-if-convert-stores -fdump-tree-ifcvt-details -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+void write(int *r, int *c)
+{
+  int i;
+  for (i = 0; i < 32; i++)
+    if (c[i])
+      *r = c[i];
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
+/* { dg-final { cleanup-tree-dump "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
index a9cc816..4be2cdb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c
@@ -1,5 +1,5 @@ 
  /* { dg-do compile } */
-/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */
+/* { dg-options "-c -O2 -ftree-vectorize -ftree-loop-if-convert-stores -fdump-tree-ifcvt-stats" { target *-*-* } } */

  void
  dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
@@ -12,11 +12,18 @@  dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
    for (i = 0; i <= nCoeffs; i++)
      {
        level = block[i];
-      if (level < 0)
-	level = level * qmul - qadd;
-      else
-	level = level * qmul + qadd;
-      block[i] = level;
+      if (level)
+        {
+          if (level < 0)
+            {
+              level = level * qmul - qadd;
+            }
+          else
+            {
+              level = level * qmul + qadd;
+            }
+          block[i] = level;
+        }
      }
  }

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
new file mode 100644
index 0000000..d7cf279
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
+
+typedef union tree_node *tree;
+struct tree_common
+{
+  unsigned volatile_flag : 1;
+  unsigned unsigned_flag : 1;
+};
+struct tree_type
+{
+  tree next_variant;
+  tree main_variant;
+};
+union tree_node
+{
+  struct tree_common common;
+  struct tree_type type;
+};
+void finish_enum (tree enumtype)
+{
+  tree tem;
+  for (tem = ((enumtype)->type.main_variant); tem; tem = ((tem)->type.next_variant))
+    {
+      if (tem == enumtype)
+	continue;
+      ((tem)->common.unsigned_flag) = ((enumtype)->common.unsigned_flag);
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-9.c
new file mode 100644
index 0000000..6d82bb4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-9.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize -ftree-loop-if-convert-stores -fdump-tree-ifcvt-stats" { target *-*-* } } */
+
+/* middle-end/PR65947 */
+
+int a[32];
+
+int main(int argc, char **argv)
+{
+  int res = 3;
+  for (int i = 0; i < 32; i++)
+    if (a[i]) res = 7;
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */
+/* { dg-final { cleanup-tree-dump "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c b/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c
index cdf687a..7bc652b 100644
--- a/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c
+++ b/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c
@@ -65,5 +65,13 @@  main (void)
    return 0;
  }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { { vect_no_align && { ! vect_hw_misalign } } || { ! vect_strided2 } } } } } */
+/* "foo()" is not vectorized FOR NOW because gather-load
+   cannot handle conditional gather loads as of June 2015.
+
+   The old way of if-converting loads was unsafe because
+   it resulted in thread-unsafe code where the as-written code was OK.
+   The old if conversion did not contain indirection in the loads,
+   so it was simpler, therefor the vectorizer was able to pick it up.  */
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect"  { xfail { { vect_no_align && { ! vect_hw_misalign } } || { ! vect_strided2 } } } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-19.c b/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-19.c
new file mode 100644
index 0000000..6331d5a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-19.c
@@ -0,0 +1,46 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+  short a;
+  short b;
+} data;
+
+data in1[N], in2[N], out[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+  int i;
+  short c, d;
+
+  for (i = 0; i < N; i++)
+    {
+      c = in1[i].b;
+      d = in2[i].b;
+
+      if (c >= d)
+        {
+          out[i].b = 11;
+          out[i].a = d + 5;
+        }
+      else
+        {
+          out[i].b = d - 12;
+          out[i].a = d;
+        }
+    }
+}
+
+/* It is safe to if-convert and vectorize the above loop because
+   out[i].a and out[i].b are always written to.
+   This is the same as "if-cvt-stores-vect-ifcvt-18.c"
+   except there are no array loads inside the "if" here.  */
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { { vect_no_align && { ! vect_hw_misalign } } || { ! vect_strided2 } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr61194.c b/gcc/testsuite/gcc.dg/vect/pr61194.c
index e10cff4..f9e3c51 100644
--- a/gcc/testsuite/gcc.dg/vect/pr61194.c
+++ b/gcc/testsuite/gcc.dg/vect/pr61194.c
@@ -39,5 +39,7 @@  int main()
    return 0;
  }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { d g - f i n a l { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/*** testing temporarily disabled pending fixes to the new-for-2015 if converter ***/
+
  /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c b/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c
index 3f73425..1340b7d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c
@@ -47,5 +47,7 @@  main ()
    return 0;
  }

-/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 1 "vect" { target avx_runtime } } } */
+/* { d g - f i n a l { scan-tree-dump-times "note: vectorized 1 loops" 1 "vect" { target avx_runtime } } } */
+/*** testing temporarily disabled pending fixes to the new-for-2015 if converter ***/
+
  /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c b/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c
index 493a18d..a900f5d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c
@@ -45,5 +45,7 @@  main ()
    return 0;
  }

-/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 1 "vect" { target avx_runtime } } } */
+/* { d g - f i n a l { scan-tree-dump-times "note: vectorized 1 loops" 1 "vect" { target avx_runtime } } } */
+/*** testing temporarily disabled pending fixes to the new-for-2015 if converter ***/
+
  /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
index 38e2009..8cde5f8 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
@@ -3,5 +3,7 @@ 

  #include "avx2-gather-5.c"

-/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 1 "vect" } } */
+/* { d g - f i n a l { scan-tree-dump-times "note: vectorized 1 loops in function" 1 "vect" } } */
+/*** testing temporarily disabled pending fixes to the new-for-2015 if converter ***/
+
  /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive-1.c b/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive-1.c
index a9a88e1..90ab9ba 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive-1.c
@@ -29,5 +29,7 @@  void foo()
      }
  }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { d g - f i n a l { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/*** testing temporarily disabled pending fixes to the new-for-2015 if converter ***/
+
  /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive.c b/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive.c
index 07f0821..897c9b8 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-vect-aggressive.c
@@ -44,6 +44,8 @@  avx2_test (void)
      abort ();
  }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { d g - f i n a l { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/*** testing temporarily disabled pending fixes to the new-for-2015 if converter ***/
+
  /* { dg-final { cleanup-tree-dump "vect" } } */

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 3d1d5f8..eb1c559 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -4393,22 +4393,11 @@  compute_all_dependences (vec<data_reference_p> datarefs,
    return true;
  }

-/* Describes a location of a memory reference.  */
-
-typedef struct data_ref_loc_d
-{
-  /* The memory reference.  */
-  tree ref;
-
-  /* True if the memory reference is read.  */
-  bool is_read;
-} data_ref_loc;
-

  /* Stores the locations of memory references in STMT to REFERENCES. Returns
     true if STMT clobbers memory, false otherwise.  */

-static bool
+bool
  get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
  {
    bool clobbers_memory = false;
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index edb3b56..06a0f1a 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -557,4 +557,18 @@  lambda_matrix_new (int m, int n, struct obstack *lambda_obstack)
    return mat;
  }

+/* Describes a location of a memory reference.  */
+
+typedef struct data_ref_loc_d
+{
+  /* The memory reference.  */
+  tree ref;
+
+  /* True if the memory reference is read.  */
+  bool is_read;
+} data_ref_loc;
+
+bool
+get_references_in_stmt (gimple /* stmt */, vec<data_ref_loc, va_heap>* /* references */);
+
  #endif  /* GCC_TREE_DATA_REF_H  */
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 49ff458..2d54b62 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -19,7 +19,7 @@  along with GCC; see the file COPYING3.  If not see
  <http://www.gnu.org/licenses/>.  */

  /* This pass implements a tree level if-conversion of loops.  Its
-   initial goal is to help the vectorizer to vectorize loops with
+   initial goal is to help the vectorizer to vectorize loops with
     conditions.

     A short description of if-conversion:
@@ -606,188 +606,39 @@  if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi,
    return true;
  }

-/* Records the status of a data reference.  This struct is attached to
-   each DR->aux field.  */
-
-struct ifc_dr {
-  /* -1 when not initialized, 0 when false, 1 when true.  */
-  int written_at_least_once;
-
-  /* -1 when not initialized, 0 when false, 1 when true.  */
-  int rw_unconditionally;
-};
-
-#define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
-#define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once)
-#define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
-
-/* Returns true when the memory references of STMT are read or written
-   unconditionally.  In other words, this function returns true when
-   for every data reference A in STMT there exist other accesses to
-   a data reference with the same base with predicates that add up (OR-up) to
-   the true predicate: this ensures that the data reference A is touched
-   (read or written) on every iteration of the if-converted loop.  */
+/* Wrapper around gimple_could_trap_p refined for the needs of the
+   if-conversion.  */

  static bool
-memrefs_read_or_written_unconditionally (gimple stmt,
-					 vec<data_reference_p> drs)
+ifcvt_could_trap_p (gimple stmt)
  {
-  int i, j;
-  data_reference_p a, b;
-  tree ca = bb_predicate (gimple_bb (stmt));
-
-  for (i = 0; drs.iterate (i, &a); i++)
-    if (DR_STMT (a) == stmt)
-      {
-	bool found = false;
-	int x = DR_RW_UNCONDITIONALLY (a);
-
-	if (x == 0)
-	  return false;
-
-	if (x == 1)
-	  continue;
-
-	for (j = 0; drs.iterate (j, &b); j++)
-          {
-            tree ref_base_a = DR_REF (a);
-            tree ref_base_b = DR_REF (b);
-
-            if (DR_STMT (b) == stmt)
-              continue;
-
-            while (TREE_CODE (ref_base_a) == COMPONENT_REF
-                   || TREE_CODE (ref_base_a) == IMAGPART_EXPR
-                   || TREE_CODE (ref_base_a) == REALPART_EXPR)
-              ref_base_a = TREE_OPERAND (ref_base_a, 0);
-
-            while (TREE_CODE (ref_base_b) == COMPONENT_REF
-                   || TREE_CODE (ref_base_b) == IMAGPART_EXPR
-                   || TREE_CODE (ref_base_b) == REALPART_EXPR)
-              ref_base_b = TREE_OPERAND (ref_base_b, 0);
-
-  	    if (!operand_equal_p (ref_base_a, ref_base_b, 0))
-	      {
-	        tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
-	        if (DR_RW_UNCONDITIONALLY (b) == 1
-		    || is_true_predicate (cb)
-		    || is_true_predicate (ca
-                        = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
-		  {
-		    DR_RW_UNCONDITIONALLY (a) = 1;
-  		    DR_RW_UNCONDITIONALLY (b) = 1;
-		    found = true;
-		    break;
-		  }
-               }
-	    }
-
-	if (!found)
-	  {
-	    DR_RW_UNCONDITIONALLY (a) = 0;
-	    return false;
-	  }
-      }
+  if (gimple_vuse (stmt)
+      && !gimple_could_trap_p_1 (stmt, false, false))
+    return false;

-  return true;
+  return gimple_could_trap_p (stmt);
  }

-/* Returns true when the memory references of STMT are unconditionally
-   written.  In other words, this function returns true when for every
-   data reference A written in STMT, there exist other writes to the
-   same data reference with predicates that add up (OR-up) to the true
-   predicate: this ensures that the data reference A is written on
-   every iteration of the if-converted loop.  */
-
+/* Returns true when stmt contains a data reference.  */
  static bool
-write_memrefs_written_at_least_once (gimple stmt,
-				     vec<data_reference_p> drs)
+has_non_addressable_refs (gimple stmt)
  {
-  int i, j;
-  data_reference_p a, b;
-  tree ca = bb_predicate (gimple_bb (stmt));
-
-  for (i = 0; drs.iterate (i, &a); i++)
-    if (DR_STMT (a) == stmt
-	&& DR_IS_WRITE (a))
-      {
-	bool found = false;
-	int x = DR_WRITTEN_AT_LEAST_ONCE (a);
-
-	if (x == 0)
-	  return false;
-
-	if (x == 1)
-	  continue;
+  vec<data_ref_loc, va_heap>* refs;
+  vec_alloc (refs, 3);

-	for (j = 0; drs.iterate (j, &b); j++)
-	  if (DR_STMT (b) != stmt
-	      && DR_IS_WRITE (b)
-	      && same_data_refs_base_objects (a, b))
-	    {
-	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+  bool res = get_references_in_stmt (stmt, refs);
+  unsigned i;
+  data_ref_loc *ref;

-	      if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1
-		  || is_true_predicate (cb)
-		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
-								 ca, cb)))
-		{
-		  DR_WRITTEN_AT_LEAST_ONCE (a) = 1;
-		  DR_WRITTEN_AT_LEAST_ONCE (b) = 1;
-		  found = true;
-		  break;
-		}
-	    }
-
-	if (!found)
-	  {
-	    DR_WRITTEN_AT_LEAST_ONCE (a) = 0;
-	    return false;
-	  }
+  FOR_EACH_VEC_ELT (*refs, i, ref)
+    if (may_be_nonaddressable_p (ref->ref))
+      {
+	res = true;
+	break;
        }

-  return true;
-}
-
-/* Return true when the memory references of STMT won't trap in the
-   if-converted code.  There are two things that we have to check for:
-
-   - writes to memory occur to writable memory: if-conversion of
-   memory writes transforms the conditional memory writes into
-   unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
-   into "A[i] = cond ? foo : A[i]", and as the write to memory may not
-   be executed at all in the original code, it may be a readonly
-   memory.  To check that A is not const-qualified, we check that
-   there exists at least an unconditional write to A in the current
-   function.
-
-   - reads or writes to memory are valid memory accesses for every
-   iteration.  To check that the memory accesses are correctly formed
-   and that we are allowed to read and write in these locations, we
-   check that the memory accesses to be if-converted occur at every
-   iteration unconditionally.  */
-
-static bool
-ifcvt_memrefs_wont_trap (gimple stmt, vec<data_reference_p> refs)
-{
-  return write_memrefs_written_at_least_once (stmt, refs)
-    && memrefs_read_or_written_unconditionally (stmt, refs);
-}
-
-/* Wrapper around gimple_could_trap_p refined for the needs of the
-   if-conversion.  Try to prove that the memory accesses of STMT could
-   not trap in the innermost loop containing STMT.  */
-
-static bool
-ifcvt_could_trap_p (gimple stmt, vec<data_reference_p> refs)
-{
-  if (gimple_vuse (stmt)
-      && !gimple_could_trap_p_1 (stmt, false, false)
-      && ifcvt_memrefs_wont_trap (stmt, refs))
-    return false;
-
-  return gimple_could_trap_p (stmt);
+  vec_free (refs);
+  return res;
  }

  /* Return true if STMT could be converted into a masked load or store
@@ -849,7 +700,6 @@  ifcvt_can_use_mask_load_store (gimple stmt)

  static bool
  if_convertible_gimple_assign_stmt_p (gimple stmt,
-				     vec<data_reference_p> refs,
  				     bool *any_mask_load_store)
  {
    tree lhs = gimple_assign_lhs (stmt);
@@ -883,7 +733,7 @@  if_convertible_gimple_assign_stmt_p (gimple stmt,

    if (flag_tree_loop_if_convert_stores)
      {
-      if (ifcvt_could_trap_p (stmt, refs))
+      if (ifcvt_could_trap_p (stmt))
  	{
  	  if (ifcvt_can_use_mask_load_store (stmt))
  	    {
@@ -892,9 +742,17 @@  if_convertible_gimple_assign_stmt_p (gimple stmt,
  	      return true;
  	    }
  	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "tree could trap...\n");
+	    fprintf (dump_file, "tree could trap\n");
  	  return false;
  	}
+
+      if (has_non_addressable_refs (stmt))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "has non-addressable memory references\n");
+	  return false;
+	}
+
        return true;
      }

@@ -942,7 +800,7 @@  if_convertible_gimple_assign_stmt_p (gimple stmt,
     - it is builtins call.  */

  static bool
-if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
+if_convertible_stmt_p (gimple stmt,
  		       bool *any_mask_load_store)
  {
    switch (gimple_code (stmt))
@@ -953,7 +811,7 @@  if_convertible_stmt_p (gimple stmt, vec<data_reference_p> refs,
        return true;

      case GIMPLE_ASSIGN:
-      return if_convertible_gimple_assign_stmt_p (stmt, refs,
+      return if_convertible_gimple_assign_stmt_p (stmt,
  						  any_mask_load_store);

      case GIMPLE_CALL:
@@ -1173,8 +1031,8 @@  get_loop_body_in_if_conv_order (const struct loop *loop)
     predicate_bbs first allocates the predicates of the basic blocks.
     These fields are then initialized with the tree expressions
     representing the predicates under which a basic block is executed
-   in the LOOP.  As the loop->header is executed at each iteration, it
-   has the "true" predicate.  Other statements executed under a
+   in the LOOP.  As the loop->header is executed at each iteration,
+   it has the "true" predicate.  Other statements executed under a
     condition are predicated with that condition, for example

     | if (x)
@@ -1322,17 +1180,7 @@  if_convertible_loop_p_1 (struct loop *loop,
      }

    if (flag_tree_loop_if_convert_stores)
-    {
-      data_reference_p dr;
-
-      for (i = 0; refs->iterate (i, &dr); i++)
-	{
-	  dr->aux = XNEW (struct ifc_dr);
-	  DR_WRITTEN_AT_LEAST_ONCE (dr) = -1;
-	  DR_RW_UNCONDITIONALLY (dr) = -1;
-	}
-      predicate_bbs (loop);
-    }
+    predicate_bbs (loop);

    for (i = 0; i < loop->num_nodes; i++)
      {
@@ -1342,7 +1190,7 @@  if_convertible_loop_p_1 (struct loop *loop,
        /* Check the if-convertibility of statements in predicated BBs.  */
        if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
-	  if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
+	  if (!if_convertible_stmt_p (gsi_stmt (itr),
  				      any_mask_load_store))
  	    return false;
      }
@@ -1988,6 +1836,80 @@  mask_exists (int size, vec<int> vec)
    return -1;
  }

+/* Inserts at position GSI a statement "ADDRESS_OF_AI = &AI;" and
+   returns the ADDRESS_OF_AI.  */
+
+static tree
+insert_address_of (tree ai, gimple_stmt_iterator *gsi)
+{
+  tree addr_expr = build_fold_addr_expr (ai);
+  tree address_of_ai = create_tmp_reg (TREE_TYPE (addr_expr), "_ifc_");
+  gimple addr_stmt = gimple_build_assign (address_of_ai, addr_expr);
+
+  address_of_ai = make_ssa_name (address_of_ai, addr_stmt);
+  gimple_assign_set_lhs (addr_stmt, address_of_ai);
+  SSA_NAME_DEF_STMT (address_of_ai) = addr_stmt;
+  update_stmt (addr_stmt);
+  gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
+
+  return address_of_ai;
+}
+
+/* Insert at the beginning of the first basic block of the current
+   function the allocation on the stack of N_BYTES of memory and
+   return a pointer to this scratchpad memory.  */
+
+static tree
+create_scratchpad (unsigned int n_bytes)
+{
+  basic_block bb = single_succ ( ENTRY_BLOCK_PTR_FOR_FN(cfun) );
+  gimple_stmt_iterator gsi = gsi_after_labels (bb);
+  tree x = build_int_cst (integer_type_node, n_bytes - 1);
+  tree elt_type = char_type_node;
+  tree array_type = build_array_type (elt_type, build_index_type (x));
+  tree base = create_tmp_reg (array_type, "scratch_pad");
+
+  return insert_address_of (base, &gsi);
+}
+
+/* Returns a memory reference to the pointer defined by the
+   conditional expression: pointer = cond ? &A[i] : scratch_pad; and
+   inserts this code at GSI.  */
+
+static tree
+create_indirect_cond_expr (tree ai, tree cond, tree *scratch_pad,
+			   gimple_stmt_iterator *gsi, bool swap)
+{
+  tree cond_expr;
+  tree pointer;
+  gimple pointer_stmt;
+
+  /* address_of_ai = &A[i];  */
+  tree address_of_ai = insert_address_of (ai, gsi);
+
+  /* Allocate the scratch pad only once per function.  */
+  if (!*scratch_pad)
+    *scratch_pad = create_scratchpad (64);
+
+  /* pointer = cond ? address_of_ai : scratch_pad;  */
+  pointer = create_tmp_reg (TREE_TYPE (address_of_ai), "_ifc_");
+
+  cond_expr = build3 (COND_EXPR, TREE_TYPE (address_of_ai),
+		      unshare_expr (cond),
+		      swap ? *scratch_pad  : address_of_ai,
+		      swap ? address_of_ai : *scratch_pad);
+
+  pointer_stmt = gimple_build_assign (pointer, cond_expr);
+  pointer = make_ssa_name (pointer, pointer_stmt);
+  gimple_assign_set_lhs (pointer_stmt, pointer);
+  SSA_NAME_DEF_STMT (pointer) = pointer_stmt;
+  update_stmt (pointer_stmt);
+  gsi_insert_before (gsi, pointer_stmt, GSI_SAME_STMT);
+
+  return build2 (MEM_REF, TREE_TYPE (ai), pointer,
+		 build_int_cst (reference_alias_ptr_type (ai), 0));
+}
+
  /* Predicate each write to memory in LOOP.

     This function transforms control flow constructs containing memory
@@ -1997,12 +1919,22 @@  mask_exists (int size, vec<int> vec)
     |   if (cond)
     |     A[i] = expr;

-   into the following form that does not contain control flow:
+   into the following form:

-   | for (i = 0; i < N; i++)
-   |   A[i] = cond ? expr : A[i];
+   | void *scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES); // FIX ME: "alloca" is not used anymore; we switched to a local array, which is faster

-   The original CFG looks like this:
+   |
+   | for (i = 0; i < N; i++) {
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
+   | }
+
+   SCRATCH_PAD is allocated on the stack for each function once and it is
+   large enough to contain any kind of scalar assignment or read.  All
+   values read or written to SCRATCH_PAD are not used in the computation.
+
+   In a more detailed way, the if-conversion of memory writes works
+   like this, supposing that the original CFG looks like this:

     | bb_0
     |   i = 0
@@ -2052,10 +1984,12 @@  mask_exists (int size, vec<int> vec)
     |   goto bb_1
     | end_bb_4

-   predicate_mem_writes is then predicating the memory write as follows:
+   predicate_mem_writes is then allocating SCRATCH_PAD in the basic block
+   preceding the loop header, and is predicating the memory write:

     | bb_0
     |   i = 0
+   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES); // FIX ME: "alloca" is not used anymore; we switched to a local array, which is faster
     | end_bb_0
     |
     | bb_1
@@ -2063,12 +1997,14 @@  mask_exists (int size, vec<int> vec)
     | end_bb_1
     |
     | bb_2
+   |   cond = some_computation;
     |   if (cond) goto bb_3 else goto bb_4
     | end_bb_2
     |
     | bb_3
     |   cond = some_computation;
-   |   A[i] = cond ? expr : A[i];
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
     |   goto bb_4
     | end_bb_3
     |
@@ -2081,12 +2017,15 @@  mask_exists (int size, vec<int> vec)

     | bb_0
     |   i = 0
+   |   scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES); // FIX ME: "alloca" is not used anymore; we switched to a local array, which is faster
+
     |   if (i < N) goto bb_5 else goto bb_1
     | end_bb_0
     |
     | bb_1
     |   cond = some_computation;
-   |   A[i] = cond ? expr : A[i];
+   |   p = cond ? &A[i] : scratch_pad;
+   |   *p = expr;
     |   if (i < N) goto bb_5 else goto bb_4
     | end_bb_1
     |
@@ -2096,7 +2035,7 @@  mask_exists (int size, vec<int> vec)
  */

  static void
-predicate_mem_writes (loop_p loop)
+predicate_mem_writes (loop_p loop, tree *scratch_pad)
  {
    unsigned int i, orig_loop_num_nodes = loop->num_nodes;
    auto_vec<int, 1> vect_sizes;
@@ -2179,24 +2118,29 @@  predicate_mem_writes (loop_p loop)
  	  }
  	else if (gimple_vdef (stmt))
  	  {
-	    tree lhs = gimple_assign_lhs (stmt);
-	    tree rhs = gimple_assign_rhs1 (stmt);
-	    tree type = TREE_TYPE (lhs);

-	    lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
-	    rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
-	    if (swap)
-	      {
-		tree tem = lhs;
-		lhs = rhs;
-		rhs = tem;
-	      }
-	    cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
-					       is_gimple_condexpr, NULL_TREE,
-					       true, GSI_SAME_STMT);
-	    rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
-	    gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
+  	    /* A[i] = x;  */
+	    tree ai = gimple_assign_lhs (stmt);
+
+	    /* pointer = cond ? &A[i] : scratch_pad;  */
+	    tree star_pointer = create_indirect_cond_expr (ai, cond,
+			  scratch_pad, &gsi, swap);
+	    /* *pointer = x;  */
+	    gimple_assign_set_lhs (stmt, star_pointer);
  	    update_stmt (stmt);
+
+	  }
+	else if (gimple_vuse (stmt))
+	  {
+	      /* x = A[i];  */
+	      tree ai = gimple_assign_rhs1 (stmt);
+
+	      /* pointer = cond ? &A[i] : scratch_pad;  */
+	      tree star_pointer = create_indirect_cond_expr (ai, cond,
+					     scratch_pad, &gsi, swap);
+	      /* x = *pointer;  */
+	      gimple_assign_set_rhs1 (stmt, star_pointer);
+	      update_stmt (stmt);
  	  }
      }
  }
@@ -2247,7 +2191,7 @@  remove_conditions_and_labels (loop_p loop)
     blocks.  Replace PHI nodes with conditional modify expressions.  */

  static void
-combine_blocks (struct loop *loop, bool any_mask_load_store)
+combine_blocks (struct loop *loop, bool any_mask_load_store, tree *scratch_pad)
  {
    basic_block bb, exit_bb, merge_target_bb;
    unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2261,7 +2205,7 @@  combine_blocks (struct loop *loop, bool any_mask_load_store)
    predicate_all_scalar_phis (loop);

    if (flag_tree_loop_if_convert_stores || any_mask_load_store)
-    predicate_mem_writes (loop);
+    predicate_mem_writes (loop, scratch_pad);

    /* Merge basic blocks: first remove all the edges in the loop,
       except for those from the exit block.  */
@@ -2523,7 +2467,7 @@  ifcvt_walk_pattern_tree (tree var, vec<gimple> *defuse_list,
    return;
  }

-/* Returns true if STMT can be a root of bool pattern apllied
+/* Returns true if STMT can be a root of bool pattern applied
     by vectorizer.  */

  static bool
@@ -2553,7 +2497,7 @@  stmt_is_root_of_bool_pattern (gimple stmt)
    return false;
  }

-/*  Traverse all statements in BB which correspondent to loop header to
+/*  Traverse all statements in BB which correspond to loop header to
      find out all statements which can start bool pattern applied by
      vectorizer and convert multiple uses in it to conform pattern
      restrictions.  Such case can occur if the same predicate is used both
@@ -2634,7 +2578,7 @@  ifcvt_local_dce (basic_block bb)
        gimple_set_plf (phi, GF_PLF_2, true);
        worklist.safe_push (phi);
      }
-  /* Consider load/store statemnts, CALL and COND as live.  */
+  /* Consider load/store statements, CALL and COND as live.  */
    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
        stmt = gsi_stmt (gsi);
@@ -2716,13 +2660,13 @@  ifcvt_local_dce (basic_block bb)
     changed.  */

  static unsigned int
-tree_if_conversion (struct loop *loop)
+tree_if_conversion (struct loop *loop, tree *scratch_pad)
  {
    unsigned int todo = 0;
    ifc_bbs = NULL;
    bool any_mask_load_store = false;

-  /* Set-up aggressive if-conversion for loops marked with simd pragma.  */
+  /* Set up aggressive if-conversion for loops marked with simd pragma.  */
    aggressive_if_conv = loop->force_vectorize;
    /* Check either outer loop was marked with simd pragma.  */
    if (!aggressive_if_conv)
@@ -2751,7 +2695,7 @@  tree_if_conversion (struct loop *loop)
    /* Now all statements are if-convertible.  Combine all the basic
       blocks into one huge basic block doing the if-conversion
       on-the-fly.  */
-  combine_blocks (loop, any_mask_load_store);
+  combine_blocks (loop, any_mask_load_store, scratch_pad);

    /* Delete dead predicate computations and repair tree correspondent
       to bool pattern to delete multiple uses of preidcates.  */
@@ -2817,10 +2761,26 @@  public:
  bool
  pass_if_conversion::gate (function *fun)
  {
-  return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
-	   && flag_tree_loop_if_convert != 0)
-	  || flag_tree_loop_if_convert == 1
-	  || flag_tree_loop_if_convert_stores == 1);
+  return  (
+	     (
+	       flag_tree_loop_vectorize
+	    || fun->has_force_vectorize_loops
+	     )
+	  && (
+	       (
+	         flag_tree_loop_if_convert        != 0
+	       )
+	    || (
+	         flag_tree_loop_if_convert_stores != 0
+	       )
+	     )
+	  )
+	  || (
+	       flag_tree_loop_if_convert        > 0
+	     )
+	  || (
+	       flag_tree_loop_if_convert_stores > 0
+	     );
  }

  unsigned int
@@ -2832,12 +2792,14 @@  pass_if_conversion::execute (function *fun)
    if (number_of_loops (fun) <= 1)
      return 0;

+  tree scratch_pad = NULL_TREE;
+
    FOR_EACH_LOOP (loop, 0)
      if (flag_tree_loop_if_convert == 1
  	|| flag_tree_loop_if_convert_stores == 1
  	|| ((flag_tree_loop_vectorize || loop->force_vectorize)
  	    && !loop->dont_vectorize))
-      todo |= tree_if_conversion (loop);
+      todo |= tree_if_conversion (loop, &scratch_pad);

  #ifdef ENABLE_CHECKING
    {