new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+
+extern "C" double pow(double, double);
+template <typename T>
+T pow(T x, unsigned int n)
+{
+ if (!n)
+ return 1;
+
+ T y = 1;
+ while (n > 1)
+ {
+ if (n%2)
+ y *= x;
+ x = x*x;
+ n /= 2;
+ }
+ return x*y;
+}
+
+void testVec(int* x)
+{
+ for (int i = 0; i < 8; ++i)
+ x[i] = pow(x[i], 10);
+}
+
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_double && vect_hw_misalign } } } } */
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-ssa-scopedtables.h"
#include "tree-ssa-threadedge.h"
+#include "tree-ssa-sccvn.h"
#include "params.h"
/* Duplicates headers of loops if they are small enough, so that the statements
@@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun)
bool changed = false;
if (number_of_loops (fun) <= 1)
- return 0;
+ return 0;
bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
bbs_size = n_basic_blocks_for_fn (fun);
+ auto_vec<std::pair<edge, loop_p> > copied;
+
FOR_EACH_LOOP (loop, 0)
{
int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS);
@@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun)
fprintf (dump_file, "Duplication failed.\n");
continue;
}
+ copied.safe_push (std::make_pair (entry, loop));
/* If the loop has the form "for (i = j; i < j + 10; i++)" then
this copying can introduce a case where we rely on undefined
@@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun)
}
if (changed)
- update_ssa (TODO_update_ssa);
+ {
+ update_ssa (TODO_update_ssa);
+ /* After updating SSA form perform CSE on the loop header
+ copies. This is esp. required for the pass before
+ vectorization since nothing cleans up copied exit tests
+ that can now be simplified. CSE from the entry of the
+ region we copied till all loop exit blocks but not
+ entering the loop itself. */
+ for (unsigned i = 0; i < copied.length (); ++i)
+ {
+ edge entry = copied[i].first;
+ loop_p loop = copied[i].second;
+ vec<edge> exit_edges = get_loop_exit_edges (loop);
+ bitmap exit_bbs = BITMAP_ALLOC (NULL);
+ for (unsigned j = 0; j < exit_edges.length (); ++j)
+ bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
+ bitmap_set_bit (exit_bbs, loop->header->index);
+ do_rpo_vn (cfun, entry, exit_bbs);
+ BITMAP_FREE (exit_bbs);
+ exit_edges.release ();
+ }
+ }
free (bbs);
free (copied_bbs);
@@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop)
if (loop->dont_vectorize)
return false;
- if (!do_while_loop_p (loop))
- return true;
-
- /* The vectorizer won't handle anything with multiple exits, so skip. */
+ /* The vectorizer won't handle anything with multiple exits, so skip. */
edge exit = single_exit (loop);
if (!exit)
return false;
- /* Copy headers iff there looks to be code in the loop after the exit block,
- i.e. the exit block has an edge to another block (besides the latch,
- which should be empty). */
- edge_iterator ei;
- edge e;
- FOR_EACH_EDGE (e, ei, exit->src->succs)
- if (!loop_exit_edge_p (loop, e)
- && e->dest != loop->header
- && e->dest != loop->latch)
- return true;
+ if (!do_while_loop_p (loop))
+ return true;
return false;
}
@@ -6115,6 +6115,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break;
}
+ /* For a nested cycle we might end up with an operation like
+ phi_result * phi_result. */
+ if (!vectype_in)
+ vectype_in = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (vectype_in);
if (slp_node)
The following fixes another instance of PR87914, this time it is DOM wrecking loop-header copying presenting vectorization with wrong loop form. The loop header copying pass right before vectorization isn't of great help since nothing cleans up after it so loop form is still bogus. I've tried to move loop-header copying after jump threading but the fallout is too large for the moment. So this resorts to instead run CSE on the copied blocks. I've took the liberty to cleanup the vec-ch predicate a bit as well. This then reveals another ICE in reduction handling which this patch fixes. Bootstrap & regtest running on x86_64-unknown-linux-gnu. Richard. From ea6b41e717b32c93ab3a27df55632f06fa1d71fc Mon Sep 17 00:00:00 2001 From: Richard Guenther <rguenther@suse.de> Date: Thu, 8 Nov 2018 11:23:12 +0100 Subject: [PATCH] fix-pr87621 PR tree-optimization/87621 * tree-vect-loop.c (vectorizable_reduction): Handle reduction op with only phi inputs. * tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h. (ch_base::copy_headers): Run CSE on copied loop headers. (pass_ch_vect::process_loop_p): Simplify. * g++.dg/vect/pr87621.cc: New testcase.