@@ -168,6 +168,15 @@ struct GTY ((chain_next ("%h.next"))) lo
describes what is the state of the estimation. */
enum loop_estimation estimate_state;
+ /* If > 0, an integer, where the user asserted that for any
+ I in [ 0, nb_iterations ) and for any J in
+ [ I, min ( I + safelen, nb_iterations ) ), the Ith and Jth iterations
+ of the loop can be safely evaluated concurrently. */
+ int safelen;
+
+ /* True if we should try harder to vectorize this loop. */
+ bool force_vect;
+
/* Upper bound on number of iterations of a loop. */
struct nb_iter_bound *bounds;
@@ -641,6 +641,10 @@ struct GTY(()) function {
adjusts one of its arguments and forwards to another
function. */
unsigned int is_thunk : 1;
+
+ /* Nonzero if the current function contains any loops with
+ loop->force_vect set. */
+ unsigned int has_force_vect_loops : 1;
};
/* Add the decl D to the local_decls list of FUN. */
@@ -4960,6 +4960,8 @@ expand_omp_simd (struct omp_region *regi
edge e, ne;
tree *counts = NULL;
int i;
+ tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+ OMP_CLAUSE_SAFELEN);
type = TREE_TYPE (fd->loop.v);
entry_bb = region->entry;
@@ -5157,6 +5159,34 @@ expand_omp_simd (struct omp_region *regi
set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, l2_bb, l1_bb);
set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
+
+ if (!broken_loop)
+ {
+ struct loop *loop = alloc_loop ();
+ loop->header = l1_bb;
+ loop->latch = e->dest;
+ add_loop (loop, l1_bb->loop_father);
+ if (safelen == NULL_TREE)
+ loop->safelen = INT_MAX;
+ else
+ {
+ safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
+ if (!host_integerp (safelen, 1)
+ || (unsigned HOST_WIDE_INT) tree_low_cst (safelen, 1)
+ > INT_MAX)
+ loop->safelen = INT_MAX;
+ else
+ loop->safelen = tree_low_cst (safelen, 1);
+ }
+ /* If not -fno-tree-vectorize, hint that we want to vectorize
+ the loop. */
+ if (flag_tree_vectorize
+ || !global_options_set.x_flag_tree_vectorize)
+ {
+ loop->force_vect = true;
+ cfun->has_force_vect_loops = true;
+ }
+ }
}
@@ -101,7 +101,8 @@ vectorize_loops (void)
than all previously defined loops. This fact allows us to run
only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (li, loop, 0)
- if (optimize_loop_nest_for_speed_p (loop))
+ if ((flag_tree_vectorize && optimize_loop_nest_for_speed_p (loop))
+ || loop->force_vect)
{
loop_vec_info loop_vinfo;
vect_location = find_loop_location (loop);
@@ -122,6 +123,9 @@ vectorize_loops (void)
LOC_FILE (vect_location), LOC_LINE (vect_location));
vect_transform_loop (loop_vinfo);
num_vectorized_loops++;
+ /* Now that the loop has been vectorized, allow it to be unrolled
+ etc. */
+ loop->force_vect = false;
}
vect_location = UNKNOWN_LOC;
@@ -225,7 +225,7 @@ tree_vectorize (void)
static bool
gate_tree_vectorize (void)
{
- return flag_tree_vectorize;
+ return flag_tree_vectorize || cfun->has_force_vect_loops;
}
struct gimple_opt_pass pass_vectorize =
@@ -1123,6 +1123,11 @@ tree_unroll_loops_completely_1 (bool may
if (changed)
return true;
+ /* Don't unroll #pragma omp simd loops until the vectorizer
+ attempts to vectorize those. */
+ if (loop->force_vect)
+ return false;
+
/* Try to unroll this loop. */
loop_father = loop_outer (loop);
if (!loop_father)
@@ -255,6 +255,15 @@ vect_analyze_data_ref_dependence (struct
/* Unknown data dependence. */
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
+ /* If user asserted safelen consecutive iterations can be
+ executed concurrently, assume independence. */
+ if (loop->safelen >= 2)
+ {
+ if (loop->safelen < *max_vf)
+ *max_vf = loop->safelen;
+ return false;
+ }
+
if (STMT_VINFO_GATHER_P (stmtinfo_a)
|| STMT_VINFO_GATHER_P (stmtinfo_b))
{
@@ -291,6 +300,15 @@ vect_analyze_data_ref_dependence (struct
/* Known data dependence. */
if (DDR_NUM_DIST_VECTS (ddr) == 0)
{
+ /* If user asserted safelen consecutive iterations can be
+ executed concurrently, assume independence. */
+ if (loop->safelen >= 2)
+ {
+ if (loop->safelen < *max_vf)
+ *max_vf = loop->safelen;
+ return false;
+ }
+
if (STMT_VINFO_GATHER_P (stmtinfo_a)
|| STMT_VINFO_GATHER_P (stmtinfo_b))
{
@@ -1822,6 +1822,10 @@ main_tree_if_conversion (void)
return 0;
FOR_EACH_LOOP (li, loop, 0)
+ if (flag_tree_loop_if_convert == 1
+ || flag_tree_loop_if_convert_stores == 1
+ || flag_tree_vectorize
+ || loop->force_vect)
changed |= tree_if_conversion (loop);
if (changed)
@@ -1848,7 +1852,8 @@ main_tree_if_conversion (void)
static bool
gate_tree_if_conversion (void)
{
- return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0)
+ return (((flag_tree_vectorize || cfun->has_force_vect_loops)
+ && flag_tree_loop_if_convert != 0)
|| flag_tree_loop_if_convert == 1
|| flag_tree_loop_if_convert_stores == 1);
}
On Tue, May 14, 2013 at 12:16:07PM +0200, Richard Biener wrote: > Works for me. ... Ok, here is what I've committed to gomp-4_0-branch. tree-vect-data-refs.c was kept (almost) unchanged, as per IRC discussion, something ++todo for the future. 2013-05-14 Jakub Jelinek <jakub@redhat.com> * cfgloop.h (struct loop): Add safelen and force_vect fields. * function.h (struct function): Add has_force_vect_loops field. * omp-low.c (expand_omp_simd): If !broken_loop, create loop for the simd region and set safelen and force_vect fields in it. * tree-vectorizer.c (vectorize_loops): If loop has force_vect set, vectorize it even if flag_vectorize isn't set. Clear loop->force_vect after vectorization. * tree-ssa-loop.c (gate_tree_vectorize): Return true even cfun->has_force_vect_loops. * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely_1): Don't unroll loops with loop->force_vect. * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): For unknown or bad data dependency, if loop->safelen is non-zero, just decrease *max_vf to loop->safelen if needed and return false. * tree-if-conv.c (main_tree_if_conversion): If-convert also loops with loop->force_vect. (gate_tree_if_conversion): Return true even if cfun->has_force_vect_loops. Jakub