@@ -568,6 +568,278 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
return opt_result::success ();
}
+/* This function tries to validate whether an early break vectorization
+ is possible for the current instruction sequence. Returns True i
+ possible, otherwise False.
+
+ Requirements:
+ - Any memory access must be to a fixed size buffer.
+ - There must not be any loads and stores to the same object.
+ - Multiple loads are allowed as long as they don't alias.
+
+ NOTE:
+ This implemementation is very conservative. Any overlappig loads/stores
+ that take place before the early break statement gets rejected aside from
+ WAR dependencies.
+
+ i.e.:
+
+ a[i] = 8
+ c = a[i]
+ if (b[i])
+ ...
+
+ is not allowed, but
+
+ c = a[i]
+ a[i] = 8
+ if (b[i])
+ ...
+
+ is which is the common case.
+
+ Arguments:
+ - LOOP_VINFO: loop information for the current loop.
+ - CHAIN: Currently detected sequence of instructions that need to be moved
+ if we are to vectorize this early break.
+ - FIXED: Sequences of SSA_NAMEs that must not be moved, they are reachable from
+ one or more cond conditions. If this set overlaps with CHAIN then FIXED
+ takes precedence. This deals with non-single use cases.
+ - LOADS: List of all loads found during traversal.
+ - BASES: List of all load data references found during traversal.
+ - GSTMT: Current position to inspect for validity. The sequence
+ will be moved upwards from this point.
+ - REACHING_VUSE: The dominating VUSE found so far.
+ - CURRENT_VDEF: The last VDEF we've seen. These are updated in
+ pre-order and updated in post-order after moving the
+ instruction. */
+
+static bool
+validate_early_exit_stmts (loop_vec_info loop_vinfo, hash_set<tree> *chain,
+ hash_set<tree> *fixed, vec<tree> *loads,
+ vec<data_reference *> *bases, tree *reaching_vuse,
+ tree *current_vdef, gimple_stmt_iterator *gstmt,
+ hash_map<tree, tree> *renames)
+{
+ if (gsi_end_p (*gstmt))
+ return true;
+
+ gimple *stmt = gsi_stmt (*gstmt);
+ if (gimple_has_ops (stmt))
+ {
+ tree dest = NULL_TREE;
+ /* Try to find the SSA_NAME being defined. For Statements with an LHS
+ use the LHS, if not, assume that the first argument of a call is the
+ value being defined. e.g. MASKED_LOAD etc. */
+ if (gimple_has_lhs (stmt))
+ {
+ if (is_gimple_assign (stmt))
+ dest = gimple_assign_lhs (stmt);
+ else if (const gcall *call = dyn_cast <const gcall *> (stmt))
+ dest = gimple_call_lhs (call);
+ }
+ else if (const gcall *call = dyn_cast <const gcall *> (stmt))
+ dest = gimple_arg (call, 0);
+ else if (const gcond *cond = dyn_cast <const gcond *> (stmt))
+ {
+ /* Operands of conds are ones we can't move. */
+ fixed->add (gimple_cond_lhs (cond));
+ fixed->add (gimple_cond_rhs (cond));
+ }
+
+ bool move = false;
+
+ stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
+ if (!stmt_vinfo)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported. Unknown"
+ " statement: %G", stmt);
+ return false;
+ }
+
+ auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
+ if (dr_ref)
+ {
+ /* We currenly only support statically allocated objects due to
+ not having first-faulting loads support or peeling for alignment
+ support. Compute the isize of the referenced object (it could be
+ dynamically allocated). */
+ tree obj = DR_BASE_ADDRESS (dr_ref);
+ if (!obj || TREE_CODE (obj) != ADDR_EXPR)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on statically"
+ " allocated objects.\n");
+ return false;
+ }
+
+ tree refop = TREE_OPERAND (obj, 0);
+ tree refbase = get_base_address (refop);
+ if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
+ || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on statically"
+ " allocated objects.\n");
+ return false;
+ }
+
+ if (DR_IS_READ (dr_ref))
+ {
+ loads->safe_push (dest);
+ bases->safe_push (dr_ref);
+ }
+ else if (DR_IS_WRITE (dr_ref))
+ {
+ for (auto dr : bases)
+ if (same_data_refs_base_objects (dr, dr_ref))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "early breaks only supported,"
+ " overlapping loads and stores found"
+ " before the break statement.\n");
+ return false;
+ }
+ /* Any writes starts a new chain. */
+ move = true;
+ }
+ }
+
+ /* If a statement if live and escapes the loop through usage in the loop
+ epilogue then we can't move it since we need to maintain its
+ reachability through all exits. */
+ bool skip = false;
+ if (STMT_VINFO_LIVE_P (stmt_vinfo)
+ && !(dr_ref && DR_IS_WRITE (dr_ref)))
+ {
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest)
+ {
+ basic_block bb = gimple_bb (USE_STMT (use_p));
+ skip = bb == LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
+ if (skip)
+ break;
+ }
+ }
+
+ /* If we found the defining statement of a something that's part of the
+ chain then expand the chain with the new SSA_VARs being used. */
+ if (!skip && (chain->contains (dest) || move))
+ {
+ move = true;
+ for (unsigned x = 0; x < gimple_num_args (stmt); x++)
+ {
+ tree var = gimple_arg (stmt, x);
+ if (TREE_CODE (var) == SSA_NAME)
+ {
+ if (fixed->contains (dest))
+ {
+ move = false;
+ fixed->add (var);
+ }
+ else
+ chain->add (var);
+ }
+ else
+ {
+ use_operand_p use_p;
+ ssa_op_iter iter;
+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+ {
+ tree op = USE_FROM_PTR (use_p);
+ gcc_assert (TREE_CODE (op) == SSA_NAME);
+ if (fixed->contains (dest))
+ {
+ move = false;
+ fixed->add (op);
+ }
+ else
+ chain->add (op);
+ }
+ }
+ }
+
+ if (dump_enabled_p ())
+ {
+ if (move)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "found chain %G", stmt);
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "ignored chain %G, not single use", stmt);
+ }
+ }
+
+ if (move)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "==> recording stmt %G", stmt);
+
+ for (tree ref : loads)
+ if (stmt_may_clobber_ref_p (stmt, ref, true))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks not supported as memory used"
+ " may alias.\n");
+ return false;
+ }
+
+ /* This statement is to be moved. */
+ LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).safe_push (stmt);
+
+ /* If we've moved a VDEF, extract the defining MEM and update
+ usages of it. */
+ tree vdef;
+ if ((vdef = gimple_vdef (stmt)))
+ {
+ *current_vdef = vdef;
+ *reaching_vuse = gimple_vuse (stmt);
+ }
+ }
+ }
+
+ gsi_prev (gstmt);
+
+ if (!validate_early_exit_stmts (loop_vinfo, chain, fixed, loads, bases,
+ reaching_vuse, current_vdef, gstmt, renames))
+ return false;
+
+ if (gimple_vuse (stmt)
+ && reaching_vuse && *reaching_vuse
+ && gimple_vuse (stmt) == *current_vdef)
+ {
+ tree new_vuse = *reaching_vuse;
+ tree *renamed = renames->get (new_vuse);
+ if (renamed)
+ new_vuse = *renamed;
+ LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_push ({stmt, new_vuse});
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "current_use: %T, new_use: %T, mem_ref: %G",
+ *current_vdef, new_vuse, stmt);
+
+ if (!renamed)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "stored: %T -> %T\n", *current_vdef, new_vuse);
+
+ renames->put (*current_vdef, new_vuse);
+ }
+ }
+
+ return true;
+}
+
/* Function vect_analyze_data_ref_dependences.
Examine all the data references in the loop, and make sure there do not
@@ -612,6 +884,84 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
return res;
}
+ /* If we have early break statements in the loop, check to see if they
+ are of a form we can vectorizer. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ hash_set<tree> chain, fixed;
+ auto_vec<tree> loads;
+ auto_vec<data_reference *> bases;
+ hash_map<tree, tree> renames;
+ basic_block dest_bb = NULL;
+ tree vdef = NULL;
+ tree vuse = NULL;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "loop contains multiple exits, analyzing"
+ " statement dependencies.\n");
+
+ for (gcond *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+ {
+ stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
+ if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
+ continue;
+
+ gimple *stmt = STMT_VINFO_STMT (loop_cond_info);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+
+ /* Initiaze the vuse chain with the one at the early break. */
+ if (!vuse)
+ vuse = gimple_vuse (c);
+
+ if (!validate_early_exit_stmts (loop_vinfo, &chain, &fixed, &loads,
+ &bases, &vuse, &vdef, &gsi, &renames))
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+
+ /* Save destination as we go, BB are visited in order and the last one
+ is where statements should be moved to. */
+ if (!dest_bb)
+ dest_bb = gimple_bb (c);
+ else
+ {
+ basic_block curr_bb = gimple_bb (c);
+ if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
+ dest_bb = curr_bb;
+ }
+ }
+
+ dest_bb = FALLTHRU_EDGE (dest_bb)->dest;
+ gcc_assert (dest_bb);
+ LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+
+ /* Do some renaming to update the uses chain. */
+ for (unsigned i = 0; i < LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).length (); i++)
+ {
+ auto g = LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)[i];
+ tree *tmp = renames.get (g.second);
+ if (tmp)
+ LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)[i]
+ = std::make_pair (g.first, *tmp);
+ }
+
+ /* TODO: Remove? It's useful debug statement but may be too much. */
+ for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "overrode use: %T, mem_ref: %G",
+ g.second, g.first);
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "recorded statements to be moved to BB %d\n",
+ LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
+ }
+
return opt_result::success ();
}
@@ -11192,6 +11192,45 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
epilogue_vinfo->shared->save_datarefs ();
}
+/* When vectorizing early break statements instructions that happen before
+ the early break in the current BB need to be moved to after the early
+ break. This function deals with that and assumes that any validity
+ checks has already been performed.
+
+ While moving the instructions if it encounters a VUSE or VDEF it then
+ corrects the VUSES as it moves the statements along. GDEST is the location
+ in which to insert the new statements. */
+
+static void
+move_early_exit_stmts (loop_vec_info loop_vinfo)
+{
+ /* Move all stmts that need moving. */
+ basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo);
+ gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb);
+
+ for (gimple *stmt : LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt);
+
+ gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
+ gsi_move_before (&stmt_gsi, &dest_gsi);
+ gsi_prev (&dest_gsi);
+ update_stmt (stmt);
+ }
+
+ /* Update all the stmts with their new reaching VUSES. */
+ for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "updating vuse %G", p.first);
+ unlink_stmt_vdef (p.first);
+ gimple_set_vuse (p.first, p.second);
+ update_stmt (p.first);
+ }
+}
+
/* Function vect_transform_loop.
The analysis phase has determined that the loop is vectorizable.
@@ -11330,6 +11369,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
}
+ /* Handle any code motion that we need to for early-break vectorization after
+ we've done peeling but just before we start vectorizing. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ move_early_exit_stmts (loop_vinfo);
+
/* FORNOW: the vectorizer supports only loops which body consist
of one basic block (header + empty latch). When the vectorizer will
support more involved loop forms, the order by which the BBs are
@@ -919,6 +919,19 @@ public:
analysis. */
vec<_loop_vec_info *> epilogue_vinfos;
+ /* Used to store the list of statements needing to be moved if doing early
+ break vectorization as they would violate the scalar loop semantics if
+ vectorized in their current location. */
+ auto_vec<gimple *> early_break_conflict;
+
+ /* The final basic block where to move statements to. In the case of
+ multiple exits this could be pretty far away. */
+ basic_block early_break_dest_bb;
+
+ /* Statements whose VUSES need updating if early break vectorization is to
+ happen. */
+ auto_vec<std::pair<gimple*, tree>> early_break_vuses;
+
} *loop_vec_info;
/* Access Functions. */
@@ -972,6 +985,9 @@ public:
#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
+#define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
+#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
+#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies