@@ -904,9 +904,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
/* Function vect_update_misalignment_for_peel.
- Sets DR's misalignment
+ Set DR's misalignment
- to 0 if it has the same alignment as DR_PEEL,
- - to the misalignment computed using NPEEL if DR's salignment is known,
+ - to the misalignment computed using NPEEL if DR's misalignment is known,
- to -1 (unknown) otherwise.
DR - the data reference whose misalignment is to be adjusted.
@@ -1293,7 +1293,7 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
{
vect_peel_info elem = *slot;
int dummy;
- unsigned int inside_cost = 0, outside_cost = 0, i;
+ unsigned int inside_cost = 0, outside_cost = 0;
gimple *stmt = DR_STMT (elem->dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -1342,7 +1342,7 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
choosing an option with the lowest cost (if cost model is enabled) or the
option that aligns as many accesses as possible. */
-static struct data_reference *
+static struct _vect_peel_extended_info
vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
loop_vec_info loop_vinfo,
unsigned int *npeel,
@@ -1369,7 +1369,7 @@ vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_hta
*npeel = res.peel_info.npeel;
*body_cost_vec = res.body_cost_vec;
- return res.peel_info.dr;
+ return res;
}
/* Return true if the new peeling NPEEL is supported. */
@@ -1518,7 +1518,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum dr_alignment_support supportable_dr_alignment;
- struct data_reference *dr0 = NULL, *first_store = NULL;
+
+ struct data_reference *most_frequent_read = NULL;
+ unsigned int dr_read_count = 0;
+ struct data_reference *most_frequent_write = NULL;
+ unsigned int dr_write_count = 0;
struct data_reference *dr;
unsigned int i, j;
bool do_peeling = false;
@@ -1527,11 +1531,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
gimple *stmt;
stmt_vec_info stmt_info;
unsigned int npeel = 0;
- bool all_misalignments_unknown = true;
+ bool one_misalignment_known = false;
+ bool one_misalignment_unknown = false;
+
unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned possible_npeel_number = 1;
tree vectype;
- unsigned int nelements, mis, same_align_drs_max = 0;
+ unsigned int nelements, mis;
stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
hash_table<peel_info_hasher> peeling_htab (1);
@@ -1652,57 +1658,67 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
npeel_tmp += nelements;
}
- all_misalignments_unknown = false;
- /* Data-ref that was chosen for the case that all the
- misalignments are unknown is not relevant anymore, since we
- have a data-ref with known alignment. */
- dr0 = NULL;
+ one_misalignment_known = true;
}
else
{
- /* If we don't know any misalignment values, we prefer
- peeling for data-ref that has the maximum number of data-refs
- with the same alignment, unless the target prefers to align
- stores over load. */
- if (all_misalignments_unknown)
- {
- unsigned same_align_drs
- = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
- if (!dr0
- || same_align_drs_max < same_align_drs)
- {
- same_align_drs_max = same_align_drs;
- dr0 = dr;
- }
- /* For data-refs with the same number of related
- accesses prefer the one where the misalign
- computation will be invariant in the outermost loop. */
- else if (same_align_drs_max == same_align_drs)
+ /* If we don't know any misalignment values, we prefer
+ peeling for data-ref that has the maximum number of data-refs
+ with the same alignment, unless the target prefers to align
+ stores over load. */
+ unsigned same_align_dr_count
+ = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
+
+ /* For data-refs with the same number of related
+ accesses prefer the one where the misalign
+ computation will be invariant in the outermost loop. */
+ struct loop *ivloop_max_read = NULL, *ivloop_max_write = NULL,
+ *ivloop_dr = NULL;
+ if (most_frequent_read)
+ ivloop_max_read = outermost_invariant_loop_for_expr
+ (loop, DR_BASE_ADDRESS (most_frequent_read));
+ if (most_frequent_write)
+ ivloop_max_write = outermost_invariant_loop_for_expr
+ (loop, DR_BASE_ADDRESS (most_frequent_write));
+ ivloop_dr = outermost_invariant_loop_for_expr
+ (loop, DR_BASE_ADDRESS (dr));
+
+ if (DR_IS_READ (dr))
+ {
+ if (!most_frequent_read
+ || (same_align_dr_count > dr_read_count))
{
- struct loop *ivloop0, *ivloop;
- ivloop0 = outermost_invariant_loop_for_expr
- (loop, DR_BASE_ADDRESS (dr0));
- ivloop = outermost_invariant_loop_for_expr
- (loop, DR_BASE_ADDRESS (dr));
- if ((ivloop && !ivloop0)
- || (ivloop && ivloop0
- && flow_loop_nested_p (ivloop, ivloop0)))
- dr0 = dr;
+ most_frequent_read = dr;
+ dr_read_count = same_align_dr_count;
}
+ else if (same_align_dr_count == dr_read_count
+ && ((ivloop_dr && !ivloop_max_read)
+ || (ivloop_dr && ivloop_max_read
+ && flow_loop_nested_p
+ (ivloop_dr, ivloop_max_read))))
+ {
+ most_frequent_read = dr;
+ }
+ }
+ else if (DR_IS_WRITE (dr))
+ {
+ if (!most_frequent_write
+ || (same_align_dr_count > dr_write_count))
+ {
+ most_frequent_write = dr;
+ dr_write_count = same_align_dr_count;
+ }
+ else if (same_align_dr_count == dr_write_count
+ && ((ivloop_dr && !ivloop_max_write)
+ || (ivloop_dr && ivloop_max_write
+ && flow_loop_nested_p
+ (ivloop_dr, ivloop_max_write))))
+ {
+ most_frequent_write = dr;
+ }
+ }
- if (!first_store && DR_IS_WRITE (dr))
- first_store = dr;
- }
-
- /* If there are both known and unknown misaligned accesses in the
- loop, we choose peeling amount according to the known
- accesses. */
- if (!supportable_dr_alignment)
- {
- dr0 = dr;
- if (!first_store && DR_IS_WRITE (dr))
- first_store = dr;
- }
+ one_misalignment_unknown = true;
}
}
else
@@ -1723,111 +1739,207 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|| loop->inner)
do_peeling = false;
- if (do_peeling
- && all_misalignments_unknown
- && vect_supportable_dr_alignment (dr0, false))
+ struct _vect_peel_extended_info best_peel;
+ struct _vect_peel_extended_info peel_for_known_alignment;
+ peel_for_known_alignment.peel_info.dr = NULL;
+ struct _vect_peel_extended_info peel_for_unknown_alignment;
+ peel_for_unknown_alignment.peel_info.dr = NULL;
+
+ if (do_peeling && one_misalignment_known)
{
- /* Check if the target requires to prefer stores over loads, i.e., if
- misaligned stores are more expensive than misaligned loads (taking
- drs with same alignment into account). */
- if (first_store && DR_IS_READ (dr0))
- {
- unsigned int load_inside_cost = 0, load_outside_cost = 0;
- unsigned int store_inside_cost = 0, store_outside_cost = 0;
- unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
- unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
+ /* Peeling is possible, but there is no data access that is not supported
+ unless aligned. So we try to choose the best possible peeling. */
+
+ /* Choose the best peeling from the hash table. */
+ peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
+ (&peeling_htab, loop_vinfo, &npeel, &body_cost_vec);
+ }
+
+ if (do_peeling && one_misalignment_unknown)
+ {
+ /* Calculate the costs for aligning MOST_FREQUENT_READ, potentially
+ leaving everything else misaligned. */
+ unsigned int align_mf_read_inside_cost = 0;
+ unsigned int align_mf_read_outside_cost = 0;
+
+ if (most_frequent_read)
+ {
stmt_vector_for_cost dummy;
dummy.create (2);
+ vect_get_peeling_costs_all_drs (most_frequent_read,
+ &align_mf_read_inside_cost,
+ &align_mf_read_outside_cost,
+ &dummy, vf / 2, vf);
+ dummy.release();
+ }
+ else
+ {
+ align_mf_read_inside_cost = UINT_MAX;
+ align_mf_read_outside_cost = UINT_MAX;
+ }
- vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
- &dummy);
- vect_get_data_access_cost (first_store, &store_inside_cost,
- &store_outside_cost, &dummy);
+ /* Calculate the costs for aligning MOST_FREQUENT_WRITE, potentially
+ leaving everything else misaligned. */
+ unsigned int align_mf_write_inside_cost = 0;
+ unsigned int align_mf_write_outside_cost = 0;
+ if (most_frequent_write)
+ {
+ stmt_vector_for_cost dummy;
+ dummy.create (2);
+ vect_get_peeling_costs_all_drs (most_frequent_write,
+ &align_mf_write_inside_cost,
+ &align_mf_write_outside_cost,
+ &dummy, vf / 2, vf);
dummy.release ();
+ }
+ else
+ {
+ align_mf_write_inside_cost = UINT_MAX;
+ align_mf_write_outside_cost = UINT_MAX;
+ }
- /* Calculate the penalty for leaving FIRST_STORE unaligned (by
- aligning the load DR0). */
- load_inside_penalty = store_inside_cost;
- load_outside_penalty = store_outside_cost;
- for (i = 0;
- STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
- DR_STMT (first_store))).iterate (i, &dr);
- i++)
- if (DR_IS_READ (dr))
- {
- load_inside_penalty += load_inside_cost;
- load_outside_penalty += load_outside_cost;
- }
- else
- {
- load_inside_penalty += store_inside_cost;
- load_outside_penalty += store_outside_cost;
- }
-
- /* Calculate the penalty for leaving DR0 unaligned (by
- aligning the FIRST_STORE). */
- store_inside_penalty = load_inside_cost;
- store_outside_penalty = load_outside_cost;
- for (i = 0;
- STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
- DR_STMT (dr0))).iterate (i, &dr);
- i++)
- if (DR_IS_READ (dr))
- {
- store_inside_penalty += load_inside_cost;
- store_outside_penalty += load_outside_cost;
- }
- else
- {
- store_inside_penalty += store_inside_cost;
- store_outside_penalty += store_outside_cost;
- }
-
- if (load_inside_penalty > store_inside_penalty
- || (load_inside_penalty == store_inside_penalty
- && load_outside_penalty > store_outside_penalty))
- dr0 = first_store;
- }
+ /* Choose best peeling according to given load and store peeling
+ costs. */
+ if (align_mf_read_inside_cost > align_mf_write_inside_cost
+ || (align_mf_read_inside_cost == align_mf_write_inside_cost
+ && align_mf_read_outside_cost > align_mf_write_outside_cost))
+ {
+ peel_for_unknown_alignment.peel_info.dr = most_frequent_write;
+ peel_for_unknown_alignment.peel_info.count =
+ 1 + STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (most_frequent_write))).length ();
+ peel_for_unknown_alignment.inside_cost = align_mf_write_inside_cost;
+ peel_for_unknown_alignment.outside_cost =
+ align_mf_write_outside_cost;
+ }
+ else
+ {
+ if (most_frequent_read)
+ {
+ peel_for_unknown_alignment.peel_info.dr = most_frequent_read;
+ peel_for_unknown_alignment.peel_info.count =
+ 1 + STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (most_frequent_read))).length ();
+ }
+ else
+ {
+ peel_for_unknown_alignment.peel_info.dr = most_frequent_write;
+ peel_for_unknown_alignment.peel_info.count =
+ 1 + STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (most_frequent_write))).length ();
+ }
+ peel_for_unknown_alignment.inside_cost = align_mf_read_inside_cost;
+ peel_for_unknown_alignment.outside_cost =
+ align_mf_read_outside_cost;
+ }
+
+ /* Add prologue and epilogue costs. */
+ stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+ prologue_cost_vec.create (2);
+ epilogue_cost_vec.create (2);
+
+ int dummy;
+ peel_for_unknown_alignment.outside_cost
+ += vect_get_known_peeling_cost (loop_vinfo, vf / 2, &dummy,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ &prologue_cost_vec, &epilogue_cost_vec);
- /* In case there are only loads with different unknown misalignments, use
- peeling only if it may help to align other accesses in the loop or
- if it may help improving load bandwith when we'd end up using
- unaligned loads. */
- tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
- if (!first_store
- && !STMT_VINFO_SAME_ALIGN_REFS (
- vinfo_for_stmt (DR_STMT (dr0))).length ()
- && (vect_supportable_dr_alignment (dr0, false)
- != dr_unaligned_supported
- || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
- == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
- do_peeling = false;
+ prologue_cost_vec.release ();
+ epilogue_cost_vec.release ();
+
+ /* The code below expects npeel == 0 when we plan to peel vf/2
+ iterations, so do not set npeel = vf/2 here. */
+ peel_for_unknown_alignment.peel_info.npeel = 0;
}
- if (do_peeling && !dr0)
+ /* At this point, we have to choose between peeling for the datarefs with
+ known alignment and the ones with unknown alignment. Prefer the one
+ that aligns more datarefs in total. */
+ struct data_reference *dr0 = NULL;
+ if (do_peeling)
{
- /* Peeling is possible, but there is no data access that is not supported
- unless aligned. So we try to choose the best possible peeling. */
+ bool peel_for_unknown_alignment_valid =
+ peel_for_unknown_alignment.peel_info.dr != NULL;
+ bool peel_for_known_alignment_valid =
+ peel_for_known_alignment.peel_info.dr != NULL;
- /* We should get here only if there are drs with known misalignment. */
- gcc_assert (!all_misalignments_unknown);
+ gcc_assert (peel_for_known_alignment_valid
+ || peel_for_unknown_alignment_valid);
- /* Choose the best peeling from the hash table. */
- dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab,
- loop_vinfo, &npeel,
- &body_cost_vec);
- if (!dr0 || !npeel)
- do_peeling = false;
+ if (peel_for_known_alignment_valid && !peel_for_unknown_alignment_valid)
+ best_peel = peel_for_known_alignment;
+
+ else if
+ (!peel_for_known_alignment_valid && peel_for_unknown_alignment_valid)
+ best_peel = peel_for_unknown_alignment;
+
+ else
+ {
+ /* Choose the best peeling for known and unknown alignment
+ according to the number of aligned datarefs. */
+ if (peel_for_unknown_alignment.peel_info.count
+ > peel_for_known_alignment.peel_info.count)
+ best_peel = peel_for_unknown_alignment;
+ else
+ best_peel = peel_for_known_alignment;
+ }
}
+ /* Calculate the penalty for no peeling, i.e. leaving everything
+ unaligned.
+ TODO: use something like an adapted vect_get_peeling_costs_all_drs. */
+ unsigned nopeel_inside_cost = 0;
+ unsigned nopeel_outside_cost = 0;
+
+ stmt_vector_for_cost dummy;
+ dummy.create (2);
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
+ {
+ vect_get_data_access_cost (dr, &nopeel_inside_cost,
+ &nopeel_outside_cost, &dummy);
+ }
+ dummy.release ();
+
+ /* Add epilogue costs. As we do no peeling for alignment here, no prologue
+ costs will be recorded. */
+ stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+ prologue_cost_vec.create (2);
+ epilogue_cost_vec.create (2);
+
+ int dummy2;
+ nopeel_outside_cost += vect_get_known_peeling_cost
+ (loop_vinfo, 0, &dummy2,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ &prologue_cost_vec, &epilogue_cost_vec);
+
+ prologue_cost_vec.release ();
+ epilogue_cost_vec.release ();
+
+
+ /* Check if doing no peeling is not more expensive than the best peeling we
+ have so far. */
+ if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
+ && ((nopeel_inside_cost < best_peel.inside_cost)
+ || (nopeel_inside_cost == best_peel.inside_cost
+ && nopeel_outside_cost <= best_peel.outside_cost)))
+ {
+ do_peeling = false;
+ npeel = 0;
+ }
+
+
if (do_peeling)
{
+ dr0 = best_peel.peel_info.dr;
+ npeel = best_peel.peel_info.npeel;
+
stmt = DR_STMT (dr0);
stmt_info = vinfo_for_stmt (stmt);
vectype = STMT_VINFO_VECTYPE (stmt_info);
nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ /* Define a peeling if not already set and log it. */
if (known_alignment_for_access_p (dr0))
{
bool negative = DR_HAS_NEGATIVE_STEP (dr0);