===================================================================
@@ -5737,8 +5737,9 @@ preserved (e.g.@: used only by a reducti
@code{widen_mult_hi/lo} idioms will be used.
@end deftypefn
-@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (enum vect_cost_for_stmt @var{type_of_cost})
-Returns cost of different scalar or vector statements for vectorization cost model.
+@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (enum vect_cost_for_stmt @var{type_of_cost}, tree @var{vectype}, int @var{misalign})
+Returns cost of different scalar or vector statements for vectorization cost model.
+For vector memory operations the cost may depend on the type and misalignment value.
@end deftypefn
@deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE (const_tree @var{type}, bool @var{is_packed})
===================================================================
@@ -479,7 +479,9 @@ default_builtin_vectorized_conversion (u
/* Default vectorizer cost model values. */
int
-default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
@@ -496,6 +498,7 @@ default_builtin_vectorization_cost (enum
return 1;
case unaligned_load:
+ case unaligned_store:
return 2;
case cond_branch_taken:
===================================================================
@@ -77,7 +77,7 @@ extern tree default_builtin_vectorized_f
extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree);
-extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt);
+extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
extern tree default_builtin_reciprocal (unsigned int, bool, bool);
===================================================================
@@ -120,6 +120,7 @@ enum vect_cost_for_stmt
vector_load,
unaligned_load,
vector_store,
+ unaligned_store,
vec_to_scalar,
scalar_to_vec,
cond_branch_not_taken,
@@ -527,8 +528,9 @@ struct gcc_target
tree (* builtin_mul_widen_odd) (tree);
/* Cost of different vector/scalar statements in vectorization cost
- model. */
- int (* builtin_vectorization_cost) (enum vect_cost_for_stmt);
+ model. In case of misaligned vector loads and stores the cost depends
+ on the data type and misalignment value. */
+ int (* builtin_vectorization_cost) (enum vect_cost_for_stmt, tree, int);
/* Return true if vector alignment is reachable (by peeling N
iterations) for the given type. */
===================================================================
@@ -1976,25 +1976,18 @@ vect_gen_niters_for_prolog_loop (loop_ve
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
tree niters_type = TREE_TYPE (loop_niters);
- int step = 1;
- int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
- if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
- step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
-
pe = loop_preheader_edge (loop);
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
- int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
- int elem_misalign = byte_misalign / element_size;
+ int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "known alignment = %d.", byte_misalign);
+ fprintf (vect_dump, "known peeling = %d.", npeel);
- iters = build_int_cst (niters_type,
- (((nelements - elem_misalign) & (nelements - 1)) / step));
+ iters = build_int_cst (niters_type, npeel);
}
else
{
@@ -2017,7 +2010,8 @@ vect_gen_niters_for_prolog_loop (loop_ve
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign =
- fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
+ fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
+ vectype_size_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
@@ -2323,7 +2317,8 @@ vect_vfa_segment_size (struct data_refer
tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
DR_STEP (dr), vect_factor);
- if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
+ if (vect_supportable_dr_alignment (dr, false)
+ == dr_explicit_realign_optimized)
{
tree vector_size = TYPE_SIZE_UNIT
(STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
===================================================================
@@ -65,6 +65,7 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { { ! vector_alignment_reachable } && { ! vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || { { ! vector_alignment_reachable } || vect_hw_misalign } } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || { { ! vector_alignment_reachable } || vect_hw_misalign } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -69,6 +69,8 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -72,8 +72,8 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_hw_misalign } } } */
/* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 2 "vect" { xfail vect_hw_misalign } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 10 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_hw_misalign } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+int ib[N+5];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* All the accesses are misaligned. With cost model disabled, we count the
+ the number of aligned accesses for each peeling option, and in this case
+ we align the two loads if possible (i.e., if misaligned stores are
+ supported). */
+ for (i = 1; i <= N; i++)
+ {
+ ia[i] = ib[i+2] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i+2] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+5; i++)
+ ib[i] = i;
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+/* unaligned store. */
+
+int ib[N+5];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* The store is aligned and the loads are misaligned with the same
+ misalignment. Cost model is disabled. If misaligned stores are supported,
+ we peel according to the loads to align them. */
+ for (i = 0; i <= N; i++)
+ {
+ ia[i] = ib[i+2] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i+2] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+5; i++)
+ ib[i] = i;
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -68,6 +68,8 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -54,6 +54,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
===================================================================
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 128
+#define RES 21888
+
+/* unaligned store. */
+
+int ib[N+10];
+int ia[N+10];
+int ic[N+10];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i, suma = 0, sumb = 0, sumc = 0;
+
+ /* ib and ic have same misalignment, we peel to align them. */
+ for (i = 1; i <= N; i++)
+ {
+ suma += ia[i];
+ sumb += ib[i+6];
+ sumc += ic[i+2];
+ }
+
+ /* check results: */
+ if (suma + sumb + sumc != RES)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+10; i++)
+ {
+ ib[i] = i;
+ ic[i] = i+2;
+ ia[i] = i/2;
+ }
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+int ib[N+5];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* Don't peel keeping one load and the store aligned. */
+ for (i = 0; i <= N; i++)
+ {
+ ia[i] = ib[i] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+5; i++)
+ ib[i] = i;
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -72,7 +72,7 @@ int main (void)
/* main && main1 together: */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 2 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target { vect_no_align && {! vector_alignment_reachable} } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail { { vect_no_align } || {! vector_alignment_reachable} } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail { { vect_no_align } || { { ! vector_alignment_reachable} || vect_hw_misalign } } } } } */
/* in main1: */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target !powerpc*-*-* !i?86-*-* !x86_64-*-* } } } */
===================================================================
@@ -44,6 +44,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { {! vect_no_align} && vector_alignment_reachable } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align } || {! vector_alignment_reachable} } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align } || { { ! vector_alignment_reachable} || vect_hw_misalign } } } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { vect_no_align || { {! vector_alignment_reachable} && {! vect_hw_misalign} } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -27,13 +27,13 @@ __attribute__ ((noinline)) int main1 (in
for (i = 0; i < n; i++)
{
sa[i+7] = sb[i];
- ia[i+3] = ib[i];
+ ia[i+3] = ib[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
- if (sa[i+7] != sb[i] || ia[i+3] != ib[i])
+ if (sa[i+7] != sb[i] || ia[i+3] != ib[i+1])
abort ();
}
@@ -44,7 +44,9 @@ __attribute__ ((noinline)) int main1 (in
access for peeling, and therefore will examine the option of
using a peeling factor = (V-3)%V = 1 for V=2,4.
This will not align the access 'sa[i+3]' (for which we need to
- peel 5 iterations), so the loop can not be vectorized. */
+ peel 5 iterations). However, 'ia[i+3]' also gets aligned if we peel 5
+ iterations, so the loop is vectorizable on all targets that support
+ unaligned loads. */
__attribute__ ((noinline)) int main2 (int n)
{
@@ -55,13 +57,13 @@ __attribute__ ((noinline)) int main2 (in
for (i = 0; i < n; i++)
{
ia[i+3] = ib[i];
- sa[i+3] = sb[i];
+ sa[i+3] = sb[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
- if (sa[i+3] != sb[i] || ia[i+3] != ib[i])
+ if (sa[i+3] != sb[i+1] || ia[i+3] != ib[i])
abort ();
}
@@ -78,11 +80,8 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -20,7 +20,9 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,2
access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
which will also align the access to 'ia[i+3]', and the loop could be
- vectorized on all targets that support unaligned loads. */
+ vectorized on all targets that support unaligned loads.
+ Without cost model on targets that support misaligned stores, no peeling
+ will be applied since we want to keep the four loads aligned. */
__attribute__ ((noinline))
int main1 (int n)
@@ -50,7 +52,11 @@ int main1 (int n)
using a peeling factor = VF-3%VF. This will result in a peeling factor
1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
- be vectorized. */
+ be vectorized. However, 'ia[i+3]' also gets aligned if we peel 5
+ iterations, so the loop is vectorizable on all targets that support
+ unaligned loads.
+ Without cost model on targets that support misaligned stores, no peeling
+ will be applied since we want to keep the four loads aligned. */
__attribute__ ((noinline))
int main2 (int n)
@@ -85,11 +91,10 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { target { vect_hw_misalign} } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -170,6 +170,21 @@ DEF_VEC_ALLOC_P(slp_instance, heap);
#define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
#define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop
+
+typedef struct _vect_peel_info
+{
+ int npeel;
+ struct data_reference *dr;
+ unsigned int count;
+} *vect_peel_info;
+
+typedef struct _vect_peel_extended_info
+{
+ struct _vect_peel_info peel_info;
+ unsigned int inside_cost;
+ unsigned int outside_cost;
+} *vect_peel_extended_info;
+
/*-----------------------------------------------------------------*/
/* Info on vectorized loops. */
/*-----------------------------------------------------------------*/
@@ -245,6 +260,10 @@ typedef struct _loop_vec_info {
/* Reduction cycles detected in the loop. Used in loop-aware SLP. */
VEC (gimple, heap) *reductions;
+
+ /* Hash table used to choose the best peeling option. */
+ htab_t peeling_htab;
+
} *loop_vec_info;
/* Access Functions. */
@@ -270,6 +289,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
+#define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
VEC_length (gimple, (L)->may_misalign_stmts) > 0
@@ -543,6 +563,8 @@ typedef struct _stmt_vec_info {
#define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
#define STMT_SLP_TYPE(S) (S)->slp_type
+#define VECT_MAX_COST 1000
+
/* The maximum number of intermediate steps required in multi-step type
conversion. */
#define MAX_INTERM_CVT_STEPS 3
@@ -743,11 +765,14 @@ extern void vect_remove_stores (gimple);
extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
tree, int);
+extern void vect_get_load_cost (struct data_reference *, int, bool,
+ unsigned int *, unsigned int *);
+extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
extern enum dr_alignment_support vect_supportable_dr_alignment
- (struct data_reference *);
+ (struct data_reference *, bool);
extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
HOST_WIDE_INT *);
extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info,
@@ -795,7 +820,8 @@ extern bool vectorizable_induction (gimp
extern int vect_estimate_min_profitable_iters (loop_vec_info);
extern tree get_initial_def_for_reduction (gimple, tree, tree *);
extern int vect_min_worthwhile_factor (enum tree_code);
-
+extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int);
+extern int vect_get_single_scalar_iteraion_cost (loop_vec_info);
/* In tree-vect-slp.c. */
extern void vect_free_slp_instance (slp_instance);
===================================================================
@@ -755,6 +755,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
+ LOOP_VINFO_PEELING_HTAB (res) = NULL;
return res;
}
@@ -845,6 +846,9 @@ destroy_loop_vec_info (loop_vec_info loo
VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
+ if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
+ htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
+
free (loop_vinfo);
loop->aux = NULL;
}
@@ -1122,7 +1126,11 @@ vect_analyze_loop_form (struct loop *loo
static inline
int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
{
- return targetm.vectorize.builtin_vectorization_cost (type_of_cost);
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
+ dummy_type, dummy);
}
@@ -1498,17 +1506,6 @@ vect_analyze_loop (struct loop *loop)
return NULL;
}
- /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
- ok = vect_analyze_slp (loop_vinfo, NULL);
- if (ok)
- {
- /* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
-
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
- }
-
/* This pass will decide on using loop versioning and/or loop peeling in
order to enhance the alignment of data references in the loop. */
@@ -1516,11 +1513,22 @@ vect_analyze_loop (struct loop *loop)
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad data alignment.");
+ fprintf (vect_dump, "bad data alignment.");
destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
+ /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
+ ok = vect_analyze_slp (loop_vinfo, NULL);
+ if (ok)
+ {
+ /* Decide which possible SLP instances to SLP. */
+ vect_make_slp_decision (loop_vinfo);
+
+ /* Find stmts that need to be both vectorized and SLPed. */
+ vect_detect_hybrid_slp (loop_vinfo);
+ }
+
/* Scan all the operations in the loop and make sure they are
vectorizable. */
@@ -2004,6 +2012,94 @@ vect_force_simple_reduction (loop_vec_in
double_reduc, true);
}
+/* Calculate the cost of one scalar iteration of the loop. */
+int
+vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
+ int innerloop_iters, i, stmt_cost;
+
+ /* Count statements in scalar loop. Using this as scalar cost for a single
+ iteration for now.
+
+ TODO: Add outer loop support.
+
+ TODO: Consider assigning different costs to different scalar
+ statements. */
+
+ /* FORNOW. */
+ if (loop->inner)
+ innerloop_iters = 50; /* FIXME */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ gimple_stmt_iterator si;
+ basic_block bb = bbs[i];
+
+ if (bb->loop_father == loop->inner)
+ factor = innerloop_iters;
+ else
+ factor = 1;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+
+ if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
+ continue;
+
+ if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ {
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
+ stmt_cost = vect_get_cost (scalar_load);
+ else
+ stmt_cost = vect_get_cost (scalar_store);
+ }
+ else
+ stmt_cost = vect_get_cost (scalar_stmt);
+
+ scalar_single_iter_cost += stmt_cost * factor;
+ }
+ }
+ return scalar_single_iter_cost;
+}
+
+/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
+int
+vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
+ int *peel_iters_epilogue,
+ int scalar_single_iter_cost)
+{
+ int peel_guard_costs = 0;
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ *peel_iters_epilogue = vf/2;
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set to vf/2 because "
+ "loop iterations are unknown .");
+
+ /* If peeled iterations are known but number of scalar loop
+ iterations are unknown, count a taken branch per peeled loop. */
+ peel_guard_costs = 2 * vect_get_cost (cond_branch_taken);
+ }
+ else
+ {
+ int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ peel_iters_prologue = niters < peel_iters_prologue ?
+ niters : peel_iters_prologue;
+ *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ }
+
+ return (peel_iters_prologue * scalar_single_iter_cost)
+ + (*peel_iters_epilogue * scalar_single_iter_cost)
+ + peel_guard_costs;
+}
+
/* Function vect_estimate_min_profitable_iters
Return the number of iterations required for the vector version of the
@@ -2028,7 +2124,7 @@ vect_estimate_min_profitable_iters (loop
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
int peel_guard_costs = 0;
int innerloop_iters = 0, factor;
VEC (slp_instance, heap) *slp_instances;
@@ -2099,7 +2195,6 @@ vect_estimate_min_profitable_iters (loop
&& (!STMT_VINFO_LIVE_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
continue;
- scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
some of the "outside" costs are generated inside the outer-loop. */
@@ -2107,6 +2202,8 @@ vect_estimate_min_profitable_iters (loop
}
}
+ scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
+
/* Add additional cost for the peeled instructions in prologue and epilogue
loop.
@@ -2116,7 +2213,7 @@ vect_estimate_min_profitable_iters (loop
TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */
- if (byte_misalign < 0)
+ if (npeel < 0)
{
peel_iters_prologue = vf/2;
if (vect_print_dump_info (REPORT_COST))
@@ -2137,46 +2234,18 @@ vect_estimate_min_profitable_iters (loop
not known. Hence guards remain the same. */
peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken)
+ vect_get_cost (cond_branch_not_taken));
+ vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ + (peel_iters_epilogue * scalar_single_iter_cost)
+ + peel_guard_costs;
}
else
{
- if (byte_misalign)
- {
- struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
- int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
-
- peel_iters_prologue = nelements - (byte_misalign / element_size);
- }
- else
- peel_iters_prologue = 0;
-
- if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
- {
- peel_iters_epilogue = vf/2;
- if (vect_print_dump_info (REPORT_COST))
- fprintf (vect_dump, "cost model: "
- "epilogue peel iters set to vf/2 because "
- "loop iterations are unknown .");
-
- /* If peeled iterations are known but number of scalar loop
- iterations are unknown, count a taken branch per peeled loop. */
- peel_guard_costs += 2 * vect_get_cost (cond_branch_taken);
- }
- else
- {
- int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
- peel_iters_prologue = niters < peel_iters_prologue ?
- niters : peel_iters_prologue;
- peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
- }
+ peel_iters_prologue = npeel;
+ vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
+ peel_iters_prologue, &peel_iters_epilogue,
+ scalar_single_iter_cost);
}
- vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
- + (peel_iters_epilogue * scalar_single_iter_cost)
- + peel_guard_costs;
-
/* FORNOW: The scalar outside cost is incremented in one of the
following ways:
===================================================================
@@ -810,7 +810,11 @@ vect_compute_data_ref_alignment (struct
NOTE: This is the only change to the code we make during
the analysis phase, before deciding to vectorize the loop. */
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "force alignment");
+ {
+ fprintf (vect_dump, "force alignment of ");
+ print_generic_expr (vect_dump, ref, TDF_SLIM);
+ }
+
DECL_ALIGN (base) = TYPE_ALIGN (vectype);
DECL_USER_ALIGN (base) = 1;
}
@@ -967,7 +971,7 @@ vect_verify_datarefs_alignment (loop_vec
|| !STMT_VINFO_VECTORIZABLE (stmt_info))
continue;
- supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
if (!supportable_dr_alignment)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
@@ -1061,6 +1065,189 @@ vector_alignment_reachable_p (struct dat
return true;
}
+
+/* Calculate the cost of the memory access represented by DR. */
+
+static void
+vect_get_data_access_cost (struct data_reference *dr,
+ unsigned int *inside_cost,
+ unsigned int *outside_cost)
+{
+ gimple stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ int ncopies = vf / nunits;
+ bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
+
+ if (!supportable_dr_alignment)
+ *inside_cost = VECT_MAX_COST;
+ else
+ {
+ if (DR_IS_READ (dr))
+ vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
+ else
+ vect_get_store_cost (dr, ncopies, inside_cost);
+ }
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
+ "outside_cost = %d.", *inside_cost, *outside_cost);
+}
+
+
+static hashval_t
+vect_peeling_hash (const void *elem)
+{
+ const struct _vect_peel_info *peel_info;
+
+ peel_info = (const struct _vect_peel_info *) elem;
+ return (hashval_t) peel_info->npeel;
+}
+
+
+static int
+vect_peeling_hash_eq (const void *elem1, const void *elem2)
+{
+ const struct _vect_peel_info *a, *b;
+
+ a = (const struct _vect_peel_info *) elem1;
+ b = (const struct _vect_peel_info *) elem2;
+ return (a->npeel == b->npeel);
+}
+
+
+/* Insert DR into peeling hash table with NPEEL as key. */
+
+static void
+vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
+ int npeel)
+{
+ struct _vect_peel_info elem, *slot;
+ void **new_slot;
+ bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
+
+ elem.npeel = npeel;
+ slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
+ &elem);
+ if (slot)
+ slot->count++;
+ else
+ {
+ slot = XNEW (struct _vect_peel_info);
+ slot->npeel = npeel;
+ slot->dr = dr;
+ slot->count = 1;
+ new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
+ INSERT);
+ *new_slot = slot;
+ }
+
+ if (!supportable_dr_alignment && !flag_vect_cost_model)
+ slot->count += VECT_MAX_COST;
+}
+
+
+/* Traverse peeling hash table to find peeling option that aligns maximum
+ number of data accesses. */
+
+static int
+vect_peeling_hash_get_most_frequent (void **slot, void *data)
+{
+ vect_peel_info elem = (vect_peel_info) *slot;
+ vect_peel_extended_info max = (vect_peel_extended_info) data;
+
+ if (elem->count > max->peel_info.count)
+ {
+ max->peel_info.npeel = elem->npeel;
+ max->peel_info.count = elem->count;
+ max->peel_info.dr = elem->dr;
+ }
+
+ return 1;
+}
+
+
+/* Traverse peeling hash table and calculate cost for each peeling option. Find
+ one with the lowest cost. */
+
+static int
+vect_peeling_hash_get_lowest_cost (void **slot, void *data)
+{
+ vect_peel_info elem = (vect_peel_info) *slot;
+ vect_peel_extended_info min = (vect_peel_extended_info) data;
+ int save_misalignment, dummy;
+ unsigned int inside_cost = 0, outside_cost = 0, i;
+ gimple stmt = DR_STMT (elem->dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ struct data_reference *dr;
+
+ for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+ {
+ stmt = DR_STMT (dr);
+ stmt_info = vinfo_for_stmt (stmt);
+ /* For interleaving, only the alignment of the first access
+ matters. */
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+ && DR_GROUP_FIRST_DR (stmt_info) != stmt)
+ continue;
+
+ save_misalignment = DR_MISALIGNMENT (dr);
+ vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
+ vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
+ SET_DR_MISALIGNMENT (dr, save_misalignment);
+ }
+
+ outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
+ vect_get_single_scalar_iteraion_cost (loop_vinfo));
+
+ if (inside_cost < min->inside_cost
+ || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
+ {
+ min->inside_cost = inside_cost;
+ min->outside_cost = outside_cost;
+ min->peel_info.dr = elem->dr;
+ min->peel_info.npeel = elem->npeel;
+ }
+
+ return 1;
+}
+
+
+/* Choose best peeling option by traversing peeling hash table and either
+ choosing an option with the lowest cost (if cost model is enabled) or the
+ option that aligns as many accesses as possible. */
+
+static struct data_reference *
+vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
+ unsigned int *npeel)
+{
+ struct _vect_peel_extended_info res;
+
+ res.peel_info.dr = NULL;
+
+ if (flag_vect_cost_model)
+ {
+ res.inside_cost = INT_MAX;
+ res.outside_cost = INT_MAX;
+ htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
+ vect_peeling_hash_get_lowest_cost, &res);
+ }
+ else
+ {
+ res.peel_info.count = 0;
+ htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
+ vect_peeling_hash_get_most_frequent, &res);
+ }
+
+ *npeel = res.peel_info.npeel;
+ return res.peel_info.dr;
+}
+
+
/* Function vect_enhance_data_refs_alignment
This pass will use loop versioning and loop peeling in order to enhance
@@ -1158,15 +1345,21 @@ vect_enhance_data_refs_alignment (loop_v
VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum dr_alignment_support supportable_dr_alignment;
- struct data_reference *dr0 = NULL;
+ struct data_reference *dr0 = NULL, *first_store = NULL;
struct data_reference *dr;
- unsigned int i;
+ unsigned int i, j;
bool do_peeling = false;
bool do_versioning = false;
bool stat;
gimple stmt;
stmt_vec_info stmt_info;
int vect_versioning_for_alias_required;
+ unsigned int npeel = 0;
+ bool all_misalignments_unknown = true;
+ unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned possible_npeel_number = 1;
+ tree vectype;
+ unsigned int nelements, mis, same_align_drs_max = 0;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
@@ -1201,12 +1394,7 @@ vect_enhance_data_refs_alignment (loop_v
- How many accesses will become unaligned due to the peeling,
and the cost of misaligned accesses.
- The cost of peeling (the extra runtime checks, the increase
- in code size).
-
- The scheme we use FORNOW: peel to force the alignment of the first
- unsupported misaligned access in the loop.
-
- TODO: Use a cost model. */
+ in code size). */
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
{
@@ -1219,15 +1407,108 @@ vect_enhance_data_refs_alignment (loop_v
&& DR_GROUP_FIRST_DR (stmt_info) != stmt)
continue;
- if (!DR_IS_READ (dr) && !aligned_access_p (dr))
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
+ do_peeling = vector_alignment_reachable_p (dr);
+ if (do_peeling)
{
- do_peeling = vector_alignment_reachable_p (dr);
- if (do_peeling)
- dr0 = dr;
- if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "vector alignment may not be reachable");
- break;
- }
+ if (known_alignment_for_access_p (dr))
+ {
+ unsigned int npeel_tmp;
+
+ /* Save info about DR in the hash table. */
+ if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
+ LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
+ htab_create (1, vect_peeling_hash,
+ vect_peeling_hash_eq, free);
+
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
+ TREE_TYPE (DR_REF (dr))));
+ npeel_tmp = (nelements - mis) % vf;
+
+ /* For multiple types, it is possible that the bigger type access
+ will have more than one peeling option. E.g., a loop with two
+ types: one of size (vector size / 4), and the other one of
+ size (vector size / 8). Vectorization factor will 8. If both
+ access are misaligned by 3, the first one needs one scalar
+ iteration to be aligned, and the second one needs 5. But the
+ the first one will be aligned also by peeling 5 scalar
+ iterations, and in that case both accesses will be aligned.
+ Hence, except for the immediate peeling amount, we also want
+ to try to add full vector size, while we don't exceed
+ vectorization factor.
+ We do this automtically for cost model, since we calculate cost
+ for every peeling option. */
+ if (!flag_vect_cost_model)
+ possible_npeel_number = vf /nelements;
+
+ /* Handle the aligned case. We may decide to align some other
+ access, making DR unaligned. */
+ if (DR_MISALIGNMENT (dr) == 0)
+ {
+ npeel_tmp = 0;
+ if (!flag_vect_cost_model)
+ possible_npeel_number++;
+ }
+
+ for (j = 0; j < possible_npeel_number; j++)
+ {
+ gcc_assert (npeel_tmp <= vf);
+ vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
+ npeel_tmp += nelements;
+ }
+
+ all_misalignments_unknown = false;
+ /* Data-ref that was chosen for the case that all the
+ misalignments are unknown is not relevant anymore, since we
+ have a data-ref with known alignment. */
+ dr0 = NULL;
+ }
+ else
+ {
+ /* If we don't know all the misalignment values, we prefer
+ peeling for data-ref that has maximum number of data-refs
+ with the same alignment, unless the target prefers to align
+ stores over load. */
+ if (all_misalignments_unknown)
+ {
+ if (same_align_drs_max < VEC_length (dr_p,
+ STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
+ || !dr0)
+ {
+ same_align_drs_max = VEC_length (dr_p,
+ STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
+ dr0 = dr;
+ }
+
+ if (!first_store && !DR_IS_READ (dr))
+ first_store = dr;
+ }
+
+ /* If there are both known and unknown misaligned accesses in the
+ loop, we choose peeling amount according to the known
+ accesses. */
+
+
+ if (!supportable_dr_alignment)
+ {
+ dr0 = dr;
+ if (!first_store && !DR_IS_READ (dr))
+ first_store = dr;
+ }
+ }
+ }
+ else
+ {
+ if (!aligned_access_p (dr))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vector alignment may not be reachable");
+
+ break;
+ }
+ }
}
vect_versioning_for_alias_required
@@ -1242,24 +1523,112 @@ vect_enhance_data_refs_alignment (loop_v
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
do_peeling = false;
+ if (do_peeling && all_misalignments_unknown
+ && vect_supportable_dr_alignment (dr0, false))
+ {
+
+ /* Check if the target requires to prefer stores over loads, i.e., if
+ misaligned stores are more expensive than misaligned loads (taking
+ drs with same alignment into account). */
+ if (first_store && DR_IS_READ (dr0))
+ {
+ unsigned int load_inside_cost = 0, load_outside_cost = 0;
+ unsigned int store_inside_cost = 0, store_outside_cost = 0;
+ unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
+ unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
+
+ vect_get_data_access_cost (dr0, &load_inside_cost,
+ &load_outside_cost);
+ vect_get_data_access_cost (first_store, &store_inside_cost,
+ &store_outside_cost);
+
+ /* Calculate the penalty for leaving FIRST_STORE unaligned (by
+ aligning the load DR0). */
+ load_inside_penalty = store_inside_cost;
+ load_outside_penalty = store_outside_cost;
+ for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (first_store))),
+ i, dr);
+ i++)
+ if (DR_IS_READ (dr))
+ {
+ load_inside_penalty += load_inside_cost;
+ load_outside_penalty += load_outside_cost;
+ }
+ else
+ {
+ load_inside_penalty += store_inside_cost;
+ load_outside_penalty += store_outside_cost;
+ }
+
+ /* Calculate the penalty for leaving DR0 unaligned (by
+ aligning the FIRST_STORE). */
+ store_inside_penalty = load_inside_cost;
+ store_outside_penalty = load_outside_cost;
+ for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (dr0))),
+ i, dr);
+ i++)
+ if (DR_IS_READ (dr))
+ {
+ store_inside_penalty += load_inside_cost;
+ store_outside_penalty += load_outside_cost;
+ }
+ else
+ {
+ store_inside_penalty += store_inside_cost;
+ store_outside_penalty += store_outside_cost;
+ }
+
+ if (load_inside_penalty > store_inside_penalty
+ || (load_inside_penalty == store_inside_penalty
+ && load_outside_penalty > store_outside_penalty))
+ dr0 = first_store;
+ }
+
+ /* In case there are only loads with different unknown misalignments, use
+ peeling only if it may help to align other accesses in the loop. */
+ if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (dr0))))
+ && vect_supportable_dr_alignment (dr0, false)
+ != dr_unaligned_supported)
+ do_peeling = false;
+ }
+
+ if (do_peeling && !dr0)
+ {
+ /* Peeling is possible, but there is no data access that is not supported
+ unless aligned. So we try to choose the best possible peeling. */
+
+ /* We should get here only if there are drs with known misalignment. */
+ gcc_assert (!all_misalignments_unknown);
+
+ /* Choose the best peeling from the hash table. */
+ dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
+ if (!dr0 || !npeel)
+ do_peeling = false;
+ }
+
if (do_peeling)
{
- int mis;
- int npeel = 0;
- gimple stmt = DR_STMT (dr0);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ stmt = DR_STMT (dr0);
+ stmt_info = vinfo_for_stmt (stmt);
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (known_alignment_for_access_p (dr0))
{
- /* Since it's known at compile time, compute the number of iterations
- in the peeled loop (the peeling factor) for use in updating
- DR_MISALIGNMENT values. The peeling factor is the vectorization
- factor minus the misalignment as an element count. */
- mis = DR_MISALIGNMENT (dr0);
- mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
- npeel = nelements - mis;
+ if (!npeel)
+ {
+ /* Since it's known at compile time, compute the number of
+ iterations in the peeled loop (the peeling factor) for use in
+ updating DR_MISALIGNMENT values. The peeling factor is the
+ vectorization factor minus the misalignment as an element
+ count. */
+ mis = DR_MISALIGNMENT (dr0);
+ mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
+ npeel = nelements - mis;
+ }
/* For interleaved data access every iteration accesses all the
members of the group, therefore we divide the number of iterations
@@ -1290,7 +1659,7 @@ vect_enhance_data_refs_alignment (loop_v
save_misalignment = DR_MISALIGNMENT (dr);
vect_update_misalignment_for_peel (dr, dr0, npeel);
- supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
SET_DR_MISALIGNMENT (dr, save_misalignment);
if (!supportable_dr_alignment)
@@ -1300,6 +1669,15 @@ vect_enhance_data_refs_alignment (loop_v
}
}
+ if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
+ {
+ stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ if (!stat)
+ do_peeling = false;
+ else
+ return stat;
+ }
+
if (do_peeling)
{
/* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
@@ -1314,7 +1692,10 @@ vect_enhance_data_refs_alignment (loop_v
vect_update_misalignment_for_peel (dr, dr0, npeel);
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
- LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
+ if (npeel)
+ LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
+ else
+ LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
SET_DR_MISALIGNMENT (dr0, 0);
if (vect_print_dump_info (REPORT_ALIGNMENT))
fprintf (vect_dump, "Alignment of access forced using peeling.");
@@ -1358,7 +1739,7 @@ vect_enhance_data_refs_alignment (loop_v
&& DR_GROUP_FIRST_DR (stmt_info) != stmt))
continue;
- supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
if (!supportable_dr_alignment)
{
@@ -1467,7 +1848,7 @@ vect_find_same_alignment_drs (struct dat
if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
return;
- if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
+ if (dra == drb)
return;
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
@@ -3561,13 +3942,16 @@ vect_can_force_dr_alignment_p (const_tre
return (alignment <= MAX_STACK_ALIGNMENT);
}
-/* Function vect_supportable_dr_alignment
- Return whether the data reference DR is supported with respect to its
+/* Return whether the data reference DR is supported with respect to its
+ alignment.
+ If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
+ it is aligned, i.e., check if it is possible to vectorize it with different
alignment. */
enum dr_alignment_support
-vect_supportable_dr_alignment (struct data_reference *dr)
+vect_supportable_dr_alignment (struct data_reference *dr,
+ bool check_aligned_accesses)
{
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -3577,7 +3961,7 @@ vect_supportable_dr_alignment (struct da
struct loop *vect_loop = NULL;
bool nested_in_vect_loop = false;
- if (aligned_access_p (dr))
+ if (aligned_access_p (dr) && !check_aligned_accesses)
return dr_aligned;
if (!loop_vinfo)
===================================================================
@@ -545,6 +545,18 @@ vect_mark_stmts_to_be_vectorized (loop_v
}
+/* Get cost by calling cost target builtin. */
+
+static inline
+int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
+{
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
+ dummy_type, dummy);
+}
+
int
cost_for_stmt (gimple stmt)
{
@@ -553,9 +565,9 @@ cost_for_stmt (gimple stmt)
switch (STMT_VINFO_TYPE (stmt_info))
{
case load_vec_info_type:
- return targetm.vectorize.builtin_vectorization_cost (scalar_load);
+ return vect_get_stmt_cost (scalar_load);
case store_vec_info_type:
- return targetm.vectorize.builtin_vectorization_cost (scalar_store);
+ return vect_get_stmt_cost (scalar_store);
case op_vec_info_type:
case condition_vec_info_type:
case assignment_vec_info_type:
@@ -565,7 +577,7 @@ cost_for_stmt (gimple stmt)
case type_demotion_vec_info_type:
case type_conversion_vec_info_type:
case call_vec_info_type:
- return targetm.vectorize.builtin_vectorization_cost (scalar_stmt);
+ return vect_get_stmt_cost (scalar_stmt);
case undef_vec_info_type:
default:
gcc_unreachable ();
@@ -589,15 +601,13 @@ vect_model_simple_cost (stmt_vec_info st
if (PURE_SLP_STMT (stmt_info))
return;
- inside_cost = ncopies
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
/* FORNOW: Assuming maximum 2 args per stmts. */
for (i = 0; i < 2; i++)
{
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
- outside_cost
- += targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ outside_cost += vect_get_stmt_cost (vector_stmt);
}
if (vect_print_dump_info (REPORT_COST))
@@ -638,22 +648,39 @@ vect_model_store_cost (stmt_vec_info stm
enum vect_def_type dt, slp_tree slp_node)
{
int group_size;
- int inside_cost = 0, outside_cost = 0;
+ unsigned int inside_cost = 0, outside_cost = 0;
+ struct data_reference *first_dr;
+ gimple first_stmt;
/* The SLP costs were already calculated during SLP tree build. */
if (PURE_SLP_STMT (stmt_info))
return;
if (dt == vect_constant_def || dt == vect_external_def)
- outside_cost
- = targetm.vectorize.builtin_vectorization_cost (scalar_to_vec);
+ outside_cost = vect_get_stmt_cost (scalar_to_vec);
/* Strided access? */
- if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
- group_size = vect_cost_strided_group_size (stmt_info);
+ if (DR_GROUP_FIRST_DR (stmt_info))
+ {
+ if (slp_node)
+ {
+ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+ group_size = 1;
+ }
+ else
+ {
+ first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+ group_size = vect_cost_strided_group_size (stmt_info);
+ }
+
+ first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ }
/* Not a strided access. */
else
- group_size = 1;
+ {
+ group_size = 1;
+ first_dr = STMT_VINFO_DATA_REF (stmt_info);
+ }
/* Is this an access in a group of stores, which provide strided access?
If so, add in the cost of the permutes. */
@@ -661,7 +688,7 @@ vect_model_store_cost (stmt_vec_info stm
{
/* Uses a high and low interleave operation for each needed permute. */
inside_cost = ncopies * exact_log2(group_size) * group_size
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ * vect_get_stmt_cost (vector_stmt);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
@@ -670,8 +697,7 @@ vect_model_store_cost (stmt_vec_info stm
}
/* Costs of the stores. */
- inside_cost += ncopies
- * targetm.vectorize.builtin_vectorization_cost (vector_store);
+ vect_get_store_cost (first_dr, ncopies, &inside_cost);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
@@ -683,6 +709,49 @@ vect_model_store_cost (stmt_vec_info stm
}
+/* Calculate cost of DR's memory access. */
+void
+vect_get_store_cost (struct data_reference *dr, int ncopies,
+ unsigned int *inside_cost)
+{
+ int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+
+ switch (alignment_support_scheme)
+ {
+ case dr_aligned:
+ {
+ *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_store_cost: aligned.");
+
+ break;
+ }
+
+ case dr_unaligned_supported:
+ {
+ gimple stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+
+ /* Here, we assign an additional cost for the unaligned store. */
+ *inside_cost += ncopies
+ * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
+ vectype, DR_MISALIGNMENT (dr));
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
+ "hardware.");
+
+ break;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+
/* Function vect_model_load_cost
Models cost for loads. In the case of strided accesses, the last access
@@ -695,10 +764,9 @@ vect_model_load_cost (stmt_vec_info stmt
{
int group_size;
- int alignment_support_cheme;
gimple first_stmt;
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
- int inside_cost = 0, outside_cost = 0;
+ unsigned int inside_cost = 0, outside_cost = 0;
/* The SLP costs were already calculated during SLP tree build. */
if (PURE_SLP_STMT (stmt_info))
@@ -718,29 +786,47 @@ vect_model_load_cost (stmt_vec_info stmt
first_dr = dr;
}
- alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
-
/* Is this an access in a group of loads providing strided access?
If so, add in the cost of the permutes. */
if (group_size > 1)
{
/* Uses an even and odd extract operations for each needed permute. */
inside_cost = ncopies * exact_log2(group_size) * group_size
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ * vect_get_stmt_cost (vector_stmt);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
group_size);
-
}
/* The loads themselves. */
- switch (alignment_support_cheme)
+ vect_get_load_cost (first_dr, ncopies,
+ ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
+ &inside_cost, &outside_cost);
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
+ "outside_cost = %d .", inside_cost, outside_cost);
+
+ /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
+ stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
+ stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
+}
+
+
+/* Calculate cost of DR's memory access. */
+void
+vect_get_load_cost (struct data_reference *dr, int ncopies,
+ bool add_realign_cost, unsigned int *inside_cost,
+ unsigned int *outside_cost)
+{
+ int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+
+ switch (alignment_support_scheme)
{
case dr_aligned:
{
- inside_cost += ncopies
- * targetm.vectorize.builtin_vectorization_cost (vector_load);
+ inside_cost += ncopies * vect_get_stmt_cost (vector_load);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: aligned.");
@@ -749,10 +835,14 @@ vect_model_load_cost (stmt_vec_info stmt
}
case dr_unaligned_supported:
{
- /* Here, we assign an additional cost for the unaligned load. */
- inside_cost += ncopies
- * targetm.vectorize.builtin_vectorization_cost (unaligned_load);
+ gimple stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ /* Here, we assign an additional cost for the unaligned load. */
+ *inside_cost += ncopies
+ * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
+ vectype, DR_MISALIGNMENT (dr));
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
"hardware.");
@@ -761,16 +851,14 @@ vect_model_load_cost (stmt_vec_info stmt
}
case dr_explicit_realign:
{
- inside_cost += ncopies * (2
- * targetm.vectorize.builtin_vectorization_cost (vector_load)
- + targetm.vectorize.builtin_vectorization_cost (vector_stmt));
+ *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
+ + vect_get_stmt_cost (vector_stmt));
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
outside costs. */
if (targetm.vectorize.builtin_mask_for_load)
- inside_cost
- += targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ *inside_cost += vect_get_stmt_cost (vector_stmt);
break;
}
@@ -787,32 +875,21 @@ vect_model_load_cost (stmt_vec_info stmt
access in the group. Inside the loop, there is a load op
and a realignment op. */
- if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
+ if (add_realign_cost)
{
- outside_cost = 2
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
if (targetm.vectorize.builtin_mask_for_load)
- outside_cost
- += targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ *outside_cost += vect_get_stmt_cost (vector_stmt);
}
- inside_cost += ncopies
- * (targetm.vectorize.builtin_vectorization_cost (vector_load)
- + targetm.vectorize.builtin_vectorization_cost (vector_stmt));
+ *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
+ + vect_get_stmt_cost (vector_stmt));
break;
}
default:
gcc_unreachable ();
}
-
- if (vect_print_dump_info (REPORT_COST))
- fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
- "outside_cost = %d .", inside_cost, outside_cost);
-
- /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
- stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
- stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
}
@@ -3141,7 +3218,7 @@ vectorizable_store (gimple stmt, gimple_
dr_chain = VEC_alloc (tree, heap, group_size);
oprnds = VEC_alloc (tree, heap, group_size);
- alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
+ alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
gcc_assert (alignment_support_scheme);
/* In case the vectorization factor (VF) is bigger than the number
@@ -3505,7 +3582,7 @@ vectorizable_load (gimple stmt, gimple_s
group_size = vec_num = 1;
}
- alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
+ alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
gcc_assert (alignment_support_scheme);
/* In case the vectorization factor (VF) is bigger than the number
===================================================================
@@ -209,7 +209,7 @@ static rtx spu_addr_space_legitimize_add
static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void);
-static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt);
+static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
static bool spu_vector_alignment_reachable (const_tree, bool);
static tree spu_builtin_vec_perm (tree, tree *);
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
@@ -6695,7 +6695,9 @@ spu_builtin_mask_for_load (void)
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
===================================================================
@@ -29325,7 +29325,9 @@ static const struct attribute_spec ix86_
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
@@ -29354,6 +29356,7 @@ ix86_builtin_vectorization_cost (enum ve
return ix86_cost->scalar_to_vec_cost;
case unaligned_load:
+ case unaligned_store:
return ix86_cost->vec_unalign_load_cost;
case cond_branch_taken:
===================================================================
@@ -1056,6 +1056,8 @@ static bool rs6000_builtin_support_vecto
machine_mode,
const_tree,
int, bool);
+static int rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt,
+ tree, int);
static void def_builtin (int, const char *, tree, int);
static bool rs6000_vector_alignment_reachable (const_tree, bool);
@@ -1448,6 +1450,9 @@ static const struct attribute_spec rs600
rs6000_builtin_support_vector_misalignment
#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
#define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ rs6000_builtin_vectorization_cost
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
@@ -3300,12 +3305,19 @@ rs6000_builtin_support_vector_misalignme
if (misalignment == -1)
{
- /* misalignment factor is unknown at compile time but we know
+ /* Misalignment factor is unknown at compile time but we know
it's word aligned. */
if (rs6000_vector_alignment_reachable (type, is_packed))
- return true;
+ {
+ int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
+
+ if (element_size == 64 || element_size == 32)
+ return true;
+ }
+
return false;
}
+
/* VSX supports word-aligned vector. */
if (misalignment % 4 == 0)
return true;
@@ -3371,6 +3383,106 @@ rs6000_builtin_vec_perm (tree type, tree
return d;
}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int misalign)
+{
+ unsigned elements;
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ case scalar_load:
+ case scalar_store:
+ case vector_stmt:
+ case vector_load:
+ case vector_store:
+ case vec_to_scalar:
+ case scalar_to_vec:
+ case cond_branch_not_taken:
+ case vec_perm:
+ return 1;
+
+ case cond_branch_taken:
+ return 3;
+
+ case unaligned_load:
+ if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+ {
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ if (elements == 2)
+ /* Double word aligned. */
+ return 2;
+
+ if (elements == 4)
+ {
+ switch (misalign)
+ {
+ case 8:
+ /* Double word aligned. */
+ return 2;
+
+ case -1:
+ /* Unknown misalignment. */
+ case 4:
+ case 12:
+ /* Word aligned. */
+ return 22;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+
+ if (TARGET_ALTIVEC)
+ /* Misaligned loads are not supported. */
+ gcc_unreachable ();
+
+ return 2;
+
+ case unaligned_store:
+ if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+ {
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ if (elements == 2)
+ /* Double word aligned. */
+ return 2;
+
+ if (elements == 4)
+ {
+ switch (misalign)
+ {
+ case 8:
+ /* Double word aligned. */
+ return 2;
+
+ case -1:
+ /* Unknown misalignment. */
+ case 4:
+ case 12:
+ /* Word aligned. */
+ return 23;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+
+ if (TARGET_ALTIVEC)
+ /* Misaligned stores are not supported. */
+ gcc_unreachable ();
+
+ return 2;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Handle generic options of the form -mfoo=yes/no.
NAME is the option name.
VALUE is the option value.
===================================================================
@@ -560,7 +560,7 @@ vect_build_slp_tree (loop_vec_info loop_
if (first_load == stmt)
{
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- if (vect_supportable_dr_alignment (first_dr)
+ if (vect_supportable_dr_alignment (first_dr, false)
== dr_unaligned_unsupported)
{
if (vect_print_dump_info (REPORT_SLP))
@@ -646,7 +646,7 @@ vect_build_slp_tree (loop_vec_info loop_
{
VEC_safe_push (slp_tree, heap, *loads, *node);
*inside_cost
- += targetm.vectorize.builtin_vectorization_cost (vec_perm)
+ += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
* group_size;
}