===================================================================
@@ -15,7 +15,8 @@ main1 (unsigned int x, unsigned int y, u
int i;
unsigned int a0, a1, a2, a3;
- /* pin and pout may alias. */
+ /* pin and pout may alias. But since all the loads are before the first
store
+ the basic block is vectorizable. */
a0 = *pin++ + 23;
a1 = *pin++ + 142;
a2 = *pin++ + 2;
@@ -26,6 +27,9 @@ main1 (unsigned int x, unsigned int y, u
*pout++ = a2 * x;
*pout++ = a3 * y;
+ if (i)
+ __asm__ volatile ("" : : : "memory");
+
/* Check results. */
if (out[0] != (in[0] + 23) * x
|| out[1] != (in[1] + 142) * y
@@ -45,6 +49,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0
"slp" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1
"slp" { target vect_hw_misalign } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
===================================================================
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int
*pout)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ /* pin and pout may alias, and loads and stores are mixed. The basic
block
+ cannot be vectorized. */
+ a0 = *pin++ + 23;
+ *pout++ = a0 * x;
+ a1 = *pin++ + 142;
+ *pout++ = a1 * y;
+ a2 = *pin++ + 2;
+ *pout++ = a2 * x;
+ a3 = *pin++ + 31;
+ *pout++ = a3 * y;
+
+ if (i)
+ __asm__ volatile ("" : : : "memory");
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3, &in[0], &out[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0
"slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
===================================================================
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+
+ /* pin and pout are different, so despite the fact that loads and stores
+ are mixed the basic block is vectorizable. */
+ a0 = *pin++ + 23;
+ *pout++ = a0 * x;
+ a1 = *pin++ + 142;
+ *pout++ = a1 * y;
+ a2 = *pin++ + 2;
+ *pout++ = a2 * x;
+ a3 = *pin++ + 31;
+ *pout++ = a3 * y;
+
+ if (i)
+ __asm__ volatile ("" : : : "memory");
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1
"slp" { target vect_hw_misalign } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
===================================================================
@@ -1,3 +1,11 @@
+2010-09-01 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
+ check into different basic blocks. Expect vectorization if
misaligned
+ stores are supported.
+ * gcc.dg/vect/bb-slp-8a.c: New test.
+ * gcc.dg/vect/bb-slp-8b.c: New test.
+
2010-09-01 Richard Guenther <rguenther@suse.de>
* gcc.dg/vect/vect-outer-fir.c: Adjust.
===================================================================
@@ -633,6 +633,32 @@ get_earlier_stmt (gimple stmt1, gimple s
return stmt2;
}
+static inline gimple
+get_later_stmt (gimple stmt1, gimple stmt2)
+{
+ unsigned int uid1, uid2;
+
+ if (stmt1 == NULL)
+ return stmt2;
+
+ if (stmt2 == NULL)
+ return stmt1;
+
+ uid1 = gimple_uid (stmt1);
+ uid2 = gimple_uid (stmt2);
+
+ if (uid1 == 0 || uid2 == 0)
+ return NULL;
+
+ gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec));
+ gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec));
+
+ if (uid1 > uid2)
+ return stmt1;
+ else
+ return stmt2;
+}
+
static inline bool
is_pattern_stmt_p (stmt_vec_info stmt_info)
{
@@ -776,7 +802,7 @@ extern enum dr_alignment_support vect_su
extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
HOST_WIDE_INT *);
extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info,
- int *);
+ int *, bool *);
extern bool vect_enhance_data_refs_alignment (loop_vec_info);
extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info);
extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
===================================================================
@@ -1378,7 +1378,7 @@ vect_analyze_loop_operations (loop_vec_i
loop_vec_info
vect_analyze_loop (struct loop *loop)
{
- bool ok;
+ bool ok, dummy;
loop_vec_info loop_vinfo;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
@@ -1444,7 +1444,7 @@ vect_analyze_loop (struct loop *loop)
the dependences.
FORNOW: fail at the first data dependence that we encounter. */
- ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
+ ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf,
&dummy);
if (!ok
|| max_vf < min_vf)
{
===================================================================
@@ -322,6 +322,64 @@ vect_equal_offsets (tree offset1, tree o
}
+/* Check dependence between DRA and DRB for basic block vectorization. */
+
+static bool
+vect_drs_dependent_in_basic_block (struct data_reference *dra,
+ struct data_reference *drb)
+{
+ HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
+ gimple earlier_stmt;
+
+ /* We only call this function for pairs of loads and stores, but we
verify
+ it here. */
+ if (DR_IS_READ (dra) == DR_IS_READ (drb))
+ {
+ if (DR_IS_READ (dra))
+ return false;
+ else
+ return true;
+ }
+
+ /* Check that the data-refs have same bases and offsets. If not, we
can't
+ determine if they are dependent. */
+ if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
+ && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
+ || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+ || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+ != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+ || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
+ return true;
+
+ /* Check the types. */
+ type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
(dra))));
+ type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
(drb))));
+
+ if (type_size_a != type_size_b
+ || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
+ TREE_TYPE (DR_REF (drb))))
+ return true;
+
+ init_a = TREE_INT_CST_LOW (DR_INIT (dra));
+ init_b = TREE_INT_CST_LOW (DR_INIT (drb));
+
+ /* Two different locations - no dependence. */
+ if (init_a != init_b)
+ return false;
+
+ /* We have a read-write dependence. Check that the load is before the
store.
+ When we vectorize basic blocks, vector load can be only before
+ corresponding scalar load, and vector store can be only after its
+ corresponding scalar store. So the order of the acceses is preserved
in
+ case the load is before the store. */
+ earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+ return false;
+
+ return true;
+}
+
+
/* Function vect_check_interleaving.
Check if DRA and DRB are a part of interleaving. In case they are,
insert
@@ -495,7 +553,8 @@ vect_mark_for_runtime_alias_test (ddr_p
static bool
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
- loop_vec_info loop_vinfo, int *max_vf)
+ loop_vec_info loop_vinfo, int *max_vf,
+ bool *data_dependence_in_bb)
{
unsigned int i;
struct loop *loop = NULL;
@@ -554,10 +613,14 @@ vect_analyze_data_ref_dependence (struct
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
}
- /* Mark the statements as unvectorizable. */
- STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
- STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
-
+ /* We do not vectorize basic blocks with write-write dependencies.
*/
+ if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+ return true;
+
+ /* We deal with read-write dependencies in basic blocks later (by
+ verifying that all the loads in the basic block are before all
the
+ stores). */
+ *data_dependence_in_bb = true;
return false;
}
@@ -577,7 +640,12 @@ vect_analyze_data_ref_dependence (struct
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
}
- return true;
+ /* Do not vectorize basic blcoks with write-write dependences. */
+ if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+ return true;
+
+ /* Check if this dependence is allowed in basic block vectorization.
*/
+ return vect_drs_dependent_in_basic_block (dra, drb);
}
/* Loop-based vectorization and known data dependence. */
@@ -678,7 +746,8 @@ vect_analyze_data_ref_dependence (struct
bool
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo, int *max_vf)
+ bb_vec_info bb_vinfo, int *max_vf,
+ bool *data_dependence_in_bb)
{
unsigned int i;
VEC (ddr_p, heap) *ddrs = NULL;
@@ -693,7 +762,8 @@ vect_analyze_data_ref_dependences (loop_
ddrs = BB_VINFO_DDRS (bb_vinfo);
FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
- if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
+ if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
+ data_dependence_in_bb))
return false;
return true;
===================================================================
@@ -1082,6 +1082,24 @@ vect_find_first_load_in_slp_instance (sl
}
+/* Find the last store in SLP INSTANCE. */
+static gimple
+vect_find_last_store_in_slp_instance (slp_instance instance)
+{
+ int i;
+ slp_tree node;
+ gimple last_store = NULL, store;
+
+ node = SLP_INSTANCE_TREE (instance);
+ for (i = 0;
+ VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
+ i++)
+ last_store = get_later_stmt (store, last_store);
+
+ return last_store;
+}
+
+
/* Analyze an SLP instance starting from a group of strided stores. Call
vect_build_slp_tree to build a tree of packed stmts if possible.
Return FALSE if it's impossible to SLP any stmt in the loop. */
@@ -1503,6 +1521,42 @@ vect_slp_analyze_operations (bb_vec_info
return true;
}
+/* Check if loads and stores are mixed in the basic block (in that
+ case if we are not sure that the accesses differ, we can't vectorize
the
+ basic block). Also return FALSE in case that there is statement marked
as
+ not vectorizable. */
+
+static bool
+vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
+{
+ basic_block bb = BB_VINFO_BB (bb_vinfo);
+ gimple_stmt_iterator si;
+ bool detected_store = false;
+ gimple stmt;
+ struct data_reference *dr;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ stmt = gsi_stmt (si);
+
+ /* We can't allow not analyzed statements, since they may contain
data
+ accesses. */
+ if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
+ return false;
+
+ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ continue;
+
+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ if (DR_IS_READ (dr) && detected_store)
+ return false;
+
+ if (!DR_IS_READ (dr))
+ detected_store = true;
+ }
+
+ return true;
+}
Hi, This patch implements data dependence analysis for basic block vectorization. For data dependence testing in basic block vectorization it's enough to check that all the loads in the basic block are before all the stores. Since we create vector loads before the first load and vector stores after the last store, we don't change the order of memory accesses. Bootstrapped and tested on powerpc64-suse-linux. Committed. Ira ChangeLog: * tree-vectorizer.h (get_later_stmt): New function. (vect_analyze_data_ref_dependences): Add argument. * tree-vect-loop.c (vect_analyze_loop): Update call to vect_analyze_data_ref_dependences. * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): New function. (vect_analyze_data_ref_dependence): Add argument for basic block dependencies. Check dependencies in basic block vectorization. (vect_analyze_data_ref_dependences): Add argument and update call to vect_analyze_data_ref_dependences. * tree-vect-slp.c (vect_find_last_store_in_slp_instance): New. (vect_bb_vectorizable_with_dependencies): New. (vect_slp_analyze_bb): Check dependencies in basic block. (vect_schedule_slp_instance): Insert stores before the last store in SLP instance. testsuite/ChangeLog: * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the check into different basic blocks. Expect vectorization if misaligned stores are supported. * gcc.dg/vect/bb-slp-8a.c: New test. * gcc.dg/vect/bb-slp-8b.c: New test. /* Check if vectorization of the basic block is profitable. */ @@ -1585,6 +1639,8 @@ vect_slp_analyze_bb (basic_block bb) gimple_stmt_iterator gsi; int min_vf = 2; int max_vf = MAX_VECTORIZATION_FACTOR; + bool data_dependence_in_bb = false; + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); @@ -1632,8 +1688,11 @@ vect_slp_analyze_bb (basic_block bb) return NULL; } - if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf) - || min_vf > max_vf) + if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, + &data_dependence_in_bb) + || min_vf > max_vf + || (data_dependence_in_bb + && !vect_bb_vectorizable_with_dependencies (bb_vinfo))) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) fprintf (vect_dump, "not vectorized: unhandled data dependence " @@ -2420,6 +2479,14 @@ vect_schedule_slp_instance (slp_tree nod else si = gsi_for_stmt (stmt); + /* Stores should be inserted just before the last store. */ + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) + && REFERENCE_CLASS_P (gimple_get_lhs (stmt))) + { + gimple last_store = vect_find_last_store_in_slp_instance (instance); + si = gsi_for_stmt (last_store); + } + is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance); return is_store; }