Patchwork Data dependence analysis for basic block SLP

login
register
mail settings
Submitter Ira Rosen
Date Sept. 2, 2010, 6:05 a.m.
Message ID <OFDEE9A586.A5068D9D-ONC2257780.00419EA7-C2257792.00217644@il.ibm.com>
Download mbox | patch
Permalink /patch/63447/
State New
Headers show

Comments

Ira Rosen - Sept. 2, 2010, 6:05 a.m.
Hi,

This patch implements data dependence analysis for basic block
vectorization.

For data dependence testing in basic block vectorization it's enough to
check that all the loads in the basic block are before all the stores.
Since we create vector loads before the first load and vector stores after
the last store, we don't change the order of memory accesses.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

	* tree-vectorizer.h (get_later_stmt): New function.
	(vect_analyze_data_ref_dependences): Add argument.
	* tree-vect-loop.c (vect_analyze_loop): Update call to
	vect_analyze_data_ref_dependences.
	* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block):
	New function.
	(vect_analyze_data_ref_dependence): Add argument for basic block
	dependencies. Check dependencies in basic block vectorization.
	(vect_analyze_data_ref_dependences): Add argument and update call to
	vect_analyze_data_ref_dependences.
	* tree-vect-slp.c (vect_find_last_store_in_slp_instance): New.
	(vect_bb_vectorizable_with_dependencies): New.
	(vect_slp_analyze_bb): Check dependencies in basic block.
	(vect_schedule_slp_instance): Insert stores before the last store in
	SLP instance.

testsuite/ChangeLog:

	* gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
	check into different basic blocks. Expect vectorization if misaligned
	stores are supported.
	* gcc.dg/vect/bb-slp-8a.c: New test.
	* gcc.dg/vect/bb-slp-8b.c: New test.


 /* Check if vectorization of the basic block is profitable.  */

@@ -1585,6 +1639,8 @@ vect_slp_analyze_bb (basic_block bb)
   gimple_stmt_iterator gsi;
   int min_vf = 2;
   int max_vf = MAX_VECTORIZATION_FACTOR;
+  bool data_dependence_in_bb = false;
+

   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
@@ -1632,8 +1688,11 @@ vect_slp_analyze_bb (basic_block bb)
       return NULL;
     }

-   if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
-       || min_vf > max_vf)
+   if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
+                                           &data_dependence_in_bb)
+       || min_vf > max_vf
+       || (data_dependence_in_bb
+           && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
      {
        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
         fprintf (vect_dump, "not vectorized: unhandled data dependence "
@@ -2420,6 +2479,14 @@ vect_schedule_slp_instance (slp_tree nod
   else
     si = gsi_for_stmt (stmt);

+  /* Stores should be inserted just before the last store.  */
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+      && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+    {
+      gimple last_store = vect_find_last_store_in_slp_instance (instance);
+      si = gsi_for_stmt (last_store);
+    }
+
   is_store = vect_transform_stmt (stmt, &si, &strided_store, node,
instance);
   return is_store;
 }

Patch

Index: testsuite/gcc.dg/vect/bb-slp-8.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-8.c    (revision 163732)
+++ testsuite/gcc.dg/vect/bb-slp-8.c    (working copy)
@@ -15,7 +15,8 @@  main1 (unsigned int x, unsigned int y, u
   int i;
   unsigned int a0, a1, a2, a3;

-  /* pin and pout may alias.  */
+  /* pin and pout may alias. But since all the loads are before the first
store
+     the basic block is vectorizable.  */
   a0 = *pin++ + 23;
   a1 = *pin++ + 142;
   a2 = *pin++ + 2;
@@ -26,6 +27,9 @@  main1 (unsigned int x, unsigned int y, u
   *pout++ = a2 * x;
   *pout++ = a3 * y;

+  if (i)
+    __asm__ volatile ("" : : : "memory");
+
   /* Check results.  */
   if (out[0] != (in[0] + 23) * x
       || out[1] != (in[1] + 142) * y
@@ -45,6 +49,6 @@  int main (void)
   return 0;
 }

-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0
"slp" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1
"slp"  { target vect_hw_misalign } } } */
 /* { dg-final { cleanup-tree-dump "slp" } } */
Index: testsuite/gcc.dg/vect/bb-slp-8a.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-8a.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-8a.c   (revision 0)
@@ -0,0 +1,53 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int
*pout)
+{
+  int i;
+  unsigned int a0, a1, a2, a3;
+
+  /* pin and pout may alias, and loads and stores are mixed. The basic
block
+     cannot be vectorized.  */
+  a0 = *pin++ + 23;
+  *pout++ = a0 * x;
+  a1 = *pin++ + 142;
+  *pout++ = a1 * y;
+  a2 = *pin++ + 2;
+  *pout++ = a2 * x;
+  a3 = *pin++ + 31;
+  *pout++ = a3 * y;
+
+  if (i)
+    __asm__ volatile ("" : : : "memory");
+
+  /* Check results.  */
+  if (out[0] != (in[0] + 23) * x
+      || out[1] != (in[1] + 142) * y
+      || out[2] != (in[2] + 2) * x
+      || out[3] != (in[3] + 31) * y)
+    abort();
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 (2, 3, &in[0], &out[0]);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0
"slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/gcc.dg/vect/bb-slp-8b.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-8b.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-8b.c   (revision 0)
@@ -0,0 +1,55 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+  int i;
+  unsigned int a0, a1, a2, a3;
+  unsigned int *pin = &in[0];
+  unsigned int *pout = &out[0];
+
+  /* pin and pout are different, so despite the fact that loads and stores
+     are mixed the basic block is vectorizable.  */
+  a0 = *pin++ + 23;
+  *pout++ = a0 * x;
+  a1 = *pin++ + 142;
+  *pout++ = a1 * y;
+  a2 = *pin++ + 2;
+  *pout++ = a2 * x;
+  a3 = *pin++ + 31;
+  *pout++ = a3 * y;
+
+  if (i)
+    __asm__ volatile ("" : : : "memory");
+
+  /* Check results.  */
+  if (out[0] != (in[0] + 23) * x
+      || out[1] != (in[1] + 142) * y
+      || out[2] != (in[2] + 2) * x
+      || out[3] != (in[3] + 31) * y)
+    abort();
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 (2, 3);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1
"slp"  { target vect_hw_misalign } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog (revision 163732)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,11 @@ 
+2010-09-01  Ira Rosen  <irar@il.ibm.com>
+
+       * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
+       check into different basic blocks. Expect vectorization if
misaligned
+       stores are supported.
+       * gcc.dg/vect/bb-slp-8a.c: New test.
+       * gcc.dg/vect/bb-slp-8b.c: New test.
+
 2010-09-01  Richard Guenther  <rguenther@suse.de>

        * gcc.dg/vect/vect-outer-fir.c: Adjust.
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h   (revision 163732)
+++ tree-vectorizer.h   (working copy)
@@ -633,6 +633,32 @@  get_earlier_stmt (gimple stmt1, gimple s
     return stmt2;
 }

+static inline gimple
+get_later_stmt (gimple stmt1, gimple stmt2)
+{
+  unsigned int uid1, uid2;
+
+  if (stmt1 == NULL)
+    return stmt2;
+
+  if (stmt2 == NULL)
+    return stmt1;
+
+  uid1 = gimple_uid (stmt1);
+  uid2 = gimple_uid (stmt2);
+
+  if (uid1 == 0 || uid2 == 0)
+    return NULL;
+
+  gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec));
+  gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec));
+
+  if (uid1 > uid2)
+    return stmt1;
+  else
+    return stmt2;
+}
+
 static inline bool
 is_pattern_stmt_p (stmt_vec_info stmt_info)
 {
@@ -776,7 +802,7 @@  extern enum dr_alignment_support vect_su
 extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
                                            HOST_WIDE_INT *);
 extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info,
-                                              int *);
+                                              int *, bool *);
 extern bool vect_enhance_data_refs_alignment (loop_vec_info);
 extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info);
 extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c    (revision 163732)
+++ tree-vect-loop.c    (working copy)
@@ -1378,7 +1378,7 @@  vect_analyze_loop_operations (loop_vec_i
 loop_vec_info
 vect_analyze_loop (struct loop *loop)
 {
-  bool ok;
+  bool ok, dummy;
   loop_vec_info loop_vinfo;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
@@ -1444,7 +1444,7 @@  vect_analyze_loop (struct loop *loop)
      the dependences.
      FORNOW: fail at the first data dependence that we encounter.  */

-  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
+  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf,
&dummy);
   if (!ok
       || max_vf < min_vf)
     {
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c       (revision 163732)
+++ tree-vect-data-refs.c       (working copy)
@@ -322,6 +322,64 @@  vect_equal_offsets (tree offset1, tree o
 }


+/* Check dependence between DRA and DRB for basic block vectorization.  */
+
+static bool
+vect_drs_dependent_in_basic_block (struct data_reference *dra,
+                                   struct data_reference *drb)
+{
+  HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
+  gimple earlier_stmt;
+
+  /* We only call this function for pairs of loads and stores, but we
verify
+     it here.  */
+  if (DR_IS_READ (dra) == DR_IS_READ (drb))
+    {
+      if (DR_IS_READ (dra))
+        return false;
+      else
+        return true;
+    }
+
+  /* Check that the data-refs have same bases and offsets. If not, we
can't
+     determine if they are dependent.  */
+  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
+       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
+           || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+           || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+           != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
+    return true;
+
+  /* Check the types.  */
+  type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
(dra))));
+  type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
(drb))));
+
+  if (type_size_a != type_size_b
+      || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
+                              TREE_TYPE (DR_REF (drb))))
+    return true;
+
+  init_a = TREE_INT_CST_LOW (DR_INIT (dra));
+  init_b = TREE_INT_CST_LOW (DR_INIT (drb));
+
+  /* Two different locations - no dependence.  */
+  if (init_a != init_b)
+    return false;
+
+  /* We have a read-write dependence. Check that the load is before the
store.
+     When we vectorize basic blocks, vector load can be only before
+     corresponding scalar load, and vector store can be only after its
+     corresponding scalar store. So the order of the acceses is preserved
in
+     case the load is before the store.  */
+  earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
+  if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+    return false;
+
+  return true;
+}
+
+
 /* Function vect_check_interleaving.

    Check if DRA and DRB are a part of interleaving. In case they are,
insert
@@ -495,7 +553,8 @@  vect_mark_for_runtime_alias_test (ddr_p

 static bool
 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
-                                  loop_vec_info loop_vinfo, int *max_vf)
+                                  loop_vec_info loop_vinfo, int *max_vf,
+                                  bool *data_dependence_in_bb)
 {
   unsigned int i;
   struct loop *loop = NULL;
@@ -554,10 +613,14 @@  vect_analyze_data_ref_dependence (struct
           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
         }

-      /* Mark the statements as unvectorizable.  */
-      STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
-      STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
-
+      /* We do not vectorize basic blocks with write-write dependencies.
*/
+      if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+        return true;
+
+      /* We deal with read-write dependencies in basic blocks later (by
+         verifying that all the loads in the basic block are before all
the
+         stores).  */
+      *data_dependence_in_bb = true;
       return false;
     }

@@ -577,7 +640,12 @@  vect_analyze_data_ref_dependence (struct
           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
         }

-      return true;
+      /* Do not vectorize basic blcoks with write-write dependences.  */
+      if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+        return true;
+
+      /* Check if this dependence is allowed in basic block vectorization.
*/
+      return vect_drs_dependent_in_basic_block (dra, drb);
     }

   /* Loop-based vectorization and known data dependence.  */
@@ -678,7 +746,8 @@  vect_analyze_data_ref_dependence (struct

 bool
 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
-                                   bb_vec_info bb_vinfo, int *max_vf)
+                                   bb_vec_info bb_vinfo, int *max_vf,
+                                   bool *data_dependence_in_bb)
 {
   unsigned int i;
   VEC (ddr_p, heap) *ddrs = NULL;
@@ -693,7 +762,8 @@  vect_analyze_data_ref_dependences (loop_
     ddrs = BB_VINFO_DDRS (bb_vinfo);

   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
-    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
+    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
+                                         data_dependence_in_bb))
       return false;

   return true;
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c     (revision 163732)
+++ tree-vect-slp.c     (working copy)
@@ -1082,6 +1082,24 @@  vect_find_first_load_in_slp_instance (sl
 }


+/* Find the last store in SLP INSTANCE.  */
+static gimple
+vect_find_last_store_in_slp_instance (slp_instance instance)
+{
+  int i;
+  slp_tree node;
+  gimple last_store = NULL, store;
+
+  node = SLP_INSTANCE_TREE (instance);
+  for (i = 0;
+       VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
+       i++)
+    last_store = get_later_stmt (store, last_store);
+
+  return last_store;
+}
+
+
 /* Analyze an SLP instance starting from a group of strided stores. Call
    vect_build_slp_tree to build a tree of packed stmts if possible.
    Return FALSE if it's impossible to SLP any stmt in the loop.  */
@@ -1503,6 +1521,42 @@  vect_slp_analyze_operations (bb_vec_info
   return true;
 }

+/* Check if loads and stores are mixed in the basic block (in that
+   case if we are not sure that the accesses differ, we can't vectorize
the
+   basic block). Also return FALSE in case that there is statement marked
as
+   not vectorizable.  */
+
+static bool
+vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
+{
+  basic_block bb = BB_VINFO_BB (bb_vinfo);
+  gimple_stmt_iterator si;
+  bool detected_store = false;
+  gimple stmt;
+  struct data_reference *dr;
+
+  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+    {
+      stmt = gsi_stmt (si);
+
+      /* We can't allow not analyzed statements, since they may contain
data
+         accesses.  */
+      if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
+        return false;
+
+      if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+        continue;
+
+      dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+      if (DR_IS_READ (dr) && detected_store)
+        return false;
+
+      if (!DR_IS_READ (dr))
+        detected_store = true;
+    }
+
+  return true;
+}