Patchwork Fix PR43432, vectorize backwards stepping loads

login
register
mail settings
Submitter Michael Matz
Date Sept. 7, 2010, 12:58 p.m.
Message ID <Pine.LNX.4.64.1009071446530.29722@wotan.suse.de>
Download mbox | patch
Permalink /patch/64009/
State New
Headers show

Comments

Michael Matz - Sept. 7, 2010, 12:58 p.m.
Hi,

this implements handling of consecutive loads whose indices run backwards.  
I'm handling the easy cases only, namely when no explicit realignment 
instructions need to be generated and when we need to emit only one load 
per original load (i.e. no multiple type width in the loop).

This all requires that we be able to reverse the elements of a vector, 
hence two new helper functions for that are added.

Reversed stores aren't handled but should be reasonably easy to extend.

Regstrapped on x86_64-linux.  I've had to add x86_64-*-* to 
target-supports.exp/vect_perm recognition which in turn makes 
vect/slp-perm-8.c and vect/slp-perm-9.c fail then.  That's most probably 
because while x86_64 supports permutation for 4 and 8-sized elements it 
doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c).  I'm going 
to investigate this but seek feedback on the patch itself already now.


Ciao,
Michael.
Ira Rosen - Sept. 8, 2010, 6:09 a.m.
gcc-patches-owner@gcc.gnu.org wrote on 07/09/2010 03:58:56 PM:

>
> Hi,
>
> this implements handling of consecutive loads whose indices run
backwards.
> I'm handling the easy cases only, namely when no explicit realignment
> instructions need to be generated and when we need to emit only one load
> per original load (i.e. no multiple type width in the loop).
>
> This all requires that we be able to reverse the elements of a vector,
> hence two new helper functions for that are added.
>
> Reversed stores aren't handled but should be reasonably easy to extend.
>
> Regstrapped on x86_64-linux.  I've had to add x86_64-*-* to
> target-supports.exp/vect_perm recognition which in turn makes
> vect/slp-perm-8.c and vect/slp-perm-9.c fail then.  That's most probably
> because while x86_64 supports permutation for 4 and 8-sized elements it
> doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c).  I'm going

> to investigate this but seek feedback on the patch itself already now.
>

Looks good to me.

Ira

>
> Ciao,
> Michael.
> --
>    PR tree-optimization/43432
>    * tree-vect-data-refs.c (vect_analyze_data_ref_access):
>    Accept backwards consecutive accesses.
>    (vect_create_data_ref_ptr): If step is negative generate
>    decreasing IVs.
>    * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
>    (perm_mask_for_reverse, reverse_vec_elements): New functions.
>    (vectorizable_load): Handle loads with negative steps when easily
>    possible.
>
> testsuite/
>    PR tree-optimization/43432
>    * gcc.dg/vect/pr43432.c: New test.
>
> Index: tree-vect-data-refs.c
> ===================================================================
> --- tree-vect-data-refs.c   (revision 163773)
> +++ tree-vect-data-refs.c   (working copy)
> @@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat
>      }
>
>    /* Consecutive?  */
> -  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
> +  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
> +      || (dr_step < 0
> +     && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
>      {
>        /* Mark that it is not interleaving.  */
>        DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
> @@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s
>    tree vptr;
>    gimple_stmt_iterator incr_gsi;
>    bool insert_after;
> +  bool negative;
>    tree indx_before_incr, indx_after_incr;
>    gimple incr;
>    tree step;
> @@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s
>      *inv_p = true;
>    else
>      *inv_p = false;
> +  negative = tree_int_cst_compare (step, size_zero_node) < 0;
>
>    /* Create an expression for the first address accessed by this load
>       in LOOP.  */
> @@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s
>      LOOP is zero. In this case the step here is also zero.  */
>        if (*inv_p)
>     step = size_zero_node;
> +      else if (negative)
> +   step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
>
>        standard_iv_increment_position (loop, &incr_gsi, &insert_after);
>
> Index: tree-vect-stmts.c
> ===================================================================
> --- tree-vect-stmts.c   (revision 163773)
> +++ tree-vect-stmts.c   (working copy)
> @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
>
> +  if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
> +    {
> +      if (vect_print_dump_info (REPORT_DETAILS))
> +        fprintf (vect_dump, "negative step for store.");
> +      return false;
> +    }
> +
>    if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
>      {
>        strided_store = true;
> @@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_
>    return true;
>  }
>
> +/* Given a vector type VECTYPE returns a builtin DECL to be used
> +   for vector permutation and stores a mask into *MASK that implements
> +   reversal of the vector elements.  If that is impossible to do
> +   returns NULL (and *MASK is unchanged).  */
> +
> +static tree
> +perm_mask_for_reverse (tree vectype, tree *mask)
> +{
> +  tree builtin_decl;
> +  tree mask_element_type, mask_type;
> +  tree mask_vec = NULL;
> +  int i;
> +  int nunits;
> +  if (!targetm.vectorize.builtin_vec_perm)
> +    return NULL;
> +
> +  builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
> +
&mask_element_type);
> +  if (!builtin_decl || !mask_element_type)
> +    return NULL;
> +
> +  mask_type = get_vectype_for_scalar_type (mask_element_type);
> +  nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +  if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS
(mask_type))
> +    return NULL;
> +
> +  for (i = 0; i < nunits; i++)
> +    mask_vec = tree_cons (NULL, build_int_cst (mask_element_type,
> i), mask_vec);
> +  mask_vec = build_vector (mask_type, mask_vec);
> +
> +  if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
> +    return NULL;
> +  if (mask)
> +    *mask = mask_vec;
> +  return builtin_decl;
> +}
> +
> +/* Given a vector variable X, that was generated for the scalar LHS of
> +   STMT, generate instructions to reverse the vector elements of X,
> +   insert them a *GSI and return the permuted vector variable.  */
> +
> +static tree
> +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
> +{
> +  tree vectype = TREE_TYPE (x);
> +  tree mask_vec, builtin_decl;
> +  tree perm_dest, data_ref;
> +  gimple perm_stmt;
> +
> +  builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
> +
> +  perm_dest = vect_create_destination_var (gimple_assign_lhs
> (stmt), vectype);
> +
> +  /* Generate the permute statement.  */
> +  perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
> +  data_ref = make_ssa_name (perm_dest, perm_stmt);
> +  gimple_call_set_lhs (perm_stmt, data_ref);
> +  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
> +
> +  return data_ref;
> +}
> +
>  /* vectorizable_load.
>
>     Check if STMT reads a non scalar data-ref (array/pointer/structure)
that
> @@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s
>    gimple first_stmt;
>    tree scalar_type;
>    bool inv_p;
> +  bool negative;
>    bool compute_in_loop = false;
>    struct loop *at_loop;
>    int vec_num;
> @@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
>
> +  negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
> +  if (negative && ncopies > 1)
> +    {
> +      if (vect_print_dump_info (REPORT_DETAILS))
> +        fprintf (vect_dump, "multiple types with negative step.");
> +      return false;
> +    }
> +
>    scalar_type = TREE_TYPE (DR_REF (dr));
>    mode = TYPE_MODE (vectype);
>
> @@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s
>     return false;
>      }
>
> +  if (negative)
> +    {
> +      gcc_assert (!strided_load);
> +      alignment_support_scheme = vect_supportable_dr_alignment (dr,
false);
> +      if (alignment_support_scheme != dr_aligned
> +     && alignment_support_scheme != dr_unaligned_supported)
> +   {
> +     if (vect_print_dump_info (REPORT_DETAILS))
> +       fprintf (vect_dump, "negative step but alignment required.");
> +     return false;
> +   }
> +      if (!perm_mask_for_reverse (vectype, NULL))
> +   {
> +     if (vect_print_dump_info (REPORT_DETAILS))
> +       fprintf (vect_dump, "negative step and reversing not
supported.");
> +     return false;
> +   }
> +    }
> +
>    if (!vec_stmt) /* transformation not required.  */
>      {
>        STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> @@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s
>    else
>      at_loop = loop;
>
> +  if (negative)
> +    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
> +
>    prev_stmt_info = NULL;
>    for (j = 0; j < ncopies; j++)
>      {
> @@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s
>        gcc_unreachable (); /* FORNOW. */
>         }
>
> +     if (negative)
> +       {
> +         new_temp = reverse_vec_elements (new_temp, stmt, gsi);
> +         new_stmt = SSA_NAME_DEF_STMT (new_temp);
> +       }
> +
>       /* Collect vector loads and later create their permutation in
>          vect_transform_strided_load ().  */
>            if (strided_load || slp_perm)
> Index: testsuite/gcc.dg/vect/pr43432.c
> ===================================================================
> --- testsuite/gcc.dg/vect/pr43432.c   (revision 0)
> +++ testsuite/gcc.dg/vect/pr43432.c   (revision 0)
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
> +
> +
> +void vector_fmul_reverse_c(float *dst, const float *src0, const float
*src1,
> +int len){
> +    int i;
> +    src1 += len-1;
> +    for(i=0; i<len; i++)
> +        dst[i] = src0[i] * src1[-i];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target { vect_perm } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */

Patch

Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 163773)
+++ tree-vect-data-refs.c	(working copy)
@@ -2281,7 +2281,9 @@  vect_analyze_data_ref_access (struct dat
     }
 
   /* Consecutive?  */
-  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
+      || (dr_step < 0
+	  && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
     {
       /* Mark that it is not interleaving.  */
       DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
@@ -2953,6 +2955,7 @@  vect_create_data_ref_ptr (gimple stmt, s
   tree vptr;
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
+  bool negative;
   tree indx_before_incr, indx_after_incr;
   gimple incr;
   tree step;
@@ -2985,6 +2988,7 @@  vect_create_data_ref_ptr (gimple stmt, s
     *inv_p = true;
   else
     *inv_p = false;
+  negative = tree_int_cst_compare (step, size_zero_node) < 0;
 
   /* Create an expression for the first address accessed by this load
      in LOOP.  */
@@ -3144,6 +3148,8 @@  vect_create_data_ref_ptr (gimple stmt, s
 	 LOOP is zero. In this case the step here is also zero.  */
       if (*inv_p)
 	step = size_zero_node;
+      else if (negative)
+	step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 163773)
+++ tree-vect-stmts.c	(working copy)
@@ -3134,6 +3134,13 @@  vectorizable_store (gimple stmt, gimple_
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
+  if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "negative step for store.");
+      return false;
+    }
+
   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       strided_store = true;
@@ -3400,6 +3407,68 @@  vectorizable_store (gimple stmt, gimple_
   return true;
 }
 
+/* Given a vector type VECTYPE returns a builtin DECL to be used
+   for vector permutation and stores a mask into *MASK that implements
+   reversal of the vector elements.  If that is impossible to do
+   returns NULL (and *MASK is unchanged).  */
+
+static tree
+perm_mask_for_reverse (tree vectype, tree *mask)
+{
+  tree builtin_decl;
+  tree mask_element_type, mask_type;
+  tree mask_vec = NULL;
+  int i;
+  int nunits;
+  if (!targetm.vectorize.builtin_vec_perm)
+    return NULL;
+
+  builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
+                                                     &mask_element_type);
+  if (!builtin_decl || !mask_element_type)
+    return NULL;
+
+  mask_type = get_vectype_for_scalar_type (mask_element_type);
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
+    return NULL;
+
+  for (i = 0; i < nunits; i++)
+    mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
+  mask_vec = build_vector (mask_type, mask_vec);
+
+  if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
+    return NULL;
+  if (mask)
+    *mask = mask_vec;
+  return builtin_decl;
+}
+
+/* Given a vector variable X, that was generated for the scalar LHS of
+   STMT, generate instructions to reverse the vector elements of X,
+   insert them a *GSI and return the permuted vector variable.  */
+
+static tree
+reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
+{
+  tree vectype = TREE_TYPE (x);
+  tree mask_vec, builtin_decl;
+  tree perm_dest, data_ref;
+  gimple perm_stmt;
+
+  builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
+
+  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
+
+  /* Generate the permute statement.  */
+  perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
+  data_ref = make_ssa_name (perm_dest, perm_stmt);
+  gimple_call_set_lhs (perm_stmt, data_ref);
+  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+
+  return data_ref;
+}
+
 /* vectorizable_load.
 
    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
@@ -3442,6 +3511,7 @@  vectorizable_load (gimple stmt, gimple_s
   gimple first_stmt;
   tree scalar_type;
   bool inv_p;
+  bool negative;
   bool compute_in_loop = false;
   struct loop *at_loop;
   int vec_num;
@@ -3504,6 +3574,14 @@  vectorizable_load (gimple stmt, gimple_s
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
+  negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+  if (negative && ncopies > 1)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "multiple types with negative step.");
+      return false;
+    }
+
   scalar_type = TREE_TYPE (DR_REF (dr));
   mode = TYPE_MODE (vectype);
 
@@ -3538,6 +3616,25 @@  vectorizable_load (gimple stmt, gimple_s
 	return false;
     }
 
+  if (negative)
+    {
+      gcc_assert (!strided_load);
+      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+      if (alignment_support_scheme != dr_aligned
+	  && alignment_support_scheme != dr_unaligned_supported)
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "negative step but alignment required.");
+	  return false;
+	}
+      if (!perm_mask_for_reverse (vectype, NULL))
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "negative step and reversing not supported.");
+	  return false;
+	}
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
@@ -3712,6 +3809,9 @@  vectorizable_load (gimple stmt, gimple_s
   else
     at_loop = loop;
 
+  if (negative)
+    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
+
   prev_stmt_info = NULL;
   for (j = 0; j < ncopies; j++)
     {
@@ -3885,6 +3985,12 @@  vectorizable_load (gimple stmt, gimple_s
 		gcc_unreachable (); /* FORNOW. */
 	    }
 
+	  if (negative)
+	    {
+	      new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
+	    }
+
 	  /* Collect vector loads and later create their permutation in
 	     vect_transform_strided_load ().  */
           if (strided_load || slp_perm)
Index: testsuite/gcc.dg/vect/pr43432.c
===================================================================
--- testsuite/gcc.dg/vect/pr43432.c	(revision 0)
+++ testsuite/gcc.dg/vect/pr43432.c	(revision 0)
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
+
+
+void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
+int len){
+    int i;
+    src1 += len-1;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[-i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */