diff mbox

[v2] Simplify alias check code generation in vectorizer

Message ID VI1PR0802MB2176FC264E50FB7B376B078FE7F70@VI1PR0802MB2176.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Bin Cheng Sept. 20, 2016, 3:25 p.m. UTC
Hi,
I originally posted a patch improving code generation for alias check in vectorizer at https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00929.html.  Here it's the 2nd version (better) patch.  It detects data reference pair in which the two references are only different to each other wrto index.  In this case the patch generates intersect range checks based on index of address of reference, rather than the address of reference.  Take example from patch's comment, given below two data references:

                       DR_A                           DR_B
      data-ref         arr[i]                         arr[j]
      base_object      arr                            arr
      index            {i_0, +, 1}_loop               {j_0, +, 1}_loop

The addresses and their index can be depicted as:

        |<- ADDR_A    ->|          |<- ADDR_B    ->|
     ------------------------------------------------------->
        |   |   |   |   |          |   |   |   |   |
     ------------------------------------------------------->
        i_0 ...         i_0+4      j_0 ...         j_0+4

We can create expression based on index rather than address:  (i_0 + 4 < j_0 || j_0 + 4 < i_0).

Also take example from spec2k/200.sixtrack/DACOP, gcc needs to generate alias check for three pairs:
//pair 1
dr_a:
(Data Ref:
  bb: 8
  stmt: _92 = da.cc[_27];
  ref: da.cc[_27];
)
dr_b:
(Data Ref:
  bb: 8
  stmt: da.cc[_93] = _92;
  ref: da.cc[_93];
)
//pair 2
dr_a:
(Data Ref:
  bb: 8
  stmt: pretmp_29 = da.i2[_27];
  ref: da.i2[_27];
)
dr_b:
(Data Ref:
  bb: 8
  stmt: da.i2[_93] = pretmp_29;
  ref: da.i2[_93];
)
//pair 3
dr_a:
(Data Ref:
  bb: 8
  stmt: pretmp_28 = da.i1[_27];
  ref: da.i1[_27];
)
dr_b:
(Data Ref:
  bb: 8
  stmt: da.i1[_93] = pretmp_28;
  ref: da.i1[_93];
)

With this patch, code can be improved to:

  <bb 7>:
  _37 = (unsigned int) _6;
  _106 = (unsigned int) _52;
  _107 = _37 - _106;
  _108 = _107 > 7;
  _109 = (integer(kind=8)) _2;
  _110 = _109 + 3;
  _111 = (integer(kind=8)) _52;
  _112 = _111 + -1;
  _113 = _110 < _112;
  _114 = (integer(kind=8)) _52;
  _115 = _114 + 2;
  _116 = (integer(kind=8)) _2;
  _117 = _115 < _116;
  _118 = _113 | _117;
  _119 = _108 & _118;
  _120 = (integer(kind=8)) _2;
  _121 = _120 + 3;
  _122 = (integer(kind=8)) _52;
  _123 = _122 + -1;
  _124 = _121 < _123;
  _125 = (integer(kind=8)) _52;
  _126 = _125 + 2;
  _127 = (integer(kind=8)) _2;
  _128 = _126 < _127;
  _129 = _124 | _128;
  _130 = _119 & _129;
  _131 = (integer(kind=8)) _2;
  _132 = _131 + 3;
  _133 = (integer(kind=8)) _52;
  _134 = _133 + -1;
  _135 = _132 < _134;
  _136 = (integer(kind=8)) _52;
  _137 = _136 + 2;
  _138 = (integer(kind=8)) _2;
  _139 = _137 < _138;
  _140 = _135 | _139;
  _141 = _130 & _140;
  if (_141 != 0)
    goto <bb 8>;
  else
    goto <bb 20>;

Note this patch doesn't do local CSE, and common sub-expressions will be optimized by later passes.  I think this is OK because the redundancy is far away from loops thus won't have bad effect (there is an opposite example/PR).
Bootstrap and test on x86_64 and AArch64, is it OK?

Thanks,
bin

2016-09-19  Bin Cheng  <bin.cheng@arm.com>

	* tree-vect-loop-manip.c (create_intersect_range_checks_index): New.
	(create_intersect_range_checks): New.
	(vect_create_cond_for_alias_checks): Call above function.

Comments

Richard Biener Sept. 21, 2016, 2:01 p.m. UTC | #1
On Tue, Sep 20, 2016 at 5:25 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,
> I originally posted a patch improving code generation for alias check in vectorizer at https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00929.html.  Here it's the 2nd version (better) patch.  It detects data reference pair in which the two references are only different to each other wrto index.  In this case the patch generates intersect range checks based on index of address of reference, rather than the address of reference.  Take example from patch's comment, given below two data references:
>
>                        DR_A                           DR_B
>       data-ref         arr[i]                         arr[j]
>       base_object      arr                            arr
>       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
>
> The addresses and their index can be depicted as:
>
>         |<- ADDR_A    ->|          |<- ADDR_B    ->|
>      ------------------------------------------------------->
>         |   |   |   |   |          |   |   |   |   |
>      ------------------------------------------------------->
>         i_0 ...         i_0+4      j_0 ...         j_0+4
>
> We can create expression based on index rather than address:  (i_0 + 4 < j_0 || j_0 + 4 < i_0).
>
> Also take example from spec2k/200.sixtrack/DACOP, gcc needs to generate alias check for three pairs:
> //pair 1
> dr_a:
> (Data Ref:
>   bb: 8
>   stmt: _92 = da.cc[_27];
>   ref: da.cc[_27];
> )
> dr_b:
> (Data Ref:
>   bb: 8
>   stmt: da.cc[_93] = _92;
>   ref: da.cc[_93];
> )
> //pair 2
> dr_a:
> (Data Ref:
>   bb: 8
>   stmt: pretmp_29 = da.i2[_27];
>   ref: da.i2[_27];
> )
> dr_b:
> (Data Ref:
>   bb: 8
>   stmt: da.i2[_93] = pretmp_29;
>   ref: da.i2[_93];
> )
> //pair 3
> dr_a:
> (Data Ref:
>   bb: 8
>   stmt: pretmp_28 = da.i1[_27];
>   ref: da.i1[_27];
> )
> dr_b:
> (Data Ref:
>   bb: 8
>   stmt: da.i1[_93] = pretmp_28;
>   ref: da.i1[_93];
> )
>
> With this patch, code can be improved to:
>
>   <bb 7>:
>   _37 = (unsigned int) _6;
>   _106 = (unsigned int) _52;
>   _107 = _37 - _106;
>   _108 = _107 > 7;
>   _109 = (integer(kind=8)) _2;
>   _110 = _109 + 3;
>   _111 = (integer(kind=8)) _52;
>   _112 = _111 + -1;
>   _113 = _110 < _112;
>   _114 = (integer(kind=8)) _52;
>   _115 = _114 + 2;
>   _116 = (integer(kind=8)) _2;
>   _117 = _115 < _116;
>   _118 = _113 | _117;
>   _119 = _108 & _118;
>   _120 = (integer(kind=8)) _2;
>   _121 = _120 + 3;
>   _122 = (integer(kind=8)) _52;
>   _123 = _122 + -1;
>   _124 = _121 < _123;
>   _125 = (integer(kind=8)) _52;
>   _126 = _125 + 2;
>   _127 = (integer(kind=8)) _2;
>   _128 = _126 < _127;
>   _129 = _124 | _128;
>   _130 = _119 & _129;
>   _131 = (integer(kind=8)) _2;
>   _132 = _131 + 3;
>   _133 = (integer(kind=8)) _52;
>   _134 = _133 + -1;
>   _135 = _132 < _134;
>   _136 = (integer(kind=8)) _52;
>   _137 = _136 + 2;
>   _138 = (integer(kind=8)) _2;
>   _139 = _137 < _138;
>   _140 = _135 | _139;
>   _141 = _130 & _140;
>   if (_141 != 0)
>     goto <bb 8>;
>   else
>     goto <bb 20>;
>
> Note this patch doesn't do local CSE, and common sub-expressions will be optimized by later passes.  I think this is OK because the redundancy is far away from loops thus won't have bad effect (there is an opposite example/PR).
> Bootstrap and test on x86_64 and AArch64, is it OK?

Seeing

+  /* Infer index length from segment length.  */
+  unsigned HOST_WIDE_INT idx_len1 = (seg_len1 + abs_step - 1) / abs_step;
+  unsigned HOST_WIDE_INT idx_len2 = (seg_len2 + abs_step - 1) / abs_step;

Does this really work for DR_STEP that is bigger than 1 in index space?  The
segment length is generally vectorization-factor * DR_STEP but index space
is -- if you are talking about array-ref indexes -- the segment length divided
by the array element size.  Note that what the index space refers to for
a given DR_ACCESS_FN depends on whether this is from an ARRAY_REF
(element size) or the base pointer evolution (bytes).  I suppose looking at
the access functions type might distinguish the pointer vs. array index case.

What I'm looking for is a big comment on why the above "translation" is
correct.

Thanks,
Richard.

> Thanks,
> bin
>
> 2016-09-19  Bin Cheng  <bin.cheng@arm.com>
>
>         * tree-vect-loop-manip.c (create_intersect_range_checks_index): New.
>         (create_intersect_range_checks): New.
>         (vect_create_cond_for_alias_checks): Call above function.
diff mbox

Patch

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 01d6bb1..c7d130e 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -2263,6 +2263,179 @@  vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
     *cond_expr = part_cond_expr;
 }
 
+/* Given two data references and segment lengths described by DR_A and DR_B,
+   create expression checking if the two addresses ranges intersect with
+   each other based on index of the two addresses.  This can only be done
+   if DR_A and DR_B referring to the same (array) object and the index is
+   the only difference.  For example:
+
+                       DR_A                           DR_B
+      data-ref         arr[i]                         arr[j]
+      base_object      arr                            arr
+      index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
+
+   The addresses and their index are like:
+
+        |<- ADDR_A    ->|          |<- ADDR_B    ->|
+     ------------------------------------------------------->
+        |   |   |   |   |          |   |   |   |   |
+     ------------------------------------------------------->
+        i_0 ...         i_0+4      j_0 ...         j_0+4
+
+   We can create expression based on index rather than address:
+     (i_0 + 4 < j_0 || j_0 + 4 < i_0).  */
+
+static bool
+create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
+				     const dr_with_seg_len& dr_a,
+				     const dr_with_seg_len& dr_b)
+{
+  if (integer_zerop (DR_STEP (dr_a.dr))
+      || integer_zerop (DR_STEP (dr_b.dr))
+      || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
+    return false;
+
+  if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
+    return false;
+
+  if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
+    return false;
+
+  if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
+    return false;
+
+  gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
+
+  bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
+  unsigned HOST_WIDE_INT abs_step = tree_to_uhwi (DR_STEP (dr_a.dr));
+  if (neg_step)
+    abs_step = -abs_step;
+
+  unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
+  unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
+  /* Infer index length from segment length.  */
+  unsigned HOST_WIDE_INT idx_len1 = (seg_len1 + abs_step - 1) / abs_step;
+  unsigned HOST_WIDE_INT idx_len2 = (seg_len2 + abs_step - 1) / abs_step;
+
+  unsigned int i;
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
+    {
+      tree access1 = DR_ACCESS_FN (dr_a.dr, i);
+      tree access2 = DR_ACCESS_FN (dr_b.dr, i);
+      /* Two index must be the same if they are not scev.  */
+      if (TREE_CODE (access1) != POLYNOMIAL_CHREC
+	  || TREE_CODE (access2) != POLYNOMIAL_CHREC)
+	{
+	  if (operand_equal_p (access1, access2, 0))
+	    continue;
+
+	  return false;
+	}
+      /* Two index must be the same if they are not scev wrto current loop.  */
+      if (CHREC_VARIABLE (access1) != (unsigned)loop->num
+	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
+	{
+	  if (operand_equal_p (access1, access2, 0))
+	    continue;
+
+	  return false;
+	}
+      /* Two index must have the same step.  */
+      if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
+	return false;
+
+      tree min1 = CHREC_LEFT (access1);
+      tree min2 = CHREC_LEFT (access2);
+      if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
+	return false;
+
+      tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1,
+			       build_int_cst (TREE_TYPE (min1), idx_len1));
+      tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2,
+			       build_int_cst (TREE_TYPE (min2), idx_len2));
+      /* Adjust ranges for negative step.  */
+      if (neg_step)
+	{
+	  min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1,
+			      build_int_cst (TREE_TYPE (min1), 1));
+	  max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1),
+			      CHREC_LEFT (access1),
+			      build_int_cst (TREE_TYPE (min1), 1));
+	  min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2,
+			      build_int_cst (TREE_TYPE (min2), 1));
+	  max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2),
+			      CHREC_LEFT (access2),
+			      build_int_cst (TREE_TYPE (min2), 1));
+	}
+      tree part_cond_expr
+	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
+	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
+      if (*cond_expr)
+	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+				  *cond_expr, part_cond_expr);
+      else
+	*cond_expr = part_cond_expr;
+    }
+  return true;
+}
+
+/* Given two data references and segment lengths described by DR_A and DR_B,
+   create expression checking if the two addresses ranges intersect with
+   each other:
+
+     ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
+     || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
+
+static void
+create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
+			       const dr_with_seg_len& dr_a,
+			       const dr_with_seg_len& dr_b)
+{
+  *cond_expr = NULL_TREE;
+  if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
+    return;
+
+  tree segment_length_a = dr_a.seg_len;
+  tree segment_length_b = dr_b.seg_len;
+  tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
+  tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
+  tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
+
+  offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
+			  offset_a, DR_INIT (dr_a.dr));
+  offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
+			  offset_b, DR_INIT (dr_b.dr));
+  addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
+  addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
+
+  tree seg_a_min = addr_base_a;
+  tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
+  /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
+     bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
+     [a, a+12) */
+  if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+    {
+      tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
+      seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
+      seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+    }
+
+  tree seg_b_min = addr_base_b;
+  tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
+  if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+    {
+      tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
+      seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
+      seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+    }
+  *cond_expr
+    = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+	fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
+	fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+}
+
 /* Function vect_create_cond_for_alias_checks.
 
    Create a conditional expression that represents the run-time checks for
@@ -2290,15 +2463,6 @@  vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
   tree part_cond_expr;
 
-  /* Create expression
-     ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
-     || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
-     &&
-     ...
-     &&
-     ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
-     || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
-
   if (comp_alias_ddrs.is_empty ())
     return;
 
@@ -2306,18 +2470,6 @@  vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
     {
       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
-      tree segment_length_a = dr_a.seg_len;
-      tree segment_length_b = dr_b.seg_len;
-      tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
-      tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
-      tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
-
-      offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
-			      offset_a, DR_INIT (dr_a.dr));
-      offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
-			      offset_b, DR_INIT (dr_b.dr));
-      addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
-      addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
 
       if (dump_enabled_p ())
 	{
@@ -2329,32 +2481,8 @@  vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
 	  dump_printf (MSG_NOTE, "\n");
 	}
 
-      tree seg_a_min = addr_base_a;
-      tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
-      /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
-	 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
-	 [a, a+12) */
-      if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
-	{
-	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
-	  seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
-	  seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
-	}
-
-      tree seg_b_min = addr_base_b;
-      tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
-      if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
-	{
-	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
-	  seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
-	  seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
-	}
-
-      part_cond_expr =
-      	fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-	  fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
-	  fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
-
+      /* Create condition expression for each pair data references.  */
+      create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
       if (*cond_expr)
 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
 				  *cond_expr, part_cond_expr);