[7/8] Use a single comparison for index-based alias checks
diff mbox series

Message ID mpt8somgsb9.fsf@arm.com
State New
Headers show
Series
  • Improve vector alias checks for WAR and WAW dependencies
Related show

Commit Message

Richard Sandiford Nov. 11, 2019, 6:51 p.m. UTC
This patch rewrites the index-based alias checks to use conditions
of the form:

  (unsigned T) (a - b + bias) <= limit

E.g. before the patch:

  struct s { int x[100]; };

  void
  f1 (struct s *s1, int a, int b)
  {
    for (int i = 0; i < 32; ++i)
      s1->x[i + a] += s1->x[i + b];
  }

used:

        add     w3, w1, 3
        cmp     w3, w2
        add     w3, w2, 3
        ccmp    w1, w3, 0, ge
        ble     .L2

whereas after the patch it uses:

        sub     w3, w1, w2
        add     w3, w3, 3
        cmp     w3, 6
        bls     .L2

The patch also fixes the seg_len1 and seg_len2 negation for cases in
which seg_len is a "negative unsigned" value narrower than 64 bits,
like it is for 32-bit targets.  Previously we'd end up with values
like 0xffffffff000000001 instead of 1.


2019-11-11  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-data-ref.c (create_intersect_range_checks_index): Rewrite
	the index tests to have the form (unsigned T) (B - A + bias) <= limit.

gcc/testsuite/
	* gcc.dg/vect/vect-alias-check-18.c: New test.
	* gcc.dg/vect/vect-alias-check-19.c: Likewise.
	* gcc.dg/vect/vect-alias-check-20.c: Likewise.

Comments

Richard Biener Nov. 15, 2019, 11:09 a.m. UTC | #1
On Mon, Nov 11, 2019 at 7:51 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> This patch rewrites the index-based alias checks to use conditions
> of the form:
>
>   (unsigned T) (a - b + bias) <= limit
>
> E.g. before the patch:
>
>   struct s { int x[100]; };
>
>   void
>   f1 (struct s *s1, int a, int b)
>   {
>     for (int i = 0; i < 32; ++i)
>       s1->x[i + a] += s1->x[i + b];
>   }
>
> used:
>
>         add     w3, w1, 3
>         cmp     w3, w2
>         add     w3, w2, 3
>         ccmp    w1, w3, 0, ge
>         ble     .L2
>
> whereas after the patch it uses:
>
>         sub     w3, w1, w2
>         add     w3, w3, 3
>         cmp     w3, 6
>         bls     .L2
>
> The patch also fixes the seg_len1 and seg_len2 negation for cases in
> which seg_len is a "negative unsigned" value narrower than 64 bits,
> like it is for 32-bit targets.  Previously we'd end up with values
> like 0xffffffff000000001 instead of 1.

OK.

>
> 2019-11-11  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         * tree-data-ref.c (create_intersect_range_checks_index): Rewrite
>         the index tests to have the form (unsigned T) (B - A + bias) <= limit.
>
> gcc/testsuite/
>         * gcc.dg/vect/vect-alias-check-18.c: New test.
>         * gcc.dg/vect/vect-alias-check-19.c: Likewise.
>         * gcc.dg/vect/vect-alias-check-20.c: Likewise.
>
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c 2019-11-11 18:31:01.263118514 +0000
> +++ gcc/tree-data-ref.c 2019-11-11 18:31:05.099091743 +0000
> @@ -1744,7 +1744,9 @@ prune_runtime_alias_test_list (vec<dr_wi
>
>     We can create expression based on index rather than address:
>
> -     (i_0 + 4 < j_0 || j_0 + 4 < i_0)
> +     (unsigned) (i_0 - j_0 + 3) <= 6
> +
> +   i.e. the indices are less than 4 apart.
>
>     Note evolution step of index needs to be considered in comparison.  */
>
> @@ -1781,15 +1783,8 @@ create_intersect_range_checks_index (cla
>    if (neg_step)
>      {
>        abs_step = -abs_step;
> -      seg_len1 = -seg_len1;
> -      seg_len2 = -seg_len2;
> -    }
> -  else
> -    {
> -      /* Include the access size in the length, so that we only have one
> -        tree addition below.  */
> -      seg_len1 += dr_a.access_size;
> -      seg_len2 += dr_b.access_size;
> +      seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
> +      seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
>      }
>
>    /* Infer the number of iterations with which the memory segment is accessed
> @@ -1803,16 +1798,13 @@ create_intersect_range_checks_index (cla
>        || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
>      return false;
>
> -  poly_uint64 niter_access1 = 0, niter_access2 = 0;
> -  if (neg_step)
> -    {
> -      /* Divide each access size by the byte step, rounding up.  */
> -      if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
> -                           abs_step, &niter_access1)
> -         || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
> -                              abs_step, &niter_access2))
> -       return false;
> -    }
> +  /* Divide each access size by the byte step, rounding up.  */
> +  poly_uint64 niter_access1, niter_access2;
> +  if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
> +                       abs_step, &niter_access1)
> +      || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
> +                          abs_step, &niter_access2))
> +    return false;
>
>    unsigned int i;
>    for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
> @@ -1852,38 +1844,87 @@ create_intersect_range_checks_index (cla
>          index of data reference.  Like segment length, index length is
>          linear function of the number of iterations with index_step as
>          the coefficient, i.e, niter_len * idx_step.  */
> -      tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
> -                                  build_int_cst (TREE_TYPE (min1),
> -                                                 niter_len1));
> -      tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
> -                                  build_int_cst (TREE_TYPE (min2),
> -                                                 niter_len2));
> -      tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
> -      tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
> -      /* Adjust ranges for negative step.  */
> +      offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
> +                                                 SIGNED);
>        if (neg_step)
> -       {
> -         /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
> -         std::swap (min1, max1);
> -         std::swap (min2, max2);
> -
> -         /* As with the lengths just calculated, we've measured the access
> -            sizes in iterations, so multiply them by the index step.  */
> -         tree idx_access1
> -           = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
> -                          build_int_cst (TREE_TYPE (min1), niter_access1));
> -         tree idx_access2
> -           = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
> -                          build_int_cst (TREE_TYPE (min2), niter_access2));
> -
> -         /* MINUS_EXPR because the above values are negative.  */
> -         max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
> -         max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
> -       }
> -      tree part_cond_expr
> -       = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
> -           fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
> -           fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
> +       abs_idx_step = -abs_idx_step;
> +      poly_offset_int idx_len1 = abs_idx_step * niter_len1;
> +      poly_offset_int idx_len2 = abs_idx_step * niter_len2;
> +      poly_offset_int idx_access1 = abs_idx_step * niter_access1;
> +      poly_offset_int idx_access2 = abs_idx_step * niter_access2;
> +
> +      gcc_assert (known_ge (idx_len1, 0)
> +                 && known_ge (idx_len2, 0)
> +                 && known_ge (idx_access1, 0)
> +                 && known_ge (idx_access2, 0));
> +
> +      /* Each access has the following pattern, with lengths measured
> +        in units of INDEX:
> +
> +             <-- idx_len -->
> +             <--- A: -ve step --->
> +             +-----+-------+-----+-------+-----+
> +             | n-1 | ..... |  0  | ..... | n-1 |
> +             +-----+-------+-----+-------+-----+
> +                           <--- B: +ve step --->
> +                           <-- idx_len -->
> +                           |
> +                          min
> +
> +        where "n" is the number of scalar iterations covered by the segment
> +        and where each access spans idx_access units.
> +
> +        A is the range of bytes accessed when the step is negative,
> +        B is the range when the step is positive.
> +
> +        When checking for general overlap, we need to test whether
> +        the range:
> +
> +          [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
> +
> +        overlaps:
> +
> +          [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
> +
> +        where:
> +
> +           low_offsetN = +ve step ? 0 : -idx_lenN;
> +          high_offsetN = +ve step ? idx_lenN : 0;
> +
> +        This is equivalent to testing whether:
> +
> +          min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
> +          && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
> +
> +        Converting this into a single test, there is an overlap if:
> +
> +          0 <= min2 - min1 + bias <= limit
> +
> +        where  bias = high_offset2 + idx_access2 - 1 - low_offset1
> +              limit = (high_offset1 - low_offset1 + idx_access1 - 1)
> +                    + (high_offset2 - low_offset2 + idx_access2 - 1)
> +         i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
> +
> +        Combining the tests requires limit to be computable in an unsigned
> +        form of the index type; if it isn't, we fall back to the usual
> +        pointer-based checks.  */
> +      poly_offset_int limit = (idx_len1 + idx_access1 - 1
> +                              + idx_len2 + idx_access2 - 1);
> +      tree utype = unsigned_type_for (TREE_TYPE (min1));
> +      if (!wi::fits_to_tree_p (limit, utype))
> +       return false;
> +
> +      poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
> +      poly_offset_int high_offset2 = neg_step ? 0 : idx_len2;
> +      poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
> +
> +      tree subject = fold_build2 (MINUS_EXPR, utype,
> +                                 fold_convert (utype, min2),
> +                                 fold_convert (utype, min1));
> +      subject = fold_build2 (PLUS_EXPR, utype, subject,
> +                            wide_int_to_tree (utype, bias));
> +      tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
> +                                        wide_int_to_tree (utype, limit));
>        if (*cond_expr)
>         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
>                                   *cond_expr, part_cond_expr);
> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
> ===================================================================
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c     2019-11-11 18:31:05.099091743 +0000
> @@ -0,0 +1,64 @@
> +#define N 200
> +#define DIST 32
> +
> +typedef signed char sc;
> +typedef unsigned char uc;
> +typedef signed short ss;
> +typedef unsigned short us;
> +typedef int si;
> +typedef unsigned int ui;
> +typedef signed long long sll;
> +typedef unsigned long long ull;
> +
> +#define FOR_EACH_TYPE(M) \
> +  M (sc) M (uc) \
> +  M (ss) M (us) \
> +  M (si) M (ui) \
> +  M (sll) M (ull) \
> +  M (float) M (double)
> +
> +#define TEST_VALUE(I) ((I) * 11 / 2)
> +
> +#define ADD_TEST(TYPE)                         \
> +  TYPE a_##TYPE[N * 2];                                \
> +  void __attribute__((noinline, noclone))      \
> +  test_##TYPE (int x, int y)                   \
> +  {                                            \
> +    for (int i = 0; i < N; ++i)                        \
> +      a_##TYPE[x - i] += a_##TYPE[y - i];      \
> +  }
> +
> +#define DO_TEST(TYPE)                                          \
> +  for (int i = 0; i < DIST * 2; ++i)                           \
> +    {                                                          \
> +      for (int j = 0; j < N + DIST * 2; ++j)                   \
> +       a_##TYPE[j] = TEST_VALUE (j);                           \
> +      test_##TYPE (i + N - 1, DIST + N - 1);                   \
> +      for (int j = 0; j < N + DIST * 2; ++j)                   \
> +       {                                                       \
> +         TYPE expected;                                        \
> +         if (j < i || j >= i + N)                              \
> +           expected = TEST_VALUE (j);                          \
> +         else if (i >= DIST)                                   \
> +           expected = ((TYPE) TEST_VALUE (j)                   \
> +                       + (TYPE) TEST_VALUE (j + DIST - i));    \
> +         else                                                  \
> +           expected = ((TYPE) TEST_VALUE (j)                   \
> +                       + a_##TYPE[j + DIST - i]);              \
> +         if (expected != a_##TYPE[j])                          \
> +           __builtin_abort ();                                 \
> +       }                                                       \
> +    }
> +
> +FOR_EACH_TYPE (ADD_TEST)
> +
> +int
> +main (void)
> +{
> +  FOR_EACH_TYPE (DO_TEST)
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
> +/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
> +/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
> ===================================================================
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c     2019-11-11 18:31:05.099091743 +0000
> @@ -0,0 +1,62 @@
> +#define N 200
> +#define DIST 32
> +
> +typedef signed char sc;
> +typedef unsigned char uc;
> +typedef signed short ss;
> +typedef unsigned short us;
> +typedef int si;
> +typedef unsigned int ui;
> +typedef signed long long sll;
> +typedef unsigned long long ull;
> +
> +#define FOR_EACH_TYPE(M) \
> +  M (sc) M (uc) \
> +  M (ss) M (us) \
> +  M (si) M (ui) \
> +  M (sll) M (ull) \
> +  M (float) M (double)
> +
> +#define ADD_TEST(TYPE)                         \
> +  TYPE a_##TYPE[N * 2];                                \
> +  void __attribute__((noinline, noclone))      \
> +  test_##TYPE (int x, int y)                   \
> +  {                                            \
> +    for (int i = 0; i < N; ++i)                        \
> +      {                                                \
> +       a_##TYPE[i + x] = i;                    \
> +       a_##TYPE[i + y] = 42 - i * 2;           \
> +      }                                                \
> +  }
> +
> +#define DO_TEST(TYPE)                                          \
> +  for (int i = 0; i < DIST * 2; ++i)                           \
> +    {                                                          \
> +      __builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE));       \
> +      test_##TYPE (DIST, i);                                   \
> +      for (int j = 0; j < N + DIST * 2; ++j)                   \
> +       {                                                       \
> +         TYPE expected = 0;                                    \
> +         if (i > DIST && j >= i && j < i + N)                  \
> +           expected = 42 - (j - i) * 2;                        \
> +         if (j >= DIST && j < DIST + N)                        \
> +           expected = j - DIST;                                \
> +         if (i <= DIST && j >= i && j < i + N)                 \
> +           expected = 42 - (j - i) * 2;                        \
> +         if (expected != a_##TYPE[j])                          \
> +           __builtin_abort ();                                 \
> +       }                                                       \
> +    }
> +
> +FOR_EACH_TYPE (ADD_TEST)
> +
> +int
> +main (void)
> +{
> +  FOR_EACH_TYPE (DO_TEST)
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
> +/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
> +/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
> ===================================================================
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c     2019-11-11 18:31:05.099091743 +0000
> @@ -0,0 +1,66 @@
> +#define N 200
> +#define DIST 32
> +
> +typedef signed char sc;
> +typedef unsigned char uc;
> +typedef signed short ss;
> +typedef unsigned short us;
> +typedef int si;
> +typedef unsigned int ui;
> +typedef signed long long sll;
> +typedef unsigned long long ull;
> +
> +#define FOR_EACH_TYPE(M) \
> +  M (sc) M (uc) \
> +  M (ss) M (us) \
> +  M (si) M (ui) \
> +  M (sll) M (ull) \
> +  M (float) M (double)
> +
> +#define TEST_VALUE(I) ((I) * 11 / 2)
> +
> +#define ADD_TEST(TYPE)                         \
> +  TYPE a_##TYPE[N * 2];                                \
> +  TYPE __attribute__((noinline, noclone))      \
> +  test_##TYPE (int x, int y)                   \
> +  {                                            \
> +    TYPE res = 0;                              \
> +    for (int i = 0; i < N; ++i)                        \
> +      {                                                \
> +       a_##TYPE[i + x] = i;                    \
> +       res += a_##TYPE[i + y];                 \
> +      }                                                \
> +    return res;                                        \
> +  }
> +
> +#define DO_TEST(TYPE)                                          \
> +  for (int i = 0; i < DIST * 2; ++i)                           \
> +    {                                                          \
> +      for (int j = 0; j < N + DIST * 2; ++j)                   \
> +       a_##TYPE[j] = TEST_VALUE (j);                           \
> +      TYPE res = test_##TYPE (DIST, i);                                \
> +      for (int j = 0; j < N; ++j)                              \
> +       if (a_##TYPE[j + DIST] != (TYPE) j)                     \
> +         __builtin_abort ();                                   \
> +      TYPE expected_res = 0;                                   \
> +      for (int j = i; j < i + N; ++j)                          \
> +       if (i <= DIST && j >= DIST && j < DIST + N)             \
> +         expected_res += j - DIST;                             \
> +       else                                                    \
> +         expected_res += TEST_VALUE (j);                       \
> +      if (expected_res != res)                                 \
> +       __builtin_abort ();                                     \
> +    }
> +
> +FOR_EACH_TYPE (ADD_TEST)
> +
> +int
> +main (void)
> +{
> +  FOR_EACH_TYPE (DO_TEST)
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
> +/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
> +/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */

Patch
diff mbox series

Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	2019-11-11 18:31:01.263118514 +0000
+++ gcc/tree-data-ref.c	2019-11-11 18:31:05.099091743 +0000
@@ -1744,7 +1744,9 @@  prune_runtime_alias_test_list (vec<dr_wi
 
    We can create expression based on index rather than address:
 
-     (i_0 + 4 < j_0 || j_0 + 4 < i_0)
+     (unsigned) (i_0 - j_0 + 3) <= 6
+
+   i.e. the indices are less than 4 apart.
 
    Note evolution step of index needs to be considered in comparison.  */
 
@@ -1781,15 +1783,8 @@  create_intersect_range_checks_index (cla
   if (neg_step)
     {
       abs_step = -abs_step;
-      seg_len1 = -seg_len1;
-      seg_len2 = -seg_len2;
-    }
-  else
-    {
-      /* Include the access size in the length, so that we only have one
-	 tree addition below.  */
-      seg_len1 += dr_a.access_size;
-      seg_len2 += dr_b.access_size;
+      seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
+      seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
     }
 
   /* Infer the number of iterations with which the memory segment is accessed
@@ -1803,16 +1798,13 @@  create_intersect_range_checks_index (cla
       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
     return false;
 
-  poly_uint64 niter_access1 = 0, niter_access2 = 0;
-  if (neg_step)
-    {
-      /* Divide each access size by the byte step, rounding up.  */
-      if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
-			    abs_step, &niter_access1)
-	  || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
-			       abs_step, &niter_access2))
-	return false;
-    }
+  /* Divide each access size by the byte step, rounding up.  */
+  poly_uint64 niter_access1, niter_access2;
+  if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
+			abs_step, &niter_access1)
+      || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+			   abs_step, &niter_access2))
+    return false;
 
   unsigned int i;
   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
@@ -1852,38 +1844,87 @@  create_intersect_range_checks_index (cla
 	 index of data reference.  Like segment length, index length is
 	 linear function of the number of iterations with index_step as
 	 the coefficient, i.e, niter_len * idx_step.  */
-      tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
-				   build_int_cst (TREE_TYPE (min1),
-						  niter_len1));
-      tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
-				   build_int_cst (TREE_TYPE (min2),
-						  niter_len2));
-      tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
-      tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
-      /* Adjust ranges for negative step.  */
+      offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+						  SIGNED);
       if (neg_step)
-	{
-	  /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
-	  std::swap (min1, max1);
-	  std::swap (min2, max2);
-
-	  /* As with the lengths just calculated, we've measured the access
-	     sizes in iterations, so multiply them by the index step.  */
-	  tree idx_access1
-	    = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
-			   build_int_cst (TREE_TYPE (min1), niter_access1));
-	  tree idx_access2
-	    = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
-			   build_int_cst (TREE_TYPE (min2), niter_access2));
-
-	  /* MINUS_EXPR because the above values are negative.  */
-	  max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
-	  max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
-	}
-      tree part_cond_expr
-	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
-	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
+	abs_idx_step = -abs_idx_step;
+      poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+      poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+      poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+      poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+
+      gcc_assert (known_ge (idx_len1, 0)
+		  && known_ge (idx_len2, 0)
+		  && known_ge (idx_access1, 0)
+		  && known_ge (idx_access2, 0));
+
+      /* Each access has the following pattern, with lengths measured
+	 in units of INDEX:
+
+	      <-- idx_len -->
+	      <--- A: -ve step --->
+	      +-----+-------+-----+-------+-----+
+	      | n-1 | ..... |  0  | ..... | n-1 |
+	      +-----+-------+-----+-------+-----+
+			    <--- B: +ve step --->
+			    <-- idx_len -->
+			    |
+			   min
+
+	 where "n" is the number of scalar iterations covered by the segment
+	 and where each access spans idx_access units.
+
+	 A is the range of bytes accessed when the step is negative,
+	 B is the range when the step is positive.
+
+	 When checking for general overlap, we need to test whether
+	 the range:
+
+	   [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+
+	 overlaps:
+
+	   [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+
+	 where:
+
+	    low_offsetN = +ve step ? 0 : -idx_lenN;
+	   high_offsetN = +ve step ? idx_lenN : 0;
+
+	 This is equivalent to testing whether:
+
+	   min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+	   && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+
+	 Converting this into a single test, there is an overlap if:
+
+	   0 <= min2 - min1 + bias <= limit
+
+	 where  bias = high_offset2 + idx_access2 - 1 - low_offset1
+	       limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+		     + (high_offset2 - low_offset2 + idx_access2 - 1)
+	  i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+
+	 Combining the tests requires limit to be computable in an unsigned
+	 form of the index type; if it isn't, we fall back to the usual
+	 pointer-based checks.  */
+      poly_offset_int limit = (idx_len1 + idx_access1 - 1
+			       + idx_len2 + idx_access2 - 1);
+      tree utype = unsigned_type_for (TREE_TYPE (min1));
+      if (!wi::fits_to_tree_p (limit, utype))
+	return false;
+
+      poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+      poly_offset_int high_offset2 = neg_step ? 0 : idx_len2;
+      poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+
+      tree subject = fold_build2 (MINUS_EXPR, utype,
+				  fold_convert (utype, min2),
+				  fold_convert (utype, min1));
+      subject = fold_build2 (PLUS_EXPR, utype, subject,
+			     wide_int_to_tree (utype, bias));
+      tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+					 wide_int_to_tree (utype, limit));
       if (*cond_expr)
 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
 				  *cond_expr, part_cond_expr);
Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c	2019-11-11 18:31:05.099091743 +0000
@@ -0,0 +1,64 @@ 
+#define N 200
+#define DIST 32
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+  M (sc) M (uc) \
+  M (ss) M (us) \
+  M (si) M (ui) \
+  M (sll) M (ull) \
+  M (float) M (double)
+
+#define TEST_VALUE(I) ((I) * 11 / 2)
+
+#define ADD_TEST(TYPE)				\
+  TYPE a_##TYPE[N * 2];				\
+  void __attribute__((noinline, noclone))	\
+  test_##TYPE (int x, int y)			\
+  {						\
+    for (int i = 0; i < N; ++i)			\
+      a_##TYPE[x - i] += a_##TYPE[y - i];	\
+  }
+
+#define DO_TEST(TYPE)						\
+  for (int i = 0; i < DIST * 2; ++i)				\
+    {								\
+      for (int j = 0; j < N + DIST * 2; ++j)			\
+	a_##TYPE[j] = TEST_VALUE (j);				\
+      test_##TYPE (i + N - 1, DIST + N - 1);			\
+      for (int j = 0; j < N + DIST * 2; ++j)			\
+	{							\
+	  TYPE expected;					\
+	  if (j < i || j >= i + N)				\
+	    expected = TEST_VALUE (j);				\
+	  else if (i >= DIST)					\
+	    expected = ((TYPE) TEST_VALUE (j)			\
+			+ (TYPE) TEST_VALUE (j + DIST - i));	\
+	  else							\
+	    expected = ((TYPE) TEST_VALUE (j)			\
+			+ a_##TYPE[j + DIST - i]);		\
+	  if (expected != a_##TYPE[j])				\
+	    __builtin_abort ();					\
+	}							\
+    }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+  FOR_EACH_TYPE (DO_TEST)
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c	2019-11-11 18:31:05.099091743 +0000
@@ -0,0 +1,62 @@ 
+#define N 200
+#define DIST 32
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+  M (sc) M (uc) \
+  M (ss) M (us) \
+  M (si) M (ui) \
+  M (sll) M (ull) \
+  M (float) M (double)
+
+#define ADD_TEST(TYPE)				\
+  TYPE a_##TYPE[N * 2];				\
+  void __attribute__((noinline, noclone))	\
+  test_##TYPE (int x, int y)			\
+  {						\
+    for (int i = 0; i < N; ++i)			\
+      {						\
+	a_##TYPE[i + x] = i;			\
+	a_##TYPE[i + y] = 42 - i * 2;		\
+      }						\
+  }
+
+#define DO_TEST(TYPE)						\
+  for (int i = 0; i < DIST * 2; ++i)				\
+    {								\
+      __builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE));	\
+      test_##TYPE (DIST, i);					\
+      for (int j = 0; j < N + DIST * 2; ++j)			\
+	{							\
+	  TYPE expected = 0;					\
+	  if (i > DIST && j >= i && j < i + N)			\
+	    expected = 42 - (j - i) * 2;			\
+	  if (j >= DIST && j < DIST + N)			\
+	    expected = j - DIST;				\
+	  if (i <= DIST && j >= i && j < i + N)			\
+	    expected = 42 - (j - i) * 2;			\
+	  if (expected != a_##TYPE[j])				\
+	    __builtin_abort ();					\
+	}							\
+    }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+  FOR_EACH_TYPE (DO_TEST)
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c	2019-11-11 18:31:05.099091743 +0000
@@ -0,0 +1,66 @@ 
+#define N 200
+#define DIST 32
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+  M (sc) M (uc) \
+  M (ss) M (us) \
+  M (si) M (ui) \
+  M (sll) M (ull) \
+  M (float) M (double)
+
+#define TEST_VALUE(I) ((I) * 11 / 2)
+
+#define ADD_TEST(TYPE)				\
+  TYPE a_##TYPE[N * 2];				\
+  TYPE __attribute__((noinline, noclone))	\
+  test_##TYPE (int x, int y)			\
+  {						\
+    TYPE res = 0;				\
+    for (int i = 0; i < N; ++i)			\
+      {						\
+	a_##TYPE[i + x] = i;			\
+	res += a_##TYPE[i + y];			\
+      }						\
+    return res;					\
+  }
+
+#define DO_TEST(TYPE)						\
+  for (int i = 0; i < DIST * 2; ++i)				\
+    {								\
+      for (int j = 0; j < N + DIST * 2; ++j)			\
+	a_##TYPE[j] = TEST_VALUE (j);				\
+      TYPE res = test_##TYPE (DIST, i);				\
+      for (int j = 0; j < N; ++j)				\
+	if (a_##TYPE[j + DIST] != (TYPE) j)			\
+	  __builtin_abort ();					\
+      TYPE expected_res = 0;					\
+      for (int j = i; j < i + N; ++j)				\
+	if (i <= DIST && j >= DIST && j < DIST + N)		\
+	  expected_res += j - DIST;				\
+	else							\
+	  expected_res += TEST_VALUE (j);			\
+      if (expected_res != res)					\
+	__builtin_abort ();					\
+    }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+  FOR_EACH_TYPE (DO_TEST)
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */