diff mbox series

Adapt a couple of scalar comparison match.pd optimizations for vector comparisons against uniform vectors (PR target/88152)

Message ID 20181129075439.GD12380@tucnak
State New
Headers show
Series Adapt a couple of scalar comparison match.pd optimizations for vector comparisons against uniform vectors (PR target/88152) | expand

Commit Message

Jakub Jelinek Nov. 29, 2018, 7:54 a.m. UTC
Hi!

The following patch adapts a couple of scalar comparison against INTEGER_CST
optimizations to vector comparison against uniform_vector_p VECTOR_CST.

The PR was specifically asking for the a > INT_MAX, a >= INT_MAX etc.
to (signed) a < 0, the first two hunks are prerequsites of that though
in order not to have to duplicate everything for the boundary values +/- 1.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-11-29  Jakub Jelinek  <jakub@redhat.com>

	PR target/88152
	* match.pd: For lt/le/gt/ge against unifoprm vector VECTOR_CST,
	perform similar simplifications like for scalar comparisons.

	* g++.dg/tree-ssa/pr88152.C: New test.


	Jakub

Comments

Richard Biener Nov. 29, 2018, 9:22 a.m. UTC | #1
On Thu, 29 Nov 2018, Jakub Jelinek wrote:

> Hi!
> 
> The following patch adapts a couple of scalar comparison against INTEGER_CST
> optimizations to vector comparison against uniform_vector_p VECTOR_CST.
> 
> The PR was specifically asking for the a > INT_MAX, a >= INT_MAX etc.
> to (signed) a < 0, the first two hunks are prerequsites of that though
> in order not to have to duplicate everything for the boundary values +/- 1.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.  I didn't spent a lot of time trying to see if we can merge the
scalar and vector variants but I trust you did ;)

Richard.

> 2018-11-29  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR target/88152
> 	* match.pd: For lt/le/gt/ge against unifoprm vector VECTOR_CST,
> 	perform similar simplifications like for scalar comparisons.
> 
> 	* g++.dg/tree-ssa/pr88152.C: New test.
> 
> --- gcc/match.pd.jj	2018-11-14 17:42:53.000000000 +0100
> +++ gcc/match.pd	2018-11-28 16:57:28.377978794 +0100
> @@ -3109,14 +3109,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (simplify
>    (cmp @0 INTEGER_CST@1)
>    (if (tree_int_cst_sgn (@1) == -1)
> -   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) + 1); }))))
> +   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) + 1); })))
> + (simplify
> +  (cmp @0 VECTOR_CST@1)
> +  (with { tree cst = uniform_vector_p (@1); }
> +   (if (cst && TREE_CODE (cst) == INTEGER_CST && tree_int_cst_sgn (cst) == -1)
> +    (acmp @0 { build_vector_from_val (TREE_TYPE (@1),
> +				      wide_int_to_tree (TREE_TYPE (cst),
> +							wi::to_wide (cst)
> +							+ 1)); })))))
>  (for cmp  (ge lt)
>       acmp (gt le)
>   (simplify
>    (cmp @0 INTEGER_CST@1)
>    (if (tree_int_cst_sgn (@1) == 1)
> -   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) - 1); }))))
> -
> +   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) - 1); })))
> + (simplify
> +  (cmp @0 VECTOR_CST@1)
> +  (with { tree cst = uniform_vector_p (@1); }
> +   (if (cst && TREE_CODE (cst) == INTEGER_CST && tree_int_cst_sgn (cst) == 1)
> +    (acmp @0 { build_vector_from_val (TREE_TYPE (@1),
> +				      wide_int_to_tree (TREE_TYPE (cst),
> +							wi::to_wide (cst)
> +							- 1)); })))))
>  
>  /* We can simplify a logical negation of a comparison to the
>     inverted comparison.  As we cannot compute an expression
> @@ -3993,7 +4008,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         (with { tree st = signed_type_for (arg1_type); }
>          (if (cmp == LE_EXPR)
>  	 (ge (convert:st @0) { build_zero_cst (st); })
> -	 (lt (convert:st @0) { build_zero_cst (st); }))))))))))
> +	 (lt (convert:st @0) { build_zero_cst (st); })))))))))
> + /* And the same for vector comparisons against uniform vector csts.  */
> + (simplify
> +  (cmp (convert?@2 @0) VECTOR_CST@1)
> +  (if (VECTOR_TYPE_P (TREE_TYPE (@1))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (@1)))
> +       && uniform_vector_p (@1)
> +       && tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0)))
> +   (with
> +    {
> +      tree arg1_type = TREE_TYPE (TREE_TYPE (@1));
> +      tree cst = uniform_vector_p (@1);
> +      unsigned int prec = TYPE_PRECISION (arg1_type);
> +      wide_int max = wi::max_value (arg1_type);
> +      wide_int signed_max = wi::max_value (prec, SIGNED);
> +      wide_int min = wi::min_value (arg1_type);
> +    }
> +    (switch
> +     (if (wi::to_wide (cst) == max)
> +      (switch
> +       (if (cmp == GT_EXPR)
> +	{ constant_boolean_node (false, type); })
> +       (if (cmp == GE_EXPR)
> +	(eq @2 @1))
> +       (if (cmp == LE_EXPR)
> +	{ constant_boolean_node (true, type); })
> +       (if (cmp == LT_EXPR)
> +	(ne @2 @1))))
> +     (if (wi::to_wide (cst) == min)
> +      (switch
> +       (if (cmp == LT_EXPR)
> +        { constant_boolean_node (false, type); })
> +       (if (cmp == LE_EXPR)
> +        (eq @2 @1))
> +       (if (cmp == GE_EXPR)
> +        { constant_boolean_node (true, type); })
> +       (if (cmp == GT_EXPR)
> +        (ne @2 @1))))
> +     (if (wi::to_wide (cst) == max - 1)
> +      (switch
> +       (if (cmp == GT_EXPR)
> +	(eq @2 { build_vector_from_val (type,
> +					wide_int_to_tree (TREE_TYPE (cst),
> +							  wi::to_wide (cst)
> +							  + 1)); }))
> +       (if (cmp == LE_EXPR)
> +	(ne @2 { build_vector_from_val (type,
> +					wide_int_to_tree (TREE_TYPE (cst),
> +							  wi::to_wide (cst)
> +							  + 1)); }))))
> +     (if (wi::to_wide (cst) == min + 1)
> +      (switch
> +       (if (cmp == GE_EXPR)
> +        (ne @2 { build_vector_from_val (type,
> +					wide_int_to_tree (TREE_TYPE (cst),
> +							  wi::to_wide (cst)
> +							  - 1)); }))
> +       (if (cmp == LT_EXPR)
> +        (eq @2 { build_vector_from_val (type,
> +					wide_int_to_tree (TREE_TYPE (cst),
> +							  wi::to_wide (cst)
> +							  - 1)); }))))
> +     (if (wi::to_wide (cst) == signed_max
> +	  && TYPE_UNSIGNED (arg1_type)
> +	  /* We will flip the signedness of the comparison operator
> +	     associated with the mode of @1, so the sign bit is
> +	     specified by this mode.  Check that @1 is the signed
> +	     max associated with this sign bit.  */
> +	  && prec == GET_MODE_PRECISION (SCALAR_INT_TYPE_MODE (arg1_type))
> +	  /* signed_type does not work on pointer types.  */
> +	  && INTEGRAL_TYPE_P (arg1_type))
> +      /* The following case also applies to X < signed_max+1
> +	 and X >= signed_max+1 because previous transformations.  */
> +      (if (cmp == LE_EXPR || cmp == GT_EXPR)
> +       (with { tree st = signed_type_for (TREE_TYPE (@1)); }
> +        (if (cmp == LE_EXPR)
> +	 (ge (view_convert:st @0) { build_zero_cst (st); })
> +	 (lt (view_convert:st @0) { build_zero_cst (st); }))))))))))
>  
>  (for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
>   /* If the second operand is NaN, the result is constant.  */
> --- gcc/testsuite/g++.dg/tree-ssa/pr88152.C.jj	2018-11-28 17:06:41.282815253 +0100
> +++ gcc/testsuite/g++.dg/tree-ssa/pr88152.C	2018-11-28 17:07:37.202886556 +0100
> @@ -0,0 +1,55 @@
> +// PR target/88152
> +// { dg-do compile }
> +// { dg-options "-O2 -std=c++14 -fdump-tree-forwprop1" }
> +// { dg-final { scan-tree-dump-times " \(?:<|>=\) \{ 0\[, ]" 120 "forwprop1" } }
> +
> +template <typename T, int N>
> +using V [[gnu::vector_size (sizeof (T) * N)]] = T;
> +
> +void *foo ();
> +
> +template <typename T, int N, T max, T maxp1>
> +__attribute__((noipa)) void
> +test_uns ()
> +{
> +  V<T, N> *x = (V<T, N> *) foo ();
> +  x[1] = x[0] > max;
> +  x[3] = x[2] >= maxp1;
> +  x[5] = x[4] <= max;
> +  x[7] = x[6] < maxp1;
> +}
> +
> +template <typename T, int N>
> +__attribute__((noipa)) void
> +test ()
> +{
> +  V<T, N> *x = (V<T, N> *) foo ();
> +  x[1] = x[0] >= 0;
> +  x[3] = x[2] > -1;
> +  x[5] = x[4] < 0;
> +  x[7] = x[6] <= -1;
> +}
> +
> +template <int N>
> +__attribute__((noipa)) void
> +tests ()
> +{
> +  test_uns<unsigned char, N, __SCHAR_MAX__, 1U + __SCHAR_MAX__> ();
> +  test<signed char, N> ();
> +  test_uns<unsigned short int, N, __SHRT_MAX__, 1U + __SHRT_MAX__> ();
> +  test<short int, N> ();
> +  test_uns<unsigned int, N, __INT_MAX__, 1U + __INT_MAX__> ();
> +  test<int, N> ();
> +  test_uns<unsigned long int, N, __LONG_MAX__, 1UL + __LONG_MAX__> ();
> +  test<long int, N> ();
> +  test_uns<unsigned long long int, N, __LONG_LONG_MAX__, 1ULL + __LONG_LONG_MAX__> ();
> +  test<long long int, N> ();
> +}
> +
> +void
> +all_tests ()
> +{
> +  tests<1> ();
> +  tests<2> ();
> +  tests<8> ();
> +}
> 
> 	Jakub
> 
>
Jakub Jelinek Nov. 29, 2018, 9:31 a.m. UTC | #2
On Thu, Nov 29, 2018 at 10:22:15AM +0100, Richard Biener wrote:
> OK.  I didn't spent a lot of time trying to see if we can merge the
> scalar and vector variants but I trust you did ;)

Yeah, sadly it is too different, even if we were to introduce a predicate
whether a tree is either INTEGER_CST, or VECTOR_CST where uniform_vector_p
is non-NULL and INTEGER_CST, e.g. because we need to use build_vector_from_val
and take care of the sometimes vector, sometimes element types.

	Jakub
diff mbox series

Patch

--- gcc/match.pd.jj	2018-11-14 17:42:53.000000000 +0100
+++ gcc/match.pd	2018-11-28 16:57:28.377978794 +0100
@@ -3109,14 +3109,29 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (simplify
   (cmp @0 INTEGER_CST@1)
   (if (tree_int_cst_sgn (@1) == -1)
-   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) + 1); }))))
+   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) + 1); })))
+ (simplify
+  (cmp @0 VECTOR_CST@1)
+  (with { tree cst = uniform_vector_p (@1); }
+   (if (cst && TREE_CODE (cst) == INTEGER_CST && tree_int_cst_sgn (cst) == -1)
+    (acmp @0 { build_vector_from_val (TREE_TYPE (@1),
+				      wide_int_to_tree (TREE_TYPE (cst),
+							wi::to_wide (cst)
+							+ 1)); })))))
 (for cmp  (ge lt)
      acmp (gt le)
  (simplify
   (cmp @0 INTEGER_CST@1)
   (if (tree_int_cst_sgn (@1) == 1)
-   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) - 1); }))))
-
+   (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) - 1); })))
+ (simplify
+  (cmp @0 VECTOR_CST@1)
+  (with { tree cst = uniform_vector_p (@1); }
+   (if (cst && TREE_CODE (cst) == INTEGER_CST && tree_int_cst_sgn (cst) == 1)
+    (acmp @0 { build_vector_from_val (TREE_TYPE (@1),
+				      wide_int_to_tree (TREE_TYPE (cst),
+							wi::to_wide (cst)
+							- 1)); })))))
 
 /* We can simplify a logical negation of a comparison to the
    inverted comparison.  As we cannot compute an expression
@@ -3993,7 +4008,84 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        (with { tree st = signed_type_for (arg1_type); }
         (if (cmp == LE_EXPR)
 	 (ge (convert:st @0) { build_zero_cst (st); })
-	 (lt (convert:st @0) { build_zero_cst (st); }))))))))))
+	 (lt (convert:st @0) { build_zero_cst (st); })))))))))
+ /* And the same for vector comparisons against uniform vector csts.  */
+ (simplify
+  (cmp (convert?@2 @0) VECTOR_CST@1)
+  (if (VECTOR_TYPE_P (TREE_TYPE (@1))
+       && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (@1)))
+       && uniform_vector_p (@1)
+       && tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0)))
+   (with
+    {
+      tree arg1_type = TREE_TYPE (TREE_TYPE (@1));
+      tree cst = uniform_vector_p (@1);
+      unsigned int prec = TYPE_PRECISION (arg1_type);
+      wide_int max = wi::max_value (arg1_type);
+      wide_int signed_max = wi::max_value (prec, SIGNED);
+      wide_int min = wi::min_value (arg1_type);
+    }
+    (switch
+     (if (wi::to_wide (cst) == max)
+      (switch
+       (if (cmp == GT_EXPR)
+	{ constant_boolean_node (false, type); })
+       (if (cmp == GE_EXPR)
+	(eq @2 @1))
+       (if (cmp == LE_EXPR)
+	{ constant_boolean_node (true, type); })
+       (if (cmp == LT_EXPR)
+	(ne @2 @1))))
+     (if (wi::to_wide (cst) == min)
+      (switch
+       (if (cmp == LT_EXPR)
+        { constant_boolean_node (false, type); })
+       (if (cmp == LE_EXPR)
+        (eq @2 @1))
+       (if (cmp == GE_EXPR)
+        { constant_boolean_node (true, type); })
+       (if (cmp == GT_EXPR)
+        (ne @2 @1))))
+     (if (wi::to_wide (cst) == max - 1)
+      (switch
+       (if (cmp == GT_EXPR)
+	(eq @2 { build_vector_from_val (type,
+					wide_int_to_tree (TREE_TYPE (cst),
+							  wi::to_wide (cst)
+							  + 1)); }))
+       (if (cmp == LE_EXPR)
+	(ne @2 { build_vector_from_val (type,
+					wide_int_to_tree (TREE_TYPE (cst),
+							  wi::to_wide (cst)
+							  + 1)); }))))
+     (if (wi::to_wide (cst) == min + 1)
+      (switch
+       (if (cmp == GE_EXPR)
+        (ne @2 { build_vector_from_val (type,
+					wide_int_to_tree (TREE_TYPE (cst),
+							  wi::to_wide (cst)
+							  - 1)); }))
+       (if (cmp == LT_EXPR)
+        (eq @2 { build_vector_from_val (type,
+					wide_int_to_tree (TREE_TYPE (cst),
+							  wi::to_wide (cst)
+							  - 1)); }))))
+     (if (wi::to_wide (cst) == signed_max
+	  && TYPE_UNSIGNED (arg1_type)
+	  /* We will flip the signedness of the comparison operator
+	     associated with the mode of @1, so the sign bit is
+	     specified by this mode.  Check that @1 is the signed
+	     max associated with this sign bit.  */
+	  && prec == GET_MODE_PRECISION (SCALAR_INT_TYPE_MODE (arg1_type))
+	  /* signed_type does not work on pointer types.  */
+	  && INTEGRAL_TYPE_P (arg1_type))
+      /* The following case also applies to X < signed_max+1
+	 and X >= signed_max+1 because previous transformations.  */
+      (if (cmp == LE_EXPR || cmp == GT_EXPR)
+       (with { tree st = signed_type_for (TREE_TYPE (@1)); }
+        (if (cmp == LE_EXPR)
+	 (ge (view_convert:st @0) { build_zero_cst (st); })
+	 (lt (view_convert:st @0) { build_zero_cst (st); }))))))))))
 
 (for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
  /* If the second operand is NaN, the result is constant.  */
--- gcc/testsuite/g++.dg/tree-ssa/pr88152.C.jj	2018-11-28 17:06:41.282815253 +0100
+++ gcc/testsuite/g++.dg/tree-ssa/pr88152.C	2018-11-28 17:07:37.202886556 +0100
@@ -0,0 +1,55 @@ 
+// PR target/88152
+// { dg-do compile }
+// { dg-options "-O2 -std=c++14 -fdump-tree-forwprop1" }
+// { dg-final { scan-tree-dump-times " \(?:<|>=\) \{ 0\[, ]" 120 "forwprop1" } }
+
+template <typename T, int N>
+using V [[gnu::vector_size (sizeof (T) * N)]] = T;
+
+void *foo ();
+
+template <typename T, int N, T max, T maxp1>
+__attribute__((noipa)) void
+test_uns ()
+{
+  V<T, N> *x = (V<T, N> *) foo ();
+  x[1] = x[0] > max;
+  x[3] = x[2] >= maxp1;
+  x[5] = x[4] <= max;
+  x[7] = x[6] < maxp1;
+}
+
+template <typename T, int N>
+__attribute__((noipa)) void
+test ()
+{
+  V<T, N> *x = (V<T, N> *) foo ();
+  x[1] = x[0] >= 0;
+  x[3] = x[2] > -1;
+  x[5] = x[4] < 0;
+  x[7] = x[6] <= -1;
+}
+
+template <int N>
+__attribute__((noipa)) void
+tests ()
+{
+  test_uns<unsigned char, N, __SCHAR_MAX__, 1U + __SCHAR_MAX__> ();
+  test<signed char, N> ();
+  test_uns<unsigned short int, N, __SHRT_MAX__, 1U + __SHRT_MAX__> ();
+  test<short int, N> ();
+  test_uns<unsigned int, N, __INT_MAX__, 1U + __INT_MAX__> ();
+  test<int, N> ();
+  test_uns<unsigned long int, N, __LONG_MAX__, 1UL + __LONG_MAX__> ();
+  test<long int, N> ();
+  test_uns<unsigned long long int, N, __LONG_LONG_MAX__, 1ULL + __LONG_LONG_MAX__> ();
+  test<long long int, N> ();
+}
+
+void
+all_tests ()
+{
+  tests<1> ();
+  tests<2> ();
+  tests<8> ();
+}