diff mbox

Allow non-overflow ops in vect_is_simple_reduction_1

Message ID 55B770A1.6010308@mentor.com
State New
Headers show

Commit Message

Tom de Vries July 28, 2015, 12:08 p.m. UTC
On 28/07/15 09:59, Richard Biener wrote:
> On Fri, Jul 24, 2015 at 4:39 PM, Tom de Vries <Tom_deVries@mentor.com> wrote:
>> Hi,
>>
>> this patch allows parallelization and vectorization of reduction operators
>> that are guaranteed to not overflow (such as min and max operators),
>> independent of the overflow behaviour of the type.
>>
>> Bootstrapped and reg-tested on x86_64.
>>
>> OK for trunk?
>
> Hmm, I don't like that no_overflow_tree_code function.  We have a much more
> clear understanding which codes may overflow or trap.  Thus please add
> a operation specific variant of TYPE_OVERFLOW_{TRAPS,WRAPS,UNDEFINED} like
>

Done.

> bool
> operation_overflow_traps (tree type, enum tree_code code)
> {
>    if (!ANY_INTEGRAL_TYPE_P (type)

I've changed this test into a gcc_checking_assert.

>       || !TYPE_OVERFLOW_TRAPS (type))
>      return false;
>    switch (code)
>      {
>      case PLUS_EXPR:
>      case MINUS_EXPR:
>      case MULT_EXPR:
>      case LSHIFT_EXPR:
>         /* Can overflow in various ways */
>      case TRUNC_DIV_EXPR:
>      case EXACT_DIV_EXPR:
>      case FLOOR_DIV_EXPR:
>      case CEIL_DIV_EXPR:
>         /* For INT_MIN / -1 */
>      case NEGATE_EXPR:
>      case ABS_EXPR:
>         /* For -INT_MIN */
>         return true;
>      default:
>         return false;
>     }
> }
>
> and similar variants for _wraps and _undefined.  I think we decided at
> some point
> the compiler should not take advantage of the fact that lshift or
> *_div have undefined
> behavior on signed integer overflow, similar we only take advantage of
> integral-type
> overflow behavior, not vector or complex.  So we could reduce the
> number of cases
> the functions return true if we document that it returns true only for
> the cases where
> the compiler needs to / may assume wrapping behavior does not take place.
> As for _traps for example we only have optabs and libfuncs for
> plus,minus,mult,negate
> and abs.

I've tried to capture all of this in the three new functions:
- operation_overflows_and_traps
- operation_no_overflow_or_wraps
- operation_overflows_and_undefined (unused atm)

I've also added the graphite bit.

OK for trunk, if bootstrap and reg-test succeeds?

Thanks,
- Tom

Comments

Richard Biener July 29, 2015, 8:09 a.m. UTC | #1
On Tue, Jul 28, 2015 at 2:08 PM, Tom de Vries <Tom_deVries@mentor.com> wrote:
> On 28/07/15 09:59, Richard Biener wrote:
>>
>> On Fri, Jul 24, 2015 at 4:39 PM, Tom de Vries <Tom_deVries@mentor.com>
>> wrote:
>>>
>>> Hi,
>>>
>>> this patch allows parallelization and vectorization of reduction
>>> operators
>>> that are guaranteed to not overflow (such as min and max operators),
>>> independent of the overflow behaviour of the type.
>>>
>>> Bootstrapped and reg-tested on x86_64.
>>>
>>> OK for trunk?
>>
>>
>> Hmm, I don't like that no_overflow_tree_code function.  We have a much
>> more
>> clear understanding which codes may overflow or trap.  Thus please add
>> a operation specific variant of TYPE_OVERFLOW_{TRAPS,WRAPS,UNDEFINED} like
>>
>
> Done.
>
>> bool
>> operation_overflow_traps (tree type, enum tree_code code)
>> {
>>    if (!ANY_INTEGRAL_TYPE_P (type)
>
>
> I've changed this test into a gcc_checking_assert.
>
>
>>       || !TYPE_OVERFLOW_TRAPS (type))
>>      return false;
>>    switch (code)
>>      {
>>      case PLUS_EXPR:
>>      case MINUS_EXPR:
>>      case MULT_EXPR:
>>      case LSHIFT_EXPR:
>>         /* Can overflow in various ways */
>>      case TRUNC_DIV_EXPR:
>>      case EXACT_DIV_EXPR:
>>      case FLOOR_DIV_EXPR:
>>      case CEIL_DIV_EXPR:
>>         /* For INT_MIN / -1 */
>>      case NEGATE_EXPR:
>>      case ABS_EXPR:
>>         /* For -INT_MIN */
>>         return true;
>>      default:
>>         return false;
>>     }
>> }
>>
>> and similar variants for _wraps and _undefined.  I think we decided at
>> some point
>> the compiler should not take advantage of the fact that lshift or
>> *_div have undefined
>> behavior on signed integer overflow, similar we only take advantage of
>> integral-type
>> overflow behavior, not vector or complex.  So we could reduce the
>> number of cases
>> the functions return true if we document that it returns true only for
>> the cases where
>> the compiler needs to / may assume wrapping behavior does not take place.
>> As for _traps for example we only have optabs and libfuncs for
>> plus,minus,mult,negate
>> and abs.
>
>
> I've tried to capture all of this in the three new functions:
> - operation_overflows_and_traps
> - operation_no_overflow_or_wraps
> - operation_overflows_and_undefined (unused atm)
>
> I've also added the graphite bit.
>
> OK for trunk, if bootstrap and reg-test succeeds?

+/* Returns true if CODE operating on operands of type TYPE can overflow, and
+   fwrapv generates trapping insns for CODE.  */

ftrapv

+bool
+operation_overflows_and_traps (tree type, enum tree_code code)
+{

operation_overflow_traps

is better wording.  Meaning that when the operation overflows then it traps.

+  /* We don't take advantage of integral type overflow behaviour for
complex and
+     vector types.  */

We don't generate instructions that trap on overflow for complex or vector types

+  if (!INTEGRAL_TYPE_P (type))
+    return true;

+  switch (code)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case MULT_EXPR:
+    case LSHIFT_EXPR:
+      /* Can overflow in various ways.  */

we don't have a trapping optab for lshift

+    case TRUNC_DIV_EXPR:
+    case EXACT_DIV_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_DIV_EXPR:

nor division.  See optabs.c:optab_for_tree_code.  I suggest to only return true
for those.

+/* Returns true if CODE operating on operands of type TYPE cannot overflow, or
+   wraps on overflow.  */
+
+bool
+operation_no_overflow_or_wraps (tree type, enum tree_code code)
+{
+  gcc_checking_assert (ANY_INTEGRAL_TYPE_P (type));

operation_overflow_wraps ()

is still my preferred name.

+bool
+operation_overflow_and_undefined (tree type, enum tree_code code)
+{
+  gcc_checking_assert (ANY_INTEGRAL_TYPE_P (type));
+

operation_overflow_undefined ()

If you like to keep an explicit operation_can_overflow then there is the
opportunity to split out the switch statement from operation_overflow_wraps
and operation_overflow_undefined.

Thanks,
Richard.




> Thanks,
> - Tom
diff mbox

Patch

Allow non-overflow ops in vect_is_simple_reduction_1

2015-07-28  Tom de Vries  <tom@codesourcery.com>

	* tree.c (operation_overflows_and_traps, operation_no_overflow_or_wraps)
	(operation_overflows_and_undefined): New function.
	* tree.h (operation_overflows_and_traps, operation_no_overflow_or_wraps)
	(operation_overflows_and_undefined): Declare.
	* tree-vect-loop.c (vect_is_simple_reduction_1): Use
	operation_overflows_and_traps and operation_overflows_and_wraps.
	* graphite-sese-to-poly.c (is_reduction_operation_p): Same.

	* gcc.dg/autopar/reduc-2char.c (init_arrays): Mark with attribute
	optimize ("-ftree-parallelize-loops=0").
	Add successful scans for 2 detected reductions.	 Add xfail scans for 3
	detected reductions.
	* gcc.dg/autopar/reduc-2short.c: Same.
	* gcc.dg/autopar/reduc-8.c (init_arrays): Mark with attribute
	optimize ("-ftree-parallelize-loops=0").  Add successful scans for 2
	detected reductions.
	* gcc.dg/vect/trapv-vect-reduc-4.c: Update scan to match vectorized min
	and max reductions.
---
 gcc/graphite-sese-to-poly.c                    |   6 +-
 gcc/testsuite/gcc.dg/autopar/reduc-2char.c     |  10 +-
 gcc/testsuite/gcc.dg/autopar/reduc-2short.c    |  10 +-
 gcc/testsuite/gcc.dg/autopar/reduc-8.c         |   7 +-
 gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c |   2 +-
 gcc/tree-vect-loop.c                           |   5 +-
 gcc/tree.c                                     | 125 +++++++++++++++++++++++++
 gcc/tree.h                                     |   3 +
 8 files changed, 153 insertions(+), 15 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index c583f16..b57dc9c 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2614,8 +2614,10 @@  is_reduction_operation_p (gimple stmt)
   if (FLOAT_TYPE_P (type))
     return flag_associative_math;
 
-  return (INTEGRAL_TYPE_P (type)
-	  && TYPE_OVERFLOW_WRAPS (type));
+  if (ANY_INTEGRAL_TYPE_P (type))
+    return operation_no_overflow_or_wraps (type, code);
+
+  return false;
 }
 
 /* Returns true when PHI contains an argument ARG.  */
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
index 14867f3..a2dad44 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
@@ -39,8 +39,9 @@  void main1 (signed char x, signed char max_result, signed char min_result)
     abort ();
 }
 
- __attribute__((noinline))
- void init_arrays ()
+void __attribute__((noinline))
+  __attribute__((optimize ("-ftree-parallelize-loops=0")))
+init_arrays ()
  {
    int i;
 
@@ -60,7 +61,10 @@  int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
index 7c19cc5..a50e14f 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
@@ -38,8 +38,9 @@  void main1 (short x, short max_result, short min_result)
     abort ();
 }
 
- __attribute__((noinline))
- void init_arrays ()
+void __attribute__((noinline))
+  __attribute__((optimize ("-ftree-parallelize-loops=0")))
+init_arrays ()
  {
    int i;
 
@@ -58,7 +59,8 @@  int main (void)
   return 0;
 }
 
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
-
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-8.c b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
index 1d05c48..18ba03d 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-8.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
@@ -40,7 +40,8 @@  testmin (const T *c, T init, T result)
     abort ();
 }
 
-int main (void)
+int __attribute__((optimize ("-ftree-parallelize-loops=0")))
+main (void)
 { 
   static signed char A[N] = {
     0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -84,5 +85,5 @@  int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
index 2129717..86f9b90 100644
--- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
+++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
@@ -46,4 +46,4 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c31bfbd..38eab77 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2615,7 +2615,7 @@  vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
     }
   else if (INTEGRAL_TYPE_P (type) && check_reduction)
     {
-      if (TYPE_OVERFLOW_TRAPS (type))
+      if (operation_overflows_and_traps (type, code))
 	{
 	  /* Changing the order of operations changes the semantics.  */
 	  if (dump_enabled_p ())
@@ -2624,7 +2624,8 @@  vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			    " (overflow traps): ");
 	  return NULL;
 	}
-      if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
+      if (need_wrapping_integral_overflow
+	  && !operation_no_overflow_or_wraps (type, code))
 	{
 	  /* Changing the order of operations changes the semantics.  */
 	  if (dump_enabled_p ())
diff --git a/gcc/tree.c b/gcc/tree.c
index 94263af..8708727 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -7597,6 +7597,131 @@  commutative_ternary_tree_code (enum tree_code code)
   return false;
 }
 
+/* Returns true if CODE operating on operands of type TYPE can overflow, and
+   fwrapv generates trapping insns for CODE.  */
+
+bool
+operation_overflows_and_traps (tree type, enum tree_code code)
+{
+  gcc_checking_assert (ANY_INTEGRAL_TYPE_P (type));
+
+  /* We don't take advantage of integral type overflow behaviour for complex and
+     vector types.  */
+  if (!INTEGRAL_TYPE_P (type))
+    return true;
+
+  if (!TYPE_OVERFLOW_TRAPS (type))
+    return false;
+
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case MULT_EXPR:
+    case LSHIFT_EXPR:
+      /* Can overflow in various ways.  */
+      return true;
+    case TRUNC_DIV_EXPR:
+    case EXACT_DIV_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_DIV_EXPR:
+      /* These operators can overflow, but -fwrapv only generates trapping code
+	 for addition, subtraction and multiplication operations.  */
+      return false;
+    case NEGATE_EXPR:
+    case ABS_EXPR:
+      /* For -INT_MIN.  */
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Returns true if CODE operating on operands of type TYPE cannot overflow, or
+   wraps on overflow.  */
+
+bool
+operation_no_overflow_or_wraps (tree type, enum tree_code code)
+{
+  gcc_checking_assert (ANY_INTEGRAL_TYPE_P (type));
+
+  /* We don't take advantage of integral type overflow behaviour for complex and
+     vector types.  */
+  if (!INTEGRAL_TYPE_P (type))
+    return false;
+
+  if (TYPE_OVERFLOW_WRAPS (type))
+    return true;
+
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case MULT_EXPR:
+    case LSHIFT_EXPR:
+      /* Can overflow in various ways.  */
+      return false;
+    case TRUNC_DIV_EXPR:
+    case EXACT_DIV_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_DIV_EXPR:
+      /* For INT_MIN / -1.  */
+      return false;
+    case NEGATE_EXPR:
+    case ABS_EXPR:
+      /* For -INT_MIN.  */
+      return false;
+    default:
+      return true;
+    }
+}
+
+/* Returns true if CODE operating on operands of type TYPE can overflow, and
+   overflow is undefined.  */
+
+bool
+operation_overflow_and_undefined (tree type, enum tree_code code)
+{
+  gcc_checking_assert (ANY_INTEGRAL_TYPE_P (type));
+
+  /* We don't take advantage of integral type overflow behaviour for complex and
+     vector types.  */
+  if (!INTEGRAL_TYPE_P (type))
+    return false;
+
+  if (!TYPE_OVERFLOW_UNDEFINED (type))
+    return false;
+
+  switch (code)
+    {
+    case LSHIFT_EXPR:
+      /* LSHIFT_EXPR can overflow, but we don't take advantage of that:
+	 GCC manual, C Implementation-defined behavior, Integers implementation:
+	 GCC does not use the latitude given in C99 and C11 only to treat
+	 certain aspects of signed << as undefined, but this is subject to
+	 change.  */
+      return false;
+    case TRUNC_DIV_EXPR:
+    case EXACT_DIV_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_DIV_EXPR:
+      /* These operators can overflow, but we don't take advantage of that.
+	 FIXME: where has this been documented?  */
+      return false;
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case MULT_EXPR:
+      /* Can overflow in various ways.  */
+      return true;
+    case NEGATE_EXPR:
+    case ABS_EXPR:
+      /* For -INT_MIN.  */
+      return true;
+    default:
+      return false;
+    }
+}
+
 namespace inchash
 {
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 6df2217..1e44f55 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4369,6 +4369,9 @@  extern int type_num_arguments (const_tree);
 extern bool associative_tree_code (enum tree_code);
 extern bool commutative_tree_code (enum tree_code);
 extern bool commutative_ternary_tree_code (enum tree_code);
+extern bool operation_overflows_and_traps (tree, enum tree_code);
+extern bool operation_no_overflow_or_wraps (tree, enum tree_code);
+extern bool operation_overflows_and_undefined (tree, enum tree_code);
 extern tree upper_bound_in_type (tree, tree);
 extern tree lower_bound_in_type (tree, tree);
 extern int operand_equal_for_phi_arg_p (const_tree, const_tree);
-- 
1.9.1