diff mbox

Allow non-overflow ops in vect_is_simple_reduction_1

Message ID 55B24E1F.1070705@mentor.com
State New
Headers show

Commit Message

Tom de Vries July 24, 2015, 2:39 p.m. UTC
Hi,

this patch allows parallelization and vectorization of reduction 
operators that are guaranteed to not overflow (such as min and max 
operators), independent of the overflow behaviour of the type.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom

Comments

Tom de Vries July 28, 2015, 7:24 a.m. UTC | #1
On 24/07/15 16:39, Tom de Vries wrote:
> Hi,
>
> this patch allows parallelization and vectorization of reduction
> operators that are guaranteed to not overflow (such as min and max
> operators), independent of the overflow behaviour of the type.
>
> Bootstrapped and reg-tested on x86_64.
>
> OK for trunk?
>

Committed to gomp-4_0-branch.

Thanks,
- Tom

> 0002-Allow-non-overflow-ops-in-vect_is_simple_reduction_1.patch
>
>
> Allow non-overflow ops in vect_is_simple_reduction_1
>
> 2015-07-24  Tom de Vries<tom@codesourcery.com>
>
> 	* tree.c (no_overflow_tree_code): New function.
> 	* tree.h (no_overflow_tree_code): Declare.
> 	* tree-vect-loop.c (vect_is_simple_reduction_1): Use
> 	no_overflow_tree_code.
>
> 	* gcc.dg/autopar/reduc-2char.c (init_arrays): Mark with attribute
> 	optimize ("-ftree-parallelize-loops=0").
> 	Add successful scans for 2 detected reductions.	 Add xfail scans for 3
> 	detected reductions.
> 	* gcc.dg/autopar/reduc-2short.c: Same.
> 	* gcc.dg/autopar/reduc-8.c  (init_arrays): Mark with attribute
> 	optimize ("-ftree-parallelize-loops=0").  Add successful scans for 2
> 	detected reductions.
> 	* gcc.dg/vect/trapv-vect-reduc-4.c: Expect succesful reductions for min
> 	and max loops.
> ---
>   gcc/testsuite/gcc.dg/autopar/reduc-2char.c     | 10 +++++++---
>   gcc/testsuite/gcc.dg/autopar/reduc-2short.c    | 10 ++++++----
>   gcc/testsuite/gcc.dg/autopar/reduc-8.c         |  7 ++++---
>   gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c |  2 +-
>   gcc/tree-vect-loop.c                           |  3 ++-
>   gcc/tree.c                                     | 24 ++++++++++++++++++++++++
>   gcc/tree.h                                     |  1 +
>   7 files changed, 45 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
> index 14867f3..a2dad44 100644
> --- a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
> +++ b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
> @@ -39,8 +39,9 @@ void main1 (signed char x, signed char max_result, signed char min_result)
>       abort ();
>   }
>
> - __attribute__((noinline))
> - void init_arrays ()
> +void __attribute__((noinline))
> +  __attribute__((optimize ("-ftree-parallelize-loops=0")))
> +init_arrays ()
>    {
>      int i;
>
> @@ -60,7 +61,10 @@ int main (void)
>   }
>
>
> -/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
> +/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
> +
> +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
>   /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
>
>
> diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
> index 7c19cc5..a50e14f 100644
> --- a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
> +++ b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
> @@ -38,8 +38,9 @@ void main1 (short x, short max_result, short min_result)
>       abort ();
>   }
>
> - __attribute__((noinline))
> - void init_arrays ()
> +void __attribute__((noinline))
> +  __attribute__((optimize ("-ftree-parallelize-loops=0")))
> +init_arrays ()
>    {
>      int i;
>
> @@ -58,7 +59,8 @@ int main (void)
>     return 0;
>   }
>
> +/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
> +/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
>
> -/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
>   /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
> -
> diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-8.c b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
> index 1d05c48..18ba03d 100644
> --- a/gcc/testsuite/gcc.dg/autopar/reduc-8.c
> +++ b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
> @@ -40,7 +40,8 @@ testmin (const T *c, T init, T result)
>       abort ();
>   }
>
> -int main (void)
> +int __attribute__((optimize ("-ftree-parallelize-loops=0")))
> +main (void)
>   {
>     static signed char A[N] = {
>       0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
> @@ -84,5 +85,5 @@ int main (void)
>   }
>
>
> -/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
> -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
> +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
> index 2129717..86f9b90 100644
> --- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
> +++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
> @@ -46,4 +46,4 @@ int main (void)
>     return 0;
>   }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index c31bfbd..42ba5f8 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -2613,7 +2613,8 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
>   			"reduction: unsafe fp math optimization: ");
>         return NULL;
>       }
> -  else if (INTEGRAL_TYPE_P (type) && check_reduction)
> +  else if (INTEGRAL_TYPE_P (type) && check_reduction
> +	   && !no_overflow_tree_code (code, type))
>       {
>         if (TYPE_OVERFLOW_TRAPS (type))
>   	{
> diff --git a/gcc/tree.c b/gcc/tree.c
> index 94263af..5b8dd1a 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -7541,6 +7541,30 @@ associative_tree_code (enum tree_code code)
>     return false;
>   }
>
> +/* Return true if CODE represents an tree code that cannot overflow, given
> +   operand type OP_TYPE.  Otherwise return false.  */
> +bool
> +no_overflow_tree_code (enum tree_code code, tree op_type)
> +{
> +  /* For now, just handle associative tree codes.  */
> +  switch (code)
> +    {
> +    case BIT_IOR_EXPR:
> +    case BIT_AND_EXPR:
> +    case BIT_XOR_EXPR:
> +      return true;
> +
> +    case MIN_EXPR:
> +    case MAX_EXPR:
> +      return (ANY_INTEGRAL_TYPE_P (op_type)
> +	      && TREE_CODE (op_type) != COMPLEX_TYPE);
> +
> +    default:
> +      break;
> +    }
> +  return false;
> +}
> +
>   /* Return true if CODE represents a commutative tree code.  Otherwise
>      return false.  */
>   bool
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 6df2217..360d13e 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4367,6 +4367,7 @@ extern tree get_file_function_name (const char *);
>   extern tree get_callee_fndecl (const_tree);
>   extern int type_num_arguments (const_tree);
>   extern bool associative_tree_code (enum tree_code);
> +extern bool no_overflow_tree_code (enum tree_code, tree);
>   extern bool commutative_tree_code (enum tree_code);
>   extern bool commutative_ternary_tree_code (enum tree_code);
>   extern tree upper_bound_in_type (tree, tree);
> -- 1.9.1
>
Richard Biener July 28, 2015, 7:59 a.m. UTC | #2
On Fri, Jul 24, 2015 at 4:39 PM, Tom de Vries <Tom_deVries@mentor.com> wrote:
> Hi,
>
> this patch allows parallelization and vectorization of reduction operators
> that are guaranteed to not overflow (such as min and max operators),
> independent of the overflow behaviour of the type.
>
> Bootstrapped and reg-tested on x86_64.
>
> OK for trunk?

Hmm, I don't like that no_overflow_tree_code function.  We have a much more
clear understanding which codes may overflow or trap.  Thus please add
a operation specific variant of TYPE_OVERFLOW_{TRAPS,WRAPS,UNDEFINED} like

bool
operation_overflow_traps (tree type, enum tree_code code)
{
  if (!ANY_INTEGRAL_TYPE_P (type)
     || !TYPE_OVERFLOW_TRAPS (type))
    return false;
  switch (code)
    {
    case PLUS_EXPR:
    case MINUS_EXPR:
    case MULT_EXPR:
    case LSHIFT_EXPR:
       /* Can overflow in various ways */
    case TRUNC_DIV_EXPR:
    case EXACT_DIV_EXPR:
    case FLOOR_DIV_EXPR:
    case CEIL_DIV_EXPR:
       /* For INT_MIN / -1 */
    case NEGATE_EXPR:
    case ABS_EXPR:
       /* For -INT_MIN */
       return true;
    default:
       return false;
   }
}

and similar variants for _wraps and _undefined.  I think we decided at
some point
the compiler should not take advantage of the fact that lshift or
*_div have undefined
behavior on signed integer overflow, similar we only take advantage of
integral-type
overflow behavior, not vector or complex.  So we could reduce the
number of cases
the functions return true if we document that it returns true only for
the cases where
the compiler needs to / may assume wrapping behavior does not take place.
As for _traps for example we only have optabs and libfuncs for
plus,minus,mult,negate
and abs.

Thanks,
Richard.

> Thanks,
> - Tom
diff mbox

Patch

Allow non-overflow ops in vect_is_simple_reduction_1

2015-07-24  Tom de Vries  <tom@codesourcery.com>

	* tree.c (no_overflow_tree_code): New function.
	* tree.h (no_overflow_tree_code): Declare.
	* tree-vect-loop.c (vect_is_simple_reduction_1): Use
	no_overflow_tree_code.

	* gcc.dg/autopar/reduc-2char.c (init_arrays): Mark with attribute
	optimize ("-ftree-parallelize-loops=0").
	Add successful scans for 2 detected reductions.	 Add xfail scans for 3
	detected reductions.
	* gcc.dg/autopar/reduc-2short.c: Same.
	* gcc.dg/autopar/reduc-8.c  (init_arrays): Mark with attribute
	optimize ("-ftree-parallelize-loops=0").  Add successful scans for 2
	detected reductions.
	* gcc.dg/vect/trapv-vect-reduc-4.c: Expect succesful reductions for min
	and max loops.
---
 gcc/testsuite/gcc.dg/autopar/reduc-2char.c     | 10 +++++++---
 gcc/testsuite/gcc.dg/autopar/reduc-2short.c    | 10 ++++++----
 gcc/testsuite/gcc.dg/autopar/reduc-8.c         |  7 ++++---
 gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c |  2 +-
 gcc/tree-vect-loop.c                           |  3 ++-
 gcc/tree.c                                     | 24 ++++++++++++++++++++++++
 gcc/tree.h                                     |  1 +
 7 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
index 14867f3..a2dad44 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2char.c
@@ -39,8 +39,9 @@  void main1 (signed char x, signed char max_result, signed char min_result)
     abort ();
 }
 
- __attribute__((noinline))
- void init_arrays ()
+void __attribute__((noinline))
+  __attribute__((optimize ("-ftree-parallelize-loops=0")))
+init_arrays ()
  {
    int i;
 
@@ -60,7 +61,10 @@  int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
index 7c19cc5..a50e14f 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-2short.c
@@ -38,8 +38,9 @@  void main1 (short x, short max_result, short min_result)
     abort ();
 }
 
- __attribute__((noinline))
- void init_arrays ()
+void __attribute__((noinline))
+  __attribute__((optimize ("-ftree-parallelize-loops=0")))
+init_arrays ()
  {
    int i;
 
@@ -58,7 +59,8 @@  int main (void)
   return 0;
 }
 
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
-
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-8.c b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
index 1d05c48..18ba03d 100644
--- a/gcc/testsuite/gcc.dg/autopar/reduc-8.c
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-8.c
@@ -40,7 +40,8 @@  testmin (const T *c, T init, T result)
     abort ();
 }
 
-int main (void)
+int __attribute__((optimize ("-ftree-parallelize-loops=0")))
+main (void)
 { 
   static signed char A[N] = {
     0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -84,5 +85,5 @@  int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
index 2129717..86f9b90 100644
--- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
+++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
@@ -46,4 +46,4 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c31bfbd..42ba5f8 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2613,7 +2613,8 @@  vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			"reduction: unsafe fp math optimization: ");
       return NULL;
     }
-  else if (INTEGRAL_TYPE_P (type) && check_reduction)
+  else if (INTEGRAL_TYPE_P (type) && check_reduction
+	   && !no_overflow_tree_code (code, type))
     {
       if (TYPE_OVERFLOW_TRAPS (type))
 	{
diff --git a/gcc/tree.c b/gcc/tree.c
index 94263af..5b8dd1a 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -7541,6 +7541,30 @@  associative_tree_code (enum tree_code code)
   return false;
 }
 
+/* Return true if CODE represents an tree code that cannot overflow, given
+   operand type OP_TYPE.  Otherwise return false.  */
+bool
+no_overflow_tree_code (enum tree_code code, tree op_type)
+{
+  /* For now, just handle associative tree codes.  */
+  switch (code)
+    {
+    case BIT_IOR_EXPR:
+    case BIT_AND_EXPR:
+    case BIT_XOR_EXPR:
+      return true;
+
+    case MIN_EXPR:
+    case MAX_EXPR:
+      return (ANY_INTEGRAL_TYPE_P (op_type)
+	      && TREE_CODE (op_type) != COMPLEX_TYPE);
+
+    default:
+      break;
+    }
+  return false;
+}
+
 /* Return true if CODE represents a commutative tree code.  Otherwise
    return false.  */
 bool
diff --git a/gcc/tree.h b/gcc/tree.h
index 6df2217..360d13e 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4367,6 +4367,7 @@  extern tree get_file_function_name (const char *);
 extern tree get_callee_fndecl (const_tree);
 extern int type_num_arguments (const_tree);
 extern bool associative_tree_code (enum tree_code);
+extern bool no_overflow_tree_code (enum tree_code, tree);
 extern bool commutative_tree_code (enum tree_code);
 extern bool commutative_ternary_tree_code (enum tree_code);
 extern tree upper_bound_in_type (tree, tree);
-- 
1.9.1