diff mbox series

tree-optimization/104112 - add check for vect epilogue reduc reuse

Message ID o726nnr0-n524-57n1-13o5-1353qs3235rn@fhfr.qr
State New
Headers show
Series tree-optimization/104112 - add check for vect epilogue reduc reuse | expand

Commit Message

Richard Biener Jan. 19, 2022, 1:43 p.m. UTC
This adds a missing check for the availability of intermediate vector
types required to re-use the accumulator of a vectorized reduction
in the vectorized epilogue.  For SVE and VNx2DF vs V2DF with
-msve-vector-bits=512 for example V4DF is not available.

In addition to that we have to verify the reduction operation is
supported, otherwise we for example on i?86 get vector code that's
later decomposed again by vector lowering when trying to use
a V2HI epilogue for a V8HI reduction with a target without
TARGET_MMX_WITH_SSE.

It might be we want -Wvector-operation-performance for all vect.exp
tests but that seems to have existing regressions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

2022-01-19  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/104112
	* tree-vect-loop.cc (vect_find_reusable_accumulator): Check
	for required intermediate vector types.

	* gcc.dg/vect/pr104112-1.c: New testcase.
	* gcc.dg/vect/pr104112-2.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr104112-1.c | 18 ++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr104112-2.c | 11 +++++++++++
 gcc/tree-vect-loop.cc                  | 15 ++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-2.c

Comments

Richard Sandiford Jan. 19, 2022, 1:47 p.m. UTC | #1
Richard Biener <rguenther@suse.de> writes:
> This adds a missing check for the availability of intermediate vector
> types required to re-use the accumulator of a vectorized reduction
> in the vectorized epilogue.  For SVE and VNx2DF vs V2DF with
> -msve-vector-bits=512 for example V4DF is not available.
>
> In addition to that we have to verify the reduction operation is
> supported, otherwise we for example on i?86 get vector code that's
> later decomposed again by vector lowering when trying to use
> a V2HI epilogue for a V8HI reduction with a target without
> TARGET_MMX_WITH_SSE.
>
> It might be we want -Wvector-operation-performance for all vect.exp
> tests but that seems to have existing regressions.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

LGTM.  The earlier patch also passed testing on SVE FWIW.

Thanks,
Richard

>
> Thanks,
> Richard.
>
> 2022-01-19  Richard Biener  <rguenther@suse.de>
>
> 	PR tree-optimization/104112
> 	* tree-vect-loop.cc (vect_find_reusable_accumulator): Check
> 	for required intermediate vector types.
>
> 	* gcc.dg/vect/pr104112-1.c: New testcase.
> 	* gcc.dg/vect/pr104112-2.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/vect/pr104112-1.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.dg/vect/pr104112-2.c | 11 +++++++++++
>  gcc/tree-vect-loop.cc                  | 15 ++++++++++++++-
>  3 files changed, 43 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-1.c b/gcc/testsuite/gcc.dg/vect/pr104112-1.c
> new file mode 100644
> index 00000000000..84e69b85170
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr104112-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-Ofast" } */
> +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=512" { target aarch64-*-* } } */
> +
> +void
> +boom(int n, double *a, double *x)
> +{
> +  int i, j;
> +  double temp;
> +
> +  for (j = n; j >= 1; --j)
> +    {
> +      temp = x[j];
> +      for (i = j - 1; i >= 1; --i)
> +	temp += a[i + j] * x[i];
> +      x[j] = temp;
> +    }
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-2.c b/gcc/testsuite/gcc.dg/vect/pr104112-2.c
> new file mode 100644
> index 00000000000..7469b3c5d84
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr104112-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* Diagnose vector ops that are later decomposed.  */
> +/* { dg-additional-options "-Wvector-operation-performance" } */
> +
> +unsigned short foo (unsigned short *a, int n)
> +{
> +  unsigned short sum = 0;
> +  for (int i = 0; i < n; ++i)
> +    sum += a[i];
> +  return sum;
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 0fe3529b2d1..0b2785a5ed6 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4979,9 +4979,22 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
>    /* Handle the case where we can reduce wider vectors to narrower ones.  */
>    tree vectype = STMT_VINFO_VECTYPE (reduc_info);
>    tree old_vectype = TREE_TYPE (accumulator->reduc_input);
> +  unsigned HOST_WIDE_INT m;
>    if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
> -			    TYPE_VECTOR_SUBPARTS (vectype)))
> +			    TYPE_VECTOR_SUBPARTS (vectype), &m))
>      return false;
> +  /* Check the intermediate vector types are available.  */
> +  while (m > 2)
> +    {
> +      m /= 2;
> +      tree intermediate_vectype = get_related_vectype_for_scalar_type
> +	(TYPE_MODE (vectype), TREE_TYPE (vectype),
> +	 exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m));
> +      if (!intermediate_vectype
> +	  || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
> +				    intermediate_vectype))
> +	return false;
> +    }
>  
>    /* Non-SLP reductions might apply an adjustment after the reduction
>       operation, in order to simplify the initialization of the accumulator.
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-1.c b/gcc/testsuite/gcc.dg/vect/pr104112-1.c
new file mode 100644
index 00000000000..84e69b85170
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104112-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=512" { target aarch64-*-* } } */
+
+void
+boom(int n, double *a, double *x)
+{
+  int i, j;
+  double temp;
+
+  for (j = n; j >= 1; --j)
+    {
+      temp = x[j];
+      for (i = j - 1; i >= 1; --i)
+	temp += a[i + j] * x[i];
+      x[j] = temp;
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-2.c b/gcc/testsuite/gcc.dg/vect/pr104112-2.c
new file mode 100644
index 00000000000..7469b3c5d84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104112-2.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* Diagnose vector ops that are later decomposed.  */
+/* { dg-additional-options "-Wvector-operation-performance" } */
+
+unsigned short foo (unsigned short *a, int n)
+{
+  unsigned short sum = 0;
+  for (int i = 0; i < n; ++i)
+    sum += a[i];
+  return sum;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0fe3529b2d1..0b2785a5ed6 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4979,9 +4979,22 @@  vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
   /* Handle the case where we can reduce wider vectors to narrower ones.  */
   tree vectype = STMT_VINFO_VECTYPE (reduc_info);
   tree old_vectype = TREE_TYPE (accumulator->reduc_input);
+  unsigned HOST_WIDE_INT m;
   if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
-			    TYPE_VECTOR_SUBPARTS (vectype)))
+			    TYPE_VECTOR_SUBPARTS (vectype), &m))
     return false;
+  /* Check the intermediate vector types are available.  */
+  while (m > 2)
+    {
+      m /= 2;
+      tree intermediate_vectype = get_related_vectype_for_scalar_type
+	(TYPE_MODE (vectype), TREE_TYPE (vectype),
+	 exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m));
+      if (!intermediate_vectype
+	  || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
+				    intermediate_vectype))
+	return false;
+    }
 
   /* Non-SLP reductions might apply an adjustment after the reduction
      operation, in order to simplify the initialization of the accumulator.