diff mbox series

MATCH: Optimize `VEC_SHL_INSERT (dup (A), A)` to just `dup (A) [PR116075]

Message ID 20240725155516.3965022-1-quic_apinski@quicinc.com
State New
Headers show
Series MATCH: Optimize `VEC_SHL_INSERT (dup (A), A)` to just `dup (A) [PR116075] | expand

Commit Message

Andrew Pinski July 25, 2024, 3:55 p.m. UTC
It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being
simplified to just `{ 0, ... }`. This was generated from the autovectorizer
(maybe even on accident, see PR tree-optmization/116081).

This adds a few SVE testcases to see if this is optimized since the
auto-vectorizer or intrinsics are the only two ways of getting this
produced.

Build and tested for aarch64-linux-gnu with no regressions.

	PR target/116075

gcc/ChangeLog:

	* match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/dup-insr-1.c: New test.
	* gcc.target/aarch64/sve/dup-insr-2.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/match.pd                                  | 17 ++++++++++++
 .../gcc.target/aarch64/sve/dup-insr-1.c       | 26 +++++++++++++++++++
 .../gcc.target/aarch64/sve/dup-insr-2.c       | 26 +++++++++++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c

Comments

Richard Biener July 25, 2024, 5:19 p.m. UTC | #1
> Am 25.07.2024 um 17:56 schrieb Andrew Pinski <quic_apinski@quicinc.com>:
> 
> It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being
> simplified to just `{ 0, ... }`. This was generated from the autovectorizer
> (maybe even on accident, see PR tree-optmization/116081).
> 
> This adds a few SVE testcases to see if this is optimized since the
> auto-vectorizer or intrinsics are the only two ways of getting this
> produced.
> 
> Build and tested for aarch64-linux-gnu with no regressions.

For the case in question implementing fold_const_call would be better.  Also …

>    PR target/116075
> 
> gcc/ChangeLog:
> 
>    * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>    * gcc.target/aarch64/sve/dup-insr-1.c: New test.
>    * gcc.target/aarch64/sve/dup-insr-2.c: New test.
> 
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
> gcc/match.pd                                  | 17 ++++++++++++
> .../gcc.target/aarch64/sve/dup-insr-1.c       | 26 +++++++++++++++++++
> .../gcc.target/aarch64/sve/dup-insr-2.c       | 26 +++++++++++++++++++
> 3 files changed, 69 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 680dfea523f..a3a64bd742e 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10657,3 +10657,20 @@ and,
>       }
>       (if (full_perm_p)
>    (vec_perm (op@3 @0 @1) @3 @2))))))
> +
> +/* vec shift left insert (dup(A), A) -> dup(A) */
> +(simplify
> + (IFN_VEC_SHL_INSERT vec_same_elem_p@0 @1)
> +  (with {
> +    tree elem = uniform_vector_p (@0);
> +    if (!elem && TREE_CODE (@0) == SSA_NAME)
> +      {
> +        gimple *def = SSA_NAME_DEF_STMT (@0);
> +    if (gimple_assign_rhs_code (def) == CONSTRUCTOR)
> +      elem = uniform_vector_p (gimple_assign_rhs1 (def));
> +    else if (gimple_assign_rhs_code (def) == VEC_DUPLICATE_EXPR)
> +      elem = gimple_assign_rhs1 (def);
> +      }
> +   }
> +    (if (elem && operand_equal_p (@1, elem))

Ugh.  Two predicates involved and we still have to do this?

> +     @0)))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> new file mode 100644
> index 00000000000..41dcbba45cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +/* PR target/116075 */
> +
> +#include <arm_sve.h>
> +
> +svint8_t f(void)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (0);
> +  tt = svinsr (tt, 0);
> +  return tt;
> +}
> +
> +svint8_t f1(int8_t t)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (t);
> +  tt = svinsr (tt, t);
> +  return tt;
> +}
> +
> +/* The above 2 functions should have removed the VEC_SHL_INSERT. */
> +
> +/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> new file mode 100644
> index 00000000000..8eafe974624
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +/* PR target/116075 */
> +
> +#include <arm_sve.h>
> +
> +svint8_t f(int8_t t)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (0);
> +  tt = svinsr (tt, t);
> +  return tt;
> +}
> +
> +svint8_t f1(int8_t t)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (t);
> +  tt = svinsr (tt, 0);
> +  return tt;
> +}
> +
> +/* The above 2 functions should not have removed the VEC_SHL_INSERT. */
> +
> +/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */
> +
> --
> 2.43.0
>
Andrew Pinski July 25, 2024, 8:57 p.m. UTC | #2
On Thu, Jul 25, 2024 at 10:20 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
>
>
> > Am 25.07.2024 um 17:56 schrieb Andrew Pinski <quic_apinski@quicinc.com>:
> >
> > It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being
> > simplified to just `{ 0, ... }`. This was generated from the autovectorizer
> > (maybe even on accident, see PR tree-optmization/116081).
> >
> > This adds a few SVE testcases to see if this is optimized since the
> > auto-vectorizer or intrinsics are the only two ways of getting this
> > produced.
> >
> > Build and tested for aarch64-linux-gnu with no regressions.
>
> For the case in question implementing fold_const_call would be better.  Also …

Makes sense, I have a patch which I am testing that does this.

>
> >    PR target/116075
> >
> > gcc/ChangeLog:
> >
> >    * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >    * gcc.target/aarch64/sve/dup-insr-1.c: New test.
> >    * gcc.target/aarch64/sve/dup-insr-2.c: New test.
> >
> > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > ---
> > gcc/match.pd                                  | 17 ++++++++++++
> > .../gcc.target/aarch64/sve/dup-insr-1.c       | 26 +++++++++++++++++++
> > .../gcc.target/aarch64/sve/dup-insr-2.c       | 26 +++++++++++++++++++
> > 3 files changed, 69 insertions(+)
> > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 680dfea523f..a3a64bd742e 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -10657,3 +10657,20 @@ and,
> >       }
> >       (if (full_perm_p)
> >    (vec_perm (op@3 @0 @1) @3 @2))))))
> > +
> > +/* vec shift left insert (dup(A), A) -> dup(A) */
> > +(simplify
> > + (IFN_VEC_SHL_INSERT vec_same_elem_p@0 @1)
> > +  (with {
> > +    tree elem = uniform_vector_p (@0);
> > +    if (!elem && TREE_CODE (@0) == SSA_NAME)
> > +      {
> > +        gimple *def = SSA_NAME_DEF_STMT (@0);
> > +    if (gimple_assign_rhs_code (def) == CONSTRUCTOR)
> > +      elem = uniform_vector_p (gimple_assign_rhs1 (def));
> > +    else if (gimple_assign_rhs_code (def) == VEC_DUPLICATE_EXPR)
> > +      elem = gimple_assign_rhs1 (def);
> > +      }
> > +   }
> > +    (if (elem && operand_equal_p (@1, elem))
>
> Ugh.  Two predicates involved and we still have to do this?

vec_same_elem_p is not the best predicate and other uses it does the
same as above.
Anyways I have simplified this down to just supporting vec_duplicate
and it still fixes the f1 in dup-insr-1.c.
Once my tests are finished, I will post the patch.

Thanks,
Andrew

>
> > +     @0)))
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> > new file mode 100644
> > index 00000000000..41dcbba45cf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O -fdump-tree-optimized" } */
> > +/* PR target/116075 */
> > +
> > +#include <arm_sve.h>
> > +
> > +svint8_t f(void)
> > +{
> > +  svint8_t tt;
> > +  tt = svdup_s8 (0);
> > +  tt = svinsr (tt, 0);
> > +  return tt;
> > +}
> > +
> > +svint8_t f1(int8_t t)
> > +{
> > +  svint8_t tt;
> > +  tt = svdup_s8 (t);
> > +  tt = svinsr (tt, t);
> > +  return tt;
> > +}
> > +
> > +/* The above 2 functions should have removed the VEC_SHL_INSERT. */
> > +
> > +/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> > new file mode 100644
> > index 00000000000..8eafe974624
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O -fdump-tree-optimized" } */
> > +/* PR target/116075 */
> > +
> > +#include <arm_sve.h>
> > +
> > +svint8_t f(int8_t t)
> > +{
> > +  svint8_t tt;
> > +  tt = svdup_s8 (0);
> > +  tt = svinsr (tt, t);
> > +  return tt;
> > +}
> > +
> > +svint8_t f1(int8_t t)
> > +{
> > +  svint8_t tt;
> > +  tt = svdup_s8 (t);
> > +  tt = svinsr (tt, 0);
> > +  return tt;
> > +}
> > +
> > +/* The above 2 functions should not have removed the VEC_SHL_INSERT. */
> > +
> > +/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */
> > +
> > --
> > 2.43.0
> >
Richard Biener July 26, 2024, 6:37 a.m. UTC | #3
On Thu, Jul 25, 2024 at 10:57 PM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Thu, Jul 25, 2024 at 10:20 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> >
> >
> > > Am 25.07.2024 um 17:56 schrieb Andrew Pinski <quic_apinski@quicinc.com>:
> > >
> > > It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being
> > > simplified to just `{ 0, ... }`. This was generated from the autovectorizer
> > > (maybe even on accident, see PR tree-optmization/116081).
> > >
> > > This adds a few SVE testcases to see if this is optimized since the
> > > auto-vectorizer or intrinsics are the only two ways of getting this
> > > produced.
> > >
> > > Build and tested for aarch64-linux-gnu with no regressions.
> >
> > For the case in question implementing fold_const_call would be better.  Also …
>
> Makes sense, I have a patch which I am testing that does this.
>
> >
> > >    PR target/116075
> > >
> > > gcc/ChangeLog:
> > >
> > >    * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >    * gcc.target/aarch64/sve/dup-insr-1.c: New test.
> > >    * gcc.target/aarch64/sve/dup-insr-2.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > > ---
> > > gcc/match.pd                                  | 17 ++++++++++++
> > > .../gcc.target/aarch64/sve/dup-insr-1.c       | 26 +++++++++++++++++++
> > > .../gcc.target/aarch64/sve/dup-insr-2.c       | 26 +++++++++++++++++++
> > > 3 files changed, 69 insertions(+)
> > > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> > > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 680dfea523f..a3a64bd742e 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -10657,3 +10657,20 @@ and,
> > >       }
> > >       (if (full_perm_p)
> > >    (vec_perm (op@3 @0 @1) @3 @2))))))
> > > +
> > > +/* vec shift left insert (dup(A), A) -> dup(A) */
> > > +(simplify
> > > + (IFN_VEC_SHL_INSERT vec_same_elem_p@0 @1)
> > > +  (with {
> > > +    tree elem = uniform_vector_p (@0);
> > > +    if (!elem && TREE_CODE (@0) == SSA_NAME)
> > > +      {
> > > +        gimple *def = SSA_NAME_DEF_STMT (@0);
> > > +    if (gimple_assign_rhs_code (def) == CONSTRUCTOR)
> > > +      elem = uniform_vector_p (gimple_assign_rhs1 (def));
> > > +    else if (gimple_assign_rhs_code (def) == VEC_DUPLICATE_EXPR)
> > > +      elem = gimple_assign_rhs1 (def);
> > > +      }
> > > +   }
> > > +    (if (elem && operand_equal_p (@1, elem))
> >
> > Ugh.  Two predicates involved and we still have to do this?
>
> vec_same_elem_p is not the best predicate and other uses it does the
> same as above.

Yeah, there's a lack of a way to do (IFN_VEC_SHL_INSERT (vec_same_elem_p @1) @1)
aka give (match ...) predicates an argument - it only has results.  Or
alternatively to do

(match (vec_same_elem_p @0)
 @1
 (with
   {
      tree el = uniform_vector_p (@1);
      @0 = el;
   }
  (if (el)))

The above actually works, but only if you mention @0 in the match expression,
so s/@1/@0/ "works" but of course it's not really intended this way(?)

> Anyways I have simplified this down to just supporting vec_duplicate
> and it still fixes the f1 in dup-insr-1.c.
> Once my tests are finished, I will post the patch.
>
> Thanks,
> Andrew
>
> >
> > > +     @0)))
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> > > new file mode 100644
> > > index 00000000000..41dcbba45cf
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> > > @@ -0,0 +1,26 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O -fdump-tree-optimized" } */
> > > +/* PR target/116075 */
> > > +
> > > +#include <arm_sve.h>
> > > +
> > > +svint8_t f(void)
> > > +{
> > > +  svint8_t tt;
> > > +  tt = svdup_s8 (0);
> > > +  tt = svinsr (tt, 0);
> > > +  return tt;
> > > +}
> > > +
> > > +svint8_t f1(int8_t t)
> > > +{
> > > +  svint8_t tt;
> > > +  tt = svdup_s8 (t);
> > > +  tt = svinsr (tt, t);
> > > +  return tt;
> > > +}
> > > +
> > > +/* The above 2 functions should have removed the VEC_SHL_INSERT. */
> > > +
> > > +/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */
> > > +
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> > > new file mode 100644
> > > index 00000000000..8eafe974624
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> > > @@ -0,0 +1,26 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O -fdump-tree-optimized" } */
> > > +/* PR target/116075 */
> > > +
> > > +#include <arm_sve.h>
> > > +
> > > +svint8_t f(int8_t t)
> > > +{
> > > +  svint8_t tt;
> > > +  tt = svdup_s8 (0);
> > > +  tt = svinsr (tt, t);
> > > +  return tt;
> > > +}
> > > +
> > > +svint8_t f1(int8_t t)
> > > +{
> > > +  svint8_t tt;
> > > +  tt = svdup_s8 (t);
> > > +  tt = svinsr (tt, 0);
> > > +  return tt;
> > > +}
> > > +
> > > +/* The above 2 functions should not have removed the VEC_SHL_INSERT. */
> > > +
> > > +/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */
> > > +
> > > --
> > > 2.43.0
> > >
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 680dfea523f..a3a64bd742e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10657,3 +10657,20 @@  and,
       }
       (if (full_perm_p)
 	(vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* vec shift left insert (dup(A), A) -> dup(A) */
+(simplify
+ (IFN_VEC_SHL_INSERT vec_same_elem_p@0 @1)
+  (with {
+    tree elem = uniform_vector_p (@0);
+    if (!elem && TREE_CODE (@0) == SSA_NAME)
+      {
+        gimple *def = SSA_NAME_DEF_STMT (@0);
+	if (gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	  elem = uniform_vector_p (gimple_assign_rhs1 (def));
+	else if (gimple_assign_rhs_code (def) == VEC_DUPLICATE_EXPR)
+	  elem = gimple_assign_rhs1 (def);
+      }
+   }
+    (if (elem && operand_equal_p (@1, elem))
+     @0)))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
new file mode 100644
index 00000000000..41dcbba45cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+/* PR target/116075 */
+
+#include <arm_sve.h>
+
+svint8_t f(void)
+{
+  svint8_t tt;
+  tt = svdup_s8 (0);
+  tt = svinsr (tt, 0);
+  return tt;
+}
+
+svint8_t f1(int8_t t)
+{
+  svint8_t tt;
+  tt = svdup_s8 (t);
+  tt = svinsr (tt, t);
+  return tt;
+}
+
+/* The above 2 functions should have removed the VEC_SHL_INSERT. */
+
+/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
new file mode 100644
index 00000000000..8eafe974624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+/* PR target/116075 */
+
+#include <arm_sve.h>
+
+svint8_t f(int8_t t)
+{
+  svint8_t tt;
+  tt = svdup_s8 (0);
+  tt = svinsr (tt, t);
+  return tt;
+}
+
+svint8_t f1(int8_t t)
+{
+  svint8_t tt;
+  tt = svdup_s8 (t);
+  tt = svinsr (tt, 0);
+  return tt;
+}
+
+/* The above 2 functions should not have removed the VEC_SHL_INSERT. */
+
+/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */
+