diff mbox series

MATCH: Look through VIEW_CONVERT when folding VEC_PERM_EXPRs.

Message ID 20240522080734.2022728-1-manolis.tsamis@vrull.eu
State New
Headers show
Series MATCH: Look through VIEW_CONVERT when folding VEC_PERM_EXPRs. | expand

Commit Message

Manolis Tsamis May 22, 2024, 8:06 a.m. UTC
The match.pd patterns to merge two vector permutes into one fail when a
potentially no-op view convert expressions is between the two permutes.
This change lifts this restriction.

gcc/ChangeLog:

	* match.pd: Allow no-op view_convert between permutes.

gcc/testsuite/ChangeLog:

	* gcc.dg/fold-perm-2.c: New test.

Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
---

 gcc/match.pd                       | 14 ++++++++------
 gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
 2 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c

Comments

Richard Biener May 24, 2024, 6:31 a.m. UTC | #1
On Wed, 22 May 2024, Manolis Tsamis wrote:

> The match.pd patterns to merge two vector permutes into one fail when a
> potentially no-op view convert expressions is between the two permutes.
> This change lifts this restriction.
> 
> gcc/ChangeLog:
> 
> 	* match.pd: Allow no-op view_convert between permutes.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/fold-perm-2.c: New test.
> 
> Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> ---
> 
>  gcc/match.pd                       | 14 ++++++++------
>  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
>  2 files changed, 24 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 07e743ae464..cbb3c5d86e0 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10039,19 +10039,21 @@ and,
>       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
>  
>  (simplify
> - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
>   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
>    (with
>     {
>       machine_mode result_mode = TYPE_MODE (type);
> -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
>       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
>       vec_perm_builder builder0;
>       vec_perm_builder builder1;
>       vec_perm_builder builder2 (nelts, nelts, 1);
>     }
> -   (if (tree_to_vec_perm_builder (&builder0, @3)
> -	&& tree_to_vec_perm_builder (&builder1, @4))
> +   (if (tree_to_vec_perm_builder (&builder0, @4)
> +	&& tree_to_vec_perm_builder (&builder1, @5)
> +	&& element_precision (TREE_TYPE (@0))
> +	   == element_precision (TREE_TYPE (@1)))

I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
instead.

Otherwise OK.

Thanks,
Richard.

>      (with
>       {
>         vec_perm_indices sel0 (builder0, 2, nelts);
> @@ -10073,10 +10075,10 @@ and,
>  	       ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
>  		  || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
>  	       : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> -	 op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> +	 op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
>       }
>       (if (op0)
> -      (vec_perm @1 @2 { op0; })))))))
> +      (view_convert (vec_perm @2 @3 { op0; }))))))))
>  
>  /* Merge
>       c = VEC_PERM_EXPR <a, b, VCST0>;
> diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
> new file mode 100644
> index 00000000000..1a4ab4065de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-fre1" } */
> +
> +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
> +
> +void fun (veci *a, veci *b, veci *c)
> +{
> +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> +  vecu r2 = __builtin_convertvector (r1, vecu);
> +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> +  *c = __builtin_convertvector (r3, veci);
> +}
> +
> +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
> +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
>
Manolis Tsamis May 24, 2024, 7:41 a.m. UTC | #2
On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote:
>
> On Wed, 22 May 2024, Manolis Tsamis wrote:
>
> > The match.pd patterns to merge two vector permutes into one fail when a
> > potentially no-op view convert expressions is between the two permutes.
> > This change lifts this restriction.
> >
> > gcc/ChangeLog:
> >
> >       * match.pd: Allow no-op view_convert between permutes.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.dg/fold-perm-2.c: New test.
> >
> > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > ---
> >
> >  gcc/match.pd                       | 14 ++++++++------
> >  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
> >  2 files changed, 24 insertions(+), 6 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 07e743ae464..cbb3c5d86e0 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -10039,19 +10039,21 @@ and,
> >       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
> >
> >  (simplify
> > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
> >   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
> >    (with
> >     {
> >       machine_mode result_mode = TYPE_MODE (type);
> > -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> > +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
> >       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> >       vec_perm_builder builder0;
> >       vec_perm_builder builder1;
> >       vec_perm_builder builder2 (nelts, nelts, 1);
> >     }
> > -   (if (tree_to_vec_perm_builder (&builder0, @3)
> > -     && tree_to_vec_perm_builder (&builder1, @4))
> > +   (if (tree_to_vec_perm_builder (&builder0, @4)
> > +     && tree_to_vec_perm_builder (&builder1, @5)
> > +     && element_precision (TREE_TYPE (@0))
> > +        == element_precision (TREE_TYPE (@1)))
>
> I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
> instead.
>

I think TYPE_SIZE is not enough as we need the vector elements to have
the same size, not just the vector as a whole.
For example, when using the TYPE_SIZE check instead the following
testcase miscompiles

typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
typedef double vecd __attribute__ ((vector_size (2 * sizeof (double))));

void fun (veci *a, veci *b, veci *c)
{
  char data[16];
  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
  vecd r2;
  __builtin_memcpy(data, &r1, sizeof(veci));
  __builtin_memcpy(&r2, data, sizeof(vecd));
  vecd r3 = __builtin_shufflevector (r2, r2, 1, 0);
  __builtin_memcpy(data, &r3, sizeof(vecd));
  __builtin_memcpy(c, data, sizeof(veci));
}

To:

ldr     q31, [x0]
rev64   v31.4s, v31.4s
str     q31, [x2]
ret

> Otherwise OK.
>
> Thanks,
> Richard.
>
> >      (with
> >       {
> >         vec_perm_indices sel0 (builder0, 2, nelts);
> > @@ -10073,10 +10075,10 @@ and,
> >              ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
> >                 || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
> >              : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> > -      op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> > +      op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
> >       }
> >       (if (op0)
> > -      (vec_perm @1 @2 { op0; })))))))
> > +      (view_convert (vec_perm @2 @3 { op0; }))))))))
> >
> >  /* Merge
> >       c = VEC_PERM_EXPR <a, b, VCST0>;
> > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > new file mode 100644
> > index 00000000000..1a4ab4065de
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O -fdump-tree-fre1" } */
> > +
> > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
> > +
> > +void fun (veci *a, veci *b, veci *c)
> > +{
> > +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > +  vecu r2 = __builtin_convertvector (r1, vecu);
> > +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> > +  *c = __builtin_convertvector (r3, veci);
> > +}
> > +
> > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
> > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
Richard Biener May 24, 2024, 7:45 a.m. UTC | #3
On Fri, 24 May 2024, Manolis Tsamis wrote:

> On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote:
> >
> > On Wed, 22 May 2024, Manolis Tsamis wrote:
> >
> > > The match.pd patterns to merge two vector permutes into one fail when a
> > > potentially no-op view convert expressions is between the two permutes.
> > > This change lifts this restriction.
> > >
> > > gcc/ChangeLog:
> > >
> > >       * match.pd: Allow no-op view_convert between permutes.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >       * gcc.dg/fold-perm-2.c: New test.
> > >
> > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > > ---
> > >
> > >  gcc/match.pd                       | 14 ++++++++------
> > >  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
> > >  2 files changed, 24 insertions(+), 6 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 07e743ae464..cbb3c5d86e0 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -10039,19 +10039,21 @@ and,
> > >       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
> > >
> > >  (simplify
> > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
> > >   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
> > >    (with
> > >     {
> > >       machine_mode result_mode = TYPE_MODE (type);
> > > -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> > > +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
> > >       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> > >       vec_perm_builder builder0;
> > >       vec_perm_builder builder1;
> > >       vec_perm_builder builder2 (nelts, nelts, 1);
> > >     }
> > > -   (if (tree_to_vec_perm_builder (&builder0, @3)
> > > -     && tree_to_vec_perm_builder (&builder1, @4))
> > > +   (if (tree_to_vec_perm_builder (&builder0, @4)
> > > +     && tree_to_vec_perm_builder (&builder1, @5)
> > > +     && element_precision (TREE_TYPE (@0))
> > > +        == element_precision (TREE_TYPE (@1)))
> >
> > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
> > instead.
> >
> 
> I think TYPE_SIZE is not enough as we need the vector elements to have
> the same size, not just the vector as a whole.

Err, yes - you want to check the element sizes of course.

> For example, when using the TYPE_SIZE check instead the following
> testcase miscompiles
> 
> typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> typedef double vecd __attribute__ ((vector_size (2 * sizeof (double))));
> 
> void fun (veci *a, veci *b, veci *c)
> {
>   char data[16];
>   veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
>   vecd r2;
>   __builtin_memcpy(data, &r1, sizeof(veci));
>   __builtin_memcpy(&r2, data, sizeof(vecd));
>   vecd r3 = __builtin_shufflevector (r2, r2, 1, 0);
>   __builtin_memcpy(data, &r3, sizeof(vecd));
>   __builtin_memcpy(c, data, sizeof(veci));
> }
> 
> To:
> 
> ldr     q31, [x0]
> rev64   v31.4s, v31.4s
> str     q31, [x2]
> ret
> 
> > Otherwise OK.
> >
> > Thanks,
> > Richard.
> >
> > >      (with
> > >       {
> > >         vec_perm_indices sel0 (builder0, 2, nelts);
> > > @@ -10073,10 +10075,10 @@ and,
> > >              ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
> > >                 || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
> > >              : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> > > -      op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> > > +      op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
> > >       }
> > >       (if (op0)
> > > -      (vec_perm @1 @2 { op0; })))))))
> > > +      (view_convert (vec_perm @2 @3 { op0; }))))))))
> > >
> > >  /* Merge
> > >       c = VEC_PERM_EXPR <a, b, VCST0>;
> > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > new file mode 100644
> > > index 00000000000..1a4ab4065de
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > @@ -0,0 +1,16 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O -fdump-tree-fre1" } */
> > > +
> > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
> > > +
> > > +void fun (veci *a, veci *b, veci *c)
> > > +{
> > > +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > > +  vecu r2 = __builtin_convertvector (r1, vecu);
> > > +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> > > +  *c = __builtin_convertvector (r3, veci);
> > > +}
> > > +
> > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
> > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
> > >
> >
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
>
Manolis Tsamis May 24, 2024, 8:18 a.m. UTC | #4
On Fri, May 24, 2024 at 10:46 AM Richard Biener <rguenther@suse.de> wrote:
>
> On Fri, 24 May 2024, Manolis Tsamis wrote:
>
> > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote:
> > >
> > > On Wed, 22 May 2024, Manolis Tsamis wrote:
> > >
> > > > The match.pd patterns to merge two vector permutes into one fail when a
> > > > potentially no-op view convert expressions is between the two permutes.
> > > > This change lifts this restriction.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >       * match.pd: Allow no-op view_convert between permutes.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >       * gcc.dg/fold-perm-2.c: New test.
> > > >
> > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > > > ---
> > > >
> > > >  gcc/match.pd                       | 14 ++++++++------
> > > >  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
> > > >  2 files changed, 24 insertions(+), 6 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> > > >
> > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > index 07e743ae464..cbb3c5d86e0 100644
> > > > --- a/gcc/match.pd
> > > > +++ b/gcc/match.pd
> > > > @@ -10039,19 +10039,21 @@ and,
> > > >       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
> > > >
> > > >  (simplify
> > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
> > > >   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
> > > >    (with
> > > >     {
> > > >       machine_mode result_mode = TYPE_MODE (type);
> > > > -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> > > > +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
> > > >       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> > > >       vec_perm_builder builder0;
> > > >       vec_perm_builder builder1;
> > > >       vec_perm_builder builder2 (nelts, nelts, 1);
> > > >     }
> > > > -   (if (tree_to_vec_perm_builder (&builder0, @3)
> > > > -     && tree_to_vec_perm_builder (&builder1, @4))
> > > > +   (if (tree_to_vec_perm_builder (&builder0, @4)
> > > > +     && tree_to_vec_perm_builder (&builder1, @5)
> > > > +     && element_precision (TREE_TYPE (@0))
> > > > +        == element_precision (TREE_TYPE (@1)))
> > >
> > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
> > > instead.
> > >
> >
> > I think TYPE_SIZE is not enough as we need the vector elements to have
> > the same size, not just the vector as a whole.
>
> Err, yes - you want to check the element sizes of course.
>
From what I understand, checking the element size should be enough.
Otherwise we can check both TYPE_SIZE and element_precision to be
equal.
So OK to commit with just element_precision?

BTW I also noticed from these testcases that there is a gcc 13 -> 14
regression with weird XORs being introduced:

typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
void fun (veci *a, veci *b, veci *c) {
  *c = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
}

gcc 13.3:
  adrp x3, .LC0
  ldr q0, [x0]
  ldr q1, [x1]
  ldr q2, [x3, #:lo12:.LC0]
  tbl v0.16b, {v0.16b - v1.16b}, v2.16b
  str q0, [x2]

gcc 14.1:
  ldr q30, [x1]
  adrp x3, .LC0
  ldr q31, [x0]
  ldr q29, [x3, #:lo12:.LC0]
  eor v31.16b, v31.16b, v30.16b
  eor v30.16b, v31.16b, v30.16b
  eor v31.16b, v31.16b, v30.16b
  tbl v30.16b, {v30.16b - v31.16b}, v29.16b
  str q30, [x2]

Manolis

> > For example, when using the TYPE_SIZE check instead the following
> > testcase miscompiles
> >
> > typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double))));
> >
> > void fun (veci *a, veci *b, veci *c)
> > {
> >   char data[16];
> >   veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> >   vecd r2;
> >   __builtin_memcpy(data, &r1, sizeof(veci));
> >   __builtin_memcpy(&r2, data, sizeof(vecd));
> >   vecd r3 = __builtin_shufflevector (r2, r2, 1, 0);
> >   __builtin_memcpy(data, &r3, sizeof(vecd));
> >   __builtin_memcpy(c, data, sizeof(veci));
> > }
> >
> > To:
> >
> > ldr     q31, [x0]
> > rev64   v31.4s, v31.4s
> > str     q31, [x2]
> > ret
> >
> > > Otherwise OK.
> > >
> > > Thanks,
> > > Richard.
> > >
> > > >      (with
> > > >       {
> > > >         vec_perm_indices sel0 (builder0, 2, nelts);
> > > > @@ -10073,10 +10075,10 @@ and,
> > > >              ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
> > > >                 || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
> > > >              : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> > > > -      op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> > > > +      op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
> > > >       }
> > > >       (if (op0)
> > > > -      (vec_perm @1 @2 { op0; })))))))
> > > > +      (view_convert (vec_perm @2 @3 { op0; }))))))))
> > > >
> > > >  /* Merge
> > > >       c = VEC_PERM_EXPR <a, b, VCST0>;
> > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > new file mode 100644
> > > > index 00000000000..1a4ab4065de
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > @@ -0,0 +1,16 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O -fdump-tree-fre1" } */
> > > > +
> > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
> > > > +
> > > > +void fun (veci *a, veci *b, veci *c)
> > > > +{
> > > > +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > > > +  vecu r2 = __builtin_convertvector (r1, vecu);
> > > > +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> > > > +  *c = __builtin_convertvector (r3, veci);
> > > > +}
> > > > +
> > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
> > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
> > > >
> > >
> > > --
> > > Richard Biener <rguenther@suse.de>
> > > SUSE Software Solutions Germany GmbH,
> > > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
Richard Biener May 24, 2024, 8:30 a.m. UTC | #5
On Fri, 24 May 2024, Manolis Tsamis wrote:

> On Fri, May 24, 2024 at 10:46 AM Richard Biener <rguenther@suse.de> wrote:
> >
> > On Fri, 24 May 2024, Manolis Tsamis wrote:
> >
> > > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote:
> > > >
> > > > On Wed, 22 May 2024, Manolis Tsamis wrote:
> > > >
> > > > > The match.pd patterns to merge two vector permutes into one fail when a
> > > > > potentially no-op view convert expressions is between the two permutes.
> > > > > This change lifts this restriction.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >       * match.pd: Allow no-op view_convert between permutes.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >       * gcc.dg/fold-perm-2.c: New test.
> > > > >
> > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > > > > ---
> > > > >
> > > > >  gcc/match.pd                       | 14 ++++++++------
> > > > >  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
> > > > >  2 files changed, 24 insertions(+), 6 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > >
> > > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > > index 07e743ae464..cbb3c5d86e0 100644
> > > > > --- a/gcc/match.pd
> > > > > +++ b/gcc/match.pd
> > > > > @@ -10039,19 +10039,21 @@ and,
> > > > >       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
> > > > >
> > > > >  (simplify
> > > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> > > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
> > > > >   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
> > > > >    (with
> > > > >     {
> > > > >       machine_mode result_mode = TYPE_MODE (type);
> > > > > -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> > > > > +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
> > > > >       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> > > > >       vec_perm_builder builder0;
> > > > >       vec_perm_builder builder1;
> > > > >       vec_perm_builder builder2 (nelts, nelts, 1);
> > > > >     }
> > > > > -   (if (tree_to_vec_perm_builder (&builder0, @3)
> > > > > -     && tree_to_vec_perm_builder (&builder1, @4))
> > > > > +   (if (tree_to_vec_perm_builder (&builder0, @4)
> > > > > +     && tree_to_vec_perm_builder (&builder1, @5)
> > > > > +     && element_precision (TREE_TYPE (@0))
> > > > > +        == element_precision (TREE_TYPE (@1)))
> > > >
> > > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
> > > > instead.
> > > >
> > >
> > > I think TYPE_SIZE is not enough as we need the vector elements to have
> > > the same size, not just the vector as a whole.
> >
> > Err, yes - you want to check the element sizes of course.
> >
> From what I understand, checking the element size should be enough.
> Otherwise we can check both TYPE_SIZE and element_precision to be
> equal.
> So OK to commit with just element_precision?

Please just check the element size.  I'm always worried when
using TYPE_PRECISION on FP types and for shuffles it's really
only about size.

> BTW I also noticed from these testcases that there is a gcc 13 -> 14
> regression with weird XORs being introduced:
> 
> typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> void fun (veci *a, veci *b, veci *c) {
>   *c = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> }
> 
> gcc 13.3:
>   adrp x3, .LC0
>   ldr q0, [x0]
>   ldr q1, [x1]
>   ldr q2, [x3, #:lo12:.LC0]
>   tbl v0.16b, {v0.16b - v1.16b}, v2.16b
>   str q0, [x2]
> 
> gcc 14.1:
>   ldr q30, [x1]
>   adrp x3, .LC0
>   ldr q31, [x0]
>   ldr q29, [x3, #:lo12:.LC0]
>   eor v31.16b, v31.16b, v30.16b
>   eor v30.16b, v31.16b, v30.16b
>   eor v31.16b, v31.16b, v30.16b
>   tbl v30.16b, {v30.16b - v31.16b}, v29.16b
>   str q30, [x2]

You'd need to bisect that but I'd guess we got some extra
match patterns triggering?

> Manolis
> 
> > > For example, when using the TYPE_SIZE check instead the following
> > > testcase miscompiles
> > >
> > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double))));
> > >
> > > void fun (veci *a, veci *b, veci *c)
> > > {
> > >   char data[16];
> > >   veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > >   vecd r2;
> > >   __builtin_memcpy(data, &r1, sizeof(veci));
> > >   __builtin_memcpy(&r2, data, sizeof(vecd));
> > >   vecd r3 = __builtin_shufflevector (r2, r2, 1, 0);
> > >   __builtin_memcpy(data, &r3, sizeof(vecd));
> > >   __builtin_memcpy(c, data, sizeof(veci));
> > > }
> > >
> > > To:
> > >
> > > ldr     q31, [x0]
> > > rev64   v31.4s, v31.4s
> > > str     q31, [x2]
> > > ret
> > >
> > > > Otherwise OK.
> > > >
> > > > Thanks,
> > > > Richard.
> > > >
> > > > >      (with
> > > > >       {
> > > > >         vec_perm_indices sel0 (builder0, 2, nelts);
> > > > > @@ -10073,10 +10075,10 @@ and,
> > > > >              ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
> > > > >                 || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
> > > > >              : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> > > > > -      op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> > > > > +      op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
> > > > >       }
> > > > >       (if (op0)
> > > > > -      (vec_perm @1 @2 { op0; })))))))
> > > > > +      (view_convert (vec_perm @2 @3 { op0; }))))))))
> > > > >
> > > > >  /* Merge
> > > > >       c = VEC_PERM_EXPR <a, b, VCST0>;
> > > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > > new file mode 100644
> > > > > index 00000000000..1a4ab4065de
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > > @@ -0,0 +1,16 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O -fdump-tree-fre1" } */
> > > > > +
> > > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
> > > > > +
> > > > > +void fun (veci *a, veci *b, veci *c)
> > > > > +{
> > > > > +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > > > > +  vecu r2 = __builtin_convertvector (r1, vecu);
> > > > > +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> > > > > +  *c = __builtin_convertvector (r3, veci);
> > > > > +}
> > > > > +
> > > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
> > > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
> > > > >
> > > >
> > > > --
> > > > Richard Biener <rguenther@suse.de>
> > > > SUSE Software Solutions Germany GmbH,
> > > > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> > >
> >
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
>
Manolis Tsamis May 24, 2024, 8:55 a.m. UTC | #6
The match.pd patterns to merge two vector permutes into one fail when a
potentially no-op view convert expression is between the two permutes.
This change lifts this restriction.

gcc/ChangeLog:

* match.pd: Allow no-op view_convert between permutes.

gcc/testsuite/ChangeLog:

* gcc.dg/fold-perm-2.c: New test.
---

gcc/match.pd | 14 ++++++++------
gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
2 files changed, 24 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464..1f91b9857c8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10039,19 +10039,21 @@ and,
d = VEC_PERM_EXPR <a, b, NEW_VCST>; */
(simplify
- (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
+ (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
(if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
(with
{
machine_mode result_mode = TYPE_MODE (type);
- machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+ machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
vec_perm_builder builder0;
vec_perm_builder builder1;
vec_perm_builder builder2 (nelts, nelts, 1);
}
- (if (tree_to_vec_perm_builder (&builder0, @3)
- && tree_to_vec_perm_builder (&builder1, @4))
+ (if (tree_to_vec_perm_builder (&builder0, @4)
+ && tree_to_vec_perm_builder (&builder1, @5)
+ && TYPE_SIZE (TREE_TYPE (TREE_TYPE (@0)))
+ == TYPE_SIZE (TREE_TYPE (TREE_TYPE (@1))))
(with
{
vec_perm_indices sel0 (builder0, 2, nelts);
@@ -10073,10 +10075,10 @@ and,
? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
|| !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
: !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
- op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+ op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
}
(if (op0)
- (vec_perm @1 @2 { op0; })))))))
+ (view_convert (vec_perm @2 @3 { op0; }))))))))
/* Merge
c = VEC_PERM_EXPR <a, b, VCST0>;
diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c
b/gcc/testsuite/gcc.dg/fold-perm-2.c
new file mode 100644
index 00000000000..1a4ab4065de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
+typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof
(unsigned int))));
+
+void fun (veci *a, veci *b, veci *c)
+{
+ veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
+ vecu r2 = __builtin_convertvector (r1, vecu);
+ vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
+ *c = __builtin_convertvector (r3, veci);
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
--
2.44.0

On Fri, May 24, 2024 at 11:30 AM Richard Biener <rguenther@suse.de> wrote:
>
> On Fri, 24 May 2024, Manolis Tsamis wrote:
>
> > On Fri, May 24, 2024 at 10:46 AM Richard Biener <rguenther@suse.de> wrote:
> > >
> > > On Fri, 24 May 2024, Manolis Tsamis wrote:
> > >
> > > > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote:
> > > > >
> > > > > On Wed, 22 May 2024, Manolis Tsamis wrote:
> > > > >
> > > > > > The match.pd patterns to merge two vector permutes into one fail when a
> > > > > > potentially no-op view convert expressions is between the two permutes.
> > > > > > This change lifts this restriction.
> > > > > >
> > > > > > gcc/ChangeLog:
> > > > > >
> > > > > >       * match.pd: Allow no-op view_convert between permutes.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > >       * gcc.dg/fold-perm-2.c: New test.
> > > > > >
> > > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
> > > > > > ---
> > > > > >
> > > > > >  gcc/match.pd                       | 14 ++++++++------
> > > > > >  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
> > > > > >  2 files changed, 24 insertions(+), 6 deletions(-)
> > > > > >  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > > >
> > > > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > > > index 07e743ae464..cbb3c5d86e0 100644
> > > > > > --- a/gcc/match.pd
> > > > > > +++ b/gcc/match.pd
> > > > > > @@ -10039,19 +10039,21 @@ and,
> > > > > >       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
> > > > > >
> > > > > >  (simplify
> > > > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> > > > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
> > > > > >   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
> > > > > >    (with
> > > > > >     {
> > > > > >       machine_mode result_mode = TYPE_MODE (type);
> > > > > > -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> > > > > > +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
> > > > > >       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> > > > > >       vec_perm_builder builder0;
> > > > > >       vec_perm_builder builder1;
> > > > > >       vec_perm_builder builder2 (nelts, nelts, 1);
> > > > > >     }
> > > > > > -   (if (tree_to_vec_perm_builder (&builder0, @3)
> > > > > > -     && tree_to_vec_perm_builder (&builder1, @4))
> > > > > > +   (if (tree_to_vec_perm_builder (&builder0, @4)
> > > > > > +     && tree_to_vec_perm_builder (&builder1, @5)
> > > > > > +     && element_precision (TREE_TYPE (@0))
> > > > > > +        == element_precision (TREE_TYPE (@1)))
> > > > >
> > > > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
> > > > > instead.
> > > > >
> > > >
> > > > I think TYPE_SIZE is not enough as we need the vector elements to have
> > > > the same size, not just the vector as a whole.
> > >
> > > Err, yes - you want to check the element sizes of course.
> > >
> > From what I understand, checking the element size should be enough.
> > Otherwise we can check both TYPE_SIZE and element_precision to be
> > equal.
> > So OK to commit with just element_precision?
>
> Please just check the element size.  I'm always worried when
> using TYPE_PRECISION on FP types and for shuffles it's really
> only about size.
>
> > BTW I also noticed from these testcases that there is a gcc 13 -> 14
> > regression with weird XORs being introduced:
> >
> > typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > void fun (veci *a, veci *b, veci *c) {
> >   *c = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > }
> >
> > gcc 13.3:
> >   adrp x3, .LC0
> >   ldr q0, [x0]
> >   ldr q1, [x1]
> >   ldr q2, [x3, #:lo12:.LC0]
> >   tbl v0.16b, {v0.16b - v1.16b}, v2.16b
> >   str q0, [x2]
> >
> > gcc 14.1:
> >   ldr q30, [x1]
> >   adrp x3, .LC0
> >   ldr q31, [x0]
> >   ldr q29, [x3, #:lo12:.LC0]
> >   eor v31.16b, v31.16b, v30.16b
> >   eor v30.16b, v31.16b, v30.16b
> >   eor v31.16b, v31.16b, v30.16b
> >   tbl v30.16b, {v30.16b - v31.16b}, v29.16b
> >   str q30, [x2]
>
> You'd need to bisect that but I'd guess we got some extra
> match patterns triggering?
>
> > Manolis
> >
> > > > For example, when using the TYPE_SIZE check instead the following
> > > > testcase miscompiles
> > > >
> > > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double))));
> > > >
> > > > void fun (veci *a, veci *b, veci *c)
> > > > {
> > > >   char data[16];
> > > >   veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > > >   vecd r2;
> > > >   __builtin_memcpy(data, &r1, sizeof(veci));
> > > >   __builtin_memcpy(&r2, data, sizeof(vecd));
> > > >   vecd r3 = __builtin_shufflevector (r2, r2, 1, 0);
> > > >   __builtin_memcpy(data, &r3, sizeof(vecd));
> > > >   __builtin_memcpy(c, data, sizeof(veci));
> > > > }
> > > >
> > > > To:
> > > >
> > > > ldr     q31, [x0]
> > > > rev64   v31.4s, v31.4s
> > > > str     q31, [x2]
> > > > ret
> > > >
> > > > > Otherwise OK.
> > > > >
> > > > > Thanks,
> > > > > Richard.
> > > > >
> > > > > >      (with
> > > > > >       {
> > > > > >         vec_perm_indices sel0 (builder0, 2, nelts);
> > > > > > @@ -10073,10 +10075,10 @@ and,
> > > > > >              ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
> > > > > >                 || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
> > > > > >              : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> > > > > > -      op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> > > > > > +      op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
> > > > > >       }
> > > > > >       (if (op0)
> > > > > > -      (vec_perm @1 @2 { op0; })))))))
> > > > > > +      (view_convert (vec_perm @2 @3 { op0; }))))))))
> > > > > >
> > > > > >  /* Merge
> > > > > >       c = VEC_PERM_EXPR <a, b, VCST0>;
> > > > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > > > new file mode 100644
> > > > > > index 00000000000..1a4ab4065de
> > > > > > --- /dev/null
> > > > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > > > > @@ -0,0 +1,16 @@
> > > > > > +/* { dg-do compile } */
> > > > > > +/* { dg-options "-O -fdump-tree-fre1" } */
> > > > > > +
> > > > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
> > > > > > +
> > > > > > +void fun (veci *a, veci *b, veci *c)
> > > > > > +{
> > > > > > +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > > > > > +  vecu r2 = __builtin_convertvector (r1, vecu);
> > > > > > +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> > > > > > +  *c = __builtin_convertvector (r3, veci);
> > > > > > +}
> > > > > > +
> > > > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
> > > > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
> > > > > >
> > > > >
> > > > > --
> > > > > Richard Biener <rguenther@suse.de>
> > > > > SUSE Software Solutions Germany GmbH,
> > > > > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> > > >
> > >
> > > --
> > > Richard Biener <rguenther@suse.de>
> > > SUSE Software Solutions Germany GmbH,
> > > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464..cbb3c5d86e0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10039,19 +10039,21 @@  and,
      d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
 
 (simplify
- (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
+ (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
  (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
   (with
    {
      machine_mode result_mode = TYPE_MODE (type);
-     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
      int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
      vec_perm_builder builder0;
      vec_perm_builder builder1;
      vec_perm_builder builder2 (nelts, nelts, 1);
    }
-   (if (tree_to_vec_perm_builder (&builder0, @3)
-	&& tree_to_vec_perm_builder (&builder1, @4))
+   (if (tree_to_vec_perm_builder (&builder0, @4)
+	&& tree_to_vec_perm_builder (&builder1, @5)
+	&& element_precision (TREE_TYPE (@0))
+	   == element_precision (TREE_TYPE (@1)))
     (with
      {
        vec_perm_indices sel0 (builder0, 2, nelts);
@@ -10073,10 +10075,10 @@  and,
 	       ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
 		  || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
 	       : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
-	 op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+	 op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
      }
      (if (op0)
-      (vec_perm @1 @2 { op0; })))))))
+      (view_convert (vec_perm @2 @3 { op0; }))))))))
 
 /* Merge
      c = VEC_PERM_EXPR <a, b, VCST0>;
diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c
new file mode 100644
index 00000000000..1a4ab4065de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
+typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int))));
+
+void fun (veci *a, veci *b, veci *c)
+{
+  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
+  vecu r2 = __builtin_convertvector (r1, vecu);
+  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
+  *c = __builtin_convertvector (r3, veci);
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */