Message ID | 20240522080734.2022728-1-manolis.tsamis@vrull.eu |
---|---|
State | New |
Headers | show |
Series | MATCH: Look through VIEW_CONVERT when folding VEC_PERM_EXPRs. | expand |
On Wed, 22 May 2024, Manolis Tsamis wrote: > The match.pd patterns to merge two vector permutes into one fail when a > potentially no-op view convert expressions is between the two permutes. > This change lifts this restriction. > > gcc/ChangeLog: > > * match.pd: Allow no-op view_convert between permutes. > > gcc/testsuite/ChangeLog: > > * gcc.dg/fold-perm-2.c: New test. > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > --- > > gcc/match.pd | 14 ++++++++------ > gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ > 2 files changed, 24 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 07e743ae464..cbb3c5d86e0 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -10039,19 +10039,21 @@ and, > d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ > > (simplify > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) > (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) > (with > { > machine_mode result_mode = TYPE_MODE (type); > - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); > + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); > int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); > vec_perm_builder builder0; > vec_perm_builder builder1; > vec_perm_builder builder2 (nelts, nelts, 1); > } > - (if (tree_to_vec_perm_builder (&builder0, @3) > - && tree_to_vec_perm_builder (&builder1, @4)) > + (if (tree_to_vec_perm_builder (&builder0, @4) > + && tree_to_vec_perm_builder (&builder1, @5) > + && element_precision (TREE_TYPE (@0)) > + == element_precision (TREE_TYPE (@1))) I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality instead. Otherwise OK. Thanks, Richard. > (with > { > vec_perm_indices sel0 (builder0, 2, nelts); > @@ -10073,10 +10075,10 @@ and, > ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) > || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) > : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) > - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); > + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); > } > (if (op0) > - (vec_perm @1 @2 { op0; }))))))) > + (view_convert (vec_perm @2 @3 { op0; })))))))) > > /* Merge > c = VEC_PERM_EXPR <a, b, VCST0>; > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c > new file mode 100644 > index 00000000000..1a4ab4065de > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-fre1" } */ > + > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); > + > +void fun (veci *a, veci *b, veci *c) > +{ > + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > + vecu r2 = __builtin_convertvector (r1, vecu); > + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); > + *c = __builtin_convertvector (r3, veci); > +} > + > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ >
On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote: > > On Wed, 22 May 2024, Manolis Tsamis wrote: > > > The match.pd patterns to merge two vector permutes into one fail when a > > potentially no-op view convert expressions is between the two permutes. > > This change lifts this restriction. > > > > gcc/ChangeLog: > > > > * match.pd: Allow no-op view_convert between permutes. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/fold-perm-2.c: New test. > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > --- > > > > gcc/match.pd | 14 ++++++++------ > > gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ > > 2 files changed, 24 insertions(+), 6 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 07e743ae464..cbb3c5d86e0 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -10039,19 +10039,21 @@ and, > > d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ > > > > (simplify > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) > > (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) > > (with > > { > > machine_mode result_mode = TYPE_MODE (type); > > - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); > > + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); > > int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); > > vec_perm_builder builder0; > > vec_perm_builder builder1; > > vec_perm_builder builder2 (nelts, nelts, 1); > > } > > - (if (tree_to_vec_perm_builder (&builder0, @3) > > - && tree_to_vec_perm_builder (&builder1, @4)) > > + (if (tree_to_vec_perm_builder (&builder0, @4) > > + && tree_to_vec_perm_builder (&builder1, @5) > > + && element_precision (TREE_TYPE (@0)) > > + == element_precision (TREE_TYPE (@1))) > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality > instead. > I think TYPE_SIZE is not enough as we need the vector elements to have the same size, not just the vector as a whole. For example, when using the TYPE_SIZE check instead the following testcase miscompiles typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); typedef double vecd __attribute__ ((vector_size (2 * sizeof (double)))); void fun (veci *a, veci *b, veci *c) { char data[16]; veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); vecd r2; __builtin_memcpy(data, &r1, sizeof(veci)); __builtin_memcpy(&r2, data, sizeof(vecd)); vecd r3 = __builtin_shufflevector (r2, r2, 1, 0); __builtin_memcpy(data, &r3, sizeof(vecd)); __builtin_memcpy(c, data, sizeof(veci)); } To: ldr q31, [x0] rev64 v31.4s, v31.4s str q31, [x2] ret > Otherwise OK. > > Thanks, > Richard. > > > (with > > { > > vec_perm_indices sel0 (builder0, 2, nelts); > > @@ -10073,10 +10075,10 @@ and, > > ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) > > || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) > > : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) > > - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); > > + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); > > } > > (if (op0) > > - (vec_perm @1 @2 { op0; }))))))) > > + (view_convert (vec_perm @2 @3 { op0; })))))))) > > > > /* Merge > > c = VEC_PERM_EXPR <a, b, VCST0>; > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c > > new file mode 100644 > > index 00000000000..1a4ab4065de > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c > > @@ -0,0 +1,16 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O -fdump-tree-fre1" } */ > > + > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); > > + > > +void fun (veci *a, veci *b, veci *c) > > +{ > > + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > + vecu r2 = __builtin_convertvector (r1, vecu); > > + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); > > + *c = __builtin_convertvector (r3, veci); > > +} > > + > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ > > > > -- > Richard Biener <rguenther@suse.de> > SUSE Software Solutions Germany GmbH, > Frankenstrasse 146, 90461 Nuernberg, Germany; > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Fri, 24 May 2024, Manolis Tsamis wrote: > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote: > > > > On Wed, 22 May 2024, Manolis Tsamis wrote: > > > > > The match.pd patterns to merge two vector permutes into one fail when a > > > potentially no-op view convert expressions is between the two permutes. > > > This change lifts this restriction. > > > > > > gcc/ChangeLog: > > > > > > * match.pd: Allow no-op view_convert between permutes. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.dg/fold-perm-2.c: New test. > > > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > > --- > > > > > > gcc/match.pd | 14 ++++++++------ > > > gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ > > > 2 files changed, 24 insertions(+), 6 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > index 07e743ae464..cbb3c5d86e0 100644 > > > --- a/gcc/match.pd > > > +++ b/gcc/match.pd > > > @@ -10039,19 +10039,21 @@ and, > > > d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ > > > > > > (simplify > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) > > > (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) > > > (with > > > { > > > machine_mode result_mode = TYPE_MODE (type); > > > - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); > > > + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); > > > int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); > > > vec_perm_builder builder0; > > > vec_perm_builder builder1; > > > vec_perm_builder builder2 (nelts, nelts, 1); > > > } > > > - (if (tree_to_vec_perm_builder (&builder0, @3) > > > - && tree_to_vec_perm_builder (&builder1, @4)) > > > + (if (tree_to_vec_perm_builder (&builder0, @4) > > > + && tree_to_vec_perm_builder (&builder1, @5) > > > + && element_precision (TREE_TYPE (@0)) > > > + == element_precision (TREE_TYPE (@1))) > > > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality > > instead. > > > > I think TYPE_SIZE is not enough as we need the vector elements to have > the same size, not just the vector as a whole. Err, yes - you want to check the element sizes of course. > For example, when using the TYPE_SIZE check instead the following > testcase miscompiles > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double)))); > > void fun (veci *a, veci *b, veci *c) > { > char data[16]; > veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > vecd r2; > __builtin_memcpy(data, &r1, sizeof(veci)); > __builtin_memcpy(&r2, data, sizeof(vecd)); > vecd r3 = __builtin_shufflevector (r2, r2, 1, 0); > __builtin_memcpy(data, &r3, sizeof(vecd)); > __builtin_memcpy(c, data, sizeof(veci)); > } > > To: > > ldr q31, [x0] > rev64 v31.4s, v31.4s > str q31, [x2] > ret > > > Otherwise OK. > > > > Thanks, > > Richard. > > > > > (with > > > { > > > vec_perm_indices sel0 (builder0, 2, nelts); > > > @@ -10073,10 +10075,10 @@ and, > > > ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) > > > || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) > > > : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) > > > - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); > > > + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); > > > } > > > (if (op0) > > > - (vec_perm @1 @2 { op0; }))))))) > > > + (view_convert (vec_perm @2 @3 { op0; })))))))) > > > > > > /* Merge > > > c = VEC_PERM_EXPR <a, b, VCST0>; > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > new file mode 100644 > > > index 00000000000..1a4ab4065de > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > @@ -0,0 +1,16 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O -fdump-tree-fre1" } */ > > > + > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); > > > + > > > +void fun (veci *a, veci *b, veci *c) > > > +{ > > > + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > > + vecu r2 = __builtin_convertvector (r1, vecu); > > > + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); > > > + *c = __builtin_convertvector (r3, veci); > > > +} > > > + > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ > > > > > > > -- > > Richard Biener <rguenther@suse.de> > > SUSE Software Solutions Germany GmbH, > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) >
On Fri, May 24, 2024 at 10:46 AM Richard Biener <rguenther@suse.de> wrote: > > On Fri, 24 May 2024, Manolis Tsamis wrote: > > > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote: > > > > > > On Wed, 22 May 2024, Manolis Tsamis wrote: > > > > > > > The match.pd patterns to merge two vector permutes into one fail when a > > > > potentially no-op view convert expressions is between the two permutes. > > > > This change lifts this restriction. > > > > > > > > gcc/ChangeLog: > > > > > > > > * match.pd: Allow no-op view_convert between permutes. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > * gcc.dg/fold-perm-2.c: New test. > > > > > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > > > --- > > > > > > > > gcc/match.pd | 14 ++++++++------ > > > > gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ > > > > 2 files changed, 24 insertions(+), 6 deletions(-) > > > > create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > > index 07e743ae464..cbb3c5d86e0 100644 > > > > --- a/gcc/match.pd > > > > +++ b/gcc/match.pd > > > > @@ -10039,19 +10039,21 @@ and, > > > > d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ > > > > > > > > (simplify > > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) > > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) > > > > (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) > > > > (with > > > > { > > > > machine_mode result_mode = TYPE_MODE (type); > > > > - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); > > > > + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); > > > > int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); > > > > vec_perm_builder builder0; > > > > vec_perm_builder builder1; > > > > vec_perm_builder builder2 (nelts, nelts, 1); > > > > } > > > > - (if (tree_to_vec_perm_builder (&builder0, @3) > > > > - && tree_to_vec_perm_builder (&builder1, @4)) > > > > + (if (tree_to_vec_perm_builder (&builder0, @4) > > > > + && tree_to_vec_perm_builder (&builder1, @5) > > > > + && element_precision (TREE_TYPE (@0)) > > > > + == element_precision (TREE_TYPE (@1))) > > > > > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality > > > instead. > > > > > > > I think TYPE_SIZE is not enough as we need the vector elements to have > > the same size, not just the vector as a whole. > > Err, yes - you want to check the element sizes of course. > From what I understand, checking the element size should be enough. Otherwise we can check both TYPE_SIZE and element_precision to be equal. So OK to commit with just element_precision? BTW I also noticed from these testcases that there is a gcc 13 -> 14 regression with weird XORs being introduced: typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); void fun (veci *a, veci *b, veci *c) { *c = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); } gcc 13.3: adrp x3, .LC0 ldr q0, [x0] ldr q1, [x1] ldr q2, [x3, #:lo12:.LC0] tbl v0.16b, {v0.16b - v1.16b}, v2.16b str q0, [x2] gcc 14.1: ldr q30, [x1] adrp x3, .LC0 ldr q31, [x0] ldr q29, [x3, #:lo12:.LC0] eor v31.16b, v31.16b, v30.16b eor v30.16b, v31.16b, v30.16b eor v31.16b, v31.16b, v30.16b tbl v30.16b, {v30.16b - v31.16b}, v29.16b str q30, [x2] Manolis > > For example, when using the TYPE_SIZE check instead the following > > testcase miscompiles > > > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double)))); > > > > void fun (veci *a, veci *b, veci *c) > > { > > char data[16]; > > veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > vecd r2; > > __builtin_memcpy(data, &r1, sizeof(veci)); > > __builtin_memcpy(&r2, data, sizeof(vecd)); > > vecd r3 = __builtin_shufflevector (r2, r2, 1, 0); > > __builtin_memcpy(data, &r3, sizeof(vecd)); > > __builtin_memcpy(c, data, sizeof(veci)); > > } > > > > To: > > > > ldr q31, [x0] > > rev64 v31.4s, v31.4s > > str q31, [x2] > > ret > > > > > Otherwise OK. > > > > > > Thanks, > > > Richard. > > > > > > > (with > > > > { > > > > vec_perm_indices sel0 (builder0, 2, nelts); > > > > @@ -10073,10 +10075,10 @@ and, > > > > ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) > > > > || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) > > > > : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) > > > > - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); > > > > + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); > > > > } > > > > (if (op0) > > > > - (vec_perm @1 @2 { op0; }))))))) > > > > + (view_convert (vec_perm @2 @3 { op0; })))))))) > > > > > > > > /* Merge > > > > c = VEC_PERM_EXPR <a, b, VCST0>; > > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > > new file mode 100644 > > > > index 00000000000..1a4ab4065de > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > > @@ -0,0 +1,16 @@ > > > > +/* { dg-do compile } */ > > > > +/* { dg-options "-O -fdump-tree-fre1" } */ > > > > + > > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); > > > > + > > > > +void fun (veci *a, veci *b, veci *c) > > > > +{ > > > > + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > > > + vecu r2 = __builtin_convertvector (r1, vecu); > > > > + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); > > > > + *c = __builtin_convertvector (r3, veci); > > > > +} > > > > + > > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ > > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ > > > > > > > > > > -- > > > Richard Biener <rguenther@suse.de> > > > SUSE Software Solutions Germany GmbH, > > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) > > > > -- > Richard Biener <rguenther@suse.de> > SUSE Software Solutions Germany GmbH, > Frankenstrasse 146, 90461 Nuernberg, Germany; > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Fri, 24 May 2024, Manolis Tsamis wrote: > On Fri, May 24, 2024 at 10:46 AM Richard Biener <rguenther@suse.de> wrote: > > > > On Fri, 24 May 2024, Manolis Tsamis wrote: > > > > > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote: > > > > > > > > On Wed, 22 May 2024, Manolis Tsamis wrote: > > > > > > > > > The match.pd patterns to merge two vector permutes into one fail when a > > > > > potentially no-op view convert expressions is between the two permutes. > > > > > This change lifts this restriction. > > > > > > > > > > gcc/ChangeLog: > > > > > > > > > > * match.pd: Allow no-op view_convert between permutes. > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > * gcc.dg/fold-perm-2.c: New test. > > > > > > > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > > > > --- > > > > > > > > > > gcc/match.pd | 14 ++++++++------ > > > > > gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ > > > > > 2 files changed, 24 insertions(+), 6 deletions(-) > > > > > create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > > > index 07e743ae464..cbb3c5d86e0 100644 > > > > > --- a/gcc/match.pd > > > > > +++ b/gcc/match.pd > > > > > @@ -10039,19 +10039,21 @@ and, > > > > > d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ > > > > > > > > > > (simplify > > > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) > > > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) > > > > > (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) > > > > > (with > > > > > { > > > > > machine_mode result_mode = TYPE_MODE (type); > > > > > - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); > > > > > + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); > > > > > int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); > > > > > vec_perm_builder builder0; > > > > > vec_perm_builder builder1; > > > > > vec_perm_builder builder2 (nelts, nelts, 1); > > > > > } > > > > > - (if (tree_to_vec_perm_builder (&builder0, @3) > > > > > - && tree_to_vec_perm_builder (&builder1, @4)) > > > > > + (if (tree_to_vec_perm_builder (&builder0, @4) > > > > > + && tree_to_vec_perm_builder (&builder1, @5) > > > > > + && element_precision (TREE_TYPE (@0)) > > > > > + == element_precision (TREE_TYPE (@1))) > > > > > > > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality > > > > instead. > > > > > > > > > > I think TYPE_SIZE is not enough as we need the vector elements to have > > > the same size, not just the vector as a whole. > > > > Err, yes - you want to check the element sizes of course. > > > From what I understand, checking the element size should be enough. > Otherwise we can check both TYPE_SIZE and element_precision to be > equal. > So OK to commit with just element_precision? Please just check the element size. I'm always worried when using TYPE_PRECISION on FP types and for shuffles it's really only about size. > BTW I also noticed from these testcases that there is a gcc 13 -> 14 > regression with weird XORs being introduced: > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > void fun (veci *a, veci *b, veci *c) { > *c = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > } > > gcc 13.3: > adrp x3, .LC0 > ldr q0, [x0] > ldr q1, [x1] > ldr q2, [x3, #:lo12:.LC0] > tbl v0.16b, {v0.16b - v1.16b}, v2.16b > str q0, [x2] > > gcc 14.1: > ldr q30, [x1] > adrp x3, .LC0 > ldr q31, [x0] > ldr q29, [x3, #:lo12:.LC0] > eor v31.16b, v31.16b, v30.16b > eor v30.16b, v31.16b, v30.16b > eor v31.16b, v31.16b, v30.16b > tbl v30.16b, {v30.16b - v31.16b}, v29.16b > str q30, [x2] You'd need to bisect that but I'd guess we got some extra match patterns triggering? > Manolis > > > > For example, when using the TYPE_SIZE check instead the following > > > testcase miscompiles > > > > > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double)))); > > > > > > void fun (veci *a, veci *b, veci *c) > > > { > > > char data[16]; > > > veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > > vecd r2; > > > __builtin_memcpy(data, &r1, sizeof(veci)); > > > __builtin_memcpy(&r2, data, sizeof(vecd)); > > > vecd r3 = __builtin_shufflevector (r2, r2, 1, 0); > > > __builtin_memcpy(data, &r3, sizeof(vecd)); > > > __builtin_memcpy(c, data, sizeof(veci)); > > > } > > > > > > To: > > > > > > ldr q31, [x0] > > > rev64 v31.4s, v31.4s > > > str q31, [x2] > > > ret > > > > > > > Otherwise OK. > > > > > > > > Thanks, > > > > Richard. > > > > > > > > > (with > > > > > { > > > > > vec_perm_indices sel0 (builder0, 2, nelts); > > > > > @@ -10073,10 +10075,10 @@ and, > > > > > ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) > > > > > || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) > > > > > : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) > > > > > - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); > > > > > + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); > > > > > } > > > > > (if (op0) > > > > > - (vec_perm @1 @2 { op0; }))))))) > > > > > + (view_convert (vec_perm @2 @3 { op0; })))))))) > > > > > > > > > > /* Merge > > > > > c = VEC_PERM_EXPR <a, b, VCST0>; > > > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > new file mode 100644 > > > > > index 00000000000..1a4ab4065de > > > > > --- /dev/null > > > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > @@ -0,0 +1,16 @@ > > > > > +/* { dg-do compile } */ > > > > > +/* { dg-options "-O -fdump-tree-fre1" } */ > > > > > + > > > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); > > > > > + > > > > > +void fun (veci *a, veci *b, veci *c) > > > > > +{ > > > > > + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > > > > + vecu r2 = __builtin_convertvector (r1, vecu); > > > > > + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); > > > > > + *c = __builtin_convertvector (r3, veci); > > > > > +} > > > > > + > > > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ > > > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ > > > > > > > > > > > > > -- > > > > Richard Biener <rguenther@suse.de> > > > > SUSE Software Solutions Germany GmbH, > > > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) > > > > > > > -- > > Richard Biener <rguenther@suse.de> > > SUSE Software Solutions Germany GmbH, > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) >
The match.pd patterns to merge two vector permutes into one fail when a potentially no-op view convert expression is between the two permutes. This change lifts this restriction. gcc/ChangeLog: * match.pd: Allow no-op view_convert between permutes. gcc/testsuite/ChangeLog: * gcc.dg/fold-perm-2.c: New test. --- gcc/match.pd | 14 ++++++++------ gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 07e743ae464..1f91b9857c8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -10039,19 +10039,21 @@ and, d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ (simplify - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) (with { machine_mode result_mode = TYPE_MODE (type); - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); vec_perm_builder builder0; vec_perm_builder builder1; vec_perm_builder builder2 (nelts, nelts, 1); } - (if (tree_to_vec_perm_builder (&builder0, @3) - && tree_to_vec_perm_builder (&builder1, @4)) + (if (tree_to_vec_perm_builder (&builder0, @4) + && tree_to_vec_perm_builder (&builder1, @5) + && TYPE_SIZE (TREE_TYPE (TREE_TYPE (@0))) + == TYPE_SIZE (TREE_TYPE (TREE_TYPE (@1)))) (with { vec_perm_indices sel0 (builder0, 2, nelts); @@ -10073,10 +10075,10 @@ and, ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); } (if (op0) - (vec_perm @1 @2 { op0; }))))))) + (view_convert (vec_perm @2 @3 { op0; })))))))) /* Merge c = VEC_PERM_EXPR <a, b, VCST0>; diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c new file mode 100644 index 00000000000..1a4ab4065de --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); + +void fun (veci *a, veci *b, veci *c) +{ + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); + vecu r2 = __builtin_convertvector (r1, vecu); + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); + *c = __builtin_convertvector (r3, veci); +} + +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ -- 2.44.0 On Fri, May 24, 2024 at 11:30 AM Richard Biener <rguenther@suse.de> wrote: > > On Fri, 24 May 2024, Manolis Tsamis wrote: > > > On Fri, May 24, 2024 at 10:46 AM Richard Biener <rguenther@suse.de> wrote: > > > > > > On Fri, 24 May 2024, Manolis Tsamis wrote: > > > > > > > On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguenther@suse.de> wrote: > > > > > > > > > > On Wed, 22 May 2024, Manolis Tsamis wrote: > > > > > > > > > > > The match.pd patterns to merge two vector permutes into one fail when a > > > > > > potentially no-op view convert expressions is between the two permutes. > > > > > > This change lifts this restriction. > > > > > > > > > > > > gcc/ChangeLog: > > > > > > > > > > > > * match.pd: Allow no-op view_convert between permutes. > > > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > > > * gcc.dg/fold-perm-2.c: New test. > > > > > > > > > > > > Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> > > > > > > --- > > > > > > > > > > > > gcc/match.pd | 14 ++++++++------ > > > > > > gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ > > > > > > 2 files changed, 24 insertions(+), 6 deletions(-) > > > > > > create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > > > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > > > > index 07e743ae464..cbb3c5d86e0 100644 > > > > > > --- a/gcc/match.pd > > > > > > +++ b/gcc/match.pd > > > > > > @@ -10039,19 +10039,21 @@ and, > > > > > > d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ > > > > > > > > > > > > (simplify > > > > > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) > > > > > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) > > > > > > (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) > > > > > > (with > > > > > > { > > > > > > machine_mode result_mode = TYPE_MODE (type); > > > > > > - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); > > > > > > + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); > > > > > > int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); > > > > > > vec_perm_builder builder0; > > > > > > vec_perm_builder builder1; > > > > > > vec_perm_builder builder2 (nelts, nelts, 1); > > > > > > } > > > > > > - (if (tree_to_vec_perm_builder (&builder0, @3) > > > > > > - && tree_to_vec_perm_builder (&builder1, @4)) > > > > > > + (if (tree_to_vec_perm_builder (&builder0, @4) > > > > > > + && tree_to_vec_perm_builder (&builder1, @5) > > > > > > + && element_precision (TREE_TYPE (@0)) > > > > > > + == element_precision (TREE_TYPE (@1))) > > > > > > > > > > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality > > > > > instead. > > > > > > > > > > > > > I think TYPE_SIZE is not enough as we need the vector elements to have > > > > the same size, not just the vector as a whole. > > > > > > Err, yes - you want to check the element sizes of course. > > > > > From what I understand, checking the element size should be enough. > > Otherwise we can check both TYPE_SIZE and element_precision to be > > equal. > > So OK to commit with just element_precision? > > Please just check the element size. I'm always worried when > using TYPE_PRECISION on FP types and for shuffles it's really > only about size. > > > BTW I also noticed from these testcases that there is a gcc 13 -> 14 > > regression with weird XORs being introduced: > > > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > void fun (veci *a, veci *b, veci *c) { > > *c = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > } > > > > gcc 13.3: > > adrp x3, .LC0 > > ldr q0, [x0] > > ldr q1, [x1] > > ldr q2, [x3, #:lo12:.LC0] > > tbl v0.16b, {v0.16b - v1.16b}, v2.16b > > str q0, [x2] > > > > gcc 14.1: > > ldr q30, [x1] > > adrp x3, .LC0 > > ldr q31, [x0] > > ldr q29, [x3, #:lo12:.LC0] > > eor v31.16b, v31.16b, v30.16b > > eor v30.16b, v31.16b, v30.16b > > eor v31.16b, v31.16b, v30.16b > > tbl v30.16b, {v30.16b - v31.16b}, v29.16b > > str q30, [x2] > > You'd need to bisect that but I'd guess we got some extra > match patterns triggering? > > > Manolis > > > > > > For example, when using the TYPE_SIZE check instead the following > > > > testcase miscompiles > > > > > > > > typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > > > typedef double vecd __attribute__ ((vector_size (2 * sizeof (double)))); > > > > > > > > void fun (veci *a, veci *b, veci *c) > > > > { > > > > char data[16]; > > > > veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > > > vecd r2; > > > > __builtin_memcpy(data, &r1, sizeof(veci)); > > > > __builtin_memcpy(&r2, data, sizeof(vecd)); > > > > vecd r3 = __builtin_shufflevector (r2, r2, 1, 0); > > > > __builtin_memcpy(data, &r3, sizeof(vecd)); > > > > __builtin_memcpy(c, data, sizeof(veci)); > > > > } > > > > > > > > To: > > > > > > > > ldr q31, [x0] > > > > rev64 v31.4s, v31.4s > > > > str q31, [x2] > > > > ret > > > > > > > > > Otherwise OK. > > > > > > > > > > Thanks, > > > > > Richard. > > > > > > > > > > > (with > > > > > > { > > > > > > vec_perm_indices sel0 (builder0, 2, nelts); > > > > > > @@ -10073,10 +10075,10 @@ and, > > > > > > ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) > > > > > > || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) > > > > > > : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) > > > > > > - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); > > > > > > + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); > > > > > > } > > > > > > (if (op0) > > > > > > - (vec_perm @1 @2 { op0; }))))))) > > > > > > + (view_convert (vec_perm @2 @3 { op0; })))))))) > > > > > > > > > > > > /* Merge > > > > > > c = VEC_PERM_EXPR <a, b, VCST0>; > > > > > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > > new file mode 100644 > > > > > > index 00000000000..1a4ab4065de > > > > > > --- /dev/null > > > > > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c > > > > > > @@ -0,0 +1,16 @@ > > > > > > +/* { dg-do compile } */ > > > > > > +/* { dg-options "-O -fdump-tree-fre1" } */ > > > > > > + > > > > > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > > > > > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); > > > > > > + > > > > > > +void fun (veci *a, veci *b, veci *c) > > > > > > +{ > > > > > > + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); > > > > > > + vecu r2 = __builtin_convertvector (r1, vecu); > > > > > > + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); > > > > > > + *c = __builtin_convertvector (r3, veci); > > > > > > +} > > > > > > + > > > > > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ > > > > > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */ > > > > > > > > > > > > > > > > -- > > > > > Richard Biener <rguenther@suse.de> > > > > > SUSE Software Solutions Germany GmbH, > > > > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) > > > > > > > > > > -- > > > Richard Biener <rguenther@suse.de> > > > SUSE Software Solutions Germany GmbH, > > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) > > > > -- > Richard Biener <rguenther@suse.de> > SUSE Software Solutions Germany GmbH, > Frankenstrasse 146, 90461 Nuernberg, Germany; > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
diff --git a/gcc/match.pd b/gcc/match.pd index 07e743ae464..cbb3c5d86e0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -10039,19 +10039,21 @@ and, d = VEC_PERM_EXPR <a, b, NEW_VCST>; */ (simplify - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) (with { machine_mode result_mode = TYPE_MODE (type); - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); vec_perm_builder builder0; vec_perm_builder builder1; vec_perm_builder builder2 (nelts, nelts, 1); } - (if (tree_to_vec_perm_builder (&builder0, @3) - && tree_to_vec_perm_builder (&builder1, @4)) + (if (tree_to_vec_perm_builder (&builder0, @4) + && tree_to_vec_perm_builder (&builder1, @5) + && element_precision (TREE_TYPE (@0)) + == element_precision (TREE_TYPE (@1))) (with { vec_perm_indices sel0 (builder0, 2, nelts); @@ -10073,10 +10075,10 @@ and, ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); } (if (op0) - (vec_perm @1 @2 { op0; }))))))) + (view_convert (vec_perm @2 @3 { op0; })))))))) /* Merge c = VEC_PERM_EXPR <a, b, VCST0>; diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c new file mode 100644 index 00000000000..1a4ab4065de --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); + +void fun (veci *a, veci *b, veci *c) +{ + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); + vecu r2 = __builtin_convertvector (r1, vecu); + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); + *c = __builtin_convertvector (r3, veci); +} + +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
The match.pd patterns to merge two vector permutes into one fail when a potentially no-op view convert expressions is between the two permutes. This change lifts this restriction. gcc/ChangeLog: * match.pd: Allow no-op view_convert between permutes. gcc/testsuite/ChangeLog: * gcc.dg/fold-perm-2.c: New test. Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> --- gcc/match.pd | 14 ++++++++------ gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c