Message ID | CAAgBjMmE5ohrwZMAjU+ju_pMcTbPMnYHGWixgdYUfx=abPn3nw@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | [match.pd,SVE] Add pattern to transform svrev(svrev(v)) --> v | expand |
On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > For the following test: > > svint32_t f(svint32_t v) > { > return svrev_s32 (svrev_s32 (v)); > } > > We generate 2 rev instructions instead of nop: > f: > rev z0.s, z0.s > rev z0.s, z0.s > ret > > The attached patch tries to fix that by trying to recognize the following > pattern in match.pd: > v1 = VEC_PERM_EXPR (v0, v0, mask) > v2 = VEC_PERM_EXPR (v1, v1, mask) > --> > v2 = v0 > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > Code-gen with patch: > f: > ret > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > Does it look OK for stage-1 ? I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should handle two consecutive permutes with the is_combined_permutation_identity which might need tweaking for VLA vectors Richard. > > Thanks, > Prathamesh
On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Hi, > > For the following test: > > > > svint32_t f(svint32_t v) > > { > > return svrev_s32 (svrev_s32 (v)); > > } > > > > We generate 2 rev instructions instead of nop: > > f: > > rev z0.s, z0.s > > rev z0.s, z0.s > > ret > > > > The attached patch tries to fix that by trying to recognize the following > > pattern in match.pd: > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > --> > > v2 = v0 > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > Code-gen with patch: > > f: > > ret > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > Does it look OK for stage-1 ? > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should > handle two consecutive permutes with the is_combined_permutation_identity > which might need tweaking for VLA vectors Hi Richard, Thanks for the suggestions. The attached patch modifies is_combined_permutation_identity to recognize the above pattern. Does it look OK ? Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. Thanks, Prathamesh > > Richard. > > > > > Thanks, > > Prathamesh gcc/ChangeLog: * tree-ssa-forwprop.cc (is_combined_permutation_identity): New parameter def_stmt. Try to simplify two successive VEC_PERM_EXPRs with single operand and same mask, where mask chooses elements in reverse order. gcc/testesuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 00000000000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +#include <arm_sve.h> + +svint32_t f(svint32_t v) +{ + return svrev_s32 (svrev_s32 (v)); +} + +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 9b567440ba4..5cbee077d89 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2532,7 +2532,7 @@ simplify_bitfield_ref (gimple_stmt_iterator *gsi) gives back one of the input. */ static int -is_combined_permutation_identity (tree mask1, tree mask2) +is_combined_permutation_identity (tree mask1, tree mask2, gimple *def_stmt = NULL) { tree mask; unsigned HOST_WIDE_INT nelts, i, j; @@ -2541,7 +2541,36 @@ is_combined_permutation_identity (tree mask1, tree mask2) gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST && TREE_CODE (mask2) == VECTOR_CST); + if (def_stmt) + gcc_checking_assert (is_gimple_assign (def_stmt) + && gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); + mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); + + /* For VLA masks, check for the following pattern: + v1 = VEC_PERM_EXPR (v0, v0, mask) + v2 = VEC_PERM_EXPR (v1, v1, mask) + --> + v2 = v0 + if mask is {nelts - 1, nelts - 2, ...}. */ + + if (operand_equal_p (mask1, mask2, 0) + && !VECTOR_CST_NELTS (mask1).is_constant () + && def_stmt + && operand_equal_p (gimple_assign_rhs1 (def_stmt), + gimple_assign_rhs2 (def_stmt), 0)) + { + vec_perm_builder builder; + if (tree_to_vec_perm_builder (&builder, mask1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + return 1; + } + return 0; + } + if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) return 0; @@ -2629,7 +2658,7 @@ simplify_permutation (gimple_stmt_iterator *gsi) op3 = gimple_assign_rhs3 (def_stmt); if (TREE_CODE (op3) != VECTOR_CST) return 0; - ident = is_combined_permutation_identity (op3, op2); + ident = is_combined_permutation_identity (op3, op2, def_stmt); if (!ident) return 0; orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote: > > On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > Hi, > > > For the following test: > > > > > > svint32_t f(svint32_t v) > > > { > > > return svrev_s32 (svrev_s32 (v)); > > > } > > > > > > We generate 2 rev instructions instead of nop: > > > f: > > > rev z0.s, z0.s > > > rev z0.s, z0.s > > > ret > > > > > > The attached patch tries to fix that by trying to recognize the following > > > pattern in match.pd: > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > --> > > > v2 = v0 > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > Code-gen with patch: > > > f: > > > ret > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > > Does it look OK for stage-1 ? > > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should > > handle two consecutive permutes with the is_combined_permutation_identity > > which might need tweaking for VLA vectors > Hi Richard, > Thanks for the suggestions. The attached patch modifies > is_combined_permutation_identity > to recognize the above pattern. > Does it look OK ? > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. Hi, ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html Thanks, Prathamesh > > Thanks, > Prathamesh > > > > Richard. > > > > > > > > Thanks, > > > Prathamesh
On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote: > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > <prathamesh.kulkarni@linaro.org> wrote: > > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: > > > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > Hi, > > > > For the following test: > > > > > > > > svint32_t f(svint32_t v) > > > > { > > > > return svrev_s32 (svrev_s32 (v)); > > > > } > > > > > > > > We generate 2 rev instructions instead of nop: > > > > f: > > > > rev z0.s, z0.s > > > > rev z0.s, z0.s > > > > ret > > > > > > > > The attached patch tries to fix that by trying to recognize the following > > > > pattern in match.pd: > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > > --> > > > > v2 = v0 > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > > > Code-gen with patch: > > > > f: > > > > ret > > > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > > > Does it look OK for stage-1 ? > > > > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should > > > handle two consecutive permutes with the is_combined_permutation_identity > > > which might need tweaking for VLA vectors > > Hi Richard, > > Thanks for the suggestions. The attached patch modifies > > is_combined_permutation_identity > > to recognize the above pattern. > > Does it look OK ? > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > Hi, > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 and amend the function comment accordingly, say, tem = VEC_PERM <op0, op1, MASK1>; res = VEC_PERM <tem, tem, MASK2>; SAME_P specifies whether op0 and op1 compare equal. */ + if (def_stmt) + gcc_checking_assert (is_gimple_assign (def_stmt) + && gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); this is then unnecessary mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); + + /* For VLA masks, check for the following pattern: + v1 = VEC_PERM_EXPR (v0, v0, mask) + v2 = VEC_PERM_EXPR (v1, v1, mask) + --> + v2 = v0 you are not using 'mask' so please defer fold_ternary until after your special-case. + if (operand_equal_p (mask1, mask2, 0) + && !VECTOR_CST_NELTS (mask1).is_constant () + && def_stmt + && operand_equal_p (gimple_assign_rhs1 (def_stmt), + gimple_assign_rhs2 (def_stmt), 0)) + { + vec_perm_builder builder; + if (tree_to_vec_perm_builder (&builder, mask1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + return 1; + } + return 0; I'm defering to Richard whether this is the correct way to check for a vector reversing mask (I wonder how constructing such mask is even possible) Richard. > Thanks, > Prathamesh > > > > Thanks, > > Prathamesh > > > > > > Richard. > > > > > > > > > > > Thanks, > > > > Prathamesh
On Wed, 19 Apr 2023 at 16:17, Richard Biener <richard.guenther@gmail.com> wrote: > > On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni > <prathamesh.kulkarni@linaro.org> wrote: > > > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > > <prathamesh.kulkarni@linaro.org> wrote: > > > > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: > > > > > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > > > Hi, > > > > > For the following test: > > > > > > > > > > svint32_t f(svint32_t v) > > > > > { > > > > > return svrev_s32 (svrev_s32 (v)); > > > > > } > > > > > > > > > > We generate 2 rev instructions instead of nop: > > > > > f: > > > > > rev z0.s, z0.s > > > > > rev z0.s, z0.s > > > > > ret > > > > > > > > > > The attached patch tries to fix that by trying to recognize the following > > > > > pattern in match.pd: > > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > > > --> > > > > > v2 = v0 > > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > > > > > Code-gen with patch: > > > > > f: > > > > > ret > > > > > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > > > > Does it look OK for stage-1 ? > > > > > > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should > > > > handle two consecutive permutes with the is_combined_permutation_identity > > > > which might need tweaking for VLA vectors > > > Hi Richard, > > > Thanks for the suggestions. The attached patch modifies > > > is_combined_permutation_identity > > > to recognize the above pattern. > > > Does it look OK ? > > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > > Hi, > > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html > > Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 > and amend the function comment accordingly, say, > > tem = VEC_PERM <op0, op1, MASK1>; > res = VEC_PERM <tem, tem, MASK2>; > > SAME_P specifies whether op0 and op1 compare equal. */ > > + if (def_stmt) > + gcc_checking_assert (is_gimple_assign (def_stmt) > + && gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); > this is then unnecessary > > mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > + > + /* For VLA masks, check for the following pattern: > + v1 = VEC_PERM_EXPR (v0, v0, mask) > + v2 = VEC_PERM_EXPR (v1, v1, mask) > + --> > + v2 = v0 > > you are not using 'mask' so please defer fold_ternary until after your > special-case. > > + if (operand_equal_p (mask1, mask2, 0) > + && !VECTOR_CST_NELTS (mask1).is_constant () > + && def_stmt > + && operand_equal_p (gimple_assign_rhs1 (def_stmt), > + gimple_assign_rhs2 (def_stmt), 0)) > + { > + vec_perm_builder builder; > + if (tree_to_vec_perm_builder (&builder, mask1)) > + { > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > + vec_perm_indices sel (builder, 1, nelts); > + if (sel.series_p (0, 1, nelts - 1, -1)) > + return 1; > + } > + return 0; > > I'm defering to Richard whether this is the correct way to check for a vector > reversing mask (I wonder how constructing such mask is even possible) Hi Richard, Thanks for the suggestions, I have updated the patch accordingly. The following hunk from svrev_impl::fold() constructs mask in reverse: /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); vec_perm_builder builder (nelts, 1, 3); for (int i = 0; i < 3; ++i) builder.quick_push (nelts - i - 1); return fold_permute (f, builder); To see if mask chooses elements in reverse, I borrowed it from function comment for series_p in vec-perm-indices.cc: /* Return true if index OUT_BASE + I * OUT_STEP selects input element IN_BASE + I * IN_STEP. For example, the call to test whether a permute reverses a vector of N elements would be: series_p (0, 1, N - 1, -1) which would return true for { N - 1, N - 2, N - 3, ... }. */ Thanks, Prathamesh > > Richard. > > > Thanks, > > Prathamesh > > > > > > Thanks, > > > Prathamesh > > > > > > > > Richard. > > > > > > > > > > > > > > Thanks, > > > > > Prathamesh gcc/ChangeLog: * tree-ssa-forwprop.cc (is_combined_permutation_identity): New parameter same_p. Try to simplify two successive VEC_PERM_EXPRs with single operand and same mask, where mask chooses elements in reverse order. gcc/testesuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 00000000000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +#include <arm_sve.h> + +svint32_t f(svint32_t v) +{ + return svrev_s32 (svrev_s32 (v)); +} + +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 9b567440ba4..ebd4a368ae9 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2528,11 +2528,16 @@ simplify_bitfield_ref (gimple_stmt_iterator *gsi) return true; } -/* Determine whether applying the 2 permutations (mask1 then mask2) - gives back one of the input. */ +/* For the following sequence: + tem = VEC_PERM_EXPR <op0, op1, mask1> + res = VEC_PERM_EXPR <tem, tem, mask2> + + Determine whether applying the 2 permutations (mask1 then mask2) + gives back one of the input. SAME_P specifies whether op0 + and op1 compare equal. */ static int -is_combined_permutation_identity (tree mask1, tree mask2) +is_combined_permutation_identity (tree mask1, tree mask2, bool same_p) { tree mask; unsigned HOST_WIDE_INT nelts, i, j; @@ -2541,6 +2546,29 @@ is_combined_permutation_identity (tree mask1, tree mask2) gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST && TREE_CODE (mask2) == VECTOR_CST); + + /* For VLA masks, check for the following pattern: + v1 = VEC_PERM_EXPR (v0, v0, mask1) + v2 = VEC_PERM_EXPR (v1, v1, mask2) + --> + v2 = v0 + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ + + if (operand_equal_p (mask1, mask2, 0) + && !VECTOR_CST_NELTS (mask1).is_constant () + && same_p) + { + vec_perm_builder builder; + if (tree_to_vec_perm_builder (&builder, mask1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + return 1; + } + return 0; + } + mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) return 0; @@ -2629,7 +2657,9 @@ simplify_permutation (gimple_stmt_iterator *gsi) op3 = gimple_assign_rhs3 (def_stmt); if (TREE_CODE (op3) != VECTOR_CST) return 0; - ident = is_combined_permutation_identity (op3, op2); + bool same_p = operand_equal_p (gimple_assign_rhs1 (def_stmt), + gimple_assign_rhs2 (def_stmt), 0); + ident = is_combined_permutation_identity (op3, op2, same_p); if (!ident) return 0; orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
On Wed, Apr 19, 2023 at 2:20 PM Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote: > > On Wed, 19 Apr 2023 at 16:17, Richard Biener <richard.guenther@gmail.com> wrote: > > > > On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni > > <prathamesh.kulkarni@linaro.org> wrote: > > > > > > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > > > <prathamesh.kulkarni@linaro.org> wrote: > > > > > > > > On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: > > > > > > > > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > > > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > > > > > Hi, > > > > > > For the following test: > > > > > > > > > > > > svint32_t f(svint32_t v) > > > > > > { > > > > > > return svrev_s32 (svrev_s32 (v)); > > > > > > } > > > > > > > > > > > > We generate 2 rev instructions instead of nop: > > > > > > f: > > > > > > rev z0.s, z0.s > > > > > > rev z0.s, z0.s > > > > > > ret > > > > > > > > > > > > The attached patch tries to fix that by trying to recognize the following > > > > > > pattern in match.pd: > > > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > > > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > > > > > > --> > > > > > > v2 = v0 > > > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > > > > > > > > > > > > Code-gen with patch: > > > > > > f: > > > > > > ret > > > > > > > > > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > > > > > > Does it look OK for stage-1 ? > > > > > > > > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should > > > > > handle two consecutive permutes with the is_combined_permutation_identity > > > > > which might need tweaking for VLA vectors > > > > Hi Richard, > > > > Thanks for the suggestions. The attached patch modifies > > > > is_combined_permutation_identity > > > > to recognize the above pattern. > > > > Does it look OK ? > > > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > > > Hi, > > > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html > > > > Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 > > and amend the function comment accordingly, say, > > > > tem = VEC_PERM <op0, op1, MASK1>; > > res = VEC_PERM <tem, tem, MASK2>; > > > > SAME_P specifies whether op0 and op1 compare equal. */ > > > > + if (def_stmt) > > + gcc_checking_assert (is_gimple_assign (def_stmt) > > + && gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); > > this is then unnecessary > > > > mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > > + > > + /* For VLA masks, check for the following pattern: > > + v1 = VEC_PERM_EXPR (v0, v0, mask) > > + v2 = VEC_PERM_EXPR (v1, v1, mask) > > + --> > > + v2 = v0 > > > > you are not using 'mask' so please defer fold_ternary until after your > > special-case. > > > > + if (operand_equal_p (mask1, mask2, 0) > > + && !VECTOR_CST_NELTS (mask1).is_constant () > > + && def_stmt > > + && operand_equal_p (gimple_assign_rhs1 (def_stmt), > > + gimple_assign_rhs2 (def_stmt), 0)) > > + { > > + vec_perm_builder builder; > > + if (tree_to_vec_perm_builder (&builder, mask1)) > > + { > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > > + vec_perm_indices sel (builder, 1, nelts); > > + if (sel.series_p (0, 1, nelts - 1, -1)) > > + return 1; > > + } > > + return 0; > > > > I'm defering to Richard whether this is the correct way to check for a vector > > reversing mask (I wonder how constructing such mask is even possible) > Hi Richard, > Thanks for the suggestions, I have updated the patch accordingly. > > The following hunk from svrev_impl::fold() constructs mask in reverse: > /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ > poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); > vec_perm_builder builder (nelts, 1, 3); > for (int i = 0; i < 3; ++i) > builder.quick_push (nelts - i - 1); > return fold_permute (f, builder); > > To see if mask chooses elements in reverse, I borrowed it from function comment > for series_p in vec-perm-indices.cc: > /* Return true if index OUT_BASE + I * OUT_STEP selects input > element IN_BASE + I * IN_STEP. For example, the call to test > whether a permute reverses a vector of N elements would be: > > series_p (0, 1, N - 1, -1) > > which would return true for { N - 1, N - 2, N - 3, ... }. */ Looks good from my side now, but as said I defer to Richard for the check. Richard. > Thanks, > Prathamesh > > > > Richard. > > > > > Thanks, > > > Prathamesh > > > > > > > > Thanks, > > > > Prathamesh > > > > > > > > > > Richard. > > > > > > > > > > > > > > > > > Thanks, > > > > > > Prathamesh
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes: > On Wed, 19 Apr 2023 at 16:17, Richard Biener <richard.guenther@gmail.com> wrote: >> >> On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni >> <prathamesh.kulkarni@linaro.org> wrote: >> > >> > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni >> > <prathamesh.kulkarni@linaro.org> wrote: >> > > >> > > On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: >> > > > >> > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches >> > > > <gcc-patches@gcc.gnu.org> wrote: >> > > > > >> > > > > Hi, >> > > > > For the following test: >> > > > > >> > > > > svint32_t f(svint32_t v) >> > > > > { >> > > > > return svrev_s32 (svrev_s32 (v)); >> > > > > } >> > > > > >> > > > > We generate 2 rev instructions instead of nop: >> > > > > f: >> > > > > rev z0.s, z0.s >> > > > > rev z0.s, z0.s >> > > > > ret >> > > > > >> > > > > The attached patch tries to fix that by trying to recognize the following >> > > > > pattern in match.pd: >> > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) >> > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) >> > > > > --> >> > > > > v2 = v0 >> > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } >> > > > > >> > > > > Code-gen with patch: >> > > > > f: >> > > > > ret >> > > > > >> > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. >> > > > > Does it look OK for stage-1 ? >> > > > >> > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should >> > > > handle two consecutive permutes with the is_combined_permutation_identity >> > > > which might need tweaking for VLA vectors >> > > Hi Richard, >> > > Thanks for the suggestions. The attached patch modifies >> > > is_combined_permutation_identity >> > > to recognize the above pattern. >> > > Does it look OK ? >> > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. >> > Hi, >> > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html >> >> Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 >> and amend the function comment accordingly, say, >> >> tem = VEC_PERM <op0, op1, MASK1>; >> res = VEC_PERM <tem, tem, MASK2>; >> >> SAME_P specifies whether op0 and op1 compare equal. */ >> >> + if (def_stmt) >> + gcc_checking_assert (is_gimple_assign (def_stmt) >> + && gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); >> this is then unnecessary >> >> mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); >> + >> + /* For VLA masks, check for the following pattern: >> + v1 = VEC_PERM_EXPR (v0, v0, mask) >> + v2 = VEC_PERM_EXPR (v1, v1, mask) >> + --> >> + v2 = v0 >> >> you are not using 'mask' so please defer fold_ternary until after your >> special-case. >> >> + if (operand_equal_p (mask1, mask2, 0) >> + && !VECTOR_CST_NELTS (mask1).is_constant () >> + && def_stmt >> + && operand_equal_p (gimple_assign_rhs1 (def_stmt), >> + gimple_assign_rhs2 (def_stmt), 0)) >> + { >> + vec_perm_builder builder; >> + if (tree_to_vec_perm_builder (&builder, mask1)) >> + { >> + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); >> + vec_perm_indices sel (builder, 1, nelts); >> + if (sel.series_p (0, 1, nelts - 1, -1)) >> + return 1; >> + } >> + return 0; >> >> I'm defering to Richard whether this is the correct way to check for a vector >> reversing mask (I wonder how constructing such mask is even possible) > Hi Richard, > Thanks for the suggestions, I have updated the patch accordingly. > > The following hunk from svrev_impl::fold() constructs mask in reverse: > /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ > poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); > vec_perm_builder builder (nelts, 1, 3); > for (int i = 0; i < 3; ++i) > builder.quick_push (nelts - i - 1); > return fold_permute (f, builder); > > To see if mask chooses elements in reverse, I borrowed it from function comment > for series_p in vec-perm-indices.cc: > /* Return true if index OUT_BASE + I * OUT_STEP selects input > element IN_BASE + I * IN_STEP. For example, the call to test > whether a permute reverses a vector of N elements would be: > > series_p (0, 1, N - 1, -1) > > which would return true for { N - 1, N - 2, N - 3, ... }. */ > > Thanks, > Prathamesh >> >> Richard. >> >> > Thanks, >> > Prathamesh >> > > >> > > Thanks, >> > > Prathamesh >> > > > >> > > > Richard. >> > > > >> > > > > >> > > > > Thanks, >> > > > > Prathamesh > > gcc/ChangeLog: > * tree-ssa-forwprop.cc (is_combined_permutation_identity): > New parameter same_p. > Try to simplify two successive VEC_PERM_EXPRs with single operand > and same mask, where mask chooses elements in reverse order. > > gcc/testesuite/ChangeLog: > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > new file mode 100644 > index 00000000000..e57ee67d716 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > + > +#include <arm_sve.h> > + > +svint32_t f(svint32_t v) > +{ > + return svrev_s32 (svrev_s32 (v)); > +} > + > +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 9b567440ba4..ebd4a368ae9 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -2528,11 +2528,16 @@ simplify_bitfield_ref (gimple_stmt_iterator *gsi) > return true; > } > > -/* Determine whether applying the 2 permutations (mask1 then mask2) > - gives back one of the input. */ > +/* For the following sequence: > + tem = VEC_PERM_EXPR <op0, op1, mask1> > + res = VEC_PERM_EXPR <tem, tem, mask2> > + > + Determine whether applying the 2 permutations (mask1 then mask2) > + gives back one of the input. SAME_P specifies whether op0 > + and op1 compare equal. */ > > static int > -is_combined_permutation_identity (tree mask1, tree mask2) > +is_combined_permutation_identity (tree mask1, tree mask2, bool same_p) > { > tree mask; > unsigned HOST_WIDE_INT nelts, i, j; > @@ -2541,6 +2546,29 @@ is_combined_permutation_identity (tree mask1, tree mask2) > > gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > && TREE_CODE (mask2) == VECTOR_CST); > + > + /* For VLA masks, check for the following pattern: > + v1 = VEC_PERM_EXPR (v0, v0, mask1) > + v2 = VEC_PERM_EXPR (v1, v1, mask2) > + --> > + v2 = v0 > + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ > + > + if (operand_equal_p (mask1, mask2, 0) > + && !VECTOR_CST_NELTS (mask1).is_constant () > + && same_p) The same_p isn't needed, since a reverse mask only picks from the first operand. Canonicalisation rules should ensure that op0 and op1 end up being equal in that case, but this fold doesn't depend on that. > + { > + vec_perm_builder builder; > + if (tree_to_vec_perm_builder (&builder, mask1)) > + { > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > + vec_perm_indices sel (builder, 1, nelts); > + if (sel.series_p (0, 1, nelts - 1, -1)) > + return 1; > + } I have conflicting feelings about this. (1) It seems quite special-purpose. I'm a bit worried that we'll end up with a list of highly specific folds (which could grow quite long) rather than something more general. (2) Constructing a vec_perm_indices seems quite heavyweight for this single check. We could go directly from the mask instead. But “fixing” (2) reduces the generality of the structure and so leans more heavily into (1). I agree the check is correct though, so let's go with the patch in this form (without the same_p thing). However: > + return 0; > + } > + I don't think we want this early exit. It won't be used for all VLA cases due the operand_equal_p check. And in future, more VLA-compatible stuff might be added further down. Thanks, Richard > mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) > return 0; > @@ -2629,7 +2657,9 @@ simplify_permutation (gimple_stmt_iterator *gsi) > op3 = gimple_assign_rhs3 (def_stmt); > if (TREE_CODE (op3) != VECTOR_CST) > return 0; > - ident = is_combined_permutation_identity (op3, op2); > + bool same_p = operand_equal_p (gimple_assign_rhs1 (def_stmt), > + gimple_assign_rhs2 (def_stmt), 0); > + ident = is_combined_permutation_identity (op3, op2, same_p); > if (!ident) > return 0; > orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
On Fri, 21 Apr 2023 at 21:57, Richard Sandiford <richard.sandiford@arm.com> wrote: > > Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes: > > On Wed, 19 Apr 2023 at 16:17, Richard Biener <richard.guenther@gmail.com> wrote: > >> > >> On Wed, Apr 19, 2023 at 11:21 AM Prathamesh Kulkarni > >> <prathamesh.kulkarni@linaro.org> wrote: > >> > > >> > On Tue, 11 Apr 2023 at 19:36, Prathamesh Kulkarni > >> > <prathamesh.kulkarni@linaro.org> wrote: > >> > > > >> > > On Tue, 11 Apr 2023 at 14:17, Richard Biener <richard.guenther@gmail.com> wrote: > >> > > > > >> > > > On Wed, Apr 5, 2023 at 10:39 AM Prathamesh Kulkarni via Gcc-patches > >> > > > <gcc-patches@gcc.gnu.org> wrote: > >> > > > > > >> > > > > Hi, > >> > > > > For the following test: > >> > > > > > >> > > > > svint32_t f(svint32_t v) > >> > > > > { > >> > > > > return svrev_s32 (svrev_s32 (v)); > >> > > > > } > >> > > > > > >> > > > > We generate 2 rev instructions instead of nop: > >> > > > > f: > >> > > > > rev z0.s, z0.s > >> > > > > rev z0.s, z0.s > >> > > > > ret > >> > > > > > >> > > > > The attached patch tries to fix that by trying to recognize the following > >> > > > > pattern in match.pd: > >> > > > > v1 = VEC_PERM_EXPR (v0, v0, mask) > >> > > > > v2 = VEC_PERM_EXPR (v1, v1, mask) > >> > > > > --> > >> > > > > v2 = v0 > >> > > > > if mask is { nelts - 1, nelts - 2, nelts - 3, ... } > >> > > > > > >> > > > > Code-gen with patch: > >> > > > > f: > >> > > > > ret > >> > > > > > >> > > > > Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress. > >> > > > > Does it look OK for stage-1 ? > >> > > > > >> > > > I didn't look at the patch but tree-ssa-forwprop.cc:simplify_permutation should > >> > > > handle two consecutive permutes with the is_combined_permutation_identity > >> > > > which might need tweaking for VLA vectors > >> > > Hi Richard, > >> > > Thanks for the suggestions. The attached patch modifies > >> > > is_combined_permutation_identity > >> > > to recognize the above pattern. > >> > > Does it look OK ? > >> > > Bootstrap+test in progress on aarch64-linux-gnu and x86_64-linux-gnu. > >> > Hi, > >> > ping https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615502.html > >> > >> Can you instead of def_stmt pass in a bool whether rhs1 is equal to rhs2 > >> and amend the function comment accordingly, say, > >> > >> tem = VEC_PERM <op0, op1, MASK1>; > >> res = VEC_PERM <tem, tem, MASK2>; > >> > >> SAME_P specifies whether op0 and op1 compare equal. */ > >> > >> + if (def_stmt) > >> + gcc_checking_assert (is_gimple_assign (def_stmt) > >> + && gimple_assign_rhs_code (def_stmt) == VEC_PERM_EXPR); > >> this is then unnecessary > >> > >> mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > >> + > >> + /* For VLA masks, check for the following pattern: > >> + v1 = VEC_PERM_EXPR (v0, v0, mask) > >> + v2 = VEC_PERM_EXPR (v1, v1, mask) > >> + --> > >> + v2 = v0 > >> > >> you are not using 'mask' so please defer fold_ternary until after your > >> special-case. > >> > >> + if (operand_equal_p (mask1, mask2, 0) > >> + && !VECTOR_CST_NELTS (mask1).is_constant () > >> + && def_stmt > >> + && operand_equal_p (gimple_assign_rhs1 (def_stmt), > >> + gimple_assign_rhs2 (def_stmt), 0)) > >> + { > >> + vec_perm_builder builder; > >> + if (tree_to_vec_perm_builder (&builder, mask1)) > >> + { > >> + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > >> + vec_perm_indices sel (builder, 1, nelts); > >> + if (sel.series_p (0, 1, nelts - 1, -1)) > >> + return 1; > >> + } > >> + return 0; > >> > >> I'm defering to Richard whether this is the correct way to check for a vector > >> reversing mask (I wonder how constructing such mask is even possible) > > Hi Richard, > > Thanks for the suggestions, I have updated the patch accordingly. > > > > The following hunk from svrev_impl::fold() constructs mask in reverse: > > /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ > > poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); > > vec_perm_builder builder (nelts, 1, 3); > > for (int i = 0; i < 3; ++i) > > builder.quick_push (nelts - i - 1); > > return fold_permute (f, builder); > > > > To see if mask chooses elements in reverse, I borrowed it from function comment > > for series_p in vec-perm-indices.cc: > > /* Return true if index OUT_BASE + I * OUT_STEP selects input > > element IN_BASE + I * IN_STEP. For example, the call to test > > whether a permute reverses a vector of N elements would be: > > > > series_p (0, 1, N - 1, -1) > > > > which would return true for { N - 1, N - 2, N - 3, ... }. */ > > > > Thanks, > > Prathamesh > >> > >> Richard. > >> > >> > Thanks, > >> > Prathamesh > >> > > > >> > > Thanks, > >> > > Prathamesh > >> > > > > >> > > > Richard. > >> > > > > >> > > > > > >> > > > > Thanks, > >> > > > > Prathamesh > > > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (is_combined_permutation_identity): > > New parameter same_p. > > Try to simplify two successive VEC_PERM_EXPRs with single operand > > and same mask, where mask chooses elements in reverse order. > > > > gcc/testesuite/ChangeLog: > > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > new file mode 100644 > > index 00000000000..e57ee67d716 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > @@ -0,0 +1,12 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > + > > +#include <arm_sve.h> > > + > > +svint32_t f(svint32_t v) > > +{ > > + return svrev_s32 (svrev_s32 (v)); > > +} > > + > > +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 9b567440ba4..ebd4a368ae9 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -2528,11 +2528,16 @@ simplify_bitfield_ref (gimple_stmt_iterator *gsi) > > return true; > > } > > > > -/* Determine whether applying the 2 permutations (mask1 then mask2) > > - gives back one of the input. */ > > +/* For the following sequence: > > + tem = VEC_PERM_EXPR <op0, op1, mask1> > > + res = VEC_PERM_EXPR <tem, tem, mask2> > > + > > + Determine whether applying the 2 permutations (mask1 then mask2) > > + gives back one of the input. SAME_P specifies whether op0 > > + and op1 compare equal. */ > > > > static int > > -is_combined_permutation_identity (tree mask1, tree mask2) > > +is_combined_permutation_identity (tree mask1, tree mask2, bool same_p) > > { > > tree mask; > > unsigned HOST_WIDE_INT nelts, i, j; > > @@ -2541,6 +2546,29 @@ is_combined_permutation_identity (tree mask1, tree mask2) > > > > gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > > && TREE_CODE (mask2) == VECTOR_CST); > > + > > + /* For VLA masks, check for the following pattern: > > + v1 = VEC_PERM_EXPR (v0, v0, mask1) > > + v2 = VEC_PERM_EXPR (v1, v1, mask2) > > + --> > > + v2 = v0 > > + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ > > + > > + if (operand_equal_p (mask1, mask2, 0) > > + && !VECTOR_CST_NELTS (mask1).is_constant () > > + && same_p) > > The same_p isn't needed, since a reverse mask only picks from the > first operand. Canonicalisation rules should ensure that op0 and op1 > end up being equal in that case, but this fold doesn't depend on that. > > > + { > > + vec_perm_builder builder; > > + if (tree_to_vec_perm_builder (&builder, mask1)) > > + { > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > > + vec_perm_indices sel (builder, 1, nelts); > > + if (sel.series_p (0, 1, nelts - 1, -1)) > > + return 1; > > + } > > I have conflicting feelings about this. > > (1) It seems quite special-purpose. I'm a bit worried that we'll end > up with a list of highly specific folds (which could grow quite long) > rather than something more general. > > (2) Constructing a vec_perm_indices seems quite heavyweight for this > single check. We could go directly from the mask instead. > > But “fixing” (2) reduces the generality of the structure and so leans > more heavily into (1). > > I agree the check is correct though, so let's go with the patch > in this form (without the same_p thing). However: > > > + return 0; > > + } > > + > > I don't think we want this early exit. It won't be used for all VLA cases > due the operand_equal_p check. And in future, more VLA-compatible stuff > might be added further down. Hi Richard, Thanks for the suggestions, does the attached patch look OK ? Bootstrapped+tested on aarch64-linux-gnu. Thanks, Prathamesh > > Thanks, > Richard > > > mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > > if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) > > return 0; > > @@ -2629,7 +2657,9 @@ simplify_permutation (gimple_stmt_iterator *gsi) > > op3 = gimple_assign_rhs3 (def_stmt); > > if (TREE_CODE (op3) != VECTOR_CST) > > return 0; > > - ident = is_combined_permutation_identity (op3, op2); > > + bool same_p = operand_equal_p (gimple_assign_rhs1 (def_stmt), > > + gimple_assign_rhs2 (def_stmt), 0); > > + ident = is_combined_permutation_identity (op3, op2, same_p); > > if (!ident) > > return 0; > > orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt) gcc/ChangeLog: * tree-ssa-forwprop.cc (is_combined_permutation_identity): Try to simplify two successive VEC_PERM_EXPRs with single operand and same mask, where mask chooses elements in reverse order. gcc/testesuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 00000000000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +#include <arm_sve.h> + +svint32_t f(svint32_t v) +{ + return svrev_s32 (svrev_s32 (v)); +} + +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 9b567440ba4..61df7efe82c 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2541,6 +2541,27 @@ is_combined_permutation_identity (tree mask1, tree mask2) gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST && TREE_CODE (mask2) == VECTOR_CST); + + /* For VLA masks, check for the following pattern: + v1 = VEC_PERM_EXPR (v0, v0, mask1) + v2 = VEC_PERM_EXPR (v1, v1, mask2) + --> + v2 = v0 + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ + + if (operand_equal_p (mask1, mask2, 0) + && !VECTOR_CST_NELTS (mask1).is_constant ()) + { + vec_perm_builder builder; + if (tree_to_vec_perm_builder (&builder, mask1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + return 1; + } + } + mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) return 0;
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes: > gcc/ChangeLog: > * tree-ssa-forwprop.cc (is_combined_permutation_identity): Try to > simplify two successive VEC_PERM_EXPRs with single operand and same > mask, where mask chooses elements in reverse order. > > gcc/testesuite/ChangeLog: > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > new file mode 100644 > index 00000000000..e57ee67d716 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > + > +#include <arm_sve.h> > + > +svint32_t f(svint32_t v) > +{ > + return svrev_s32 (svrev_s32 (v)); > +} > + > +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 9b567440ba4..61df7efe82c 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -2541,6 +2541,27 @@ is_combined_permutation_identity (tree mask1, tree mask2) > > gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > && TREE_CODE (mask2) == VECTOR_CST); > + > + /* For VLA masks, check for the following pattern: > + v1 = VEC_PERM_EXPR (v0, v0, mask1) > + v2 = VEC_PERM_EXPR (v1, v1, mask2) Maybe blank out the second operands using "...": v1 = VEC_PERM_EXPR (v0, ..., mask1) v2 = VEC_PERM_EXPR (v1, ..., mask2) to make it clear that they don't matter. OK with that change, thanks. Richard > + --> > + v2 = v0 > + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ > + > + if (operand_equal_p (mask1, mask2, 0) > + && !VECTOR_CST_NELTS (mask1).is_constant ()) > + { > + vec_perm_builder builder; > + if (tree_to_vec_perm_builder (&builder, mask1)) > + { > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > + vec_perm_indices sel (builder, 1, nelts); > + if (sel.series_p (0, 1, nelts - 1, -1)) > + return 1; > + } > + } > + > mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) > return 0;
On Mon, 24 Apr 2023 at 15:02, Richard Sandiford <richard.sandiford@arm.com> wrote: > > Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes: > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (is_combined_permutation_identity): Try to > > simplify two successive VEC_PERM_EXPRs with single operand and same > > mask, where mask chooses elements in reverse order. > > > > gcc/testesuite/ChangeLog: > > * gcc.target/aarch64/sve/acle/general/rev-1.c: New test. > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > new file mode 100644 > > index 00000000000..e57ee67d716 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c > > @@ -0,0 +1,12 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > + > > +#include <arm_sve.h> > > + > > +svint32_t f(svint32_t v) > > +{ > > + return svrev_s32 (svrev_s32 (v)); > > +} > > + > > +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 9b567440ba4..61df7efe82c 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -2541,6 +2541,27 @@ is_combined_permutation_identity (tree mask1, tree mask2) > > > > gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > > && TREE_CODE (mask2) == VECTOR_CST); > > + > > + /* For VLA masks, check for the following pattern: > > + v1 = VEC_PERM_EXPR (v0, v0, mask1) > > + v2 = VEC_PERM_EXPR (v1, v1, mask2) > > Maybe blank out the second operands using "...": > > v1 = VEC_PERM_EXPR (v0, ..., mask1) > v2 = VEC_PERM_EXPR (v1, ..., mask2) > > to make it clear that they don't matter. > > OK with that change, thanks. Thanks, committed in: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f0eabc52c9a2d3da0bfc201da7a5c1658b76e9a4 Thanks, Prathamesh > > Richard > > > + --> > > + v2 = v0 > > + if mask1 == mask2 == {nelts - 1, nelts - 2, ...}. */ > > + > > + if (operand_equal_p (mask1, mask2, 0) > > + && !VECTOR_CST_NELTS (mask1).is_constant ()) > > + { > > + vec_perm_builder builder; > > + if (tree_to_vec_perm_builder (&builder, mask1)) > > + { > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1)); > > + vec_perm_indices sel (builder, 1, nelts); > > + if (sel.series_p (0, 1, nelts - 1, -1)) > > + return 1; > > + } > > + } > > + > > mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); > > if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST) > > return 0;
diff --git a/gcc/match.pd b/gcc/match.pd index b8d3538b809..19dfc8f3722 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8456,3 +8456,27 @@ and, } (if (full_perm_p) (vec_perm (op@3 @0 @1) @3 @2)))))) + +/* Transform: + v1 = VEC_PERM_EXPR (v0, v0, mask) + v2 = VEC_PERM_EXPR (v1, v1, mask) + --> + v2 = v0 + if mask is {nelts - 1, nelts - 2, ...} */ + +(simplify + (vec_perm (vec_perm@2 @0 @0 VECTOR_CST@1) @2 @1) + (with + { + vec_perm_builder builder; + bool rev_p = false; + if (tree_to_vec_perm_builder (&builder, @1)) + { + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + vec_perm_indices sel (builder, 1, nelts); + if (sel.series_p (0, 1, nelts - 1, -1)) + rev_p = true; + } + } + (if (rev_p) + @0))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c new file mode 100644 index 00000000000..e57ee67d716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +#include <arm_sve.h> + +svint32_t f(svint32_t v) +{ + return svrev_s32 (svrev_s32 (v)); +} + +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */