Message ID | 2034460.3Q4jJ9Mvva@twilight |
---|---|
State | New |
Headers | show |
Series | Use two source permute for vector initialization (PR 85692) | expand |
On Tue, May 8, 2018 at 12:37 PM, Allan Sandfeld Jensen <linux@carewolf.com> wrote: > I have tried to fix PR85692 that I opened. Please add a testcase as well. It also helps if you shortly tell what the patch does in your mail. Thanks, Richard. > 2018-05-08 Allan Sandfeld Jense <allan.jensen@qt.io> > > PR tree-optimization/85692 > * tree-ssa-forwprop.c (simplify_vector_constructor): Detect > two source permute operations as well.
On Dienstag, 8. Mai 2018 12:42:33 CEST Richard Biener wrote: > On Tue, May 8, 2018 at 12:37 PM, Allan Sandfeld Jensen > > <linux@carewolf.com> wrote: > > I have tried to fix PR85692 that I opened. > > Please add a testcase as well. It also helps if you shortly tell what > the patch does > in your mail. > Okay. I have updated the patch with a test-case based on my motivating examples. The patch just extends patching a vector construction to not just a single source permute instruction, but also a two source permute instruction. commit 15c0f6a933d60b085416a59221851b604b955958 Author: Allan Sandfeld Jensen <allan.jensen@qt.io> Date: Tue May 8 13:16:18 2018 +0200 Try two source permute for vector construction simplify_vector_constructor() was detecting when vector construction could be implemented as a single source permute, but was not detecting when it could be implemented as a double source permute. This patch adds the second case. 2018-05-08 Allan Sandfeld Jensen <allan.jensen@qt.io> gcc/ PR tree-optimization/85692 * tree-ssa-forwprop.c (simplify_vector_constructor): Try two source permute as well. gcc/testsuite * gcc.target/i386/pr85692.c: Test two simply constructions are detected as permute instructions. diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c b/gcc/testsuite/gcc.target/i386/pr85692.c new file mode 100644 index 00000000000..322c1050161 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr85692.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse4.1" } */ +/* { dg-final { scan-assembler "unpcklps" } } */ +/* { dg-final { scan-assembler "blendps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ + +typedef float v4sf __attribute__ ((vector_size (16))); + +v4sf unpcklps(v4sf a, v4sf b) +{ + return v4sf{a[0],b[0],a[1],b[1]}; +} + +v4sf blendps(v4sf a, v4sf b) +{ + return v4sf{a[0],b[1],a[2],b[3]}; +} diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 58ec6b47a5b..fbee8064160 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) { gimple *stmt = gsi_stmt (*gsi); gimple *def_stmt; - tree op, op2, orig, type, elem_type; + tree op, op2, orig1, orig2, type, elem_type; unsigned elem_size, i; unsigned HOST_WIDE_INT nelts; enum tree_code code, conv_code; @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elem_type = TREE_TYPE (type); elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - vec_perm_builder sel (nelts, nelts, 1); - orig = NULL; + vec_perm_builder sel (nelts, 2, nelts); + orig1 = NULL; + orig2 = NULL; conv_code = ERROR_MARK; maybe_ident = true; FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; op1 = gimple_assign_rhs1 (def_stmt); ref = TREE_OPERAND (op1, 0); - if (orig) + if (orig1) { - if (ref != orig) - return false; + if (ref == orig1 || orig2) + { + if (ref != orig1 && ref != orig2) + return false; + } + else + { + if (TREE_CODE (ref) != SSA_NAME) + return false; + if (! VECTOR_TYPE_P (TREE_TYPE (ref)) + || ! useless_type_conversion_p (TREE_TYPE (op1), + TREE_TYPE (TREE_TYPE (ref)))) + return false; + if (TREE_TYPE (orig1) != TREE_TYPE (ref)) + return false; + orig2 = ref; + maybe_ident = false; + } } else { @@ -2076,12 +2093,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) || ! useless_type_conversion_p (TREE_TYPE (op1), TREE_TYPE (TREE_TYPE (ref)))) return false; - orig = ref; + orig1 = ref; } unsigned int elt; if (maybe_ne (bit_field_size (op1), elem_size) || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) return false; + if (orig2 && ref == orig2) + elt += nelts; if (elt != i) maybe_ident = false; sel.quick_push (elt); @@ -2089,14 +2108,17 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (i < nelts) return false; - if (! VECTOR_TYPE_P (TREE_TYPE (orig)) + if (! VECTOR_TYPE_P (TREE_TYPE (orig1)) || maybe_ne (TYPE_VECTOR_SUBPARTS (type), - TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig)))) + TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig1)))) return false; + if (!orig2) + orig2 = orig1; + tree tem; if (conv_code != ERROR_MARK - && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig), + && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig1), &tem, &conv_code) || conv_code == CALL_EXPR)) return false; @@ -2104,16 +2126,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (maybe_ident) { if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_from_tree (gsi, orig); + gimple_assign_set_rhs_from_tree (gsi, orig1); else - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, + gimple_assign_set_rhs_with_ops (gsi, conv_code, orig1, NULL_TREE, NULL_TREE); } else { tree mask_type; - vec_perm_indices indices (sel, 1, nelts); + vec_perm_indices indices (sel, 2, nelts); if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) return false; mask_type @@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; op2 = vec_perm_indices_to_tree (mask_type, indices); if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2); + gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2); else { gimple *perm - = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)), - VEC_PERM_EXPR, orig, orig, op2); - orig = gimple_assign_lhs (perm); + = gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)), + VEC_PERM_EXPR, orig1, orig2, op2); gsi_insert_before (gsi, perm, GSI_SAME_STMT); - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, + gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs (perm), NULL_TREE, NULL_TREE); } }
On Tue, May 08, 2018 at 01:25:35PM +0200, Allan Sandfeld Jensen wrote: > 2018-05-08 Allan Sandfeld Jensen <allan.jensen@qt.io> 2 spaces between date and name and two spaces between name and email address. > gcc/ > > PR tree-optimization/85692 > * tree-ssa-forwprop.c (simplify_vector_constructor): Try two > source permute as well. > > gcc/testsuite > > * gcc.target/i386/pr85692.c: Test two simply constructions are > detected as permute instructions. Just * gcc.target/i386/pr85692.c: New test. > > diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c b/gcc/testsuite/gcc.target/i386/pr85692.c > new file mode 100644 > index 00000000000..322c1050161 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85692.c > @@ -0,0 +1,18 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -msse4.1" } */ > +/* { dg-final { scan-assembler "unpcklps" } } */ > +/* { dg-final { scan-assembler "blendps" } } */ > +/* { dg-final { scan-assembler-not "shufps" } } */ > +/* { dg-final { scan-assembler-not "unpckhps" } } */ > + > +typedef float v4sf __attribute__ ((vector_size (16))); > + > +v4sf unpcklps(v4sf a, v4sf b) > +{ > + return v4sf{a[0],b[0],a[1],b[1]}; Though, not really sure if this has been tested at all. The above is valid only in C++ (and only C++11 and above), while the test is compiled as C and thus has to fail. In C one should use e.g. return (v4sf){a[0],b[0],a[1],b[1]}; instead (i.e. a compound literal). > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) > elem_type = TREE_TYPE (type); > elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > > - vec_perm_builder sel (nelts, nelts, 1); > - orig = NULL; > + vec_perm_builder sel (nelts, 2, nelts); Why this change? I admit the vec_parm_builder arguments are confusing, but I think the second times third is the number of how many indices are being pushed into the vector, so I think (nelts, nelts, 1) is right. > @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) > return false; > op1 = gimple_assign_rhs1 (def_stmt); > ref = TREE_OPERAND (op1, 0); > - if (orig) > + if (orig1) > { > - if (ref != orig) > - return false; > + if (ref == orig1 || orig2) > + { > + if (ref != orig1 && ref != orig2) > + return false; > + } > + else > + { > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + if (! VECTOR_TYPE_P (TREE_TYPE (ref)) > + || ! useless_type_conversion_p (TREE_TYPE (op1), > + TREE_TYPE (TREE_TYPE (ref)))) > + return false; > + if (TREE_TYPE (orig1) != TREE_TYPE (ref)) > + return false; I think even different type is acceptable here, as long as its conversion to orig1's type is useless. Furthermore, I think the way you wrote the patch with 2 variables rather than an array of 2 elements means too much duplication, this else block is a duplication of the else block below. See the patch I've added to the PR (and sorry for missing your patch first, the PR wasn't ASSIGNED and there was no link to gcc-patches for it). > @@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) > return false; > op2 = vec_perm_indices_to_tree (mask_type, indices); > if (conv_code == ERROR_MARK) > - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2); > + gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2); > else > { > gimple *perm > - = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)), > - VEC_PERM_EXPR, orig, orig, op2); > - orig = gimple_assign_lhs (perm); > + = gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)), > + VEC_PERM_EXPR, orig1, orig2, op2); > gsi_insert_before (gsi, perm, GSI_SAME_STMT); > - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, > + gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs (perm), Too long line. Jakub
On Mittwoch, 9. Mai 2018 11:08:02 CEST Jakub Jelinek wrote: > On Tue, May 08, 2018 at 01:25:35PM +0200, Allan Sandfeld Jensen wrote: > > 2018-05-08 Allan Sandfeld Jensen <allan.jensen@qt.io> > > 2 spaces between date and name and two spaces between name and email > address. > > > gcc/ > > > > PR tree-optimization/85692 > > * tree-ssa-forwprop.c (simplify_vector_constructor): Try two > > source permute as well. > > > > gcc/testsuite > > > > * gcc.target/i386/pr85692.c: Test two simply constructions are > > detected as permute instructions. > > Just > * gcc.target/i386/pr85692.c: New test. > > > diff --git a/gcc/testsuite/gcc.target/i386/pr85692.c > > b/gcc/testsuite/gcc.target/i386/pr85692.c new file mode 100644 > > index 00000000000..322c1050161 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr85692.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -msse4.1" } */ > > +/* { dg-final { scan-assembler "unpcklps" } } */ > > +/* { dg-final { scan-assembler "blendps" } } */ > > +/* { dg-final { scan-assembler-not "shufps" } } */ > > +/* { dg-final { scan-assembler-not "unpckhps" } } */ > > + > > +typedef float v4sf __attribute__ ((vector_size (16))); > > + > > +v4sf unpcklps(v4sf a, v4sf b) > > +{ > > + return v4sf{a[0],b[0],a[1],b[1]}; > > Though, not really sure if this has been tested at all. > The above is valid only in C++ (and only C++11 and above), while the > test is compiled as C and thus has to fail. > Yes, I thought it had been tested, but it wasn't. It also needs to change the first line to be a compile and not run test. > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator > > *gsi)> > > elem_type = TREE_TYPE (type); > > elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > > > > - vec_perm_builder sel (nelts, nelts, 1); > > - orig = NULL; > > + vec_perm_builder sel (nelts, 2, nelts); > > Why this change? I admit the vec_parm_builder arguments are confusing, but > I think the second times third is the number of how many indices are being > pushed into the vector, so I think (nelts, nelts, 1) is right. > I had the impression it was what was selected from. In any case, I changed it because without I get crash when vec_perm_indices is created later with a possible nparms of 2. > > @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator > > *gsi)> > > return false; > > > > op1 = gimple_assign_rhs1 (def_stmt); > > ref = TREE_OPERAND (op1, 0); > > > > - if (orig) > > + if (orig1) > > > > { > > > > - if (ref != orig) > > - return false; > > + if (ref == orig1 || orig2) > > + { > > + if (ref != orig1 && ref != orig2) > > + return false; > > + } > > + else > > + { > > + if (TREE_CODE (ref) != SSA_NAME) > > + return false; > > + if (! VECTOR_TYPE_P (TREE_TYPE (ref)) > > + || ! useless_type_conversion_p (TREE_TYPE (op1), > > + TREE_TYPE (TREE_TYPE (ref)))) > > + return false; > > + if (TREE_TYPE (orig1) != TREE_TYPE (ref)) > > + return false; > > I think even different type is acceptable here, as long as its conversion to > orig1's type is useless. > > Furthermore, I think the way you wrote the patch with 2 variables rather > than an array of 2 elements means too much duplication, this else block > is a duplication of the else block below. See the patch I've added to the > PR It seemed to me it was clearer like this, but I can see your point. > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there > was no link to gcc-patches for it). > It is okay. You are welcome to take it over. I am not a regular gcc contributor and thus not well-versed in the details, only the basic logic of how things work. 'Allan
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 58ec6b47a5b..fbee8064160 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) { gimple *stmt = gsi_stmt (*gsi); gimple *def_stmt; - tree op, op2, orig, type, elem_type; + tree op, op2, orig1, orig2, type, elem_type; unsigned elem_size, i; unsigned HOST_WIDE_INT nelts; enum tree_code code, conv_code; @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elem_type = TREE_TYPE (type); elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - vec_perm_builder sel (nelts, nelts, 1); - orig = NULL; + vec_perm_builder sel (nelts, 2, nelts); + orig1 = NULL; + orig2 = NULL; conv_code = ERROR_MARK; maybe_ident = true; FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) @@ -2063,10 +2064,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; op1 = gimple_assign_rhs1 (def_stmt); ref = TREE_OPERAND (op1, 0); - if (orig) + if (orig1) { - if (ref != orig) - return false; + if (ref == orig1 || orig2) + { + if (ref != orig1 && ref != orig2) + return false; + } + else + { + if (TREE_CODE (ref) != SSA_NAME) + return false; + if (! VECTOR_TYPE_P (TREE_TYPE (ref)) + || ! useless_type_conversion_p (TREE_TYPE (op1), + TREE_TYPE (TREE_TYPE (ref)))) + return false; + if (TREE_TYPE (orig1) != TREE_TYPE (ref)) + return false; + orig2 = ref; + maybe_ident = false; + } } else { @@ -2076,12 +2093,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) || ! useless_type_conversion_p (TREE_TYPE (op1), TREE_TYPE (TREE_TYPE (ref)))) return false; - orig = ref; + orig1 = ref; } unsigned int elt; if (maybe_ne (bit_field_size (op1), elem_size) || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) return false; + if (orig2 && ref == orig2) + elt += nelts; if (elt != i) maybe_ident = false; sel.quick_push (elt); @@ -2089,14 +2108,17 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (i < nelts) return false; - if (! VECTOR_TYPE_P (TREE_TYPE (orig)) + if (! VECTOR_TYPE_P (TREE_TYPE (orig1)) || maybe_ne (TYPE_VECTOR_SUBPARTS (type), - TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig)))) + TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig1)))) return false; + if (!orig2) + orig2 = orig1; + tree tem; if (conv_code != ERROR_MARK - && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig), + && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig1), &tem, &conv_code) || conv_code == CALL_EXPR)) return false; @@ -2104,16 +2126,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (maybe_ident) { if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_from_tree (gsi, orig); + gimple_assign_set_rhs_from_tree (gsi, orig1); else - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, + gimple_assign_set_rhs_with_ops (gsi, conv_code, orig1, NULL_TREE, NULL_TREE); } else { tree mask_type; - vec_perm_indices indices (sel, 1, nelts); + vec_perm_indices indices (sel, 2, nelts); if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) return false; mask_type @@ -2125,15 +2147,14 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; op2 = vec_perm_indices_to_tree (mask_type, indices); if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2); + gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig1, orig2, op2); else { gimple *perm - = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)), - VEC_PERM_EXPR, orig, orig, op2); - orig = gimple_assign_lhs (perm); + = gimple_build_assign (make_ssa_name (TREE_TYPE (orig1)), + VEC_PERM_EXPR, orig1, orig2, op2); gsi_insert_before (gsi, perm, GSI_SAME_STMT); - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, + gimple_assign_set_rhs_with_ops (gsi, conv_code, gimple_assign_lhs (perm), NULL_TREE, NULL_TREE); } }