Message ID | 20120918122009.GY22619@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On Tue, Sep 18, 2012 at 2:20 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > vect_gen_perm_mask is not suitable for use outside of the vectorizer, > it uses current vector size to determine the number of units of a vector, > which isn't something that should be used outside of the vectorizer. > > The following patch just does construct the mask inline, it is not that long > code. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Thanks, Richard. > 2012-09-18 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/54610 > * tree-ssa-forwprop.c: Include optabs.h. > (simplify_vector_constructor): Don't use vect_gen_perm_mask, > instead create the mask constant here. > * Makefile.in (tree-ssa-forwprop.o): Depend on $(OPTABS_H). > > * gcc.target/i386/pr54610.c: New test. > > --- gcc/tree-ssa-forwprop.c.jj 2012-09-14 14:20:56.000000000 +0200 > +++ gcc/tree-ssa-forwprop.c 2012-09-18 10:17:40.627193548 +0200 > @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. > #include "expr.h" > #include "cfgloop.h" > #include "tree-vectorizer.h" > +#include "optabs.h" > > /* This pass propagates the RHS of assignment statements into use > sites of the LHS of the assignment. It's basically a specialized > @@ -2854,14 +2855,24 @@ simplify_vector_constructor (gimple_stmt > return false; > > if (maybe_ident) > - { > - gimple_assign_set_rhs_from_tree (gsi, orig); > - } > + gimple_assign_set_rhs_from_tree (gsi, orig); > else > { > - op2 = vect_gen_perm_mask (type, sel); > - if (!op2) > + tree mask_type, *mask_elts; > + > + if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) > + return false; > + mask_type > + = build_vector_type (build_nonstandard_integer_type (elem_size, 1), > + nelts); > + if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT > + || GET_MODE_SIZE (TYPE_MODE (mask_type)) > + != GET_MODE_SIZE (TYPE_MODE (type))) > return false; > + mask_elts = XALLOCAVEC (tree, nelts); > + for (i = 0; i < nelts; i++) > + mask_elts[i] = build_int_cst (TREE_TYPE (mask_type), sel[i]); > + op2 = build_vector (mask_type, mask_elts); > gimple_assign_set_rhs_with_ops_1 (gsi, VEC_PERM_EXPR, orig, orig, op2); > } > update_stmt (gsi_stmt (*gsi)); > --- gcc/Makefile.in.jj 2012-09-13 07:54:44.000000000 +0200 > +++ gcc/Makefile.in 2012-09-18 10:18:05.717067056 +0200 > @@ -2245,7 +2245,7 @@ tree-ssa-forwprop.o : tree-ssa-forwprop. > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ > - $(TREE_VECTORIZER_H) > + $(TREE_VECTORIZER_H) $(OPTABS_H) > tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > --- gcc/testsuite/gcc.target/i386/pr54610.c.jj 2012-09-18 10:24:58.793981091 +0200 > +++ gcc/testsuite/gcc.target/i386/pr54610.c 2012-09-18 10:26:26.838535968 +0200 > @@ -0,0 +1,17 @@ > +/* PR tree-optimization/54610 */ > +/* { dg-do compile } */ > +/* { dg-options "-O -mavx -fdump-tree-optimized" } */ > + > +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); > +void f (vec *px, vec *y, vec *z) > +{ > + vec x = *px; > + vec t1 = { x[1], x[0] }; > + vec t2 = { x[0], x[1] }; > + *y = t1; > + *z = t2; > +} > + > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Jakub
--- gcc/tree-ssa-forwprop.c.jj 2012-09-14 14:20:56.000000000 +0200 +++ gcc/tree-ssa-forwprop.c 2012-09-18 10:17:40.627193548 +0200 @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. #include "expr.h" #include "cfgloop.h" #include "tree-vectorizer.h" +#include "optabs.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized @@ -2854,14 +2855,24 @@ simplify_vector_constructor (gimple_stmt return false; if (maybe_ident) - { - gimple_assign_set_rhs_from_tree (gsi, orig); - } + gimple_assign_set_rhs_from_tree (gsi, orig); else { - op2 = vect_gen_perm_mask (type, sel); - if (!op2) + tree mask_type, *mask_elts; + + if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) + return false; + mask_type + = build_vector_type (build_nonstandard_integer_type (elem_size, 1), + nelts); + if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT + || GET_MODE_SIZE (TYPE_MODE (mask_type)) + != GET_MODE_SIZE (TYPE_MODE (type))) return false; + mask_elts = XALLOCAVEC (tree, nelts); + for (i = 0; i < nelts; i++) + mask_elts[i] = build_int_cst (TREE_TYPE (mask_type), sel[i]); + op2 = build_vector (mask_type, mask_elts); gimple_assign_set_rhs_with_ops_1 (gsi, VEC_PERM_EXPR, orig, orig, op2); } update_stmt (gsi_stmt (*gsi)); --- gcc/Makefile.in.jj 2012-09-13 07:54:44.000000000 +0200 +++ gcc/Makefile.in 2012-09-18 10:18:05.717067056 +0200 @@ -2245,7 +2245,7 @@ tree-ssa-forwprop.o : tree-ssa-forwprop. $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ - $(TREE_VECTORIZER_H) + $(TREE_VECTORIZER_H) $(OPTABS_H) tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ --- gcc/testsuite/gcc.target/i386/pr54610.c.jj 2012-09-18 10:24:58.793981091 +0200 +++ gcc/testsuite/gcc.target/i386/pr54610.c 2012-09-18 10:26:26.838535968 +0200 @@ -0,0 +1,17 @@ +/* PR tree-optimization/54610 */ +/* { dg-do compile } */ +/* { dg-options "-O -mavx -fdump-tree-optimized" } */ + +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); +void f (vec *px, vec *y, vec *z) +{ + vec x = *px; + vec t1 = { x[1], x[0] }; + vec t2 = { x[0], x[1] }; + *y = t1; + *z = t2; +} + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */