Message ID | 1497304579.24125.144.camel@brimstone.rchland.ibm.com |
---|---|
State | New |
Headers | show |
On Mon, Jun 12, 2017 at 11:56 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > Hi, > > [PATCH, rs6000] (v2) Fold vector shifts in GIMPLE > > Add support for early expansion of vector shifts. Including > vec_sl (shift left), vec_sr (shift right), > vec_sra (shift right algebraic), vec_rl (rotate left). > Part of this includes adding the vector shift right instructions to > the list of those instructions having an unsigned second argument. > > The VSR (vector shift right) folding is a bit more complex than > the others. This is due to requiring arg0 be unsigned before the > gimple RSHIFT_EXPR assignment is built, which is required for an > algebraic shift. > > [V2 update] Guard the folding of left shifts with TYPE_OVERFLOW_WRAPS. > Add -fwrapv test variations for the left shifts. > > Sniff-tests passed. full regtest still running. OK for trunk? > > Thanks, > -Will > > > > [gcc] > > 2017-06-12 Will Schmidt <will_schmidt@vnet.ibm.com> > > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling > for early expansion of vector shifts (sl,sr,sra,rl). > (builtin_function_type): Add vector shift right instructions > to the unsigned argument list. > > [gcc/testsuite] > > 2017-06-12 Will Schmidt <will_schmidt@vnet.ibm.com> > > * testsuite/gcc.target/powerpc/fold-vec-shift-char.c: New. > * testsuite/gcc.target/powerpc/fold-vec-shift-int.c: New. > * testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c: New. > * testsuite/gcc.target/powerpc/fold-vec-shift-short.c: New. > * testsuite/gcc.target/powerpc/fold-vec-shift-left.c: New. > * testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c: New. > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 63ca2d1..55592fb 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -16588,6 +16588,83 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > gsi_replace (gsi, g, true); > return true; > } > + /* Flavors of vec_rotate_left. */ > + case ALTIVEC_BUILTIN_VRLB: > + case ALTIVEC_BUILTIN_VRLH: > + case ALTIVEC_BUILTIN_VRLW: > + case P8V_BUILTIN_VRLD: > + { > + arg0 = gimple_call_arg (stmt, 0); > + arg1 = gimple_call_arg (stmt, 1); > + lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } > + /* Flavors of vector shift right algebraic. > + * vec_sra{b,h,w} -> vsra{b,h,w}. */ > + case ALTIVEC_BUILTIN_VSRAB: > + case ALTIVEC_BUILTIN_VSRAH: > + case ALTIVEC_BUILTIN_VSRAW: > + case P8V_BUILTIN_VSRAD: > + { > + arg0 = gimple_call_arg (stmt, 0); > + arg1 = gimple_call_arg (stmt, 1); > + lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } > + /* Flavors of vector shift left. > + * builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */ > + case ALTIVEC_BUILTIN_VSLB: > + case ALTIVEC_BUILTIN_VSLH: > + case ALTIVEC_BUILTIN_VSLW: > + case P8V_BUILTIN_VSLD: > + { > + arg0 = gimple_call_arg (stmt, 0); > + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0))) > + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0)))) > + return false; > + arg1 = gimple_call_arg (stmt, 1); > + lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } > + /* Flavors of vector shift right. */ > + case ALTIVEC_BUILTIN_VSRB: > + case ALTIVEC_BUILTIN_VSRH: > + case ALTIVEC_BUILTIN_VSRW: > + case P8V_BUILTIN_VSRD: > + { > + arg0 = gimple_call_arg (stmt, 0); > + arg1 = gimple_call_arg (stmt, 1); > + lhs = gimple_call_lhs (stmt); > + gimple *g; > + /* convert arg0 to unsigned. */ > + arg0 = convert (unsigned_type_for (TREE_TYPE (arg0)), arg0); Please do not use 'convert', instead do ... > + tree arg0_uns = create_tmp_reg_or_ssa_name > + (unsigned_type_for (TREE_TYPE (arg0))); > + g = gimple_build_assign (arg0_uns, arg0); g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, usigned_type, arg0); You also want to avoid spitting out useless copies here if the arg/result is already unsigned, like via tree arg0_uns = arg0; if (! TYPE_UNSIGNED (TREE_TYPE (arg0_uns))) { ... } > + gimple_set_location (g, gimple_location (stmt)); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + /* convert lhs to unsigned and do the shift. */ Just use lhs if it has the same sign as arg0_uns. > + tree lhs_uns = create_tmp_reg_or_ssa_name > + (unsigned_type_for (TREE_TYPE (lhs))); You can re-use the type of arg0_uns here. > + g = gimple_build_assign (lhs_uns, RSHIFT_EXPR, arg0_uns, arg1); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + /* convert lhs back to a signed type for the return. */ > + lhs_uns = convert (signed_type_for (TREE_TYPE (lhs)),lhs_uns); > + g = gimple_build_assign (lhs, lhs_uns); See above for how to perform the conversion. Note that you could use the gimple_build convenience to shorten the code sequence above to gimple_seq stmts = NULL; tree arg0_unsigned = gimple_build (&stmts, VIEW_CONVERT_EXPR, unsigned_type_for (...), arg0); tree res = gimple_build (&stmts, RSHIFT_EXPR, TREE_TYPE (arg0_uns), arg0_uns, arg1); res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); update_call_from_tree (gsi, res); The above gimple_build sequence will fold all the stmts thus remove useless conversions and apply constant folding, etc. Richard. > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } > default: > break; > } > @@ -18090,6 +18167,14 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, > h.uns_p[2] = 1; > break; > > + /* unsigned second arguments (vector shift right). */ > + case ALTIVEC_BUILTIN_VSRB: > + case ALTIVEC_BUILTIN_VSRH: > + case ALTIVEC_BUILTIN_VSRW: > + case P8V_BUILTIN_VSRD: > + h.uns_p[2] = 1; > + break; > + > default: > break; > } > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c > new file mode 100644 > index 0000000..ebe91e7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c > @@ -0,0 +1,66 @@ > +/* Verify that overloaded built-ins for vec_sl with char > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +//# vec_sl - shift left > +//# vec_sr - shift right > +//# vec_sra - shift right algebraic > +//# vec_rl - rotate left > + > +vector signed char > +testsl_signed (vector signed char x, vector unsigned char y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned char > +testsl_unsigned (vector unsigned char x, vector unsigned char y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed char > +testsr_signed (vector signed char x, vector unsigned char y) > +{ > + return vec_sr (x, y); > +} > + > +vector unsigned char > +testsr_unsigned (vector unsigned char x, vector unsigned char y) > +{ > + return vec_sr (x, y); > +} > + > +vector signed char > +testsra_signed (vector signed char x, vector unsigned char y) > +{ > + return vec_sra (x, y); > +} > + > +vector unsigned char > +testsra_unsigned (vector unsigned char x, vector unsigned char y) > +{ > + return vec_sra (x, y); > +} > + > +vector signed char > +testrl_signed (vector signed char x, vector unsigned char y) > +{ > + return vec_rl (x, y); > +} > + > +vector unsigned char > +testrl_unsigned (vector unsigned char x, vector unsigned char y) > +{ > + return vec_rl (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vslb" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrb" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrab" 2 } } */ > +/* { dg-final { scan-assembler-times "vrlb" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c > new file mode 100644 > index 0000000..e9c5fe1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c > @@ -0,0 +1,61 @@ > +/* Verify that overloaded built-ins for vec_sl with int > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector signed int > +testsl_signed (vector signed int x, vector unsigned int y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned int > +testsl_unsigned (vector unsigned int x, vector unsigned int y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed int > +testsr_signed (vector signed int x, vector unsigned int y) > +{ > + return vec_sr (x, y); > +} > + > +vector unsigned int > +testsr_unsigned (vector unsigned int x, vector unsigned int y) > +{ > + return vec_sr (x, y); > +} > + > +vector signed int > +testsra_signed (vector signed int x, vector unsigned int y) > +{ > + return vec_sra (x, y); > +} > + > +vector unsigned int > +testsra_unsigned (vector unsigned int x, vector unsigned int y) > +{ > + return vec_sra (x, y); > +} > + > +vector signed int > +testrl_signed (vector signed int x, vector unsigned int y) > +{ > + return vec_rl (x, y); > +} > + > +vector unsigned int > +testrl_unsigned (vector unsigned int x, vector unsigned int y) > +{ > + return vec_rl (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vslw" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrw" 2 } } */ > +/* { dg-final { scan-assembler-times "vsraw" 2 } } */ > +/* { dg-final { scan-assembler-times "vrlw" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c > new file mode 100644 > index 0000000..4e0dc66 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c > @@ -0,0 +1,61 @@ > +/* Verify that overloaded built-ins for vec_sl produce the right results. */ > +/* This test covers the shift left tests with the -fwrapv option. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2 -fwrapv" } */ > + > +#include <altivec.h> > + > +vector signed char > +testsl_signed_char (vector signed char x, vector unsigned char y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned char > +testsl_unsigned_char (vector unsigned char x, vector unsigned char y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed short > +testsl_signed_short (vector signed short x, vector unsigned short y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned short > +testsl_unsigned_short (vector unsigned short x, vector unsigned short y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed int > +testsl_signed_int (vector signed int x, vector unsigned int y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned int > +testsl_unsigned_int (vector unsigned int x, vector unsigned int y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed long long > +testsl_signed_longlong (vector signed long long x, vector unsigned long long y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned long long > +testsl_unsigned_longlong (vector unsigned long long x, vector unsigned long long y) > +{ > + return vec_sl (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vslb" 2 } } */ > +/* { dg-final { scan-assembler-times "vslh" 2 } } */ > +/* { dg-final { scan-assembler-times "vslw" 2 } } */ > +/* { dg-final { scan-assembler-times "vsld" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c > new file mode 100644 > index 0000000..bbc3f01 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c > @@ -0,0 +1,61 @@ > +/* cross section of shift tests specific for shift-left. > + * This is a counterpart to the fold-vec-shift-left-frwapv test. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector signed char > +testsl_signed_char (vector signed char x, vector unsigned char y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned char > +testsl_unsigned_char (vector unsigned char x, vector unsigned char y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed short > +testsl_signed_short (vector signed short x, vector unsigned short y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned short > +testsl_unsigned_short (vector unsigned short x, vector unsigned short y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed int > +testsl_signed_int (vector signed int x, vector unsigned int y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned int > +testsl_unsigned_int (vector unsigned int x, vector unsigned int y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed long long > +testsl_signed_longlong (vector signed long long x, vector unsigned long long y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned long long > +testsl_unsigned_longlong (vector unsigned long long x, vector unsigned long long y) > +{ > + return vec_sl (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vslb" 2 } } */ > +/* { dg-final { scan-assembler-times "vslh" 2 } } */ > +/* { dg-final { scan-assembler-times "vslw" 2 } } */ > +/* { dg-final { scan-assembler-times "vsld" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c > new file mode 100644 > index 0000000..97b82cf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c > @@ -0,0 +1,63 @@ > +/* Verify that overloaded built-ins for vec_sl with long long > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-options "-mpower8-vector -O2" } */ > + > +#include <altivec.h> > + > +vector signed long long > +testsl_signed (vector signed long long x, vector unsigned long long y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned long long > +testsl_unsigned (vector unsigned long long x, vector unsigned long long y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed long long > +testsr_signed (vector signed long long x, vector unsigned long long y) > +{ > + return vec_sr (x, y); > +} > + > +vector unsigned long long > +testsr_unsigned (vector unsigned long long x, vector unsigned long long y) > +{ > + return vec_sr (x, y); > +} > + > +vector signed long long > +testsra_signed (vector signed long long x, vector unsigned long long y) > +{ > + return vec_sra (x, y); > +} > + > +/* watch for PR 79544 here (vsrd / vsrad issue) */ > +vector unsigned long long > +testsra_unsigned (vector unsigned long long x, vector unsigned long long y) > +{ > + return vec_sra (x, y); > +} > + > +vector signed long long > +testrl_signed (vector signed long long x, vector unsigned long long y) > +{ > + return vec_rl (x, y); > +} > + > +vector unsigned long long > +testrl_unsigned (vector unsigned long long x, vector unsigned long long y) > +{ > + return vec_rl (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vsld" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrd" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrad" 2 } } */ > +/* { dg-final { scan-assembler-times "vrld" 2 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c > new file mode 100644 > index 0000000..4ca7c18 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c > @@ -0,0 +1,61 @@ > +/* Verify that overloaded built-ins for vec_sl with short > + inputs produce the right results. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector signed short > +testsl_signed (vector signed short x, vector unsigned short y) > +{ > + return vec_sl (x, y); > +} > + > +vector unsigned short > +testsl_unsigned (vector unsigned short x, vector unsigned short y) > +{ > + return vec_sl (x, y); > +} > + > +vector signed short > +testsr_signed (vector signed short x, vector unsigned short y) > +{ > + return vec_sr (x, y); > +} > + > +vector unsigned short > +testsr_unsigned (vector unsigned short x, vector unsigned short y) > +{ > + return vec_sr (x, y); > +} > + > +vector signed short > +testsra_signed (vector signed short x, vector unsigned short y) > +{ > + return vec_sra (x, y); > +} > + > +vector unsigned short > +testsra_unsigned (vector unsigned short x, vector unsigned short y) > +{ > + return vec_sra (x, y); > +} > + > +vector signed short > +testrl_signed (vector signed short x, vector unsigned short y) > +{ > + return vec_rl (x, y); > +} > + > +vector unsigned short > +testrl_unsigned (vector unsigned short x, vector unsigned short y) > +{ > + return vec_rl (x, y); > +} > + > +/* { dg-final { scan-assembler-times "vslh" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrh" 2 } } */ > +/* { dg-final { scan-assembler-times "vsrah" 2 } } */ > +/* { dg-final { scan-assembler-times "vrlh" 2 } } */ > >
On Tue, 2017-06-13 at 10:03 +0200, Richard Biener wrote: > On Mon, Jun 12, 2017 at 11:56 PM, Will Schmidt > <will_schmidt@vnet.ibm.com> wrote: > > Hi, > > > > > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > > index 63ca2d1..55592fb 100644 > > --- a/gcc/config/rs6000/rs6000.c > > +++ b/gcc/config/rs6000/rs6000.c > > @@ -16588,6 +16588,83 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > gsi_replace (gsi, g, true); > > return true; > > } <snip> > > + /* Flavors of vector shift right. */ > > + case ALTIVEC_BUILTIN_VSRB: > > + case ALTIVEC_BUILTIN_VSRH: > > + case ALTIVEC_BUILTIN_VSRW: > > + case P8V_BUILTIN_VSRD: > > + { > > + arg0 = gimple_call_arg (stmt, 0); > > + arg1 = gimple_call_arg (stmt, 1); > > + lhs = gimple_call_lhs (stmt); > > + gimple *g; > > + /* convert arg0 to unsigned. */ > > + arg0 = convert (unsigned_type_for (TREE_TYPE (arg0)), arg0); > > Please do not use 'convert', instead do ... Hi Richard, V3 of this patch , using the gimple_build() convenience helper function has been posted, and is the direction I'm going for with this patch. I wanted to make sure I fully understood the other options though, so I have a question/clarification on the other suggestions: > > + tree arg0_uns = create_tmp_reg_or_ssa_name > > + (unsigned_type_for (TREE_TYPE (arg0))); > > + g = gimple_build_assign (arg0_uns, arg0); > > g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, usigned_type, arg0); I tried a few trivial variations of this: g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, unsigned_type_for (TREE_TYPE(arg0_uns)), arg0); which lookd good, but it asserts in gimple_build_assign_1(), on the check "if (op2) { gcc_assert (num_ops > 2); ... Trolling around the other code for references, i found and tried this, which uses the build1() helper, and appears to work. Is this the gist of what you suggested, or would there be another alternative? g = gimple_build_assign (arg0_uns, build1(VIEW_CONVERT_EXPR, unsigned_type_for (TREE_TYPE(arg0_uns)), arg0)); Thanks for the feedback, etc. :-) -Will > You also want to avoid spitting out useless copies here if the > arg/result is already unsigned, > like via > > tree arg0_uns = arg0; > if (! TYPE_UNSIGNED (TREE_TYPE (arg0_uns))) > { > ... > } > > > + gimple_set_location (g, gimple_location (stmt)); > > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > > + /* convert lhs to unsigned and do the shift. */ > > Just use lhs if it has the same sign as arg0_uns. > > > + tree lhs_uns = create_tmp_reg_or_ssa_name > > + (unsigned_type_for (TREE_TYPE (lhs))); > > You can re-use the type of arg0_uns here. > > > + g = gimple_build_assign (lhs_uns, RSHIFT_EXPR, arg0_uns, arg1); > > + gimple_set_location (g, gimple_location (stmt)); > > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > > + /* convert lhs back to a signed type for the return. */ > > + lhs_uns = convert (signed_type_for (TREE_TYPE (lhs)),lhs_uns); > > + g = gimple_build_assign (lhs, lhs_uns); > > See above for how to perform the conversion. > > Note that you could use the gimple_build convenience to shorten the code > sequence above to > > gimple_seq stmts = NULL; > tree arg0_unsigned = gimple_build (&stmts, VIEW_CONVERT_EXPR, > > unsigned_type_for (...), arg0); > tree res = gimple_build (&stmts, RSHIFT_EXPR, TREE_TYPE (arg0_uns), > arg0_uns, arg1); > res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > update_call_from_tree (gsi, res); > > The above gimple_build sequence will fold all the stmts thus remove > useless conversions and apply constant folding, etc. > > Richard. > > > + gimple_set_location (g, gimple_location (stmt)); > > + gsi_replace (gsi, g, true); > > + return true; > > + } > > default: > > break; > > }
On Wed, Jun 14, 2017 at 4:55 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > On Tue, 2017-06-13 at 10:03 +0200, Richard Biener wrote: >> On Mon, Jun 12, 2017 at 11:56 PM, Will Schmidt >> <will_schmidt@vnet.ibm.com> wrote: >> > Hi, >> > >> > >> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >> > index 63ca2d1..55592fb 100644 >> > --- a/gcc/config/rs6000/rs6000.c >> > +++ b/gcc/config/rs6000/rs6000.c >> > @@ -16588,6 +16588,83 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) >> > gsi_replace (gsi, g, true); >> > return true; >> > } > <snip> >> > + /* Flavors of vector shift right. */ >> > + case ALTIVEC_BUILTIN_VSRB: >> > + case ALTIVEC_BUILTIN_VSRH: >> > + case ALTIVEC_BUILTIN_VSRW: >> > + case P8V_BUILTIN_VSRD: >> > + { >> > + arg0 = gimple_call_arg (stmt, 0); >> > + arg1 = gimple_call_arg (stmt, 1); >> > + lhs = gimple_call_lhs (stmt); >> > + gimple *g; >> > + /* convert arg0 to unsigned. */ >> > + arg0 = convert (unsigned_type_for (TREE_TYPE (arg0)), arg0); >> >> Please do not use 'convert', instead do ... > > Hi Richard, > > V3 of this patch , using the gimple_build() convenience helper function > has been posted, and is the direction I'm going for with this patch. I > wanted to make sure I fully understood the other options though, so I > have a question/clarification on the other suggestions: > >> > + tree arg0_uns = create_tmp_reg_or_ssa_name >> > + (unsigned_type_for (TREE_TYPE (arg0))); >> > + g = gimple_build_assign (arg0_uns, arg0); >> >> g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, usigned_type, arg0); > > I tried a few trivial variations of this: > g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, > unsigned_type_for (TREE_TYPE(arg0_uns)), arg0); > > which lookd good, but it asserts in gimple_build_assign_1(), on the > check > "if (op2) > { > gcc_assert (num_ops > 2); > ... > > Trolling around the other code for references, i found and tried this, > which uses the build1() helper, and appears to work. Is this the gist > of what you suggested, or would there be another alternative? > > g = gimple_build_assign (arg0_uns, > build1(VIEW_CONVERT_EXPR, > unsigned_type_for (TREE_TYPE(arg0_uns)), arg0)); > > Thanks for the feedback, etc. :-) Yeah, sorry -- the gimple_build machinery handles this GIMPLE wart transparently but gimple_build_assign does not ... Richard. > -Will > > >> You also want to avoid spitting out useless copies here if the >> arg/result is already unsigned, >> like via >> >> tree arg0_uns = arg0; >> if (! TYPE_UNSIGNED (TREE_TYPE (arg0_uns))) >> { >> ... >> } >> >> > + gimple_set_location (g, gimple_location (stmt)); >> > + gsi_insert_before (gsi, g, GSI_SAME_STMT); >> > + /* convert lhs to unsigned and do the shift. */ >> >> Just use lhs if it has the same sign as arg0_uns. >> >> > + tree lhs_uns = create_tmp_reg_or_ssa_name >> > + (unsigned_type_for (TREE_TYPE (lhs))); >> >> You can re-use the type of arg0_uns here. >> >> > + g = gimple_build_assign (lhs_uns, RSHIFT_EXPR, arg0_uns, arg1); >> > + gimple_set_location (g, gimple_location (stmt)); >> > + gsi_insert_before (gsi, g, GSI_SAME_STMT); >> > + /* convert lhs back to a signed type for the return. */ >> > + lhs_uns = convert (signed_type_for (TREE_TYPE (lhs)),lhs_uns); >> > + g = gimple_build_assign (lhs, lhs_uns); >> >> See above for how to perform the conversion. >> >> Note that you could use the gimple_build convenience to shorten the code >> sequence above to >> >> gimple_seq stmts = NULL; >> tree arg0_unsigned = gimple_build (&stmts, VIEW_CONVERT_EXPR, >> >> unsigned_type_for (...), arg0); >> tree res = gimple_build (&stmts, RSHIFT_EXPR, TREE_TYPE (arg0_uns), >> arg0_uns, arg1); >> res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); >> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); >> update_call_from_tree (gsi, res); >> >> The above gimple_build sequence will fold all the stmts thus remove >> useless conversions and apply constant folding, etc. >> >> Richard. >> >> > + gimple_set_location (g, gimple_location (stmt)); >> > + gsi_replace (gsi, g, true); >> > + return true; >> > + } >> > default: >> > break; >> > } > > >
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 63ca2d1..55592fb 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16588,6 +16588,83 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) gsi_replace (gsi, g, true); return true; } + /* Flavors of vec_rotate_left. */ + case ALTIVEC_BUILTIN_VRLB: + case ALTIVEC_BUILTIN_VRLH: + case ALTIVEC_BUILTIN_VRLW: + case P8V_BUILTIN_VRLD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift right algebraic. + * vec_sra{b,h,w} -> vsra{b,h,w}. */ + case ALTIVEC_BUILTIN_VSRAB: + case ALTIVEC_BUILTIN_VSRAH: + case ALTIVEC_BUILTIN_VSRAW: + case P8V_BUILTIN_VSRAD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift left. + * builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */ + case ALTIVEC_BUILTIN_VSLB: + case ALTIVEC_BUILTIN_VSLH: + case ALTIVEC_BUILTIN_VSLW: + case P8V_BUILTIN_VSLD: + { + arg0 = gimple_call_arg (stmt, 0); + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0))) + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0)))) + return false; + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift right. */ + case ALTIVEC_BUILTIN_VSRB: + case ALTIVEC_BUILTIN_VSRH: + case ALTIVEC_BUILTIN_VSRW: + case P8V_BUILTIN_VSRD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g; + /* convert arg0 to unsigned. */ + arg0 = convert (unsigned_type_for (TREE_TYPE (arg0)), arg0); + tree arg0_uns = create_tmp_reg_or_ssa_name + (unsigned_type_for (TREE_TYPE (arg0))); + g = gimple_build_assign (arg0_uns, arg0); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + /* convert lhs to unsigned and do the shift. */ + tree lhs_uns = create_tmp_reg_or_ssa_name + (unsigned_type_for (TREE_TYPE (lhs))); + g = gimple_build_assign (lhs_uns, RSHIFT_EXPR, arg0_uns, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + /* convert lhs back to a signed type for the return. */ + lhs_uns = convert (signed_type_for (TREE_TYPE (lhs)),lhs_uns); + g = gimple_build_assign (lhs, lhs_uns); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } default: break; } @@ -18090,6 +18167,14 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[2] = 1; break; + /* unsigned second arguments (vector shift right). */ + case ALTIVEC_BUILTIN_VSRB: + case ALTIVEC_BUILTIN_VSRH: + case ALTIVEC_BUILTIN_VSRW: + case P8V_BUILTIN_VSRD: + h.uns_p[2] = 1; + break; + default: break; } diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c new file mode 100644 index 0000000..ebe91e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-char.c @@ -0,0 +1,66 @@ +/* Verify that overloaded built-ins for vec_sl with char + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include <altivec.h> + +//# vec_sl - shift left +//# vec_sr - shift right +//# vec_sra - shift right algebraic +//# vec_rl - rotate left + +vector signed char +testsl_signed (vector signed char x, vector unsigned char y) +{ + return vec_sl (x, y); +} + +vector unsigned char +testsl_unsigned (vector unsigned char x, vector unsigned char y) +{ + return vec_sl (x, y); +} + +vector signed char +testsr_signed (vector signed char x, vector unsigned char y) +{ + return vec_sr (x, y); +} + +vector unsigned char +testsr_unsigned (vector unsigned char x, vector unsigned char y) +{ + return vec_sr (x, y); +} + +vector signed char +testsra_signed (vector signed char x, vector unsigned char y) +{ + return vec_sra (x, y); +} + +vector unsigned char +testsra_unsigned (vector unsigned char x, vector unsigned char y) +{ + return vec_sra (x, y); +} + +vector signed char +testrl_signed (vector signed char x, vector unsigned char y) +{ + return vec_rl (x, y); +} + +vector unsigned char +testrl_unsigned (vector unsigned char x, vector unsigned char y) +{ + return vec_rl (x, y); +} + +/* { dg-final { scan-assembler-times "vslb" 2 } } */ +/* { dg-final { scan-assembler-times "vsrb" 2 } } */ +/* { dg-final { scan-assembler-times "vsrab" 2 } } */ +/* { dg-final { scan-assembler-times "vrlb" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c new file mode 100644 index 0000000..e9c5fe1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-int.c @@ -0,0 +1,61 @@ +/* Verify that overloaded built-ins for vec_sl with int + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include <altivec.h> + +vector signed int +testsl_signed (vector signed int x, vector unsigned int y) +{ + return vec_sl (x, y); +} + +vector unsigned int +testsl_unsigned (vector unsigned int x, vector unsigned int y) +{ + return vec_sl (x, y); +} + +vector signed int +testsr_signed (vector signed int x, vector unsigned int y) +{ + return vec_sr (x, y); +} + +vector unsigned int +testsr_unsigned (vector unsigned int x, vector unsigned int y) +{ + return vec_sr (x, y); +} + +vector signed int +testsra_signed (vector signed int x, vector unsigned int y) +{ + return vec_sra (x, y); +} + +vector unsigned int +testsra_unsigned (vector unsigned int x, vector unsigned int y) +{ + return vec_sra (x, y); +} + +vector signed int +testrl_signed (vector signed int x, vector unsigned int y) +{ + return vec_rl (x, y); +} + +vector unsigned int +testrl_unsigned (vector unsigned int x, vector unsigned int y) +{ + return vec_rl (x, y); +} + +/* { dg-final { scan-assembler-times "vslw" 2 } } */ +/* { dg-final { scan-assembler-times "vsrw" 2 } } */ +/* { dg-final { scan-assembler-times "vsraw" 2 } } */ +/* { dg-final { scan-assembler-times "vrlw" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c new file mode 100644 index 0000000..4e0dc66 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left-fwrapv.c @@ -0,0 +1,61 @@ +/* Verify that overloaded built-ins for vec_sl produce the right results. */ +/* This test covers the shift left tests with the -fwrapv option. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2 -fwrapv" } */ + +#include <altivec.h> + +vector signed char +testsl_signed_char (vector signed char x, vector unsigned char y) +{ + return vec_sl (x, y); +} + +vector unsigned char +testsl_unsigned_char (vector unsigned char x, vector unsigned char y) +{ + return vec_sl (x, y); +} + +vector signed short +testsl_signed_short (vector signed short x, vector unsigned short y) +{ + return vec_sl (x, y); +} + +vector unsigned short +testsl_unsigned_short (vector unsigned short x, vector unsigned short y) +{ + return vec_sl (x, y); +} + +vector signed int +testsl_signed_int (vector signed int x, vector unsigned int y) +{ + return vec_sl (x, y); +} + +vector unsigned int +testsl_unsigned_int (vector unsigned int x, vector unsigned int y) +{ + return vec_sl (x, y); +} + +vector signed long long +testsl_signed_longlong (vector signed long long x, vector unsigned long long y) +{ + return vec_sl (x, y); +} + +vector unsigned long long +testsl_unsigned_longlong (vector unsigned long long x, vector unsigned long long y) +{ + return vec_sl (x, y); +} + +/* { dg-final { scan-assembler-times "vslb" 2 } } */ +/* { dg-final { scan-assembler-times "vslh" 2 } } */ +/* { dg-final { scan-assembler-times "vslw" 2 } } */ +/* { dg-final { scan-assembler-times "vsld" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c new file mode 100644 index 0000000..bbc3f01 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-left.c @@ -0,0 +1,61 @@ +/* cross section of shift tests specific for shift-left. + * This is a counterpart to the fold-vec-shift-left-frwapv test. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include <altivec.h> + +vector signed char +testsl_signed_char (vector signed char x, vector unsigned char y) +{ + return vec_sl (x, y); +} + +vector unsigned char +testsl_unsigned_char (vector unsigned char x, vector unsigned char y) +{ + return vec_sl (x, y); +} + +vector signed short +testsl_signed_short (vector signed short x, vector unsigned short y) +{ + return vec_sl (x, y); +} + +vector unsigned short +testsl_unsigned_short (vector unsigned short x, vector unsigned short y) +{ + return vec_sl (x, y); +} + +vector signed int +testsl_signed_int (vector signed int x, vector unsigned int y) +{ + return vec_sl (x, y); +} + +vector unsigned int +testsl_unsigned_int (vector unsigned int x, vector unsigned int y) +{ + return vec_sl (x, y); +} + +vector signed long long +testsl_signed_longlong (vector signed long long x, vector unsigned long long y) +{ + return vec_sl (x, y); +} + +vector unsigned long long +testsl_unsigned_longlong (vector unsigned long long x, vector unsigned long long y) +{ + return vec_sl (x, y); +} + +/* { dg-final { scan-assembler-times "vslb" 2 } } */ +/* { dg-final { scan-assembler-times "vslh" 2 } } */ +/* { dg-final { scan-assembler-times "vslw" 2 } } */ +/* { dg-final { scan-assembler-times "vsld" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c new file mode 100644 index 0000000..97b82cf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-longlong.c @@ -0,0 +1,63 @@ +/* Verify that overloaded built-ins for vec_sl with long long + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mpower8-vector -O2" } */ + +#include <altivec.h> + +vector signed long long +testsl_signed (vector signed long long x, vector unsigned long long y) +{ + return vec_sl (x, y); +} + +vector unsigned long long +testsl_unsigned (vector unsigned long long x, vector unsigned long long y) +{ + return vec_sl (x, y); +} + +vector signed long long +testsr_signed (vector signed long long x, vector unsigned long long y) +{ + return vec_sr (x, y); +} + +vector unsigned long long +testsr_unsigned (vector unsigned long long x, vector unsigned long long y) +{ + return vec_sr (x, y); +} + +vector signed long long +testsra_signed (vector signed long long x, vector unsigned long long y) +{ + return vec_sra (x, y); +} + +/* watch for PR 79544 here (vsrd / vsrad issue) */ +vector unsigned long long +testsra_unsigned (vector unsigned long long x, vector unsigned long long y) +{ + return vec_sra (x, y); +} + +vector signed long long +testrl_signed (vector signed long long x, vector unsigned long long y) +{ + return vec_rl (x, y); +} + +vector unsigned long long +testrl_unsigned (vector unsigned long long x, vector unsigned long long y) +{ + return vec_rl (x, y); +} + +/* { dg-final { scan-assembler-times "vsld" 2 } } */ +/* { dg-final { scan-assembler-times "vsrd" 2 } } */ +/* { dg-final { scan-assembler-times "vsrad" 2 } } */ +/* { dg-final { scan-assembler-times "vrld" 2 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c new file mode 100644 index 0000000..4ca7c18 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-shift-short.c @@ -0,0 +1,61 @@ +/* Verify that overloaded built-ins for vec_sl with short + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include <altivec.h> + +vector signed short +testsl_signed (vector signed short x, vector unsigned short y) +{ + return vec_sl (x, y); +} + +vector unsigned short +testsl_unsigned (vector unsigned short x, vector unsigned short y) +{ + return vec_sl (x, y); +} + +vector signed short +testsr_signed (vector signed short x, vector unsigned short y) +{ + return vec_sr (x, y); +} + +vector unsigned short +testsr_unsigned (vector unsigned short x, vector unsigned short y) +{ + return vec_sr (x, y); +} + +vector signed short +testsra_signed (vector signed short x, vector unsigned short y) +{ + return vec_sra (x, y); +} + +vector unsigned short +testsra_unsigned (vector unsigned short x, vector unsigned short y) +{ + return vec_sra (x, y); +} + +vector signed short +testrl_signed (vector signed short x, vector unsigned short y) +{ + return vec_rl (x, y); +} + +vector unsigned short +testrl_unsigned (vector unsigned short x, vector unsigned short y) +{ + return vec_rl (x, y); +} + +/* { dg-final { scan-assembler-times "vslh" 2 } } */ +/* { dg-final { scan-assembler-times "vsrh" 2 } } */ +/* { dg-final { scan-assembler-times "vsrah" 2 } } */ +/* { dg-final { scan-assembler-times "vrlh" 2 } } */