Message ID | e319e90f-817f-55c0-4f10-c48e116212a3@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000] Optimization for vec_xl_sext | expand |
Hi Hao Chen, I don't understand. This patch was already approved and you committed it. :-) I know because I needed to make corresponding adjustments to the new builtins code. Thanks, Bill On 11/15/21 8:16 PM, HAO CHEN GUI wrote: > Hi, > > The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly. > > Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > > 2021-11-16 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify > the expansion for sign extension. All extensions are done on VSX > registers. > > gcc/testsuite/ > * gcc.target/powerpc/p10_vec_xl_sext.c: New test. > > patch.diff > > diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c > index b4e13af4dc6..587e9fa2a2a 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl > > if (sign_extend) > { > - rtx discratch = gen_reg_rtx (DImode); > + rtx discratch = gen_reg_rtx (V2DImode); > rtx tiscratch = gen_reg_rtx (TImode); > > /* Emit the lxvr*x insn. */ > @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl > return 0; > emit_insn (pat); > > - /* Emit a sign extension from QI,HI,WI to double (DI). */ > - rtx scratch = gen_lowpart (smode, tiscratch); > + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ > + rtx temp1, temp2; > if (icode == CODE_FOR_vsx_lxvrbx) > - emit_insn (gen_extendqidi2 (discratch, scratch)); > + { > + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); > + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); > + } > else if (icode == CODE_FOR_vsx_lxvrhx) > - emit_insn (gen_extendhidi2 (discratch, scratch)); > + { > + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); > + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); > + } > else if (icode == CODE_FOR_vsx_lxvrwx) > - emit_insn (gen_extendsidi2 (discratch, scratch)); > - /* Assign discratch directly if scratch is already DI. */ > - if (icode == CODE_FOR_vsx_lxvrdx) > - discratch = scratch; > + { > + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); > + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); > + } > + else if (icode == CODE_FOR_vsx_lxvrdx) > + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); > + else > + gcc_unreachable (); > > - /* Emit the sign extension from DI (double) to TI (quad). */ > - emit_insn (gen_extendditi2 (target, discratch)); > + /* Emit the sign extension from V2DI (double) to TI (quad). */ > + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); > + emit_insn (gen_extendditi2_vector (target, temp2)); > > return target; > } > diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c > new file mode 100644 > index 00000000000..78e72ac5425 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <altivec.h> > + > +vector signed __int128 > +foo1 (signed long a, signed char *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +vector signed __int128 > +foo2 (signed long a, signed short *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +vector signed __int128 > +foo3 (signed long a, signed int *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +vector signed __int128 > +foo4 (signed long a, signed long *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ > +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */ >
Bill, Sorry, I mixed up the patches. There is one vec_reve patch which hasn't gotten approval for a long time. I will re-send it. Thanks a lot. On 16/11/2021 下午 9:10, Bill Schmidt wrote: > Hi Hao Chen, > > I don't understand. This patch was already approved and you committed it. :-) I know > because I needed to make corresponding adjustments to the new builtins code. > > Thanks, > Bill > > On 11/15/21 8:16 PM, HAO CHEN GUI wrote: >> Hi, >> >> The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly. >> >> Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. >> >> ChangeLog >> >> 2021-11-16 Haochen Gui <guihaoc@linux.ibm.com> >> >> gcc/ >> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify >> the expansion for sign extension. All extensions are done on VSX >> registers. >> >> gcc/testsuite/ >> * gcc.target/powerpc/p10_vec_xl_sext.c: New test. >> >> patch.diff >> >> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c >> index b4e13af4dc6..587e9fa2a2a 100644 >> --- a/gcc/config/rs6000/rs6000-call.c >> +++ b/gcc/config/rs6000/rs6000-call.c >> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl >> >> if (sign_extend) >> { >> - rtx discratch = gen_reg_rtx (DImode); >> + rtx discratch = gen_reg_rtx (V2DImode); >> rtx tiscratch = gen_reg_rtx (TImode); >> >> /* Emit the lxvr*x insn. */ >> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl >> return 0; >> emit_insn (pat); >> >> - /* Emit a sign extension from QI,HI,WI to double (DI). */ >> - rtx scratch = gen_lowpart (smode, tiscratch); >> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ >> + rtx temp1, temp2; >> if (icode == CODE_FOR_vsx_lxvrbx) >> - emit_insn (gen_extendqidi2 (discratch, scratch)); >> + { >> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); >> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); >> + } >> else if (icode == CODE_FOR_vsx_lxvrhx) >> - emit_insn (gen_extendhidi2 (discratch, scratch)); >> + { >> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); >> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); >> + } >> else if (icode == CODE_FOR_vsx_lxvrwx) >> - emit_insn (gen_extendsidi2 (discratch, scratch)); >> - /* Assign discratch directly if scratch is already DI. */ >> - if (icode == CODE_FOR_vsx_lxvrdx) >> - discratch = scratch; >> + { >> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); >> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); >> + } >> + else if (icode == CODE_FOR_vsx_lxvrdx) >> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); >> + else >> + gcc_unreachable (); >> >> - /* Emit the sign extension from DI (double) to TI (quad). */ >> - emit_insn (gen_extendditi2 (target, discratch)); >> + /* Emit the sign extension from V2DI (double) to TI (quad). */ >> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); >> + emit_insn (gen_extendditi2_vector (target, temp2)); >> >> return target; >> } >> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >> new file mode 100644 >> index 00000000000..78e72ac5425 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >> @@ -0,0 +1,35 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target int128 } */ >> +/* { dg-require-effective-target power10_ok } */ >> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ >> + >> +#include <altivec.h> >> + >> +vector signed __int128 >> +foo1 (signed long a, signed char *b) >> +{ >> + return vec_xl_sext (a, b); >> +} >> + >> +vector signed __int128 >> +foo2 (signed long a, signed short *b) >> +{ >> + return vec_xl_sext (a, b); >> +} >> + >> +vector signed __int128 >> +foo3 (signed long a, signed int *b) >> +{ >> + return vec_xl_sext (a, b); >> +} >> + >> +vector signed __int128 >> +foo4 (signed long a, signed long *b) >> +{ >> + return vec_xl_sext (a, b); >> +} >> + >> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ >> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ >> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ >> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */ >>
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index b4e13af4dc6..587e9fa2a2a 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl if (sign_extend) { - rtx discratch = gen_reg_rtx (DImode); + rtx discratch = gen_reg_rtx (V2DImode); rtx tiscratch = gen_reg_rtx (TImode); /* Emit the lxvr*x insn. */ @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl return 0; emit_insn (pat); - /* Emit a sign extension from QI,HI,WI to double (DI). */ - rtx scratch = gen_lowpart (smode, tiscratch); + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ + rtx temp1, temp2; if (icode == CODE_FOR_vsx_lxvrbx) - emit_insn (gen_extendqidi2 (discratch, scratch)); + { + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); + } else if (icode == CODE_FOR_vsx_lxvrhx) - emit_insn (gen_extendhidi2 (discratch, scratch)); + { + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); + } else if (icode == CODE_FOR_vsx_lxvrwx) - emit_insn (gen_extendsidi2 (discratch, scratch)); - /* Assign discratch directly if scratch is already DI. */ - if (icode == CODE_FOR_vsx_lxvrdx) - discratch = scratch; + { + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); + } + else if (icode == CODE_FOR_vsx_lxvrdx) + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); + else + gcc_unreachable (); - /* Emit the sign extension from DI (double) to TI (quad). */ - emit_insn (gen_extendditi2 (target, discratch)); + /* Emit the sign extension from V2DI (double) to TI (quad). */ + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); + emit_insn (gen_extendditi2_vector (target, temp2)); return target; } diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c new file mode 100644 index 00000000000..78e72ac5425 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +vector signed __int128 +foo1 (signed long a, signed char *b) +{ + return vec_xl_sext (a, b); +} + +vector signed __int128 +foo2 (signed long a, signed short *b) +{ + return vec_xl_sext (a, b); +} + +vector signed __int128 +foo3 (signed long a, signed int *b) +{ + return vec_xl_sext (a, b); +} + +vector signed __int128 +foo4 (signed long a, signed long *b) +{ + return vec_xl_sext (a, b); +} + +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */