Message ID | 7ed95783-2b61-487f-93c2-89124674accb@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v2] rs6000: Support doubleword swaps removal in rot64 load store [PR100085] | expand |
Hi! On Tue, Jun 08, 2021 at 09:11:33AM +0800, Xionghu Luo wrote: > On P8LE, extra rot64+rot64 load or store instructions are generated > in float128 to vector __int128 conversion. > > This patch teaches pass swaps to also handle such pattens to remove > extra swap instructions. > +/* Return 1 iff PAT is a rotate 64 bit expression; else return 0. */ > + > +static bool > +pattern_is_rotate64_p (rtx pat) You already have a verb in the name, don't use _p please (and preferably just don't use it at all, "pattern_is_rotate64" is much better than "pattern_rotate64_p"). > +{ > + rtx rot = SET_SRC (pat); So this is assuming PAT is a SINGLE_SET. Please say that in the function comment. /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return 0. */ You can do an assert for that as well, but I wouldn't bother. > @@ -266,6 +280,9 @@ insn_is_load_p (rtx insn) (I do realise you just copied existing naming, don't worry :-) ) > @@ -392,7 +411,8 @@ quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) > false. */ > rtx body = PATTERN (def_insn); > if (GET_CODE (body) != SET > - || GET_CODE (SET_SRC (body)) != VEC_SELECT > + || !(GET_CODE (SET_SRC (body)) == VEC_SELECT > + || pattern_is_rotate64_p (body)) Broken indentation: the || should align with "pattern...". > @@ -2223,9 +2246,9 @@ static void > recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) > { > rtx body = PATTERN (insn); > - gcc_assert (GET_CODE (body) == SET > - && MEM_P (SET_DEST (body)) > - && GET_CODE (SET_SRC (body)) == VEC_SELECT); > + gcc_assert (GET_CODE (body) == SET && MEM_P (SET_DEST (body)) > + && (GET_CODE (SET_SRC (body)) == VEC_SELECT > + || pattern_is_rotate64_p (body))); Please start a new line for every "&&" here. The way it was was more readable. It often is nice to keep things one one line, if it fits on one line. If it does not, make a new line for every phrase. This is more readable because you can then just scan down the line of "&&" and see the start of every phrase without actually having to read it all. > diff --git a/gcc/testsuite/gcc.target/powerpc/float128-call.c b/gcc/testsuite/gcc.target/powerpc/float128-call.c > index 5895416e985..a1f09df8a57 100644 > --- a/gcc/testsuite/gcc.target/powerpc/float128-call.c > +++ b/gcc/testsuite/gcc.target/powerpc/float128-call.c > @@ -21,5 +21,5 @@ > TYPE one (void) { return ONE; } > void store (TYPE a, TYPE *p) { *p = a; } > > -/* { dg-final { scan-assembler "lxvd2x 34" } } */ > -/* { dg-final { scan-assembler "stxvd2x 34" } } */ > +/* { dg-final { scan-assembler "lvx 2" } } */ > +/* { dg-final { scan-assembler "stvx 2" } } */ Huh. Is that correct? Where did the other 32 loads and stores go? Are there now other insns generated that you should scan for? > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr100085.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_float128_sw_ok } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ If you use float128_ok you should use -mfloat128 (or this is very surprising and is worth an explanation itself :-) ) But, you do not need it, since you use -mcpu=power8 already (which implicitly sets this). So just remove that dg-require please. > +/* { dg-final { scan-assembler-not "xxpermdi" } } */ > +/* { dg-final { scan-assembler-not "stxvd2x" } } */ > +/* { dg-final { scan-assembler-not "lxvd2x" } } */ It is a good habit to use \m and \M in the scans where you can (those are the same as \< and \> are in some other regexp dialects). They aren't absolutely necessary here of course. Okay for trunk with those fixes. Thanks! Segher
On 2021/6/9 05:07, Segher Boessenkool wrote: > Hi! > > On Tue, Jun 08, 2021 at 09:11:33AM +0800, Xionghu Luo wrote: >> On P8LE, extra rot64+rot64 load or store instructions are generated >> in float128 to vector __int128 conversion. >> >> This patch teaches pass swaps to also handle such pattens to remove >> extra swap instructions. > >> +/* Return 1 iff PAT is a rotate 64 bit expression; else return 0. */ >> + >> +static bool >> +pattern_is_rotate64_p (rtx pat) > > You already have a verb in the name, don't use _p please (and preferably > just don't use it at all, "pattern_is_rotate64" is much better than > "pattern_rotate64_p"). > >> +{ >> + rtx rot = SET_SRC (pat); > > So this is assuming PAT is a SINGLE_SET. Please say that in the > function comment. > > /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else > return 0. */ > > You can do an assert for that as well, but I wouldn't bother. > >> @@ -266,6 +280,9 @@ insn_is_load_p (rtx insn) > > (I do realise you just copied existing naming, don't worry :-) ) > >> @@ -392,7 +411,8 @@ quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) >> false. */ >> rtx body = PATTERN (def_insn); >> if (GET_CODE (body) != SET >> - || GET_CODE (SET_SRC (body)) != VEC_SELECT >> + || !(GET_CODE (SET_SRC (body)) == VEC_SELECT >> + || pattern_is_rotate64_p (body)) > > Broken indentation: the || should align with "pattern...". > >> @@ -2223,9 +2246,9 @@ static void >> recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) >> { >> rtx body = PATTERN (insn); >> - gcc_assert (GET_CODE (body) == SET >> - && MEM_P (SET_DEST (body)) >> - && GET_CODE (SET_SRC (body)) == VEC_SELECT); >> + gcc_assert (GET_CODE (body) == SET && MEM_P (SET_DEST (body)) >> + && (GET_CODE (SET_SRC (body)) == VEC_SELECT >> + || pattern_is_rotate64_p (body))); > > Please start a new line for every "&&" here. The way it was was more > readable. > > It often is nice to keep things one one line, if it fits on one line. > If it does not, make a new line for every phrase. This is more readable > because you can then just scan down the line of "&&" and see the start > of every phrase without actually having to read it all. > >> diff --git a/gcc/testsuite/gcc.target/powerpc/float128-call.c b/gcc/testsuite/gcc.target/powerpc/float128-call.c >> index 5895416e985..a1f09df8a57 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/float128-call.c >> +++ b/gcc/testsuite/gcc.target/powerpc/float128-call.c >> @@ -21,5 +21,5 @@ >> TYPE one (void) { return ONE; } >> void store (TYPE a, TYPE *p) { *p = a; } >> >> -/* { dg-final { scan-assembler "lxvd2x 34" } } */ >> -/* { dg-final { scan-assembler "stxvd2x 34" } } */ >> +/* { dg-final { scan-assembler "lvx 2" } } */ >> +/* { dg-final { scan-assembler "stvx 2" } } */ > > Huh. Is that correct? Where did the other 32 loads and stores go? Are > there now other insns generated that you should scan for? This is expected change. lxvd2x+xxpermdi is replaced by lvx. No need scan other instructions. Similarly for stvx. 34 and 2 are *vector register names* instead of counts. diff float128-call.trunk.s float128-call.patched.s 18,19c18 < lxvd2x 34,0,9 < xxpermdi 34,34,34,2 --- > lvx 2,0,9 33,34c32 < xxpermdi 34,34,34,2 < stxvd2x 34,0,5 --- > stvx 2,0,5 Thanks for all the other comments, updated and committed with r12-1316. BR, Xionghu
On Wed, Jun 09, 2021 at 11:06:31AM +0800, Xionghu Luo wrote: > On 2021/6/9 05:07, Segher Boessenkool wrote: > >> -/* { dg-final { scan-assembler "lxvd2x 34" } } */ > >> -/* { dg-final { scan-assembler "stxvd2x 34" } } */ > >> +/* { dg-final { scan-assembler "lvx 2" } } */ > >> +/* { dg-final { scan-assembler "stvx 2" } } */ > > > > Huh. Is that correct? Where did the other 32 loads and stores go? Are > > there now other insns generated that you should scan for? > > This is expected change. lxvd2x+xxpermdi is replaced by lvx. No need scan other > instructions. Similarly for stvx. 34 and 2 are *vector register names* instead of > counts. Oh! Oh my. I read that as "scan-assembler-times" for some reason, as you expected. > Thanks for all the other comments, updated and committed with r12-1316. Thank you! Segher
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index ec503ab742f..3b74e05e396 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -250,6 +250,20 @@ union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) } } +/* Return 1 iff PAT is a rotate 64 bit expression; else return 0. */ + +static bool +pattern_is_rotate64_p (rtx pat) +{ + rtx rot = SET_SRC (pat); + + if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1)) + && INTVAL (XEXP (rot, 1)) == 64) + return true; + + return false; +} + /* Return 1 iff INSN is a load insn, including permuting loads that represent an lvxd2x instruction; else return 0. */ static unsigned int @@ -266,6 +280,9 @@ insn_is_load_p (rtx insn) && MEM_P (XEXP (SET_SRC (body), 0))) return 1; + if (pattern_is_rotate64_p (body) && MEM_P (XEXP (SET_SRC (body), 0))) + return 1; + return 0; } @@ -305,6 +322,8 @@ insn_is_swap_p (rtx insn) if (GET_CODE (body) != SET) return 0; rtx rhs = SET_SRC (body); + if (pattern_is_rotate64_p (body)) + return 1; if (GET_CODE (rhs) != VEC_SELECT) return 0; rtx parallel = XEXP (rhs, 1); @@ -392,7 +411,8 @@ quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) false. */ rtx body = PATTERN (def_insn); if (GET_CODE (body) != SET - || GET_CODE (SET_SRC (body)) != VEC_SELECT + || !(GET_CODE (SET_SRC (body)) == VEC_SELECT + || pattern_is_rotate64_p (body)) || !MEM_P (XEXP (SET_SRC (body), 0))) return false; @@ -531,7 +551,8 @@ const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) false. */ rtx body = PATTERN (def_insn); if (GET_CODE (body) != SET - || GET_CODE (SET_SRC (body)) != VEC_SELECT + || !(GET_CODE (SET_SRC (body)) == VEC_SELECT + || pattern_is_rotate64_p (body)) || !MEM_P (XEXP (SET_SRC (body), 0))) return false; @@ -1730,7 +1751,8 @@ replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) swap (indicated by code VEC_SELECT). */ rtx body = PATTERN (def_insn); gcc_assert ((GET_CODE (body) == SET) - && (GET_CODE (SET_SRC (body)) == VEC_SELECT) + && (GET_CODE (SET_SRC (body)) == VEC_SELECT + || pattern_is_rotate64_p (body)) && MEM_P (XEXP (SET_SRC (body), 0))); rtx src_exp = XEXP (SET_SRC (body), 0); @@ -2148,7 +2170,8 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) { rtx body = PATTERN (insn); gcc_assert (GET_CODE (body) == SET - && GET_CODE (SET_SRC (body)) == VEC_SELECT + && (GET_CODE (SET_SRC (body)) == VEC_SELECT + || pattern_is_rotate64_p (body)) && MEM_P (XEXP (SET_SRC (body), 0))); rtx mem = XEXP (SET_SRC (body), 0); @@ -2223,9 +2246,9 @@ static void recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) { rtx body = PATTERN (insn); - gcc_assert (GET_CODE (body) == SET - && MEM_P (SET_DEST (body)) - && GET_CODE (SET_SRC (body)) == VEC_SELECT); + gcc_assert (GET_CODE (body) == SET && MEM_P (SET_DEST (body)) + && (GET_CODE (SET_SRC (body)) == VEC_SELECT + || pattern_is_rotate64_p (body))); rtx mem = SET_DEST (body); rtx base_reg = XEXP (mem, 0); diff --git a/gcc/testsuite/gcc.target/powerpc/float128-call.c b/gcc/testsuite/gcc.target/powerpc/float128-call.c index 5895416e985..a1f09df8a57 100644 --- a/gcc/testsuite/gcc.target/powerpc/float128-call.c +++ b/gcc/testsuite/gcc.target/powerpc/float128-call.c @@ -21,5 +21,5 @@ TYPE one (void) { return ONE; } void store (TYPE a, TYPE *p) { *p = a; } -/* { dg-final { scan-assembler "lxvd2x 34" } } */ -/* { dg-final { scan-assembler "stxvd2x 34" } } */ +/* { dg-final { scan-assembler "lvx 2" } } */ +/* { dg-final { scan-assembler "stvx 2" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr100085.c b/gcc/testsuite/gcc.target/powerpc/pr100085.c new file mode 100644 index 00000000000..0a2e0feaf30 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr100085.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_float128_sw_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ + +typedef __vector unsigned __int128 vui128_t; + +typedef union +{ + __float128 vf1; + vui128_t vx1; +} __VF_128; + +vui128_t +vec_xfer_bin128_2_vui128t (__float128 f128) +{ + __VF_128 vunion; + vunion.vf1 = f128; + return (vunion.vx1); +} + +/* { dg-final { scan-assembler-not "xxpermdi" } } */ +/* { dg-final { scan-assembler-not "stxvd2x" } } */ +/* { dg-final { scan-assembler-not "lxvd2x" } } */ +