diff mbox series

[v2] rs6000: Support doubleword swaps removal in rot64 load store [PR100085]

Message ID 7ed95783-2b61-487f-93c2-89124674accb@linux.ibm.com
State New
Headers show
Series [v2] rs6000: Support doubleword swaps removal in rot64 load store [PR100085] | expand

Commit Message

Xionghu Luo June 8, 2021, 1:11 a.m. UTC
Update the patch according to the comments.  Thanks.


On P8LE, extra rot64+rot64 load or store instructions are generated
in float128 to vector __int128 conversion.

This patch teaches pass swaps to also handle such pattens to remove
extra swap instructions.

(insn 7 6 8 2 (set (subreg:V1TI (reg:KF 123) 0)
        (rotate:V1TI (mem/u/c:V1TI (reg/f:DI 121) [0  S16 A128])
	            (const_int 64 [0x40]))) {*vsx_le_permute_v1ti})
(insn 8 7 9 2 (set (subreg:V1TI (reg:KF 122) 0)
        (rotate:V1TI (subreg:V1TI (reg:KF 123) 0)
	            (const_int 64 [0x40])))  {*vsx_le_permute_v1ti})
=>
(insn 22 6 23 2 (set (subreg:V1TI (reg:KF 123) 0)
        (mem/u/c:V1TI (and:DI (reg/f:DI 121)
	          (const_int -16 [0xfffffffffffffff0])) [0  S16 A128])))
(insn 23 22 25 2 (set (subreg:V1TI (reg:KF 122) 0)
        (subreg:V1TI (reg:KF 123) 0)))

gcc/ChangeLog:

	* config/rs6000/rs6000-p8swap.c (pattern_is_rotate64_p): New.
	(insn_is_load_p): Use pattern_is_rotate64_p.
	(insn_is_swap_p): Likewise.
	(quad_aligned_load_p): Likewise.
	(const_load_sequence_p): Likewise.
	(replace_swapped_aligned_load): Likewise.
	(recombine_lvx_pattern): Likewise.
	(recombine_stvx_pattern): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/float128-call.c: Adjust.
	* gcc.target/powerpc/pr100085.c: New test.
---
 gcc/config/rs6000/rs6000-p8swap.c             | 37 +++++++++++++++----
 .../gcc.target/powerpc/float128-call.c        |  4 +-
 gcc/testsuite/gcc.target/powerpc/pr100085.c   | 24 ++++++++++++
 3 files changed, 56 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr100085.c

Comments

Segher Boessenkool June 8, 2021, 9:07 p.m. UTC | #1
Hi!

On Tue, Jun 08, 2021 at 09:11:33AM +0800, Xionghu Luo wrote:
> On P8LE, extra rot64+rot64 load or store instructions are generated
> in float128 to vector __int128 conversion.
> 
> This patch teaches pass swaps to also handle such pattens to remove
> extra swap instructions.

> +/* Return 1 iff PAT is a rotate 64 bit expression; else return 0.  */
> +
> +static bool
> +pattern_is_rotate64_p (rtx pat)

You already have a verb in the name, don't use _p please (and preferably
just don't use it at all, "pattern_is_rotate64" is much better than
"pattern_rotate64_p").

> +{
> +  rtx rot = SET_SRC (pat);

So this is assuming PAT is a SINGLE_SET.  Please say that in the
function comment.

/* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else
   return 0.  */

You can do an assert for that as well, but I wouldn't bother.

> @@ -266,6 +280,9 @@ insn_is_load_p (rtx insn)

(I do realise you just copied existing naming, don't worry :-) )

> @@ -392,7 +411,8 @@ quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
>       false.  */
>    rtx body = PATTERN (def_insn);
>    if (GET_CODE (body) != SET
> -      || GET_CODE (SET_SRC (body)) != VEC_SELECT
> +      || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
> +	  || pattern_is_rotate64_p (body))

Broken indentation: the || should align with "pattern...".

> @@ -2223,9 +2246,9 @@ static void
>  recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
>  {
>    rtx body = PATTERN (insn);
> -  gcc_assert (GET_CODE (body) == SET
> -	      && MEM_P (SET_DEST (body))
> -	      && GET_CODE (SET_SRC (body)) == VEC_SELECT);
> +  gcc_assert (GET_CODE (body) == SET && MEM_P (SET_DEST (body))
> +	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
> +		  || pattern_is_rotate64_p (body)));

Please start a new line for every "&&" here.  The way it was was more
readable.

It often is nice to keep things one one line, if it fits on one line.
If it does not, make a new line for every phrase.  This is more readable
because you can then just scan down the line of "&&" and see the start
of every phrase without actually having to read it all.

> diff --git a/gcc/testsuite/gcc.target/powerpc/float128-call.c b/gcc/testsuite/gcc.target/powerpc/float128-call.c
> index 5895416e985..a1f09df8a57 100644
> --- a/gcc/testsuite/gcc.target/powerpc/float128-call.c
> +++ b/gcc/testsuite/gcc.target/powerpc/float128-call.c
> @@ -21,5 +21,5 @@
>  TYPE one (void) { return ONE; }
>  void store (TYPE a, TYPE *p) { *p = a; }
>  
> -/* { dg-final { scan-assembler "lxvd2x 34"  } } */
> -/* { dg-final { scan-assembler "stxvd2x 34" } } */
> +/* { dg-final { scan-assembler "lvx 2"  } } */
> +/* { dg-final { scan-assembler "stvx 2" } } */

Huh.  Is that correct?  Where did the other 32 loads and stores go?  Are
there now other insns generated that you should scan for?

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr100085.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_float128_sw_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */

If you use float128_ok you should use -mfloat128 (or this is very
surprising and is worth an explanation itself :-) )

But, you do not need it, since you use -mcpu=power8 already (which
implicitly sets this).  So just remove that dg-require please.

> +/* { dg-final { scan-assembler-not "xxpermdi" } } */
> +/* { dg-final { scan-assembler-not "stxvd2x" } } */
> +/* { dg-final { scan-assembler-not "lxvd2x" } } */

It is a good habit to use \m and \M in the scans where you can (those
are the same as \< and \> are in some other regexp dialects).  They
aren't absolutely necessary here of course.


Okay for trunk with those fixes.  Thanks!


Segher
Xionghu Luo June 9, 2021, 3:06 a.m. UTC | #2
On 2021/6/9 05:07, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Jun 08, 2021 at 09:11:33AM +0800, Xionghu Luo wrote:
>> On P8LE, extra rot64+rot64 load or store instructions are generated
>> in float128 to vector __int128 conversion.
>>
>> This patch teaches pass swaps to also handle such pattens to remove
>> extra swap instructions.
> 
>> +/* Return 1 iff PAT is a rotate 64 bit expression; else return 0.  */
>> +
>> +static bool
>> +pattern_is_rotate64_p (rtx pat)
> 
> You already have a verb in the name, don't use _p please (and preferably
> just don't use it at all, "pattern_is_rotate64" is much better than
> "pattern_rotate64_p").
> 
>> +{
>> +  rtx rot = SET_SRC (pat);
> 
> So this is assuming PAT is a SINGLE_SET.  Please say that in the
> function comment.
> 
> /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else
>     return 0.  */
> 
> You can do an assert for that as well, but I wouldn't bother.
> 
>> @@ -266,6 +280,9 @@ insn_is_load_p (rtx insn)
> 
> (I do realise you just copied existing naming, don't worry :-) )
> 
>> @@ -392,7 +411,8 @@ quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
>>        false.  */
>>     rtx body = PATTERN (def_insn);
>>     if (GET_CODE (body) != SET
>> -      || GET_CODE (SET_SRC (body)) != VEC_SELECT
>> +      || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
>> +	  || pattern_is_rotate64_p (body))
> 
> Broken indentation: the || should align with "pattern...".
> 
>> @@ -2223,9 +2246,9 @@ static void
>>   recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
>>   {
>>     rtx body = PATTERN (insn);
>> -  gcc_assert (GET_CODE (body) == SET
>> -	      && MEM_P (SET_DEST (body))
>> -	      && GET_CODE (SET_SRC (body)) == VEC_SELECT);
>> +  gcc_assert (GET_CODE (body) == SET && MEM_P (SET_DEST (body))
>> +	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
>> +		  || pattern_is_rotate64_p (body)));
> 
> Please start a new line for every "&&" here.  The way it was was more
> readable.
> 
> It often is nice to keep things one one line, if it fits on one line.
> If it does not, make a new line for every phrase.  This is more readable
> because you can then just scan down the line of "&&" and see the start
> of every phrase without actually having to read it all.
> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/float128-call.c b/gcc/testsuite/gcc.target/powerpc/float128-call.c
>> index 5895416e985..a1f09df8a57 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/float128-call.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/float128-call.c
>> @@ -21,5 +21,5 @@
>>   TYPE one (void) { return ONE; }
>>   void store (TYPE a, TYPE *p) { *p = a; }
>>   
>> -/* { dg-final { scan-assembler "lxvd2x 34"  } } */
>> -/* { dg-final { scan-assembler "stxvd2x 34" } } */
>> +/* { dg-final { scan-assembler "lvx 2"  } } */
>> +/* { dg-final { scan-assembler "stvx 2" } } */
> 
> Huh.  Is that correct?  Where did the other 32 loads and stores go?  Are
> there now other insns generated that you should scan for?

This is expected change. lxvd2x+xxpermdi is replaced by lvx.  No need scan other
instructions. Similarly for stvx. 34 and 2 are *vector register names* instead of
counts.

diff float128-call.trunk.s float128-call.patched.s
18,19c18
<       lxvd2x 34,0,9
<       xxpermdi 34,34,34,2
---
>       lvx 2,0,9
33,34c32
<       xxpermdi 34,34,34,2
<       stxvd2x 34,0,5
---
>       stvx 2,0,5

Thanks for all the other comments, updated and committed with r12-1316.


BR,
Xionghu
Segher Boessenkool June 9, 2021, 4:47 p.m. UTC | #3
On Wed, Jun 09, 2021 at 11:06:31AM +0800, Xionghu Luo wrote:
> On 2021/6/9 05:07, Segher Boessenkool wrote:
> >> -/* { dg-final { scan-assembler "lxvd2x 34"  } } */
> >> -/* { dg-final { scan-assembler "stxvd2x 34" } } */
> >> +/* { dg-final { scan-assembler "lvx 2"  } } */
> >> +/* { dg-final { scan-assembler "stvx 2" } } */
> > 
> > Huh.  Is that correct?  Where did the other 32 loads and stores go?  Are
> > there now other insns generated that you should scan for?
> 
> This is expected change. lxvd2x+xxpermdi is replaced by lvx.  No need scan other
> instructions. Similarly for stvx. 34 and 2 are *vector register names* instead of
> counts.

Oh!  Oh my.  I read that as "scan-assembler-times" for some reason, as
you expected.

> Thanks for all the other comments, updated and committed with r12-1316.

Thank you!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index ec503ab742f..3b74e05e396 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -250,6 +250,20 @@  union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
     }
 }
 
+/* Return 1 iff PAT is a rotate 64 bit expression; else return 0.  */
+
+static bool
+pattern_is_rotate64_p (rtx pat)
+{
+  rtx rot = SET_SRC (pat);
+
+  if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1))
+      && INTVAL (XEXP (rot, 1)) == 64)
+    return true;
+
+  return false;
+}
+
 /* Return 1 iff INSN is a load insn, including permuting loads that
    represent an lvxd2x instruction; else return 0.  */
 static unsigned int
@@ -266,6 +280,9 @@  insn_is_load_p (rtx insn)
 	  && MEM_P (XEXP (SET_SRC (body), 0)))
 	return 1;
 
+      if (pattern_is_rotate64_p (body) && MEM_P (XEXP (SET_SRC (body), 0)))
+	return 1;
+
       return 0;
     }
 
@@ -305,6 +322,8 @@  insn_is_swap_p (rtx insn)
   if (GET_CODE (body) != SET)
     return 0;
   rtx rhs = SET_SRC (body);
+  if (pattern_is_rotate64_p (body))
+    return 1;
   if (GET_CODE (rhs) != VEC_SELECT)
     return 0;
   rtx parallel = XEXP (rhs, 1);
@@ -392,7 +411,8 @@  quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
      false.  */
   rtx body = PATTERN (def_insn);
   if (GET_CODE (body) != SET
-      || GET_CODE (SET_SRC (body)) != VEC_SELECT
+      || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
+	  || pattern_is_rotate64_p (body))
       || !MEM_P (XEXP (SET_SRC (body), 0)))
     return false;
 
@@ -531,7 +551,8 @@  const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
 	 false.  */
       rtx body = PATTERN (def_insn);
       if (GET_CODE (body) != SET
-	  || GET_CODE (SET_SRC (body)) != VEC_SELECT
+	  || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
+	       || pattern_is_rotate64_p (body))
 	  || !MEM_P (XEXP (SET_SRC (body), 0)))
 	return false;
 
@@ -1730,7 +1751,8 @@  replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
      swap (indicated by code VEC_SELECT).  */
   rtx body = PATTERN (def_insn);
   gcc_assert ((GET_CODE (body) == SET)
-	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
+	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
+		  || pattern_is_rotate64_p (body))
 	      && MEM_P (XEXP (SET_SRC (body), 0)));
 
   rtx src_exp = XEXP (SET_SRC (body), 0);
@@ -2148,7 +2170,8 @@  recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
 {
   rtx body = PATTERN (insn);
   gcc_assert (GET_CODE (body) == SET
-	      && GET_CODE (SET_SRC (body)) == VEC_SELECT
+	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
+		  || pattern_is_rotate64_p (body))
 	      && MEM_P (XEXP (SET_SRC (body), 0)));
 
   rtx mem = XEXP (SET_SRC (body), 0);
@@ -2223,9 +2246,9 @@  static void
 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
 {
   rtx body = PATTERN (insn);
-  gcc_assert (GET_CODE (body) == SET
-	      && MEM_P (SET_DEST (body))
-	      && GET_CODE (SET_SRC (body)) == VEC_SELECT);
+  gcc_assert (GET_CODE (body) == SET && MEM_P (SET_DEST (body))
+	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
+		  || pattern_is_rotate64_p (body)));
   rtx mem = SET_DEST (body);
   rtx base_reg = XEXP (mem, 0);
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-call.c b/gcc/testsuite/gcc.target/powerpc/float128-call.c
index 5895416e985..a1f09df8a57 100644
--- a/gcc/testsuite/gcc.target/powerpc/float128-call.c
+++ b/gcc/testsuite/gcc.target/powerpc/float128-call.c
@@ -21,5 +21,5 @@ 
 TYPE one (void) { return ONE; }
 void store (TYPE a, TYPE *p) { *p = a; }
 
-/* { dg-final { scan-assembler "lxvd2x 34"  } } */
-/* { dg-final { scan-assembler "stxvd2x 34" } } */
+/* { dg-final { scan-assembler "lvx 2"  } } */
+/* { dg-final { scan-assembler "stvx 2" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100085.c b/gcc/testsuite/gcc.target/powerpc/pr100085.c
new file mode 100644
index 00000000000..0a2e0feaf30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100085.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_float128_sw_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+typedef __vector unsigned __int128 vui128_t;
+
+typedef union
+{
+  __float128 vf1;
+  vui128_t vx1;
+} __VF_128;
+
+vui128_t
+vec_xfer_bin128_2_vui128t (__float128 f128)
+{
+  __VF_128 vunion;
+  vunion.vf1 = f128;
+  return (vunion.vx1);
+}
+
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+