diff mbox series

rs6000: Disable optimizing multiple xxsetaccz instructions into one xxsetaccz

Message ID 8393a33f-50ab-6720-0017-3f012803b990@linux.ibm.com
State New
Headers show
Series rs6000: Disable optimizing multiple xxsetaccz instructions into one xxsetaccz | expand

Commit Message

Peter Bergner Aug. 27, 2021, 7:58 p.m. UTC
Fwprop will happily optimize two xxsetaccz instructions into one xxsetaccz
by propagating the results of the first to the uses of the second.
We really don't want that to happen given the late priming/depriming of
accumulators.  I fixed this by making the xxsetaccz source operand an
unspec volatile.  I also removed the mma_xxsetaccz define_expand and
define_insn_and_split and replaced it with a simple define_insn.
The expand and splitter patterns were leftovers from the pre opaque mode
code when the xxsetaccz code was part of the movpxi pattern, and we don't
need them now.

Rather than a new test case, I was able to just modify the current test case
to add another __builtin_mma_xxsetaccz call which shows the bad code gen
with unpatched compilers.

This passed bootstrap on powerpc64le-linux with no regressions.
Ok for trunk?  We'll need this for sure in GCC11.  Ok there too after
some trunk burn in time?

GCC10 suffers from the same issue, but since the code is different, I'll
have to determine a different solution which I'll post as a separate
patch.

Peter


gcc/
	* config/rs6000/mma.md (unspec): Delete UNSPEC_MMA_XXSETACCZ.
	(unspecv): Add UNSPECV_MMA_XXSETACCZ.
	(*mma_xxsetaccz): Delete.
	(mma_xxsetaccz): Change to define_insn.  Remove match_operand.
	Use UNSPECV_MMA_XXSETACCZ.
	* config/rs6000/rs6000.c (rs6000_rtx_costs): Use UNSPECV_MMA_XXSETACCZ.

gcc/testsuite/
	* gcc.target/powerpc/mma-builtin-6.c: Add second call to xxsetacc
	built-in.  Update instruction counts.

Comments

Li, Pan2 via Gcc-patches Sept. 12, 2021, 3:32 p.m. UTC | #1
Hi Peter,

This patch looks fine to me.  The approach to avoiding incorrect 
optimization is reasonable.  Maintainers?

Thanks for the patch!
Bill

On 8/27/21 2:58 PM, Peter Bergner via Gcc-patches wrote:
> Fwprop will happily optimize two xxsetaccz instructions into one xxsetaccz
> by propagating the results of the first to the uses of the second.
> We really don't want that to happen given the late priming/depriming of
> accumulators.  I fixed this by making the xxsetaccz source operand an
> unspec volatile.  I also removed the mma_xxsetaccz define_expand and
> define_insn_and_split and replaced it with a simple define_insn.
> The expand and splitter patterns were leftovers from the pre opaque mode
> code when the xxsetaccz code was part of the movpxi pattern, and we don't
> need them now.
>
> Rather than a new test case, I was able to just modify the current test case
> to add another __builtin_mma_xxsetaccz call which shows the bad code gen
> with unpatched compilers.
>
> This passed bootstrap on powerpc64le-linux with no regressions.
> Ok for trunk?  We'll need this for sure in GCC11.  Ok there too after
> some trunk burn in time?
>
> GCC10 suffers from the same issue, but since the code is different, I'll
> have to determine a different solution which I'll post as a separate
> patch.
>
> Peter
>
>
> gcc/
> 	* config/rs6000/mma.md (unspec): Delete UNSPEC_MMA_XXSETACCZ.
> 	(unspecv): Add UNSPECV_MMA_XXSETACCZ.
> 	(*mma_xxsetaccz): Delete.
> 	(mma_xxsetaccz): Change to define_insn.  Remove match_operand.
> 	Use UNSPECV_MMA_XXSETACCZ.
> 	* config/rs6000/rs6000.c (rs6000_rtx_costs): Use UNSPECV_MMA_XXSETACCZ.
>
> gcc/testsuite/
> 	* gcc.target/powerpc/mma-builtin-6.c: Add second call to xxsetacc
> 	built-in.  Update instruction counts.
>
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index 1f6fc03d2ac..b26ae7a5d04 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -91,7 +91,10 @@ (define_c_enum "unspec"
>      UNSPEC_MMA_XVI8GER4SPP
>      UNSPEC_MMA_XXMFACC
>      UNSPEC_MMA_XXMTACC
> -   UNSPEC_MMA_XXSETACCZ
> +  ])
> +
> +(define_c_enum "unspecv"
> +  [UNSPECV_MMA_XXSETACCZ
>     ])
>
>   ;; MMA instructions with 1 accumulator argument
> @@ -469,26 +472,12 @@ (define_insn "mma_<acc>"
>
>   ;; We can't have integer constants in XOmode so we wrap this in an UNSPEC.
>
> -(define_expand "mma_xxsetaccz"
> -  [(set (match_operand:XO 0 "fpr_reg_operand")
> -	(const_int 0))]
> -  "TARGET_MMA"
> -{
> -  rtx xo0 = gen_rtx_UNSPEC (XOmode, gen_rtvec (1, const0_rtx),
> -			    UNSPEC_MMA_XXSETACCZ);
> -  emit_insn (gen_rtx_SET (operands[0], xo0));
> -  DONE;
> -})
> -
> -(define_insn_and_split "*mma_xxsetaccz"
> +(define_insn "mma_xxsetaccz"
>     [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
> -	(unspec:XO [(match_operand 1 "const_0_to_1_operand" "O")]
> -	 UNSPEC_MMA_XXSETACCZ))]
> +	(unspec_volatile:XO [(const_int 0)]
> +			    UNSPECV_MMA_XXSETACCZ))]
>     "TARGET_MMA"
>     "xxsetaccz %A0"
> -  "&& reload_completed"
> -  [(set (match_dup 0) (unspec:XO [(match_dup 1)] UNSPEC_MMA_XXSETACCZ))]
> -  ""
>     [(set_attr "type" "mma")
>      (set_attr "length" "4")])
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index e073b26b430..40dc71c8171 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -21919,7 +21919,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
>         break;
>
>       case UNSPEC:
> -      if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
> +      if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
>   	{
>   	  *total = 0;
>   	  return true;
> diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
> index 0c6517211e3..715b28138e9 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
> @@ -5,14 +5,16 @@
>   void
>   foo (__vector_quad *dst)
>   {
> -  __vector_quad acc;
> -  __builtin_mma_xxsetaccz (&acc);
> -  *dst = acc;
> +  __vector_quad acc0, acc1;
> +  __builtin_mma_xxsetaccz (&acc0);
> +  __builtin_mma_xxsetaccz (&acc1);
> +  dst[0] = acc0;
> +  dst[1] = acc1;
>   }
>
>   /* { dg-final { scan-assembler-not {\mlxv\M} } } */
>   /* { dg-final { scan-assembler-not {\mlxvp\M} } } */
>   /* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
> -/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
Segher Boessenkool Sept. 12, 2021, 7:26 p.m. UTC | #2
Hi!

On Fri, Aug 27, 2021 at 02:58:05PM -0500, Peter Bergner wrote:
> Fwprop will happily optimize two xxsetaccz instructions into one xxsetaccz
> by propagating the results of the first to the uses of the second.
> We really don't want that to happen given the late priming/depriming of
> accumulators.  I fixed this by making the xxsetaccz source operand an
> unspec volatile.

Good good.

> I also removed the mma_xxsetaccz define_expand and
> define_insn_and_split and replaced it with a simple define_insn.

In the future pleaase do that in a separate patch.  That makes it *much*
easier to read and review this.

> GCC10 suffers from the same issue, but since the code is different, I'll
> have to determine a different solution which I'll post as a separate
> patch.

It doesn't currently have an unspec at all, so you cannot give it a
side effect.  It shouldn't be hard to make it use an unspec, just a lot
of mechanics (and testing :-/ )

> 	* config/rs6000/mma.md (unspec): Delete UNSPEC_MMA_XXSETACCZ.
> 	(unspecv): Add UNSPECV_MMA_XXSETACCZ.

Unrelated to this patch, but I have been wondering this for years:
should we have an unspecv enum at all?  It causes some churn, and you
can name the volatile ones UNSPECV_ in either case.

> 	(mma_xxsetaccz): Change to define_insn.  Remove match_operand.
> 	Use UNSPECV_MMA_XXSETACCZ.

It still has the match_operand.

>  ;; We can't have integer constants in XOmode so we wrap this in an UNSPEC.

Does the comment need updating?  It may help to point out here that itr
 needs to be volatile.

>     (set_attr "length" "4")])

Not new of course: the default length is 4, most insns have that, it
helps to be less verbose.

Okay for trunk with that changelog fix.  Thanks!


Segher
Peter Bergner Sept. 13, 2021, 10:10 p.m. UTC | #3
On 9/12/21 2:26 PM, Segher Boessenkool wrote:
>> I also removed the mma_xxsetaccz define_expand and
>> define_insn_and_split and replaced it with a simple define_insn.
> 
> In the future pleaase do that in a separate patch.  That makes it *much*
> easier to read and review this.

Will do.



>> 	* config/rs6000/mma.md (unspec): Delete UNSPEC_MMA_XXSETACCZ.
>> 	(unspecv): Add UNSPECV_MMA_XXSETACCZ.
> 
> Unrelated to this patch, but I have been wondering this for years:
> should we have an unspecv enum at all?  It causes some churn, and you
> can name the volatile ones UNSPECV_ in either case.

I assumed it was needed, but if not, yeah, one enum would seem to be
better than two.




>> 	(mma_xxsetaccz): Change to define_insn.  Remove match_operand.
>> 	Use UNSPECV_MMA_XXSETACCZ.
> 
> It still has the match_operand.

-(define_insn_and_split "*mma_xxsetaccz"
+(define_insn "mma_xxsetaccz"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
-       (unspec:XO [(match_operand 1 "const_0_to_1_operand" "O")]
-        UNSPEC_MMA_XXSETACCZ))]
+       (unspec_volatile:XO [(const_int 0)]
+                           UNSPECV_MMA_XXSETACCZ))]


It still has "a" match_operand...for operand 0.  The match_operand
for operand 1 was what was removed.  Want me to reword that as
"Remove source match_operand." or "Remove match_operand 1." or ???



>>  ;; We can't have integer constants in XOmode so we wrap this in an UNSPEC.
> 
> Does the comment need updating?  It may help to point out here that itr
>  needs to be volatile.

I think the comment was referring to the unneeded operand which I have
now removed.  I could either remove the comment altogether or change it
to:

;; We can't have integer constants in XOmode so we wrap this in an
;; UNSPEC_VOLATILE.

...to refer to the dummy zero for the source.  Let me know what you want.



>>     (set_attr "length" "4")])
> 
> Not new of course: the default length is 4, most insns have that, it
> helps to be less verbose.

I'll remove that before pushing, thanks!


Peter
Segher Boessenkool Sept. 14, 2021, 12:17 a.m. UTC | #4
On Mon, Sep 13, 2021 at 05:10:42PM -0500, Peter Bergner wrote:
> >> 	* config/rs6000/mma.md (unspec): Delete UNSPEC_MMA_XXSETACCZ.
> >> 	(unspecv): Add UNSPECV_MMA_XXSETACCZ.
> > 
> > Unrelated to this patch, but I have been wondering this for years:
> > should we have an unspecv enum at all?  It causes some churn, and you
> > can name the volatile ones UNSPECV_ in either case.
> 
> I assumed it was needed, but if not, yeah, one enum would seem to be
> better than two.

These enums are not so very old (from 2010 only).  Before that we used
define_constant for all.  Some backends used separate numbering for
unspec and for unspec_volatile, some didn't.  The enum scheme supported
both the existing practices.

It is easier and prettier to have only one namespace for this (when you
want to look something up for example, or just reading stuff even).  I
did however think it quite useful to have the "V" in the name still.
But there is nothing forcing us to have any particular naming scheme for
the enumeration constants, so that is no blocker :-)

Since it is perfectly fine to have multiple define_enum's for the same
enumeration, too, converting to this will be pretty easy :-)

> >> 	(mma_xxsetaccz): Change to define_insn.  Remove match_operand.
> >> 	Use UNSPECV_MMA_XXSETACCZ.
> > 
> > It still has the match_operand.
> 
> -(define_insn_and_split "*mma_xxsetaccz"
> +(define_insn "mma_xxsetaccz"
>    [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
> -       (unspec:XO [(match_operand 1 "const_0_to_1_operand" "O")]
> -        UNSPEC_MMA_XXSETACCZ))]
> +       (unspec_volatile:XO [(const_int 0)]
> +                           UNSPECV_MMA_XXSETACCZ))]
> 
> 
> It still has "a" match_operand...for operand 0.  The match_operand
> for operand 1 was what was removed.  Want me to reword that as
> "Remove source match_operand." or "Remove match_operand 1." or ???

Ah I see, I looked with my eyes close apparently.  "Remove operand 1"?

> >>  ;; We can't have integer constants in XOmode so we wrap this in an UNSPEC.
> > 
> > Does the comment need updating?  It may help to point out here that itr
> >  needs to be volatile.
> 
> I think the comment was referring to the unneeded operand which I have
> now removed.  I could either remove the comment altogether or change it
> to:
> 
> ;; We can't have integer constants in XOmode so we wrap this in an
> ;; UNSPEC_VOLATILE.
> 
> ...to refer to the dummy zero for the source.  Let me know what you want.

No strong opinion, the existing comment looked out of place, that's all.

The latter option adds information, so if you think that is useful to
have here, let's go with that?

Cheers,


Segher
Peter Bergner Sept. 14, 2021, 3:58 p.m. UTC | #5
On 9/13/21 7:17 PM, Segher Boessenkool wrote:
> On Mon, Sep 13, 2021 at 05:10:42PM -0500, Peter Bergner wrote:
>> It still has "a" match_operand...for operand 0.  The match_operand
>> for operand 1 was what was removed.  Want me to reword that as
>> "Remove source match_operand." or "Remove match_operand 1." or ???
> 
> Ah I see, I looked with my eyes close apparently.  "Remove operand 1"?

Ok, I'll use that.




>> ;; We can't have integer constants in XOmode so we wrap this in an
>> ;; UNSPEC_VOLATILE.
>>
>> ...to refer to the dummy zero for the source.  Let me know what you want.
> 
> The latter option adds information, so if you think that is useful to
> have here, let's go with that?

Sure will do.


I'll make the above changes and the ones from my previous reply and do one
last build before pushing.  Thanks for the review!


Peter
diff mbox series

Patch

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 1f6fc03d2ac..b26ae7a5d04 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,7 +91,10 @@  (define_c_enum "unspec"
    UNSPEC_MMA_XVI8GER4SPP
    UNSPEC_MMA_XXMFACC
    UNSPEC_MMA_XXMTACC
-   UNSPEC_MMA_XXSETACCZ
+  ])
+
+(define_c_enum "unspecv"
+  [UNSPECV_MMA_XXSETACCZ
   ])
 
 ;; MMA instructions with 1 accumulator argument
@@ -469,26 +472,12 @@  (define_insn "mma_<acc>"
 
 ;; We can't have integer constants in XOmode so we wrap this in an UNSPEC.
 
-(define_expand "mma_xxsetaccz"
-  [(set (match_operand:XO 0 "fpr_reg_operand")
-	(const_int 0))]
-  "TARGET_MMA"
-{
-  rtx xo0 = gen_rtx_UNSPEC (XOmode, gen_rtvec (1, const0_rtx),
-			    UNSPEC_MMA_XXSETACCZ);
-  emit_insn (gen_rtx_SET (operands[0], xo0));
-  DONE;
-})
-
-(define_insn_and_split "*mma_xxsetaccz"
+(define_insn "mma_xxsetaccz"
   [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
-	(unspec:XO [(match_operand 1 "const_0_to_1_operand" "O")]
-	 UNSPEC_MMA_XXSETACCZ))]
+	(unspec_volatile:XO [(const_int 0)]
+			    UNSPECV_MMA_XXSETACCZ))]
   "TARGET_MMA"
   "xxsetaccz %A0"
-  "&& reload_completed"
-  [(set (match_dup 0) (unspec:XO [(match_dup 1)] UNSPEC_MMA_XXSETACCZ))]
-  ""
   [(set_attr "type" "mma")
    (set_attr "length" "4")])
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e073b26b430..40dc71c8171 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -21919,7 +21919,7 @@  rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
       break;
 
     case UNSPEC:
-      if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
+      if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
 	{
 	  *total = 0;
 	  return true;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
index 0c6517211e3..715b28138e9 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
@@ -5,14 +5,16 @@ 
 void
 foo (__vector_quad *dst)
 {
-  __vector_quad acc;
-  __builtin_mma_xxsetaccz (&acc);
-  *dst = acc;
+  __vector_quad acc0, acc1;
+  __builtin_mma_xxsetaccz (&acc0);
+  __builtin_mma_xxsetaccz (&acc1);
+  dst[0] = acc0;
+  dst[1] = acc1;
 }
 
 /* { dg-final { scan-assembler-not {\mlxv\M} } } */
 /* { dg-final { scan-assembler-not {\mlxvp\M} } } */
 /* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
-/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */