diff mbox

[AArch64] Remove '*' from movsi/di/ti patterns

Message ID DB6PR0801MB20531CE5EB7BEE60F8B2D3B483B90@DB6PR0801MB2053.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra July 26, 2017, 1:46 p.m. UTC
Remove the remaining uses of '*' from the movsi/di/ti patterns.
Using '*' in alternatives is typically incorrect at it tells the register
allocator to ignore those alternatives.  So remove these from all the
integer move patterns.  This removes unnecessary int to float moves, for
example gcc.target/aarch64/pr62178.c no longer generates a redundant fmov
since the w = m variant is now allowed.

Passes regress & bootstrap, OK for commit?

ChangeLog:
2017-07-26  Wilco Dijkstra  <wdijkstr@arm.com>

	* gcc/config/aarch64/aarch64.md (movsi_aarch64): Remove all '*'.
	(movdi_aarch64): Likewise.
	(movti_aarch64): Likewise.
--

Comments

Andrew Pinski Sept. 9, 2017, 6:58 a.m. UTC | #1
On Wed, Jul 26, 2017 at 6:46 AM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> Remove the remaining uses of '*' from the movsi/di/ti patterns.
> Using '*' in alternatives is typically incorrect at it tells the register
> allocator to ignore those alternatives.  So remove these from all the
> integer move patterns.  This removes unnecessary int to float moves, for
> example gcc.target/aarch64/pr62178.c no longer generates a redundant fmov
> since the w = m variant is now allowed.
>
> Passes regress & bootstrap, OK for commit?

I had looked into doing the similar thing but I did not have any way
of benchmarking the patch, I decided to drop the patch.
That is the patch looks good to me (though I cannot approve it).

Thanks,
Andrew

>
> ChangeLog:
> 2017-07-26  Wilco Dijkstra  <wdijkstr@arm.com>
>
>         * gcc/config/aarch64/aarch64.md (movsi_aarch64): Remove all '*'.
>         (movdi_aarch64): Likewise.
>         (movti_aarch64): Likewise.
> --
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 225b64e1daf1663d28bbe8c2d30ba373b4722176..97c5fb08a2fd5d2eee556e1fc20dbf65b089d84b 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -920,8 +920,8 @@ (define_expand "mov<mode>"
>  )
>
>  (define_insn_and_split "*movsi_aarch64"
> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w,r,*w")
> -       (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,w,m, m,  r,  r, w,r,w")
> +       (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m,m,rZ,w,Usa,Ush,rZ,w,w"))]
>    "(register_operand (operands[0], SImode)
>      || aarch64_reg_or_zero (operands[1], SImode))"
>    "@
> @@ -952,8 +952,8 @@ (define_insn_and_split "*movsi_aarch64"
>  )
>
>  (define_insn_and_split "*movdi_aarch64"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w,r,*w,w")
> -       (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,w, m,m,  r,  r, w,r,w,w")
> +       (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))]
>    "(register_operand (operands[0], DImode)
>      || aarch64_reg_or_zero (operands[1], DImode))"
>    "@
> @@ -1008,9 +1008,9 @@ (define_expand "movti"
>
>  (define_insn "*movti_aarch64"
>    [(set (match_operand:TI 0
> -        "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
> +        "nonimmediate_operand"  "=r, w,r,w,r,m,m,w,m")
>         (match_operand:TI 1
> -        "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
> +        "aarch64_movti_operand" " rn,r,w,w,m,r,Z,m,w"))]
>    "(register_operand (operands[0], TImode)
>      || aarch64_reg_or_zero (operands[1], TImode))"
>    "@
James Greenhalgh Sept. 12, 2017, 4:10 p.m. UTC | #2
On Wed, Jul 26, 2017 at 02:46:14PM +0100, Wilco Dijkstra wrote:
> Remove the remaining uses of '*' from the movsi/di/ti patterns.
> Using '*' in alternatives is typically incorrect at it tells the register
> allocator to ignore those alternatives.  So remove these from all the
> integer move patterns.  This removes unnecessary int to float moves, for
> example gcc.target/aarch64/pr62178.c no longer generates a redundant fmov
> since the w = m variant is now allowed.
> 
> Passes regress & bootstrap, OK for commit?

OK.

These attempts at register allocator costs are just trouble!

Thanks,
James

> 
> ChangeLog:
> 2017-07-26  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* gcc/config/aarch64/aarch64.md (movsi_aarch64): Remove all '*'.
> 	(movdi_aarch64): Likewise.
> 	(movti_aarch64): Likewise.
> --
>
Andrew Pinski Sept. 13, 2017, 2:10 a.m. UTC | #3
On Tue, Sep 12, 2017 at 9:10 AM, James Greenhalgh
<james.greenhalgh@arm.com> wrote:
> On Wed, Jul 26, 2017 at 02:46:14PM +0100, Wilco Dijkstra wrote:
>> Remove the remaining uses of '*' from the movsi/di/ti patterns.
>> Using '*' in alternatives is typically incorrect at it tells the register
>> allocator to ignore those alternatives.  So remove these from all the
>> integer move patterns.  This removes unnecessary int to float moves, for
>> example gcc.target/aarch64/pr62178.c no longer generates a redundant fmov
>> since the w = m variant is now allowed.
>>
>> Passes regress & bootstrap, OK for commit?
>
> OK.
>
> These attempts at register allocator costs are just trouble!

Note this caused a few testsuite failures:
gcc.target/aarch64/vmov_n_1.c scan-assembler-times dup\\tv[0-9]+.2d,
v[0-9]+.d\\[[0-9]+\\] 1
gcc.target/aarch64/vmov_n_1.c scan-assembler-times dup\\tv[0-9]+.2d, x[0-9]+ 2
gcc.target/aarch64/vmov_n_1.c scan-assembler-times dup\\tv[0-9]+.2s,
v[0-9]+.s\\[[0-9]+\\] 1
gcc.target/aarch64/vmov_n_1.c scan-assembler-times dup\\tv[0-9]+.2s, w[0-9]+ 2
gcc.target/aarch64/vmov_n_1.c scan-assembler-times dup\\tv[0-9]+.4s,
v[0-9]+.s\\[[0-9]+\\] 1
gcc.target/aarch64/vmov_n_1.c scan-assembler-times dup\\tv[0-9]+.4s, w[0-9]+ 2

I suspect we are using the d/s registers in these cases rather than
x/w register (I did not look into the code generation to check that
but if we are then this is better code anyways).

Thanks,
Andrew

>
> Thanks,
> James
>
>>
>> ChangeLog:
>> 2017-07-26  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>>       * gcc/config/aarch64/aarch64.md (movsi_aarch64): Remove all '*'.
>>       (movdi_aarch64): Likewise.
>>       (movti_aarch64): Likewise.
>> --
>>
Wilco Dijkstra Sept. 13, 2017, 11:46 a.m. UTC | #4
Andrew Pinski wrote:
>
> Note this caused a few testsuite failures:

Confirmed. The diffs show the new sequence is always better. I've
committed this:


Update vmov_n_1.c now we are generating better code for dup:

        ldr     s0, [x0]
        dup     v0.2s, v0.s[0]
        ret

Instead of:

        ldr     w0, [x0]
        dup     v0.2s, w0
        ret

ChangeLog: 
2017-09-13  Wilco Dijkstra  <wdijkstr@arm.com>

	* gcc.target/aarch64/vmov_n_1.c: Update dup scan-assembler.
--

diff --git a/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c b/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
index 485a1a970bbcd30cb45a2f69bbd9f62f8258d3df..d0c284296a5e256be5cf5f859b113cdd62f929ba 100644
--- a/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
@@ -190,8 +190,9 @@ TESTFUNC_NAME (reg_len, data_type, data_len) ()	\
 
 OBSCURE_FUNC (64, 32, f)
 TESTFUNC (64, 32, f)
-/* "dup  Vd.2s, Rn" is less preferable then "dup  Vd.2s, Vn.s[lane]".  */
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 1 } } */
+/* "dup  Vd.2s, Rn" is less preferable than "dup  Vd.2s, Vn.s[lane]".  */
+/* { dg-final { scan-assembler-not "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 3 } } */
 
 OBSCURE_FUNC (64, 64, f)
 TESTFUNC (64, 64, f)
@@ -216,7 +217,9 @@ TESTFUNC (64, 16, s)
 
 OBSCURE_FUNC (64, 32, s)
 TESTFUNC (64, 32, s)
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
+/* "dup  Vd.2s, Rn" is less preferable than "dup  Vd.2s, Vn.s[lane]".  */
+/* { dg-final { scan-assembler-not "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 3 } } */
 
 OBSCURE_FUNC (64, 64, s)
 TESTFUNC (64, 64, s)
@@ -242,13 +245,15 @@ TESTFUNC (64, 64, u)
 
 OBSCURE_FUNC (128, 32, f)
 TESTFUNC (128, 32, f)
-/* "dup  Vd.4s, Rn" is less preferable then "dup  Vd.4s, Vn.s[lane]".  */
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 1 } } */
+/* "dup  Vd.4s, Rn" is less preferable than "dup  Vd.4s, Vn.s[lane]".  */
+/* { dg-final { scan-assembler-not "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 3 } } */
 
 OBSCURE_FUNC (128, 64, f)
 TESTFUNC (128, 64, f)
-/* "dup  Vd.2d, Rn" is less preferable then "dup  Vd.2d, Vn.d[lane]".  */
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 1 } } */
+/* "dup  Vd.2d, Rn" is less preferable than "dup  Vd.2d, Vn.d[lane]".  */
+/* { dg-final { scan-assembler-not "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 } } */
 
 OBSCURE_FUNC (128, 8, p)
 TESTFUNC (128, 8, p)
@@ -268,11 +273,15 @@ TESTFUNC (128, 16, s)
 
 OBSCURE_FUNC (128, 32, s)
 TESTFUNC (128, 32, s)
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
+/* "dup  Vd.4s, Rn" is less preferable than "dup  Vd.4s, Vn.s[lane]".  */
+/* { dg-final { scan-assembler-not "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 3 } } */
 
 OBSCURE_FUNC (128, 64, s)
 TESTFUNC (128, 64, s)
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
+/* "dup  Vd.2d, Rn" is less preferable than "dup  Vd.2d, Vn.d[lane]".  */
+/* { dg-final { scan-assembler-not "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 } } */
 
 OBSCURE_FUNC (128, 8, u)
 TESTFUNC (128, 8, u)
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 225b64e1daf1663d28bbe8c2d30ba373b4722176..97c5fb08a2fd5d2eee556e1fc20dbf65b089d84b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -920,8 +920,8 @@  (define_expand "mov<mode>"
 )
 
 (define_insn_and_split "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w,r,*w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,w,m, m,  r,  r, w,r,w")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m,m,rZ,w,Usa,Ush,rZ,w,w"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -952,8 +952,8 @@  (define_insn_and_split "*movsi_aarch64"
 )
 
 (define_insn_and_split "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w,r,*w,w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,w, m,m,  r,  r, w,r,w,w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -1008,9 +1008,9 @@  (define_expand "movti"
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
+	 "nonimmediate_operand"  "=r, w,r,w,r,m,m,w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
+	 "aarch64_movti_operand" " rn,r,w,w,m,r,Z,m,w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@