diff mbox

[AArch64] Replace insn to zero up DF register

Message ID 56E0972F.3060207@samsung.com
State New
Headers show

Commit Message

Evandro Menezes March 9, 2016, 9:35 p.m. UTC
On 03/01/16 13:08, Evandro Menezes wrote:
> On 03/01/16 13:02, Wilco Dijkstra wrote:
>> Evandro Menezes wrote:
>>> The meaning of these attributes are not clear to me.  Is there a
>>> reference somewhere about which insns are FP or SIMD or neither?
>> The meaning should be clear, "fp" is a floating point instruction, 
>> "simd" a SIMD one
>> as defined in ARM-ARM.
>>
>>> Indeed, I had to add the Y for the f_mcr insn to match it with nosimd.
>>> However, I didn't feel that it should be moved to the right, since it's
>>> already disparaged.  Am I missing something detail?
>> It might not matter for this specific case, but I have seen reload 
>> forcing the very
>> first alternative without looking at any costs or preferences - as 
>> long as it is legal.
>> This suggests we need to order alternatives from most preferred 
>> alternative to least
>> preferred one.
>>
>> I think it is good enough for commit, James?
>
> Methinks that my issue with those attributes is that I'm not as fluent 
> in AArch64 as I'd like to be.
>
> Please, feel free to edit the patch changing the order then.

    Replace insn to zero up SIMD registers

    gcc/
             * config/aarch64/aarch64.md
             (*movhf_aarch64): Add "movi %0, #0" to zero up register.
             (*movsf_aarch64): Likewise and add "simd" and "fp" attributes.
             (*movdf_aarch64): Likewise.

Swapped the order of the constraints to favor MOVI.

Just say the word...

Thank you,

Comments

James Greenhalgh March 10, 2016, 1:23 p.m. UTC | #1
On Wed, Mar 09, 2016 at 03:35:43PM -0600, Evandro Menezes wrote:
> On 03/01/16 13:08, Evandro Menezes wrote:
> >On 03/01/16 13:02, Wilco Dijkstra wrote:
> >>Evandro Menezes wrote:
> >>>The meaning of these attributes are not clear to me.  Is there a
> >>>reference somewhere about which insns are FP or SIMD or neither?
> >>The meaning should be clear, "fp" is a floating point
> >>instruction, "simd" a SIMD one
> >>as defined in ARM-ARM.
> >>
> >>>Indeed, I had to add the Y for the f_mcr insn to match it with nosimd.
> >>>However, I didn't feel that it should be moved to the right, since it's
> >>>already disparaged.  Am I missing something detail?
> >>It might not matter for this specific case, but I have seen
> >>reload forcing the very
> >>first alternative without looking at any costs or preferences -
> >>as long as it is legal.
> >>This suggests we need to order alternatives from most preferred
> >>alternative to least
> >>preferred one.
> >>
> >>I think it is good enough for commit, James?
> >
> >Methinks that my issue with those attributes is that I'm not as
> >fluent in AArch64 as I'd like to be.
> >
> >Please, feel free to edit the patch changing the order then.
> 
>    Replace insn to zero up SIMD registers
> 
>    gcc/
>             * config/aarch64/aarch64.md
>             (*movhf_aarch64): Add "movi %0, #0" to zero up register.
>             (*movsf_aarch64): Likewise and add "simd" and "fp" attributes.
>             (*movdf_aarch64): Likewise.
> 
> Swapped the order of the constraints to favor MOVI.
> 
> Just say the word...

I'm wondering whether this is appropriate for GCC6 now that we are so late
in the development cycle. Additionally, I have some comments on your patch:

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 68676c9..4502a58 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1163,11 +1163,12 @@
>  )
>  
>  (define_insn "*movhf_aarch64"
> -  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
> -	(match_operand:HF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
> +  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w,m,r,m ,r")
> +	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,m,w,m,rY,r"))]
>    "TARGET_FLOAT && (register_operand (operands[0], HFmode)
>      || aarch64_reg_or_fp_zero (operands[1], HFmode))"
>    "@
> +   movi\\t%0.4h, #0
>     mov\\t%0.h[0], %w1
>     umov\\t%w0, %1.h[0]
>     mov\\t%0.h[0], %1.h[0]
> @@ -1176,18 +1177,19 @@
>     ldrh\\t%w0, %1
>     strh\\t%w1, %0
>     mov\\t%w0, %w1"
> -  [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\
> +  [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
>                       f_loads,f_stores,load1,store1,mov_reg")
> -   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
> -   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
> +   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")
> +   (set_attr "fp"   "*,*,*,*,yes,yes,*,*,*")]
>  )
>  
>  (define_insn "*movsf_aarch64"
> -  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
> -	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
> +  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
> +	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
>    "TARGET_FLOAT && (register_operand (operands[0], SFmode)
>      || aarch64_reg_or_fp_zero (operands[1], SFmode))"
>    "@
> +   movi\\t%0.2s, #0
>     fmov\\t%s0, %w1
>     fmov\\t%w0, %s1
>     fmov\\t%s0, %s1
> @@ -1197,16 +1199,19 @@
>     ldr\\t%w0, %1
>     str\\t%w1, %0
>     mov\\t%w0, %w1"
> -  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
> -                     f_loads,f_stores,load1,store1,mov_reg")]
> +  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
> +                     f_loads,f_stores,load1,store1,mov_reg")
> +   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
> +   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]
>  )

This fp attribute looks wrong to me. The two fmov instructions that move
between core and FP registers should be tagged "yes". However, this is
irrelevant as the whole pattern is guarded by TARGET_FLOAT.

It would be clearer to drop the FP attribute entirely, so as not to give
the erroneous impression that some alternatives in this insn are enabled
for !TARGET_FLOAT.

>  (define_insn "*movdf_aarch64"
> -  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
> -	(match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
> +  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
> +	(match_operand:DF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
>    "TARGET_FLOAT && (register_operand (operands[0], DFmode)
>      || aarch64_reg_or_fp_zero (operands[1], DFmode))"
>    "@
> +   movi\\t%d0, #0
>     fmov\\t%d0, %x1
>     fmov\\t%x0, %d1
>     fmov\\t%d0, %d1
> @@ -1216,8 +1221,10 @@
>     ldr\\t%x0, %1
>     str\\t%x1, %0
>     mov\\t%x0, %x1"
> -  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
> -                     f_loadd,f_stored,load1,store1,mov_reg")]
> +  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> +                     f_loadd,f_stored,load1,store1,mov_reg")
> +   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
> +   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]

Likewise here.

As to whether this is OK for GCC6, this doesn't look like a regression to me,
we've generated FMOV since the dawn of the port. The patch has performance
implications (arguably trivial ones, but still performance implications)
for generic code generation, and release is coming soon - so I'd rather
keep this back until GCC 7 opens.

Remove the FP attributes, and this patch is OK for GCC 7.

I'm happy to be overruled on this by Marcus or Richard if they feel we
should take this patch now, but my opinion is that waiting is more in the
spirit of using Stage 4 to stabilise the compiler.

Thanks,
James
Evandro Menezes March 10, 2016, 4:27 p.m. UTC | #2
On 03/10/16 07:23, James Greenhalgh wrote:
> On Wed, Mar 09, 2016 at 03:35:43PM -0600, Evandro Menezes wrote:
>> On 03/01/16 13:08, Evandro Menezes wrote:
>>> On 03/01/16 13:02, Wilco Dijkstra wrote:
>>>> Evandro Menezes wrote:
>>>>> The meaning of these attributes are not clear to me.  Is there a
>>>>> reference somewhere about which insns are FP or SIMD or neither?
>>>> The meaning should be clear, "fp" is a floating point
>>>> instruction, "simd" a SIMD one
>>>> as defined in ARM-ARM.
>>>>
>>>>> Indeed, I had to add the Y for the f_mcr insn to match it with nosimd.
>>>>> However, I didn't feel that it should be moved to the right, since it's
>>>>> already disparaged.  Am I missing something detail?
>>>> It might not matter for this specific case, but I have seen
>>>> reload forcing the very
>>>> first alternative without looking at any costs or preferences -
>>>> as long as it is legal.
>>>> This suggests we need to order alternatives from most preferred
>>>> alternative to least
>>>> preferred one.
>>>>
>>>> I think it is good enough for commit, James?
>>> Methinks that my issue with those attributes is that I'm not as
>>> fluent in AArch64 as I'd like to be.
>>>
>>> Please, feel free to edit the patch changing the order then.
>>     Replace insn to zero up SIMD registers
>>
>>     gcc/
>>              * config/aarch64/aarch64.md
>>              (*movhf_aarch64): Add "movi %0, #0" to zero up register.
>>              (*movsf_aarch64): Likewise and add "simd" and "fp" attributes.
>>              (*movdf_aarch64): Likewise.
>>
>> Swapped the order of the constraints to favor MOVI.
>>
>> Just say the word...
> I'm wondering whether this is appropriate for GCC6 now that we are so late
> in the development cycle. Additionally, I have some comments on your patch:
>
>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>> index 68676c9..4502a58 100644
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -1163,11 +1163,12 @@
>>   )
>>   
>>   (define_insn "*movhf_aarch64"
>> -  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
>> -	(match_operand:HF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
>> +  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w,m,r,m ,r")
>> +	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,m,w,m,rY,r"))]
>>     "TARGET_FLOAT && (register_operand (operands[0], HFmode)
>>       || aarch64_reg_or_fp_zero (operands[1], HFmode))"
>>     "@
>> +   movi\\t%0.4h, #0
>>      mov\\t%0.h[0], %w1
>>      umov\\t%w0, %1.h[0]
>>      mov\\t%0.h[0], %1.h[0]
>> @@ -1176,18 +1177,19 @@
>>      ldrh\\t%w0, %1
>>      strh\\t%w1, %0
>>      mov\\t%w0, %w1"
>> -  [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\
>> +  [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
>>                        f_loads,f_stores,load1,store1,mov_reg")
>> -   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
>> -   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
>> +   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")
>> +   (set_attr "fp"   "*,*,*,*,yes,yes,*,*,*")]
>>   )
>>   
>>   (define_insn "*movsf_aarch64"
>> -  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
>> -	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
>> +  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
>> +	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
>>     "TARGET_FLOAT && (register_operand (operands[0], SFmode)
>>       || aarch64_reg_or_fp_zero (operands[1], SFmode))"
>>     "@
>> +   movi\\t%0.2s, #0
>>      fmov\\t%s0, %w1
>>      fmov\\t%w0, %s1
>>      fmov\\t%s0, %s1
>> @@ -1197,16 +1199,19 @@
>>      ldr\\t%w0, %1
>>      str\\t%w1, %0
>>      mov\\t%w0, %w1"
>> -  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
>> -                     f_loads,f_stores,load1,store1,mov_reg")]
>> +  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
>> +                     f_loads,f_stores,load1,store1,mov_reg")
>> +   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
>> +   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]
>>   )
> This fp attribute looks wrong to me. The two fmov instructions that move
> between core and FP registers should be tagged "yes". However, this is
> irrelevant as the whole pattern is guarded by TARGET_FLOAT.
>
> It would be clearer to drop the FP attribute entirely, so as not to give
> the erroneous impression that some alternatives in this insn are enabled
> for !TARGET_FLOAT.

You mean to remove the FP attribute from all, HF, SF, DF, TF?

>>   (define_insn "*movdf_aarch64"
>> -  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
>> -	(match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
>> +  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
>> +	(match_operand:DF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
>>     "TARGET_FLOAT && (register_operand (operands[0], DFmode)
>>       || aarch64_reg_or_fp_zero (operands[1], DFmode))"
>>     "@
>> +   movi\\t%d0, #0
>>      fmov\\t%d0, %x1
>>      fmov\\t%x0, %d1
>>      fmov\\t%d0, %d1
>> @@ -1216,8 +1221,10 @@
>>      ldr\\t%x0, %1
>>      str\\t%x1, %0
>>      mov\\t%x0, %x1"
>> -  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
>> -                     f_loadd,f_stored,load1,store1,mov_reg")]
>> +  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
>> +                     f_loadd,f_stored,load1,store1,mov_reg")
>> +   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
>> +   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]
> Likewise here.
>
> As to whether this is OK for GCC6, this doesn't look like a regression to me,
> we've generated FMOV since the dawn of the port. The patch has performance
> implications (arguably trivial ones, but still performance implications)
> for generic code generation, and release is coming soon - so I'd rather
> keep this back until GCC 7 opens.
>
> Remove the FP attributes, and this patch is OK for GCC 7.
>
> I'm happy to be overruled on this by Marcus or Richard if they feel we
> should take this patch now, but my opinion is that waiting is more in the
> spirit of using Stage 4 to stabilise the compiler.

I agree to postpone until GCC 7.

         [AArch64] Replace insn to zero up SIMD registers

         gcc/
             * config/aarch64/aarch64.md
             (*movhf_aarch64): Add "movi %0, #0" to zero up register.
             (*movsf_aarch64): Likewise and add "simd" attributes.
             (*movdf_aarch64): Likewise.

This patch removes the FP attributes from the HF, SF, DF, TF moves.

Thank you,
diff mbox

Patch

From bcb76a4c864436930e1236e7ce35d9e689adf075 Mon Sep 17 00:00:00 2001
From: Evandro Menezes <e.menezes@samsung.com>
Date: Mon, 19 Oct 2015 18:31:48 -0500
Subject: [PATCH] Replace insn to zero up SIMD registers

gcc/
	* config/aarch64/aarch64.md
	(*movhf_aarch64): Add "movi %0, #0" to zero up register.
	(*movsf_aarch64): Likewise and add "simd" and "fp" attributes.
	(*movdf_aarch64): Likewise.
---
 gcc/config/aarch64/aarch64.md | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 68676c9..4502a58 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1163,11 +1163,12 @@ 
 )
 
 (define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w,m,r,m ,r")
+	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
     || aarch64_reg_or_fp_zero (operands[1], HFmode))"
   "@
+   movi\\t%0.4h, #0
    mov\\t%0.h[0], %w1
    umov\\t%w0, %1.h[0]
    mov\\t%0.h[0], %1.h[0]
@@ -1176,18 +1177,19 @@ 
    ldrh\\t%w0, %1
    strh\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\
+  [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
                      f_loads,f_stores,load1,store1,mov_reg")
-   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
-   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
+   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,*,yes,yes,*,*,*")]
 )
 
 (define_insn "*movsf_aarch64"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
-	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
+	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
     || aarch64_reg_or_fp_zero (operands[1], SFmode))"
   "@
+   movi\\t%0.2s, #0
    fmov\\t%s0, %w1
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1
@@ -1197,16 +1199,19 @@ 
    ldr\\t%w0, %1
    str\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
-                     f_loads,f_stores,load1,store1,mov_reg")]
+  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
+                     f_loads,f_stores,load1,store1,mov_reg")
+   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]
 )
 
 (define_insn "*movdf_aarch64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
-	(match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
+	(match_operand:DF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
     || aarch64_reg_or_fp_zero (operands[1], DFmode))"
   "@
+   movi\\t%d0, #0
    fmov\\t%d0, %x1
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
@@ -1216,8 +1221,10 @@ 
    ldr\\t%x0, %1
    str\\t%x1, %0
    mov\\t%x0, %x1"
-  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,load1,store1,mov_reg")]
+  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
+                     f_loadd,f_stored,load1,store1,mov_reg")
+   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]
 )
 
 (define_insn "*movtf_aarch64"
-- 
1.9.1