diff mbox

[AArch64] Replace insn to zero up DF register

Message ID 56A94F35.8000000@samsung.com
State New
Headers show

Commit Message

Evandro Menezes Jan. 27, 2016, 11:13 p.m. UTC
On 01/22/16 07:52, Wilco Dijkstra wrote:
> On 12/16/2015 03:30 PM, Evandro Menezes wrote:
>>      On 10/30/2015 05:24 AM, Marcus Shawcroft wrote:
>>
>>          On 20 October 2015 at 00:40, Evandro Menezes <e.menezes@samsung.com> wrote:
>>
>>              In the existing targets, it seems that it's always faster to zero up a DF
>>
>>              register with "movi %d0, #0" instead of "fmov %d0, xzr".
>>
>>              This patch modifies the respective pattern.
>>
>>
>>          Hi Evandro,
>>
>>          This patch changes the generic, u architecture independent instruction
>>          selection. The ARM ARM (C3.5.3) makes a specific recommendation about
>>          the choice of instruction in this situation and the current
>>          implementation in GCC follows that recommendation.  Wilco has also
>>          picked up on this issue he has the same patch internal to ARM along
>>          with an ongoing discussion with ARM architecture folk regarding this
>>          recommendation.  I'm reluctant to take this patch right now on the
>>          basis that it runs contrary to ARM ARM recommendation pending the
>>          conclusion of Wilco's discussion with ARM architecture folk.
>>
>>
>>      Have you had a chance to discuss this internally further?
> Yes, it was decided to remove the recommendation from future ARM ARM's.
>
> Several review comments on your patch (https://patchwork.ozlabs.org/patch/532736):
>
> * This should be added to movhf, movsf and movdf - movtf already does this.
> * It is important to set the "fp" and "simd" attributes so that the movi variant can
>     only be selected if it is available.
>

Hi, Wilco.

    2016-01-27 Evandro Menezes <e.menezes@samsung.com>

         Replace insn to zero up SIMD registers

         gcc/
             * config/aarch64/aarch64.md
             (*movhf_aarch64): Add "movi %0, #0" to zero up register.
             (*movsf_aarch64): Likewise.
             (*movdf_aarch64): Likewise.

When this decision is final, methinks that this patch would be close to 
addressing this change.

I have a question though: is it necessary to add the "fp" and "simd" 
attributes to both movsf_aarch64 and movdf_aarch64 as well?

Thank you,

Comments

Wilco Dijkstra Feb. 26, 2016, 12:37 p.m. UTC | #1
Evandro Menezes <e.menezes@samsung.com> wrote:
>
> I have a question though: is it necessary to add the "fp" and "simd"
> attributes to both movsf_aarch64 and movdf_aarch64 as well?

You need at least the "simd" attribute, but providing "fp" as well is clearer
(in principle the TARGET_FLOAT check in the pattern condition is 
redundant as a result, but the movhf and movtf patterns already do both).

Also you want to use the smallest possible SIMD size as these are
scalar operations and some microarchitectures execute 64-bit operations
more efficiently than 128-bit ones, so:

    mov\\t%0.h[0], %w1
+   movi\\t%0.4h, #0
    umov\\t%w0, %1.h[0]

    fmov\\t%s0, %w1
+   movi\\t%0.2s, #0
    fmov\\t%w0, %s1

With those changes it should be ready for commit once you get the OK from James/Marcus.

Wilco
diff mbox

Patch

From b390d411b56cfcdedf4601d0487baf1ef84e79ab Mon Sep 17 00:00:00 2001
From: Evandro Menezes <e.menezes@samsung.com>
Date: Mon, 19 Oct 2015 18:31:48 -0500
Subject: [PATCH] Replace insn to zero up SIMD registers

gcc/
	* config/aarch64/aarch64.md
	(*movhf_aarch64): Add "movi %0, #0" to zero up register.
	(*movsf_aarch64): Likewise.
	(*movdf_aarch64): Likewise.
---
 gcc/config/aarch64/aarch64.md | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f6c8eb1..43a3854 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1164,64 +1164,67 @@ 
       operands[1] = force_reg (<MODE>mode, operands[1]);
   }
 )
 
 (define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w,?r,w,w,m,r,m ,r")
+	(match_operand:HF 1 "general_operand"      "?r,Y, w,w,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
     || aarch64_reg_or_fp_zero (operands[1], HFmode))"
   "@
    mov\\t%0.h[0], %w1
+   movi\\t%0.8h, #0
    umov\\t%w0, %1.h[0]
    mov\\t%0.h[0], %1.h[0]
    ldr\\t%h0, %1
    str\\t%h1, %0
    ldrh\\t%w0, %1
    strh\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\
+  [(set_attr "type" "neon_from_gp,neon_move,neon_to_gp,neon_move,\
                      f_loads,f_stores,load1,store1,mov_reg")
-   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
-   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
+   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,*,yes,yes,*,*,*")]
 )
 
 (define_insn "*movsf_aarch64"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
-	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w,?r,w,w  ,w,m,r,m ,r")
+	(match_operand:SF 1 "general_operand"      "?r,Y, w,w,Ufc,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
     || aarch64_reg_or_fp_zero (operands[1], SFmode))"
   "@
    fmov\\t%s0, %w1
+   movi\\t%0.4s, #0
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1
    fmov\\t%s0, %1
    ldr\\t%s0, %1
    str\\t%s1, %0
    ldr\\t%w0, %1
    str\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
+  [(set_attr "type" "f_mcr,neon_move,f_mrc,fmov,fconsts,\
                      f_loads,f_stores,load1,store1,mov_reg")]
 )
 
 (define_insn "*movdf_aarch64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
-	(match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w,?r,w,w  ,w,m,r,m ,r")
+	(match_operand:DF 1 "general_operand"      "?r,Y, w,w,Ufc,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
     || aarch64_reg_or_fp_zero (operands[1], DFmode))"
   "@
    fmov\\t%d0, %x1
+   movi\\t%d0, #0
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    fmov\\t%d0, %1
    ldr\\t%d0, %1
    str\\t%d1, %0
    ldr\\t%x0, %1
    str\\t%x1, %0
    mov\\t%x0, %x1"
-  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
+  [(set_attr "type" "f_mcr,neon_move,f_mrc,fmov,fconstd,\
                      f_loadd,f_stored,load1,store1,mov_reg")]
 )
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-- 
2.6.3