diff mbox

[AArch64] PR 63521. define REG_ALLOC_ORDER/HONOR_REG_ALLOC_ORDER

Message ID n998ucjwgk1.fsf@arm.com
State New
Headers show

Commit Message

Jiong Wang May 20, 2015, 12:35 p.m. UTC
Current IRA still use both target macros in a few places.

Tell IRA to use the order we defined rather than with it's own cost
calculation. Allocate caller saved first, then callee saved.

This is especially useful for LR/x30, as it's free to allocate and is
pure caller saved when used in leaf function.

Haven't noticed significant impact on benchmarks, but by grepping some
keywords like "Spilling", "Push.*spill" etc in ira rtl dump, the number
is smaller.

OK for trunk?

2015-05-19  Jiong. Wang  <jiong.wang@arm.com>

gcc/
  PR 63521
  * config/aarch64/aarch64.h (REG_ALLOC_ORDER): Define.
  (HONOR_REG_ALLOC_ORDER): Define.

Regards,
Jiong

Comments

Jiong Wang July 22, 2015, 10:30 a.m. UTC | #1
Jiong Wang writes:

> Current IRA still use both target macros in a few places.
>
> Tell IRA to use the order we defined rather than with it's own cost
> calculation. Allocate caller saved first, then callee saved.
>
> This is especially useful for LR/x30, as it's free to allocate and is
> pure caller saved when used in leaf function.
>
> Haven't noticed significant impact on benchmarks, but by grepping some
> keywords like "Spilling", "Push.*spill" etc in ira rtl dump, the number
> is smaller.
>
> OK for trunk?
>
> 2015-05-19  Jiong. Wang  <jiong.wang@arm.com>
>
> gcc/
>   PR 63521
>   * config/aarch64/aarch64.h (REG_ALLOC_ORDER): Define.
>   (HONOR_REG_ALLOC_ORDER): Define.
>
> Regards,
> Jiong

Ping.

I know it's hard to notice the register allocation improvements by this
hook as current IRA/LRA has improved register allocation quite a lot.

But given the example like below:

test.c
======

double dec (double, double);

int cal (int a, int b, double d, double e)
{
  double sum = dec (a , a + b);
  sum = dec (b, a - b);
  sum = dec (sum, a * b);
  return d + e + sum;
}

Although the instruction number is the same before and after this patch,
but the instruction scheduling looks better after this patch as we
allocated w7 instead of w0 there is few instruction dependecies.

Before Patch (-O2)
======
cal:
        stp     x29, x30, [sp, -48]!
        add     x29, sp, 0
        stp     x19, x20, [sp, 16]
        stp     d8, d9, [sp, 32]
        mov     w19, w0
        add     w0, w0, w1
        fmov    d9, d1
        mov     w20, w1
        fmov    d8, d0
        scvtf   d1, w0
        scvtf   d0, w19
        bl      dec
        scvtf   d0, w20 
        sub     w0, w19, w20
        mul     w19, w19, w20
        scvtf   d1, w0
        bl      dec
        scvtf   d1, w19
        bl      dec
        fadd    d8, d8, d9
        ldp     x19, x20, [sp, 16]
        fadd    d0, d8, d0
        ldp     d8, d9, [sp, 32]
        ldp     x29, x30, [sp], 48
        fcvtzs  w0, d0
        ret

After Patch
===========
cal:    
        stp     x29, x30, [sp, -48]!
        add     w7, w0, w1
        add     x29, sp, 0
        stp     d8, d9, [sp, 32]
        fmov    d9, d1
        fmov    d8, d0
        scvtf   d1, w7
        scvtf   d0, w0
        stp     x19, x20, [sp, 16]
        mov     w20, w1 
        mov     w19, w0
        bl      dec
        scvtf   d0, w20
        sub     w7, w19, w20
        mul     w19, w19, w20
        scvtf   d1, w7
        bl      dec 
        scvtf   d1, w19
        bl      dec
        fadd    d8, d8, d9
        ldp     x19, x20, [sp, 16]
        fadd    d0, d8, d0
        ldp     d8, d9, [sp, 32]
        ldp     x29, x30, [sp], 48
        fcvtzs  w0, d0
        ret
James Greenhalgh July 22, 2015, 11:22 a.m. UTC | #2
On Wed, May 20, 2015 at 01:35:41PM +0100, Jiong Wang wrote:
> Current IRA still use both target macros in a few places.
> 
> Tell IRA to use the order we defined rather than with it's own cost
> calculation. Allocate caller saved first, then callee saved.
> 
> This is especially useful for LR/x30, as it's free to allocate and is
> pure caller saved when used in leaf function.
> 
> Haven't noticed significant impact on benchmarks, but by grepping some
> keywords like "Spilling", "Push.*spill" etc in ira rtl dump, the number
> is smaller.
> 
> OK for trunk?

OK, sorry for the delay.

It might be mail client mangling, but please check that the trailing slashes
line up in the version that gets committed.

Thanks,
James

> 2015-05-19  Jiong. Wang  <jiong.wang@arm.com>
> 
> gcc/
>   PR 63521
>   * config/aarch64/aarch64.h (REG_ALLOC_ORDER): Define.
>   (HONOR_REG_ALLOC_ORDER): Define.
> 
 

> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index bf59e40..0acdf10 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -337,6 +337,31 @@ extern unsigned long aarch64_tune_flags;
>      V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31)  \
>    }
>  
> +#define REG_ALLOC_ORDER				\
> +{						\
> +  /* Reverse order for argument registers.  */	\
> +  7, 6, 5, 4, 3, 2, 1, 0,			\
> +  /* Other caller-saved registers.  */		\
> +  8, 9, 10, 11, 12, 13, 14, 15,			\
> +  16, 17, 18, 30,				\
> +  /* Callee-saved registers.  */		\
> +  19, 20, 21, 22, 23, 24, 25, 26,		\
> +  27, 28,					\
> +  /* All other registers.  */			\
> +  29, 31,					\
> +  /* Reverse order for argument vregisters.  */	\
> +  39, 38, 37, 36, 35, 34, 33, 32,		\
> +  /* Other caller-saved vregisters.  */		\
> +  48, 49, 50, 51, 52, 53, 54, 55,		\
> +  56, 57, 58, 59, 60, 61, 62, 63,		\
> +  /* Callee-saved vregisters.  */		\
> +  40, 41, 42, 43, 44, 45, 46, 47,		\
> +  /* Other pseudo registers.  */		\
> +  64, 65, 66					\
> +}
> +
> +#define HONOR_REG_ALLOC_ORDER 1
> +
>  /* Say that the epilogue uses the return address register.  Note that
>     in the case of sibcalls, the values "used by the epilogue" are
>     considered live at the start of the called function.  */
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bf59e40..0acdf10 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -337,6 +337,31 @@  extern unsigned long aarch64_tune_flags;
     V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31)  \
   }
 
+#define REG_ALLOC_ORDER				\
+{						\
+  /* Reverse order for argument registers.  */	\
+  7, 6, 5, 4, 3, 2, 1, 0,			\
+  /* Other caller-saved registers.  */		\
+  8, 9, 10, 11, 12, 13, 14, 15,			\
+  16, 17, 18, 30,				\
+  /* Callee-saved registers.  */		\
+  19, 20, 21, 22, 23, 24, 25, 26,		\
+  27, 28,					\
+  /* All other registers.  */			\
+  29, 31,					\
+  /* Reverse order for argument vregisters.  */	\
+  39, 38, 37, 36, 35, 34, 33, 32,		\
+  /* Other caller-saved vregisters.  */		\
+  48, 49, 50, 51, 52, 53, 54, 55,		\
+  56, 57, 58, 59, 60, 61, 62, 63,		\
+  /* Callee-saved vregisters.  */		\
+  40, 41, 42, 43, 44, 45, 46, 47,		\
+  /* Other pseudo registers.  */		\
+  64, 65, 66					\
+}
+
+#define HONOR_REG_ALLOC_ORDER 1
+
 /* Say that the epilogue uses the return address register.  Note that
    in the case of sibcalls, the values "used by the epilogue" are
    considered live at the start of the called function.  */