diff mbox

[AArch64] Improve aarch64_legitimate_constant_p

Message ID AM5PR0802MB2610272348FB1C5DADA9665C83AA0@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra July 7, 2017, 11:28 a.m. UTC
This patch further improves aarch64_legitimate_constant_p.  Allow all
integer, floating point and vector constants.  Allow label references
and non-anchor symbols with an immediate offset.  This allows such
constants to be rematerialized, resulting in smaller code and fewer stack
spills.

SPEC2006 codesize reduces by 0.08%, SPEC2017 by 0.13%.

Bootstrap OK, OK for commit?

ChangeLog:
2017-07-07  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/aarch64/aarch64.c (aarch64_legitimate_constant_p):
	Return true for more constants, symbols and label references.
	(aarch64_valid_floating_const): Remove unused function.

--

Comments

James Greenhalgh Oct. 26, 2017, 3:38 p.m. UTC | #1
On Fri, Jul 07, 2017 at 12:28:11PM +0100, Wilco Dijkstra wrote:
> This patch further improves aarch64_legitimate_constant_p.  Allow all
> integer, floating point and vector constants.  Allow label references
> and non-anchor symbols with an immediate offset.  This allows such
> constants to be rematerialized, resulting in smaller code and fewer stack
> spills.
> 
> SPEC2006 codesize reduces by 0.08%, SPEC2017 by 0.13%.
> 
> Bootstrap OK, OK for commit?

This is mostly OK, but I think you lose one case we previosuly permitted,
buried in aarch64_classify_address (the CONST case).

OK with that case handled too (assuming that passes a bootstrap and test).

Reviewed by: James Greenhalgh <james.greenhalgh@arm.com>

Thanks,
James

> 
> ChangeLog:
> 2017-07-07  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* config/aarch64/aarch64.c (aarch64_legitimate_constant_p):
> 	Return true for more constants, symbols and label references.
> 	(aarch64_valid_floating_const): Remove unused function.
>
Wilco Dijkstra Oct. 31, 2017, 6:50 p.m. UTC | #2
James Greenhalgh wrote:

> This is mostly OK, but I think you lose one case we previosuly permitted,
> buried in aarch64_classify_address (the CONST case).
> 
> OK with that case handled too (assuming that passes a bootstrap and test).

That case is already handled. The CONST case handles the constant addresses,
and the remaining ones are LABEL_REFs, which is also handled. So I don't see
what case could be missing...

I've tweaked the code to use split_const like elsewhere:

2017-10-31  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/aarch64/aarch64.c (aarch64_legitimate_constant_p):
	Return true for more constants, symbols and label references.
	(aarch64_valid_floating_const): Remove unused function.

--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6ab6092e1e518460e69d4c59ce5c1717f996f3db..1d7a5102d4f9f4dc9d4658df466231c47e3ca165 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10396,51 +10396,49 @@ aarch64_legitimate_pic_operand_p (rtx x)
   return true;
 }
 
-/* Return true if X holds either a quarter-precision or
-     floating-point +0.0 constant.  */
-static bool
-aarch64_valid_floating_const (rtx x)
-{
-  if (!CONST_DOUBLE_P (x))
-    return false;
-
-  /* This call determines which constants can be used in mov<mode>
-     as integer moves instead of constant loads.  */
-  if (aarch64_float_const_rtx_p (x))
-    return true;
-
-  return aarch64_float_const_representable_p (x);
-}
+/* Implement TARGET_LEGITIMATE_CONSTANT_P hook.  Return true for constants
+   that should be rematerialized rather than spilled.  */
 
 static bool
 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
 {
+  /* Support CSE and rematerialization of common constants.  */
+  if (CONST_INT_P (x) || CONST_DOUBLE_P (x) || GET_CODE (x) == CONST_VECTOR)
+    return true;
+
   /* Do not allow vector struct mode constants.  We could support
      0 and -1 easily, but they need support in aarch64-simd.md.  */
-  if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
+  if (aarch64_vect_struct_mode_p (mode))
     return false;
 
-  /* For these cases we never want to use a literal load.
-     As such we have to prevent the compiler from forcing these
-     to memory.  */
-  if ((GET_CODE (x) == CONST_VECTOR
-       && aarch64_simd_valid_immediate (x, mode, false, NULL))
-      || CONST_INT_P (x)
-      || aarch64_valid_floating_const (x)
-      || aarch64_can_const_movi_rtx_p (x, mode)
-      || aarch64_float_const_rtx_p (x))
-	return !targetm.cannot_force_const_mem (mode, x);
+  /* Do not allow wide int constants - this requires support in movti.  */
+  if (CONST_WIDE_INT_P (x))
+    return false;
 
-  if (GET_CODE (x) == HIGH
-      && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
-    return true;
+  /* Do not allow const (plus (anchor_symbol, const_int)).  */
+  if (GET_CODE (x) == CONST)
+    {
+      rtx offset;
+
+      split_const (x, &x, &offset);
+
+      if (SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
+	return false;
+    }
+
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
 
   /* Treat symbols as constants.  Avoid TLS symbols as they are complex,
      so spilling them is better than rematerialization.  */
   if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
     return true;
 
-  return aarch64_constant_address_p (x);
+  /* Label references are always constant.  */
+  if (GET_CODE (x) == LABEL_REF)
+    return true;
+
+  return false;
 }
 
 rtx
Andreas Schwab Nov. 4, 2017, 5:31 p.m. UTC | #3
FAIL: gfortran.dg/class_array_1.f03   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
Excess errors:
/opt/gcc/gcc-20171104/gcc/testsuite/gfortran.dg/class_array_1.f03:31:0: Error: could not split insn
(insn 527 1444 562 (set (reg:TI 0 x0 [847])
        (const_wide_int 0x10000000000000001)) "/opt/gcc/gcc-20171104/gcc/testsuite/gfortran.dg/class_array_1.f03":21 50 {*movti_aarch64}
     (nil))
during RTL pass: final
/opt/gcc/gcc-20171104/gcc/testsuite/gfortran.dg/class_array_1.f03:31:0: internal compiler error: in final_scan_insn, at final.c:3018
0x58be1b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
        ../../gcc/rtl-error.c:108
0x8aeed7 final_scan_insn(rtx_insn*, _IO_FILE*, int, int, int*)
        ../../gcc/final.c:3018
0x8af12f final(rtx_insn*, _IO_FILE*, int)
        ../../gcc/final.c:2046
0x8af483 rest_of_handle_final
        ../../gcc/final.c:4477
0x8af483 execute
        ../../gcc/final.c:4551

Andreas.
Richard Sandiford Nov. 6, 2017, 10:50 a.m. UTC | #4
Andreas Schwab <schwab@linux-m68k.org> writes:
> FAIL: gfortran.dg/class_array_1.f03   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
> Excess errors:
> /opt/gcc/gcc-20171104/gcc/testsuite/gfortran.dg/class_array_1.f03:31:0: Error: could not split insn
> (insn 527 1444 562 (set (reg:TI 0 x0 [847])
>         (const_wide_int 0x10000000000000001)) "/opt/gcc/gcc-20171104/gcc/testsuite/gfortran.dg/class_array_1.f03":21 50 {*movti_aarch64}
>      (nil))
> during RTL pass: final
> /opt/gcc/gcc-20171104/gcc/testsuite/gfortran.dg/class_array_1.f03:31:0: internal compiler error: in final_scan_insn, at final.c:3018
> 0x58be1b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
>         ../../gcc/rtl-error.c:108
> 0x8aeed7 final_scan_insn(rtx_insn*, _IO_FILE*, int, int, int*)
>         ../../gcc/final.c:3018
> 0x8af12f final(rtx_insn*, _IO_FILE*, int)
>         ../../gcc/final.c:2046
> 0x8af483 rest_of_handle_final
>         ../../gcc/final.c:4477
> 0x8af483 execute
>         ../../gcc/final.c:4551

Yeah, I'd hit this too.  I think it's a latent bug that just
happened to be exposed by Wilco's patch: although the *movti_aarch64
predicate disallows const_wide_int, the constraints allow it via "n",
which means that the RA can rematerialise a const_wide_int that would
otherwise be spilled or forced to memory.

Maybe the best fix would be just to go ahead and add support for
const_wide_int, as with the patch below.

This means that we now implement:

   __int128
   t (void)
   {
     return (__int128)1 << 80;
   }

as:

	mov	x0, 0
	mov	x1, 65536

which probably defeats what pr78733.c and pr79041-2.c were trying
to test, but should be better than loading from memory.

Tested on aarch64-elf and aarch64-linux-gnu.  OK to install?

Thanks,
Richard


2017-11-06  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/aarch64/aarch64.c (aarch64_legitimate_constant_p): Allow
	CONST_WIDE_INTs.
	* config/aarch64/predicates.md (aarch64_movti_operand)
	(aarch64_reg_or_imm): Use const_scalar_int_operand rather than
	const_int_operand.

gcc/testsuite/
	* gcc.target/aarch64/pr78733.c: Expect immediate moves.
	* gcc.target/aarch64/pr79041-2.c: Likewise.

Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2017-11-06 08:58:24.140007587 +0000
+++ gcc/config/aarch64/aarch64.c	2017-11-06 10:42:55.272329213 +0000
@@ -10378,7 +10378,9 @@ aarch64_legitimate_pic_operand_p (rtx x)
 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
 {
   /* Support CSE and rematerialization of common constants.  */
-  if (CONST_INT_P (x) || CONST_DOUBLE_P (x) || GET_CODE (x) == CONST_VECTOR)
+  if (CONST_SCALAR_INT_P (x)
+      || CONST_DOUBLE_P (x)
+      || GET_CODE (x) == CONST_VECTOR)
     return true;
 
   /* Do not allow vector struct mode constants.  We could support
@@ -10386,10 +10388,6 @@ aarch64_legitimate_constant_p (machine_m
   if (aarch64_vect_struct_mode_p (mode))
     return false;
 
-  /* Do not allow wide int constants - this requires support in movti.  */
-  if (CONST_WIDE_INT_P (x))
-    return false;
-
   /* Do not allow const (plus (anchor_symbol, const_int)).  */
   if (GET_CODE (x) == CONST)
     {
Index: gcc/config/aarch64/predicates.md
===================================================================
--- gcc/config/aarch64/predicates.md	2017-11-06 08:56:19.855082230 +0000
+++ gcc/config/aarch64/predicates.md	2017-11-06 10:42:55.273229537 +0000
@@ -250,15 +250,13 @@ (define_predicate "aarch64_mov_operand"
 		 (match_test "aarch64_mov_operand_p (op, mode)")))))
 
 (define_predicate "aarch64_movti_operand"
-  (and (match_code "reg,subreg,mem,const_int")
-       (ior (match_operand 0 "register_operand")
-	    (ior (match_operand 0 "memory_operand")
-		 (match_operand 0 "const_int_operand")))))
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "memory_operand")
+       (match_operand 0 "const_scalar_int_operand")))
 
 (define_predicate "aarch64_reg_or_imm"
-  (and (match_code "reg,subreg,const_int")
-       (ior (match_operand 0 "register_operand")
-	    (match_operand 0 "const_int_operand"))))
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_scalar_int_operand")))
 
 ;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ.
 (define_special_predicate "aarch64_comparison_operator"
Index: gcc/testsuite/gcc.target/aarch64/pr78733.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/pr78733.c	2017-02-23 19:54:05.000000000 +0000
+++ gcc/testsuite/gcc.target/aarch64/pr78733.c	2017-11-06 10:42:55.273229537 +0000
@@ -7,4 +7,5 @@ t (void)
   return (__int128)1 << 80;
 }
 
-/* { dg-final { scan-assembler "adr" } } */
+/* { dg-final { scan-assembler "\tmov\tx0, 0" } } */
+/* { dg-final { scan-assembler "\tmov\tx1, 65536" } } */
Index: gcc/testsuite/gcc.target/aarch64/pr79041-2.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/pr79041-2.c	2017-07-27 10:37:55.117035178 +0100
+++ gcc/testsuite/gcc.target/aarch64/pr79041-2.c	2017-11-06 10:42:55.273229537 +0000
@@ -8,5 +8,6 @@ t (void)
   return (__int128)1 << 80;
 }
 
-/* { dg-final { scan-assembler "adr" } } */
+/* { dg-final { scan-assembler "\tmov\tx0, 0" } } */
+/* { dg-final { scan-assembler "\tmov\tx1, 65536" } } */
 /* { dg-final { scan-assembler-not "adrp" } } */
Wilco Dijkstra Nov. 6, 2017, 1:50 p.m. UTC | #5
Richard Sandiford wrote:
>
> Yeah, I'd hit this too.  I think it's a latent bug that just
> happened to be exposed by Wilco's patch: although the *movti_aarch64
> predicate disallows const_wide_int, the constraints allow it via "n",
> which means that the RA can rematerialise a const_wide_int that would
> otherwise be spilled or forced to memory.

Yes I explicitly disallowed const-wide-int because otherwise it failed in
Fortran code. Clearly there were more corner cases...

> Maybe the best fix would be just to go ahead and add support for
> const_wide_int, as with the patch below.

But then it always uses a MOV/MOVK expansion, no matter how complex.
That's inefficient since it would take at most 8 instructions. It's best to load
complex immediates from the literal pool, so we need a cutoff (eg. sum of
aarch64_internal_mov_immediate of both halves <= 4), and use a literal load
otherwise, just like we do for floating point constants.

Note those tests are there to test literal pool accesses work as expected,
so we need to change those to ensure they continue to test that.

Wilco
Richard Sandiford Nov. 6, 2017, 2:44 p.m. UTC | #6
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Richard Sandiford wrote:
>>
>> Yeah, I'd hit this too.  I think it's a latent bug that just
>> happened to be exposed by Wilco's patch: although the *movti_aarch64
>> predicate disallows const_wide_int, the constraints allow it via "n",
>> which means that the RA can rematerialise a const_wide_int that would
>> otherwise be spilled or forced to memory.
>
> Yes I explicitly disallowed const-wide-int because otherwise it failed in
> Fortran code. Clearly there were more corner cases...
>
>> Maybe the best fix would be just to go ahead and add support for
>> const_wide_int, as with the patch below.
>
> But then it always uses a MOV/MOVK expansion, no matter how complex.
> That's inefficient since it would take at most 8 instructions. It's best to load
> complex immediates from the literal pool, so we need a cutoff (eg. sum of
> aarch64_internal_mov_immediate of both halves <= 4), and use a literal load
> otherwise, just like we do for floating point constants.
>
> Note those tests are there to test literal pool accesses work as expected,
> so we need to change those to ensure they continue to test that.

OK.  Would you mind having a look at that?  I'm a bit swamped with SVE
stuff ATM :-)

Thanks,
Ricard
Christophe Lyon Nov. 13, 2017, 1:54 p.m. UTC | #7
On 6 November 2017 at 15:44, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
>> Richard Sandiford wrote:
>>>
>>> Yeah, I'd hit this too.  I think it's a latent bug that just
>>> happened to be exposed by Wilco's patch: although the *movti_aarch64
>>> predicate disallows const_wide_int, the constraints allow it via "n",
>>> which means that the RA can rematerialise a const_wide_int that would
>>> otherwise be spilled or forced to memory.
>>
>> Yes I explicitly disallowed const-wide-int because otherwise it failed in
>> Fortran code. Clearly there were more corner cases...
>>
>>> Maybe the best fix would be just to go ahead and add support for
>>> const_wide_int, as with the patch below.
>>
>> But then it always uses a MOV/MOVK expansion, no matter how complex.
>> That's inefficient since it would take at most 8 instructions. It's best to load
>> complex immediates from the literal pool, so we need a cutoff (eg. sum of
>> aarch64_internal_mov_immediate of both halves <= 4), and use a literal load
>> otherwise, just like we do for floating point constants.
>>
>> Note those tests are there to test literal pool accesses work as expected,
>> so we need to change those to ensure they continue to test that.
>
> OK.  Would you mind having a look at that?  I'm a bit swamped with SVE
> stuff ATM :-)
>

Hi,
I've filed https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82964
to keep track of this.

Christophe

> Thanks,
> Ricard
Jakub Jelinek Jan. 10, 2018, 1:55 p.m. UTC | #8
On Mon, Nov 06, 2017 at 01:50:07PM +0000, Wilco Dijkstra wrote:
> Richard Sandiford wrote:
> >
> > Yeah, I'd hit this too.  I think it's a latent bug that just
> > happened to be exposed by Wilco's patch: although the *movti_aarch64
> > predicate disallows const_wide_int, the constraints allow it via "n",
> > which means that the RA can rematerialise a const_wide_int that would
> > otherwise be spilled or forced to memory.
> 
> Yes I explicitly disallowed const-wide-int because otherwise it failed in
> Fortran code. Clearly there were more corner cases...
> 
> > Maybe the best fix would be just to go ahead and add support for
> > const_wide_int, as with the patch below.
> 
> But then it always uses a MOV/MOVK expansion, no matter how complex.
> That's inefficient since it would take at most 8 instructions. It's best to load
> complex immediates from the literal pool, so we need a cutoff (eg. sum of
> aarch64_internal_mov_immediate of both halves <= 4), and use a literal load
> otherwise, just like we do for floating point constants.

Can we apply Richard's patch and then perhaps incrementally improve stuff,
like selecting a subset of CONST_WIDE_INTs we want to accept for movti/movtf
and corresponding constraint that would be used instead of the "n" in
*movti_aarch64?

I mean, the trunk shouldn't remain so broken for months after a correct
patch has been posted.

For the new predicate/constraint, you can e.g. have a look at i386
x86_64_hilo_int_operand predicate which does a similar thing and is used for
similar stuff, namely a constant that is meant to be split and each half
needs to satisfy something.  If it is a purely performance/-Os thing, you
could as well count the number of instructions you'll need to construct it
or similar.  E.g. in the pr83726.C case, even when it is a CONST_WIDE_INT,
it is a rather cheap one, one half is 70, the other half 0.

	Jakub
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a2eca64a9c13e44d223b5552c079ef4e09659e84..810c17416db01681e99a9eb8cc9f5af137ed2054 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10173,49 +10173,46 @@  aarch64_legitimate_pic_operand_p (rtx x)
   return true;
 }
 
-/* Return true if X holds either a quarter-precision or
-     floating-point +0.0 constant.  */
-static bool
-aarch64_valid_floating_const (machine_mode mode, rtx x)
-{
-  if (!CONST_DOUBLE_P (x))
-    return false;
-
-  if (aarch64_float_const_zero_rtx_p (x))
-    return true;
-
-  /* We only handle moving 0.0 to a TFmode register.  */
-  if (!(mode == SFmode || mode == DFmode))
-    return false;
-
-  return aarch64_float_const_representable_p (x);
-}
+/* Implement TARGET_LEGITIMATE_CONSTANT_P hook.  Return true for constants
+   that should be rematerialized rather than spilled.  */
 
 static bool
 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
 {
+  /* Support CSE and rematerialization of common constants.  */
+  if (CONST_INT_P (x) || CONST_DOUBLE_P (x) || GET_CODE (x) == CONST_VECTOR)
+    return true;
+
   /* Do not allow vector struct mode constants.  We could support
      0 and -1 easily, but they need support in aarch64-simd.md.  */
-  if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
+  if (aarch64_vect_struct_mode_p (mode))
     return false;
 
-  /* This could probably go away because
-     we now decompose CONST_INTs according to expand_mov_immediate.  */
-  if ((GET_CODE (x) == CONST_VECTOR
-       && aarch64_simd_valid_immediate (x, mode, false, NULL))
-      || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
-	return !targetm.cannot_force_const_mem (mode, x);
+  /* Do not allow wide int constants - this requires support in movti.  */
+  if (CONST_WIDE_INT_P (x))
+    return false;
 
-  if (GET_CODE (x) == HIGH
-      && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
-    return true;
+  /* Do not allow const (plus (anchor_symbol, const_int)).  */
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+  {
+    x = XEXP (XEXP (x, 0), 0);
+    if (SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
+      return false;
+  }
+
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
 
   /* Treat symbols as constants.  Avoid TLS symbols as they are complex,
      so spilling them is better than rematerialization.  */
   if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
     return true;
 
-  return aarch64_constant_address_p (x);
+  /* Label references are always constant.  */
+  if (GET_CODE (x) == LABEL_REF)
+    return true;
+
+  return false;
 }
 
 rtx