diff mbox

[AArch64] Add legitimize_address_displacement hook

Message ID AM5PR0802MB2610DBDD0231C2837795E8C9831D0@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra Aug. 10, 2016, 4:31 p.m. UTC
Richard Earnshaw wrote:
> OK.  But please enhance the comment with some explanation as to WHY
> you've chosen to use just two base pairings rather than separate bases
> for each access size.

OK here is the updated patch which also handles unaligned accesses
which further improves the benefit:

This patch adds legitimize_address_displacement hook so that stack accesses
with large offsets are split into a more efficient sequence.  Unaligned and 
TI/TFmode use a 256-byte range, byte and halfword accesses use a 4KB range,
wider accesses use a 16KB range to maximise the available addressing range
and increase opportunities to share the base address.

int f(int x)
{
  int arr[8192];
  arr[4096] = 0;
  arr[6000] = 0;
  arr[7000] = 0;
  arr[8191] = 0;
  return arr[x];
}

Now generates:

	sub	sp, sp, #32768
	add	x1, sp, 16384
	str	wzr, [x1]
	str	wzr, [x1, 7616]
	str	wzr, [x1, 11616]
	str	wzr, [x1, 16380]
	ldr	w0, [sp, w0, sxtw 2]
	add	sp, sp, 32768
	ret

instead of:

	sub	sp, sp, #32768
	mov	x2, 28000
	add	x1, sp, 16384
	mov	x3, 32764
	str	wzr, [x1]
	mov	x1, 24000
	add	x1, sp, x1
	str	wzr, [x1]
	add	x1, sp, x2
	str	wzr, [x1]
	add	x1, sp, x3
	str	wzr, [x1]
	ldr	w0, [sp, w0, sxtw 2]
	add	sp, sp, 32768
	ret

Bootstrap, GCC regression OK.

ChangeLog:
2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>

    gcc/
	* config/aarch64/aarch64.c (aarch64_legitimize_address_displacement):
	New function.
	(TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT): Define.
--

Comments

Richard Earnshaw (lists) Aug. 30, 2016, 1:31 p.m. UTC | #1
On 10/08/16 17:31, Wilco Dijkstra wrote:
> Richard Earnshaw wrote:
>> OK.  But please enhance the comment with some explanation as to WHY
>> you've chosen to use just two base pairings rather than separate bases
>> for each access size.
> 
> OK here is the updated patch which also handles unaligned accesses
> which further improves the benefit:
> 
> This patch adds legitimize_address_displacement hook so that stack accesses
> with large offsets are split into a more efficient sequence.  Unaligned and 
> TI/TFmode use a 256-byte range, byte and halfword accesses use a 4KB range,
> wider accesses use a 16KB range to maximise the available addressing range
> and increase opportunities to share the base address.
> 
> int f(int x)
> {
>   int arr[8192];
>   arr[4096] = 0;
>   arr[6000] = 0;
>   arr[7000] = 0;
>   arr[8191] = 0;
>   return arr[x];
> }
> 
> Now generates:
> 
> 	sub	sp, sp, #32768
> 	add	x1, sp, 16384
> 	str	wzr, [x1]
> 	str	wzr, [x1, 7616]
> 	str	wzr, [x1, 11616]
> 	str	wzr, [x1, 16380]
> 	ldr	w0, [sp, w0, sxtw 2]
> 	add	sp, sp, 32768
> 	ret
> 
> instead of:
> 
> 	sub	sp, sp, #32768
> 	mov	x2, 28000
> 	add	x1, sp, 16384
> 	mov	x3, 32764
> 	str	wzr, [x1]
> 	mov	x1, 24000
> 	add	x1, sp, x1
> 	str	wzr, [x1]
> 	add	x1, sp, x2
> 	str	wzr, [x1]
> 	add	x1, sp, x3
> 	str	wzr, [x1]
> 	ldr	w0, [sp, w0, sxtw 2]
> 	add	sp, sp, 32768
> 	ret
> 
> Bootstrap, GCC regression OK.
> 
> ChangeLog:
> 2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>     gcc/
> 	* config/aarch64/aarch64.c (aarch64_legitimize_address_displacement):
> 	New function.
> 	(TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT): Define.

OK.

R.

> --
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 9a5fc199128b1326d0fb2afe0833aa6a5ce62ddf..b8536175a84b76f8c2939e61f1379ae279b20d43 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4173,6 +4173,24 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
>    return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
>  }
>  
> +/* Split an out-of-range address displacement into a base and offset.
> +   Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
> +   to increase opportunities for sharing the base address of different sizes.
> +   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> +static bool
> +aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
> +{
> +  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +
> +  if (mode == TImode || mode == TFmode ||
> +      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> +     mask = 0xff;
> +
> +  *off = GEN_INT (INTVAL (*disp) & ~mask);
> +  *disp = GEN_INT (INTVAL (*disp) & mask);
> +  return true;
> +}
> +
>  /* Return TRUE if rtx X is immediate constant 0.0 */
>  bool
>  aarch64_float_const_zero_rtx_p (rtx x)
> @@ -14137,6 +14155,10 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
>  #undef TARGET_LEGITIMATE_CONSTANT_P
>  #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
>  
> +#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
> +#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
> +  aarch64_legitimize_address_displacement
> +
>  #undef TARGET_LIBGCC_CMP_RETURN_MODE
>  #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
>  
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a5fc199128b1326d0fb2afe0833aa6a5ce62ddf..b8536175a84b76f8c2939e61f1379ae279b20d43 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4173,6 +4173,24 @@  aarch64_legitimate_address_p (machine_mode mode, rtx x,
   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
 }
 
+/* Split an out-of-range address displacement into a base and offset.
+   Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
+   to increase opportunities for sharing the base address of different sizes.
+   For TI/TFmode and unaligned accesses use a 256-byte range.  */
+static bool
+aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
+{
+  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+
+  if (mode == TImode || mode == TFmode ||
+      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
+     mask = 0xff;
+
+  *off = GEN_INT (INTVAL (*disp) & ~mask);
+  *disp = GEN_INT (INTVAL (*disp) & mask);
+  return true;
+}
+
 /* Return TRUE if rtx X is immediate constant 0.0 */
 bool
 aarch64_float_const_zero_rtx_p (rtx x)
@@ -14137,6 +14155,10 @@  aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
 
+#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
+#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
+  aarch64_legitimize_address_displacement
+
 #undef TARGET_LIBGCC_CMP_RETURN_MODE
 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode