diff mbox

Improve alloca alignment

Message ID DB6PR0801MB2053EFC180A8360BC943BEB883B90@DB6PR0801MB2053.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra July 26, 2017, 5:39 p.m. UTC
This patch improves alloca alignment.  Currently alloca reserves
too much space as it aligns twice, and generates unnecessary stack
alignment code.  For example alloca (16) generates:

	sub	sp, sp, #32   ???
	mov	x1, sp

Similarly alloca (x) generates:

	add	x0, x0, 30    ???
	and	x0, x0, -16
	sub	sp, sp, x0
	mov	x0, sp

__builtin_alloca_with_align (x, 256):
	add	x0, x0, 78    ???
	and	x0, x0, -16
	sub	sp, sp, x0
	add	x0, sp, 63
	and	x0, x0, -64

As can be seen the alignment adjustment value is incorrect.
When the requested alignment is lower than the stack alignment, no
extra alignment is needed.  If the requested alignment is higher,
we need to increase the size by the difference of the requested 
alignment and the stack alignment.  As a result, the alloca alignment
is exactly as expected:

alloca (16):
	sub	sp, sp, #16
	mov	x1, sp

alloca (x):
	add	x0, x0, 15
	and	x0, x0, -16
	sub	sp, sp, x0
	mov	x0, sp

__builtin_alloca_with_align (x, 256):
	add	x0, x0, 63
	and	x0, x0, -16
	sub	sp, sp, x0
	add	x0, sp, 63
	and	x0, x0, -64

ChangeLog:
2017-07-26  Wilco Dijkstra  <wdijkstr@arm.com>

	* explow.c (get_dynamic_stack_size): Improve dynamic alignment.

--

Comments

Jeff Law July 26, 2017, 7:12 p.m. UTC | #1
On 07/26/2017 11:39 AM, Wilco Dijkstra wrote:
> This patch improves alloca alignment.  Currently alloca reserves
> too much space as it aligns twice, and generates unnecessary stack
> alignment code.  For example alloca (16) generates:
> 
> 	sub	sp, sp, #32   ???
> 	mov	x1, sp
> 
> Similarly alloca (x) generates:
> 
> 	add	x0, x0, 30    ???
> 	and	x0, x0, -16
> 	sub	sp, sp, x0
> 	mov	x0, sp
> 
> __builtin_alloca_with_align (x, 256):
> 	add	x0, x0, 78    ???
> 	and	x0, x0, -16
> 	sub	sp, sp, x0
> 	add	x0, sp, 63
> 	and	x0, x0, -64
> 
> As can be seen the alignment adjustment value is incorrect.
> When the requested alignment is lower than the stack alignment, no
> extra alignment is needed.  If the requested alignment is higher,
> we need to increase the size by the difference of the requested 
> alignment and the stack alignment.  As a result, the alloca alignment
> is exactly as expected:
> 
> alloca (16):
> 	sub	sp, sp, #16
> 	mov	x1, sp
> 
> alloca (x):
> 	add	x0, x0, 15
> 	and	x0, x0, -16
> 	sub	sp, sp, x0
> 	mov	x0, sp
> 
> __builtin_alloca_with_align (x, 256):
> 	add	x0, x0, 63
> 	and	x0, x0, -16
> 	sub	sp, sp, x0
> 	add	x0, sp, 63
> 	and	x0, x0, -64
> 
> ChangeLog:
> 2017-07-26  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* explow.c (get_dynamic_stack_size): Improve dynamic alignment.
Hehe.  This stuff is a mess.  Dominik went round and round in this code
about a year ago, I'm not sure we ever got it right for the cases he
wanted to improve.

This is something I wanted to look at but had deferred (it makes writing
some stack-clash tests of this code more painful than it really needs to
be).


> 
> --
> diff --git a/gcc/explow.c b/gcc/explow.c
> index 50074e281edd5270c76d29feac6b7a92f598d11d..fbdda5fa1e303664e346f975270415b40aed252d 100644
> --- a/gcc/explow.c
> +++ b/gcc/explow.c
> @@ -1234,15 +1234,22 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align,
>       example), so we must preventively align the value.  We leave space
>       in SIZE for the hole that might result from the alignment operation.  */
>  
> -  extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
> -  size = plus_constant (Pmode, size, extra);
> -  size = force_operand (size, NULL_RTX);
> -
> -  if (flag_stack_usage_info && pstack_usage_size)
> -    *pstack_usage_size += extra;
> +  /* Since the stack is presumed to be aligned before this allocation,
> +     we only need to increase the size of the allocation if the required
> +     alignment is more than the stack alignment.
> +     Note size_align doesn't need to be updated - if it is larger than the
> +     stack alignment, size remains a multiple of the stack alignment, so
> +     we can skip rounding up to the stack alignment.  */
> +  if (required_align > MAX_SUPPORTED_STACK_ALIGNMENT)
> +    {
> +      extra = (required_align - MAX_SUPPORTED_STACK_ALIGNMENT)
> +	/ BITS_PER_UNIT;
> +      size = plus_constant (Pmode, size, extra);
> +      size = force_operand (size, NULL_RTX);
>  
> -  if (extra && size_align > BITS_PER_UNIT)
> -    size_align = BITS_PER_UNIT;
> +      if (flag_stack_usage_info && pstack_usage_size)
> +	*pstack_usage_size += extra;
> +    }
So it's unclear to me why you increase the size iff REQUIRED_ALIGN is
greater than MAX_SUPPORTED_STACK_ALIGNMENT.

ISTM the safe assumption we can make in this code is that the stack is
aligned to STACK_BOUNDARY.  If so, isn't the right test

(required_align > STACK_BOUNDARY)?

And once we decide to make an adjustment, isn't the right adjustment to
make (required_align - STACK_BOUNDARY) / BITS_PER_UNIT?

I feel like I must be missing something really important here.

jeff
Wilco Dijkstra July 26, 2017, 11:29 p.m. UTC | #2
Jeff Law wrote:

> +  if (required_align > MAX_SUPPORTED_STACK_ALIGNMENT)
> +    {
> +      extra = (required_align - MAX_SUPPORTED_STACK_ALIGNMENT)
> +     / BITS_PER_UNIT;
> +      size = plus_constant (Pmode, size, extra);
> +      size = force_operand (size, NULL_RTX);

> -  if (extra && size_align > BITS_PER_UNIT)
> -    size_align = BITS_PER_UNIT;
> +      if (flag_stack_usage_info && pstack_usage_size)
> +     *pstack_usage_size += extra;
> +    }

> So it's unclear to me why you increase the size iff REQUIRED_ALIGN is
> greater than MAX_SUPPORTED_STACK_ALIGNMENT.
>
> ISTM the safe assumption we can make in this code is that the stack is
> aligned to STACK_BOUNDARY.  If so, isn't the right test
>
> (required_align > STACK_BOUNDARY)?
>
> And once we decide to make an adjustment, isn't the right adjustment to
> make (required_align - STACK_BOUNDARY) / BITS_PER_UNIT?
>
> I feel like I must be missing something really important here.

Yes I think you're right that if STACK_BOUNDARY is the guaranteed stack
alignment, it should check against STACK_BOUNDARY indeed.

But then the check size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0
seems wrong too given that round_push uses a different alignment to align to. 

Wilco
Jeff Law July 27, 2017, 4:59 p.m. UTC | #3
On 07/26/2017 05:29 PM, Wilco Dijkstra wrote:
> Jeff Law wrote:
> 
>> +  if (required_align > MAX_SUPPORTED_STACK_ALIGNMENT)
>> +    {
>> +      extra = (required_align - MAX_SUPPORTED_STACK_ALIGNMENT)
>> +     / BITS_PER_UNIT;
>> +      size = plus_constant (Pmode, size, extra);
>> +      size = force_operand (size, NULL_RTX);
>>   
>> -  if (extra && size_align > BITS_PER_UNIT)
>> -    size_align = BITS_PER_UNIT;
>> +      if (flag_stack_usage_info && pstack_usage_size)
>> +     *pstack_usage_size += extra;
>> +    }
> 
>> So it's unclear to me why you increase the size iff REQUIRED_ALIGN is
>> greater than MAX_SUPPORTED_STACK_ALIGNMENT.
>>
>> ISTM the safe assumption we can make in this code is that the stack is
>> aligned to STACK_BOUNDARY.  If so, isn't the right test
>>
>> (required_align > STACK_BOUNDARY)?
>>
>> And once we decide to make an adjustment, isn't the right adjustment to
>> make (required_align - STACK_BOUNDARY) / BITS_PER_UNIT?
>>
>> I feel like I must be missing something really important here.
> 
> Yes I think you're right that if STACK_BOUNDARY is the guaranteed stack
> alignment, it should check against STACK_BOUNDARY indeed.
Seems that way to me.

> 
> But then the check size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0
> seems wrong too given that round_push uses a different alignment to align to. 
I had started to dig into the history of this code, but just didn't have
the time to do so fully before needing to leave for the day.  To some
degree I was hoping you knew the rationale behind the test against
MAX_SUPPORTED_STACK_ALIGNMENT and I wouldn't have to do a ton of digging :-)

Jeff
diff mbox

Patch

diff --git a/gcc/explow.c b/gcc/explow.c
index 50074e281edd5270c76d29feac6b7a92f598d11d..fbdda5fa1e303664e346f975270415b40aed252d 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1234,15 +1234,22 @@  get_dynamic_stack_size (rtx *psize, unsigned size_align,
      example), so we must preventively align the value.  We leave space
      in SIZE for the hole that might result from the alignment operation.  */
 
-  extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
-  size = plus_constant (Pmode, size, extra);
-  size = force_operand (size, NULL_RTX);
-
-  if (flag_stack_usage_info && pstack_usage_size)
-    *pstack_usage_size += extra;
+  /* Since the stack is presumed to be aligned before this allocation,
+     we only need to increase the size of the allocation if the required
+     alignment is more than the stack alignment.
+     Note size_align doesn't need to be updated - if it is larger than the
+     stack alignment, size remains a multiple of the stack alignment, so
+     we can skip rounding up to the stack alignment.  */
+  if (required_align > MAX_SUPPORTED_STACK_ALIGNMENT)
+    {
+      extra = (required_align - MAX_SUPPORTED_STACK_ALIGNMENT)
+	/ BITS_PER_UNIT;
+      size = plus_constant (Pmode, size, extra);
+      size = force_operand (size, NULL_RTX);
 
-  if (extra && size_align > BITS_PER_UNIT)
-    size_align = BITS_PER_UNIT;
+      if (flag_stack_usage_info && pstack_usage_size)
+	*pstack_usage_size += extra;
+    }
 
   /* Round the size to a multiple of the required stack alignment.
      Since the stack is presumed to be rounded before this allocation,