diff mbox series

x86: Don't use get_frame_size to finalize stack frame

Message ID 20181213173637.GA30946@intel.com
State New
Headers show
Series x86: Don't use get_frame_size to finalize stack frame | expand

Commit Message

H.J. Lu Dec. 13, 2018, 5:36 p.m. UTC
get_frame_size () returns used stack slots during compilation, which
may be optimized out later.  Since ix86_find_max_used_stack_alignment
is called by ix86_finalize_stack_frame_flags to check if stack frame
is required, there is no need to call get_frame_size () which may give
inaccurate final stack frame size.

Tested on AVX512 machine configured with

--with-arch=native --with-cpu=native

OK for trunk?


H.J.
---
gcc/

	PR target/88483
	* config/i386/i386.c (ix86_finalize_stack_frame_flags): Don't
	use get_frame_size ().

gcc/testsuite/

	PR target/88483
	* gcc.target/i386/stackalign/pr88483.c: New test.
---
 gcc/config/i386/i386.c                          |  1 -
 .../gcc.target/i386/stackalign/pr88483.c        | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483.c

Comments

Uros Bizjak Dec. 14, 2018, 7:11 a.m. UTC | #1
On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com> wrote:
>
> get_frame_size () returns used stack slots during compilation, which
> may be optimized out later.  Since ix86_find_max_used_stack_alignment
> is called by ix86_finalize_stack_frame_flags to check if stack frame
> is required, there is no need to call get_frame_size () which may give
> inaccurate final stack frame size.
>
> Tested on AVX512 machine configured with
>
> --with-arch=native --with-cpu=native
>
> OK for trunk?
>
>
> H.J.
> ---
> gcc/
>
>         PR target/88483
>         * config/i386/i386.c (ix86_finalize_stack_frame_flags): Don't
>         use get_frame_size ().
>
> gcc/testsuite/
>
>         PR target/88483
>         * gcc.target/i386/stackalign/pr88483.c: New test.

LGTM, but you know this part of the compiler better than I.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c                          |  1 -
>  .../gcc.target/i386/stackalign/pr88483.c        | 17 +++++++++++++++++
>  2 files changed, 17 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index caa701fe242..edc8f4f092e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
>            && flag_exceptions
>            && cfun->can_throw_non_call_exceptions)
>        && !ix86_frame_pointer_required ()
> -      && get_frame_size () == 0
>        && ix86_nsaved_sseregs () == 0
>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>      {
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> new file mode 100644
> index 00000000000..5aec8fd4cf6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-O2 -mavx2" } */
> +
> +struct B
> +{
> +  char a[12];
> +  int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> +  struct B x = {};
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> --
> 2.19.2
>
H.J. Lu Dec. 14, 2018, 11:01 p.m. UTC | #2
On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com> wrote:
> >
> > get_frame_size () returns used stack slots during compilation, which
> > may be optimized out later.  Since ix86_find_max_used_stack_alignment
> > is called by ix86_finalize_stack_frame_flags to check if stack frame
> > is required, there is no need to call get_frame_size () which may give
> > inaccurate final stack frame size.
> >
> > Tested on AVX512 machine configured with
> >
> > --with-arch=native --with-cpu=native
> >
> > OK for trunk?
> >
> >
> > H.J.
> > ---
> > gcc/
> >
> >         PR target/88483
> >         * config/i386/i386.c (ix86_finalize_stack_frame_flags): Don't
> >         use get_frame_size ().
> >
> > gcc/testsuite/
> >
> >         PR target/88483
> >         * gcc.target/i386/stackalign/pr88483.c: New test.
>
> LGTM, but you know this part of the compiler better than I.
>
> Thanks,
> Uros.
>
> > ---
> >  gcc/config/i386/i386.c                          |  1 -
> >  .../gcc.target/i386/stackalign/pr88483.c        | 17 +++++++++++++++++
> >  2 files changed, 17 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index caa701fe242..edc8f4f092e 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
> >            && flag_exceptions
> >            && cfun->can_throw_non_call_exceptions)
> >        && !ix86_frame_pointer_required ()
> > -      && get_frame_size () == 0
> >        && ix86_nsaved_sseregs () == 0
> >        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> >      {
> > diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> > new file mode 100644
> > index 00000000000..5aec8fd4cf6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> > +/* { dg-options "-O2 -mavx2" } */
> > +
> > +struct B
> > +{
> > +  char a[12];
> > +  int b;
> > +};
> > +
> > +struct B
> > +f2 (void)
> > +{
> > +  struct B x = {};
> > +  return x;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> > --
> > 2.19.2
> >

My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
Here is the fix.  OK for trunk?

Thanks.
Jeff Law Dec. 14, 2018, 11:24 p.m. UTC | #3
On 12/14/18 4:01 PM, H.J. Lu wrote:
> On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> get_frame_size () returns used stack slots during compilation, which
>>> may be optimized out later.  Since ix86_find_max_used_stack_alignment
>>> is called by ix86_finalize_stack_frame_flags to check if stack frame
>>> is required, there is no need to call get_frame_size () which may give
>>> inaccurate final stack frame size.
>>>
>>> Tested on AVX512 machine configured with
>>>
>>> --with-arch=native --with-cpu=native
>>>
>>> OK for trunk?
>>>
>>>
>>> H.J.
>>> ---
>>> gcc/
>>>
>>>         PR target/88483
>>>         * config/i386/i386.c (ix86_finalize_stack_frame_flags): Don't
>>>         use get_frame_size ().
>>>
>>> gcc/testsuite/
>>>
>>>         PR target/88483
>>>         * gcc.target/i386/stackalign/pr88483.c: New test.
>> LGTM, but you know this part of the compiler better than I.
>>
>> Thanks,
>> Uros.
>>
>>> ---
>>>  gcc/config/i386/i386.c                          |  1 -
>>>  .../gcc.target/i386/stackalign/pr88483.c        | 17 +++++++++++++++++
>>>  2 files changed, 17 insertions(+), 1 deletion(-)
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>>>
>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> index caa701fe242..edc8f4f092e 100644
>>> --- a/gcc/config/i386/i386.c
>>> +++ b/gcc/config/i386/i386.c
>>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
>>>            && flag_exceptions
>>>            && cfun->can_throw_non_call_exceptions)
>>>        && !ix86_frame_pointer_required ()
>>> -      && get_frame_size () == 0
>>>        && ix86_nsaved_sseregs () == 0
>>>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>>>      {
>>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>>> new file mode 100644
>>> index 00000000000..5aec8fd4cf6
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>>> @@ -0,0 +1,17 @@
>>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
>>> +/* { dg-options "-O2 -mavx2" } */
>>> +
>>> +struct B
>>> +{
>>> +  char a[12];
>>> +  int b;
>>> +};
>>> +
>>> +struct B
>>> +f2 (void)
>>> +{
>>> +  struct B x = {};
>>> +  return x;
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
>>> --
>>> 2.19.2
>>>
> My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
> Here is the fix.  OK for trunk?
> 
> Thanks.
> 
> -- H.J.
> 
> 
> 0001-x86-Properly-check-stack-reference.patch
> 
> From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00 2001
> From: "H.J. Lu" <hjl.tools@gmail.com>
> Date: Fri, 14 Dec 2018 12:21:02 -0800
> Subject: [PATCH] x86: Properly check stack reference
> 
> A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
> fix for PR target/88483, which caused:
> 
> FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t ]*%esp
> 
> on i386.  ix86_find_max_used_stack_alignment failed to notice stack
> reference via non-stack/frame registers and missed stack alignment
> requirement.  We should track all registers which may reference stack
> by checking registers set from stack referencing registers.
> 
> Tested on i686 and x86-64 with
> 
> --with-arch=native --with-cpu=native
> 
> on AVX512 machine.  Tested on i686 and x86-64 without
> 
> --with-arch=native --with-cpu=native
> 
> on x86-64 machine.
> 
> 	PR target/88483
> 	* config/i386/i386.c (ix86_stack_referenced_p): New function.
> 	(ix86_find_max_used_stack_alignment): Call ix86_stack_referenced_p
> 	to check if stack is referenced.
> ---
>  gcc/config/i386/i386.c | 43 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 39 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 4599ca2a7d5..bf93ec3722f 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>  
> +/* Return true if OP references stack frame though one of registers
> +   in STACK_REF_REGS.  */
> +
> +static bool
> +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs)
> +{
> +  for (int i = 0; i < LAST_REX_INT_REG; i++)
> +    if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i], op))
> +      return true;
> +  return false;
> +}
> +
>  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
>     to the largest alignment, in bits, of stack slot used if stack
>     frame is required and CHECK_STACK_SLOT is true.  */
> @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>  
>    bool require_stack_frame = false;
>  
> +  /* Array of hard registers which reference stack frame.  */
> +  rtx stack_ref_regs[LAST_REX_INT_REG];
> +  memset (stack_ref_regs, 0, sizeof (stack_ref_regs));
> +  stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx;
> +  stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx;
> +
>    FOR_EACH_BB_FN (bb, cfun)
>      {
>        rtx_insn *insn;
> @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>  	  {
>  	    require_stack_frame = true;
>  
> +	    rtx body = PATTERN (insn);
> +	    if (GET_CODE (body) == SET)
> +	      {
> +		/* If a register is set from a stack referencing
> +		   register, it is a stack referencing register.  */
> +		rtx dest = SET_DEST (body);
> +		if (REG_P (dest) && !MEM_P (SET_SRC (body)))
> +		  {
> +		    int regno = REGNO (dest);
> +		    gcc_assert (regno < FIRST_PSEUDO_REGISTER);
> +		    if (GENERAL_REGNO_P (regno))
> +		      {
> +			add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> +					     regno);
> +			stack_ref_regs[regno] = dest;
> +		      }
> +		  }
> +	      }
> +
>  	    if (check_stack_slot)
>  	      {
>  		/* Find the maximum stack alignment.  */
>  		subrtx_iterator::array_type array;
>  		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
>  		  if (MEM_P (*iter)
> -		      && (reg_mentioned_p (stack_pointer_rtx,
> -					   *iter)
> -			  || reg_mentioned_p (frame_pointer_rtx,
> -					      *iter)))
> +		      && ix86_stack_referenced_p (*iter,
> +						  stack_ref_regs))
>  		    {
>  		      unsigned int alignment = MEM_ALIGN (*iter);
>  		      if (alignment > stack_alignment)
This just does a linear scan of the blocks and insns within the block
building up STACK_REF_REGS as we go.

The problem is there's no guarantee that we're visiting the blocks in
execution order.  So we might see an insn that indirectly references the
stack reg *before* we see the insn which had the copy from the stack
pointer.

Or am I missing something?

Jeff
H.J. Lu Dec. 15, 2018, 12:04 a.m. UTC | #4
On Fri, Dec 14, 2018 at 3:24 PM Jeff Law <law@redhat.com> wrote:
>
> On 12/14/18 4:01 PM, H.J. Lu wrote:
> > On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com> wrote:
> >>> get_frame_size () returns used stack slots during compilation, which
> >>> may be optimized out later.  Since ix86_find_max_used_stack_alignment
> >>> is called by ix86_finalize_stack_frame_flags to check if stack frame
> >>> is required, there is no need to call get_frame_size () which may give
> >>> inaccurate final stack frame size.
> >>>
> >>> Tested on AVX512 machine configured with
> >>>
> >>> --with-arch=native --with-cpu=native
> >>>
> >>> OK for trunk?
> >>>
> >>>
> >>> H.J.
> >>> ---
> >>> gcc/
> >>>
> >>>         PR target/88483
> >>>         * config/i386/i386.c (ix86_finalize_stack_frame_flags): Don't
> >>>         use get_frame_size ().
> >>>
> >>> gcc/testsuite/
> >>>
> >>>         PR target/88483
> >>>         * gcc.target/i386/stackalign/pr88483.c: New test.
> >> LGTM, but you know this part of the compiler better than I.
> >>
> >> Thanks,
> >> Uros.
> >>
> >>> ---
> >>>  gcc/config/i386/i386.c                          |  1 -
> >>>  .../gcc.target/i386/stackalign/pr88483.c        | 17 +++++++++++++++++
> >>>  2 files changed, 17 insertions(+), 1 deletion(-)
> >>>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> >>>
> >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> >>> index caa701fe242..edc8f4f092e 100644
> >>> --- a/gcc/config/i386/i386.c
> >>> +++ b/gcc/config/i386/i386.c
> >>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
> >>>            && flag_exceptions
> >>>            && cfun->can_throw_non_call_exceptions)
> >>>        && !ix86_frame_pointer_required ()
> >>> -      && get_frame_size () == 0
> >>>        && ix86_nsaved_sseregs () == 0
> >>>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> >>>      {
> >>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> >>> new file mode 100644
> >>> index 00000000000..5aec8fd4cf6
> >>> --- /dev/null
> >>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> >>> @@ -0,0 +1,17 @@
> >>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> >>> +/* { dg-options "-O2 -mavx2" } */
> >>> +
> >>> +struct B
> >>> +{
> >>> +  char a[12];
> >>> +  int b;
> >>> +};
> >>> +
> >>> +struct B
> >>> +f2 (void)
> >>> +{
> >>> +  struct B x = {};
> >>> +  return x;
> >>> +}
> >>> +
> >>> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> >>> --
> >>> 2.19.2
> >>>
> > My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
> > Here is the fix.  OK for trunk?
> >
> > Thanks.
> >
> > -- H.J.
> >
> >
> > 0001-x86-Properly-check-stack-reference.patch
> >
> > From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00 2001
> > From: "H.J. Lu" <hjl.tools@gmail.com>
> > Date: Fri, 14 Dec 2018 12:21:02 -0800
> > Subject: [PATCH] x86: Properly check stack reference
> >
> > A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
> > fix for PR target/88483, which caused:
> >
> > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t ]*%esp
> >
> > on i386.  ix86_find_max_used_stack_alignment failed to notice stack
> > reference via non-stack/frame registers and missed stack alignment
> > requirement.  We should track all registers which may reference stack
> > by checking registers set from stack referencing registers.
> >
> > Tested on i686 and x86-64 with
> >
> > --with-arch=native --with-cpu=native
> >
> > on AVX512 machine.  Tested on i686 and x86-64 without
> >
> > --with-arch=native --with-cpu=native
> >
> > on x86-64 machine.
> >
> >       PR target/88483
> >       * config/i386/i386.c (ix86_stack_referenced_p): New function.
> >       (ix86_find_max_used_stack_alignment): Call ix86_stack_referenced_p
> >       to check if stack is referenced.
> > ---
> >  gcc/config/i386/i386.c | 43 ++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 39 insertions(+), 4 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 4599ca2a7d5..bf93ec3722f 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end)
> >    return "";
> >  }
> >
> > +/* Return true if OP references stack frame though one of registers
> > +   in STACK_REF_REGS.  */
> > +
> > +static bool
> > +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs)
> > +{
> > +  for (int i = 0; i < LAST_REX_INT_REG; i++)
> > +    if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i], op))
> > +      return true;
> > +  return false;
> > +}
> > +
> >  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
> >     to the largest alignment, in bits, of stack slot used if stack
> >     frame is required and CHECK_STACK_SLOT is true.  */
> > @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >
> >    bool require_stack_frame = false;
> >
> > +  /* Array of hard registers which reference stack frame.  */
> > +  rtx stack_ref_regs[LAST_REX_INT_REG];
> > +  memset (stack_ref_regs, 0, sizeof (stack_ref_regs));
> > +  stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx;
> > +  stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx;
> > +
> >    FOR_EACH_BB_FN (bb, cfun)
> >      {
> >        rtx_insn *insn;
> > @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >         {
> >           require_stack_frame = true;
> >
> > +         rtx body = PATTERN (insn);
> > +         if (GET_CODE (body) == SET)
> > +           {
> > +             /* If a register is set from a stack referencing
> > +                register, it is a stack referencing register.  */
> > +             rtx dest = SET_DEST (body);
> > +             if (REG_P (dest) && !MEM_P (SET_SRC (body)))
> > +               {
> > +                 int regno = REGNO (dest);
> > +                 gcc_assert (regno < FIRST_PSEUDO_REGISTER);
> > +                 if (GENERAL_REGNO_P (regno))
> > +                   {
> > +                     add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> > +                                          regno);
> > +                     stack_ref_regs[regno] = dest;
> > +                   }
> > +               }
> > +           }
> > +
> >           if (check_stack_slot)
> >             {
> >               /* Find the maximum stack alignment.  */
> >               subrtx_iterator::array_type array;
> >               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> >                 if (MEM_P (*iter)
> > -                   && (reg_mentioned_p (stack_pointer_rtx,
> > -                                        *iter)
> > -                       || reg_mentioned_p (frame_pointer_rtx,
> > -                                           *iter)))
> > +                   && ix86_stack_referenced_p (*iter,
> > +                                               stack_ref_regs))
> >                   {
> >                     unsigned int alignment = MEM_ALIGN (*iter);
> >                     if (alignment > stack_alignment)
> This just does a linear scan of the blocks and insns within the block
> building up STACK_REF_REGS as we go.
>
> The problem is there's no guarantee that we're visiting the blocks in
> execution order.  So we might see an insn that indirectly references the
> stack reg *before* we see the insn which had the copy from the stack
> pointer.
>
> Or am I missing something?
>

You are right.  We must be conservative in this case   Here is the
updated patch.  If there may be indirect stack references, stop and
restore the original stack alignment.

I am testing it now.
H.J. Lu Dec. 15, 2018, 7:19 p.m. UTC | #5
On Fri, Dec 14, 2018 at 04:04:02PM -0800, H.J. Lu wrote:
> On Fri, Dec 14, 2018 at 3:24 PM Jeff Law <law@redhat.com> wrote:
> >
> > On 12/14/18 4:01 PM, H.J. Lu wrote:
> > > On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com> wrote:
> > >>> get_frame_size () returns used stack slots during compilation, which
> > >>> may be optimized out later.  Since ix86_find_max_used_stack_alignment
> > >>> is called by ix86_finalize_stack_frame_flags to check if stack frame
> > >>> is required, there is no need to call get_frame_size () which may give
> > >>> inaccurate final stack frame size.
> > >>>
> > >>> Tested on AVX512 machine configured with
> > >>>
> > >>> --with-arch=native --with-cpu=native
> > >>>
> > >>> OK for trunk?
> > >>>
> > >>>
> > >>> H.J.
> > >>> ---
> > >>> gcc/
> > >>>
> > >>>         PR target/88483
> > >>>         * config/i386/i386.c (ix86_finalize_stack_frame_flags): Don't
> > >>>         use get_frame_size ().
> > >>>
> > >>> gcc/testsuite/
> > >>>
> > >>>         PR target/88483
> > >>>         * gcc.target/i386/stackalign/pr88483.c: New test.
> > >> LGTM, but you know this part of the compiler better than I.
> > >>
> > >> Thanks,
> > >> Uros.
> > >>
> > >>> ---
> > >>>  gcc/config/i386/i386.c                          |  1 -
> > >>>  .../gcc.target/i386/stackalign/pr88483.c        | 17 +++++++++++++++++
> > >>>  2 files changed, 17 insertions(+), 1 deletion(-)
> > >>>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> > >>>
> > >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > >>> index caa701fe242..edc8f4f092e 100644
> > >>> --- a/gcc/config/i386/i386.c
> > >>> +++ b/gcc/config/i386/i386.c
> > >>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
> > >>>            && flag_exceptions
> > >>>            && cfun->can_throw_non_call_exceptions)
> > >>>        && !ix86_frame_pointer_required ()
> > >>> -      && get_frame_size () == 0
> > >>>        && ix86_nsaved_sseregs () == 0
> > >>>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> > >>>      {
> > >>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> > >>> new file mode 100644
> > >>> index 00000000000..5aec8fd4cf6
> > >>> --- /dev/null
> > >>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
> > >>> @@ -0,0 +1,17 @@
> > >>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> > >>> +/* { dg-options "-O2 -mavx2" } */
> > >>> +
> > >>> +struct B
> > >>> +{
> > >>> +  char a[12];
> > >>> +  int b;
> > >>> +};
> > >>> +
> > >>> +struct B
> > >>> +f2 (void)
> > >>> +{
> > >>> +  struct B x = {};
> > >>> +  return x;
> > >>> +}
> > >>> +
> > >>> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> > >>> --
> > >>> 2.19.2
> > >>>
> > > My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
> > > Here is the fix.  OK for trunk?
> > >
> > > Thanks.
> > >
> > > -- H.J.
> > >
> > >
> > > 0001-x86-Properly-check-stack-reference.patch
> > >
> > > From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00 2001
> > > From: "H.J. Lu" <hjl.tools@gmail.com>
> > > Date: Fri, 14 Dec 2018 12:21:02 -0800
> > > Subject: [PATCH] x86: Properly check stack reference
> > >
> > > A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
> > > fix for PR target/88483, which caused:
> > >
> > > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t ]*%esp
> > >
> > > on i386.  ix86_find_max_used_stack_alignment failed to notice stack
> > > reference via non-stack/frame registers and missed stack alignment
> > > requirement.  We should track all registers which may reference stack
> > > by checking registers set from stack referencing registers.
> > >
> > > Tested on i686 and x86-64 with
> > >
> > > --with-arch=native --with-cpu=native
> > >
> > > on AVX512 machine.  Tested on i686 and x86-64 without
> > >
> > > --with-arch=native --with-cpu=native
> > >
> > > on x86-64 machine.
> > >
> > >       PR target/88483
> > >       * config/i386/i386.c (ix86_stack_referenced_p): New function.
> > >       (ix86_find_max_used_stack_alignment): Call ix86_stack_referenced_p
> > >       to check if stack is referenced.
> > > ---
> > >  gcc/config/i386/i386.c | 43 ++++++++++++++++++++++++++++++++++++++----
> > >  1 file changed, 39 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index 4599ca2a7d5..bf93ec3722f 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end)
> > >    return "";
> > >  }
> > >
> > > +/* Return true if OP references stack frame though one of registers
> > > +   in STACK_REF_REGS.  */
> > > +
> > > +static bool
> > > +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs)
> > > +{
> > > +  for (int i = 0; i < LAST_REX_INT_REG; i++)
> > > +    if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i], op))
> > > +      return true;
> > > +  return false;
> > > +}
> > > +
> > >  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
> > >     to the largest alignment, in bits, of stack slot used if stack
> > >     frame is required and CHECK_STACK_SLOT is true.  */
> > > @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> > >
> > >    bool require_stack_frame = false;
> > >
> > > +  /* Array of hard registers which reference stack frame.  */
> > > +  rtx stack_ref_regs[LAST_REX_INT_REG];
> > > +  memset (stack_ref_regs, 0, sizeof (stack_ref_regs));
> > > +  stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx;
> > > +  stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx;
> > > +
> > >    FOR_EACH_BB_FN (bb, cfun)
> > >      {
> > >        rtx_insn *insn;
> > > @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> > >         {
> > >           require_stack_frame = true;
> > >
> > > +         rtx body = PATTERN (insn);
> > > +         if (GET_CODE (body) == SET)
> > > +           {
> > > +             /* If a register is set from a stack referencing
> > > +                register, it is a stack referencing register.  */
> > > +             rtx dest = SET_DEST (body);
> > > +             if (REG_P (dest) && !MEM_P (SET_SRC (body)))
> > > +               {
> > > +                 int regno = REGNO (dest);
> > > +                 gcc_assert (regno < FIRST_PSEUDO_REGISTER);
> > > +                 if (GENERAL_REGNO_P (regno))
> > > +                   {
> > > +                     add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> > > +                                          regno);
> > > +                     stack_ref_regs[regno] = dest;
> > > +                   }
> > > +               }
> > > +           }
> > > +
> > >           if (check_stack_slot)
> > >             {
> > >               /* Find the maximum stack alignment.  */
> > >               subrtx_iterator::array_type array;
> > >               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> > >                 if (MEM_P (*iter)
> > > -                   && (reg_mentioned_p (stack_pointer_rtx,
> > > -                                        *iter)
> > > -                       || reg_mentioned_p (frame_pointer_rtx,
> > > -                                           *iter)))
> > > +                   && ix86_stack_referenced_p (*iter,
> > > +                                               stack_ref_regs))
> > >                   {
> > >                     unsigned int alignment = MEM_ALIGN (*iter);
> > >                     if (alignment > stack_alignment)
> > This just does a linear scan of the blocks and insns within the block
> > building up STACK_REF_REGS as we go.
> >
> > The problem is there's no guarantee that we're visiting the blocks in
> > execution order.  So we might see an insn that indirectly references the
> > stack reg *before* we see the insn which had the copy from the stack
> > pointer.
> >
> > Or am I missing something?
> >
> 
> You are right.  We must be conservative in this case   Here is the
> updated patch.  If there may be indirect stack references, stop and
> restore the original stack alignment.
> 
> I am testing it now.
> 

This is updated patch.  Tested on i686 and x86-64 with

--with-arch=native --with-cpu=native

on AVX512 machine.  Tested on i686 and x86-64 without

--with-arch=native --with-cpu=native

on x86-64 machine.

OK for trunk?

Thanks.


H.J.
----
A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
fix for PR target/88483, which caused:

FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t ]*%esp

on i386.  ix86_find_max_used_stack_alignment failed to notice stack
reference via non-stack/frame registers and missed stack alignment
requirement.  There's no guarantee that we're visiting the blocks in
execution order.  If there may be indirect stack references, stop and
restore the original stack alignment.  Update stack alignment only if
we check stack alignment.

	PR target/88483
	* config/i386/i386.c (ix86_stack_referenced_p): New function.
	(ix86_find_max_used_stack_alignment): If there may be indirect
	stack references, stop and restore the original stack alignment.
---
 gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b6dea0c061d..e28a8d50069 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12777,6 +12777,57 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Return true if destination register of SET_BODY may reference stack.  */
+
+static bool
+ix86_stack_referenced_p (const_rtx set_body)
+{
+  rtx set_dest, set_src;
+  int i;
+
+  switch (GET_CODE (set_body))
+    {
+    case SET:
+      /* If a register is set from a stack stack address, it may be
+	 used to reference stack indirectly.  */
+      set_dest = SET_DEST (set_body);
+      if (SUBREG_P (set_dest))
+	set_dest = SUBREG_REG (set_dest);
+      if (!REG_P (set_dest) || !GENERAL_REG_P (set_dest))
+	return false;
+      set_src = SET_SRC (set_body);
+      if (address_operand (set_src, VOIDmode))
+	{
+	  struct ix86_address parts;
+	  if (!ix86_decompose_address (set_src, &parts))
+	    return false;
+	  if (parts.base)
+	    {
+	      if (SUBREG_P (parts.base))
+		parts.base = SUBREG_REG (parts.base);
+	      return (parts.base == stack_pointer_rtx
+		      || parts.base == frame_pointer_rtx);
+	    }
+	  if (parts.index)
+	    {
+	      if (SUBREG_P (parts.index))
+		parts.index = SUBREG_REG (parts.index);
+	      return (parts.index == stack_pointer_rtx
+		      || parts.index == frame_pointer_rtx);
+	    }
+	}
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+	if (ix86_stack_referenced_p (XVECEXP (set_body, 0, i)))
+	  return true;
+      /* FALLTHROUGH */
+    default:
+      break;
+    }
+  return false;
+}
+
 /* Return true if stack frame is required.  Update STACK_ALIGNMENT
    to the largest alignment, in bits, of stack slot used if stack
    frame is required and CHECK_STACK_SLOT is true.  */
@@ -12795,8 +12846,13 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
 		       HARD_FRAME_POINTER_REGNUM);
 
-  /* The preferred stack alignment is the minimum stack alignment.  */
-  if (stack_alignment > crtl->preferred_stack_boundary)
+  /* Save the original stack alignment.  */
+  unsigned int original_stack_alignment = stack_alignment;
+
+  /* The preferred stack alignment is the minimum stack alignment.
+     Update stack alignment only if we check stack alignment.  */
+  if (check_stack_slot
+      && stack_alignment > crtl->preferred_stack_boundary)
     stack_alignment = crtl->preferred_stack_boundary;
 
   bool require_stack_frame = false;
@@ -12811,6 +12867,16 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 	  {
 	    require_stack_frame = true;
 
+	    if (ix86_stack_referenced_p (PATTERN (insn)))
+	      {
+		/* There's no guarantee that we're visiting the
+		   blocks in execution order.  If there may be
+		   indirect stack references, stop and restore
+		   the original stack alignment.  */
+		stack_alignment = original_stack_alignment;
+		goto done;
+	      }
+
 	    if (check_stack_slot)
 	      {
 		/* Find the maximum stack alignment.  */
@@ -12827,9 +12893,15 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 			stack_alignment = alignment;
 		    }
 	      }
+	    else
+	      {
+		/* We are done.  */
+		goto done;
+	      }
 	  }
     }
 
+done:
   return require_stack_frame;
 }
Uros Bizjak Dec. 16, 2018, 10:08 a.m. UTC | #6
On 12/15/18, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Dec 14, 2018 at 04:04:02PM -0800, H.J. Lu wrote:
>> On Fri, Dec 14, 2018 at 3:24 PM Jeff Law <law@redhat.com> wrote:
>> >
>> > On 12/14/18 4:01 PM, H.J. Lu wrote:
>> > > On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com>
>> > > wrote:
>> > >> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com>
>> > >> wrote:
>> > >>> get_frame_size () returns used stack slots during compilation,
>> > >>> which
>> > >>> may be optimized out later.  Since
>> > >>> ix86_find_max_used_stack_alignment
>> > >>> is called by ix86_finalize_stack_frame_flags to check if stack
>> > >>> frame
>> > >>> is required, there is no need to call get_frame_size () which may
>> > >>> give
>> > >>> inaccurate final stack frame size.
>> > >>>
>> > >>> Tested on AVX512 machine configured with
>> > >>>
>> > >>> --with-arch=native --with-cpu=native
>> > >>>
>> > >>> OK for trunk?
>> > >>>
>> > >>>
>> > >>> H.J.
>> > >>> ---
>> > >>> gcc/
>> > >>>
>> > >>>         PR target/88483
>> > >>>         * config/i386/i386.c (ix86_finalize_stack_frame_flags):
>> > >>> Don't
>> > >>>         use get_frame_size ().
>> > >>>
>> > >>> gcc/testsuite/
>> > >>>
>> > >>>         PR target/88483
>> > >>>         * gcc.target/i386/stackalign/pr88483.c: New test.
>> > >> LGTM, but you know this part of the compiler better than I.
>> > >>
>> > >> Thanks,
>> > >> Uros.
>> > >>
>> > >>> ---
>> > >>>  gcc/config/i386/i386.c                          |  1 -
>> > >>>  .../gcc.target/i386/stackalign/pr88483.c        | 17
>> > >>> +++++++++++++++++
>> > >>>  2 files changed, 17 insertions(+), 1 deletion(-)
>> > >>>  create mode 100644
>> > >>> gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>>
>> > >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> > >>> index caa701fe242..edc8f4f092e 100644
>> > >>> --- a/gcc/config/i386/i386.c
>> > >>> +++ b/gcc/config/i386/i386.c
>> > >>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
>> > >>>            && flag_exceptions
>> > >>>            && cfun->can_throw_non_call_exceptions)
>> > >>>        && !ix86_frame_pointer_required ()
>> > >>> -      && get_frame_size () == 0
>> > >>>        && ix86_nsaved_sseregs () == 0
>> > >>>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>> > >>>      {
>> > >>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> new file mode 100644
>> > >>> index 00000000000..5aec8fd4cf6
>> > >>> --- /dev/null
>> > >>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> @@ -0,0 +1,17 @@
>> > >>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
>> > >>> +/* { dg-options "-O2 -mavx2" } */
>> > >>> +
>> > >>> +struct B
>> > >>> +{
>> > >>> +  char a[12];
>> > >>> +  int b;
>> > >>> +};
>> > >>> +
>> > >>> +struct B
>> > >>> +f2 (void)
>> > >>> +{
>> > >>> +  struct B x = {};
>> > >>> +  return x;
>> > >>> +}
>> > >>> +
>> > >>> +/* { dg-final { scan-assembler-not
>> > >>> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
>> > >>> --
>> > >>> 2.19.2
>> > >>>
>> > > My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
>> > > Here is the fix.  OK for trunk?
>> > >
>> > > Thanks.
>> > >
>> > > -- H.J.
>> > >
>> > >
>> > > 0001-x86-Properly-check-stack-reference.patch
>> > >
>> > > From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00
>> > > 2001
>> > > From: "H.J. Lu" <hjl.tools@gmail.com>
>> > > Date: Fri, 14 Dec 2018 12:21:02 -0800
>> > > Subject: [PATCH] x86: Properly check stack reference
>> > >
>> > > A latent bug in ix86_find_max_used_stack_alignment was uncovered by
>> > > the
>> > > fix for PR target/88483, which caused:
>> > >
>> > > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t
>> > > ]*\\$-16,[\\t ]*%esp
>> > >
>> > > on i386.  ix86_find_max_used_stack_alignment failed to notice stack
>> > > reference via non-stack/frame registers and missed stack alignment
>> > > requirement.  We should track all registers which may reference stack
>> > > by checking registers set from stack referencing registers.
>> > >
>> > > Tested on i686 and x86-64 with
>> > >
>> > > --with-arch=native --with-cpu=native
>> > >
>> > > on AVX512 machine.  Tested on i686 and x86-64 without
>> > >
>> > > --with-arch=native --with-cpu=native
>> > >
>> > > on x86-64 machine.
>> > >
>> > >       PR target/88483
>> > >       * config/i386/i386.c (ix86_stack_referenced_p): New function.
>> > >       (ix86_find_max_used_stack_alignment): Call
>> > > ix86_stack_referenced_p
>> > >       to check if stack is referenced.
>> > > ---
>> > >  gcc/config/i386/i386.c | 43
>> > > ++++++++++++++++++++++++++++++++++++++----
>> > >  1 file changed, 39 insertions(+), 4 deletions(-)
>> > >
>> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> > > index 4599ca2a7d5..bf93ec3722f 100644
>> > > --- a/gcc/config/i386/i386.c
>> > > +++ b/gcc/config/i386/i386.c
>> > > @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end)
>> > >    return "";
>> > >  }
>> > >
>> > > +/* Return true if OP references stack frame though one of registers
>> > > +   in STACK_REF_REGS.  */
>> > > +
>> > > +static bool
>> > > +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs)
>> > > +{
>> > > +  for (int i = 0; i < LAST_REX_INT_REG; i++)
>> > > +    if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i],
>> > > op))
>> > > +      return true;
>> > > +  return false;
>> > > +}
>> > > +
>> > >  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
>> > >     to the largest alignment, in bits, of stack slot used if stack
>> > >     frame is required and CHECK_STACK_SLOT is true.  */
>> > > @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned
>> > > int &stack_alignment,
>> > >
>> > >    bool require_stack_frame = false;
>> > >
>> > > +  /* Array of hard registers which reference stack frame.  */
>> > > +  rtx stack_ref_regs[LAST_REX_INT_REG];
>> > > +  memset (stack_ref_regs, 0, sizeof (stack_ref_regs));
>> > > +  stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx;
>> > > +  stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx;
>> > > +
>> > >    FOR_EACH_BB_FN (bb, cfun)
>> > >      {
>> > >        rtx_insn *insn;
>> > > @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned
>> > > int &stack_alignment,
>> > >         {
>> > >           require_stack_frame = true;
>> > >
>> > > +         rtx body = PATTERN (insn);
>> > > +         if (GET_CODE (body) == SET)
>> > > +           {
>> > > +             /* If a register is set from a stack referencing
>> > > +                register, it is a stack referencing register.  */
>> > > +             rtx dest = SET_DEST (body);
>> > > +             if (REG_P (dest) && !MEM_P (SET_SRC (body)))
>> > > +               {
>> > > +                 int regno = REGNO (dest);
>> > > +                 gcc_assert (regno < FIRST_PSEUDO_REGISTER);
>> > > +                 if (GENERAL_REGNO_P (regno))
>> > > +                   {
>> > > +                     add_to_hard_reg_set (&set_up_by_prologue,
>> > > Pmode,
>> > > +                                          regno);
>> > > +                     stack_ref_regs[regno] = dest;
>> > > +                   }
>> > > +               }
>> > > +           }
>> > > +
>> > >           if (check_stack_slot)
>> > >             {
>> > >               /* Find the maximum stack alignment.  */
>> > >               subrtx_iterator::array_type array;
>> > >               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
>> > >                 if (MEM_P (*iter)
>> > > -                   && (reg_mentioned_p (stack_pointer_rtx,
>> > > -                                        *iter)
>> > > -                       || reg_mentioned_p (frame_pointer_rtx,
>> > > -                                           *iter)))
>> > > +                   && ix86_stack_referenced_p (*iter,
>> > > +                                               stack_ref_regs))
>> > >                   {
>> > >                     unsigned int alignment = MEM_ALIGN (*iter);
>> > >                     if (alignment > stack_alignment)
>> > This just does a linear scan of the blocks and insns within the block
>> > building up STACK_REF_REGS as we go.
>> >
>> > The problem is there's no guarantee that we're visiting the blocks in
>> > execution order.  So we might see an insn that indirectly references
>> > the
>> > stack reg *before* we see the insn which had the copy from the stack
>> > pointer.
>> >
>> > Or am I missing something?
>> >
>>
>> You are right.  We must be conservative in this case   Here is the
>> updated patch.  If there may be indirect stack references, stop and
>> restore the original stack alignment.
>>
>> I am testing it now.
>>
>
> This is updated patch.  Tested on i686 and x86-64 with
>
> --with-arch=native --with-cpu=native
>
> on AVX512 machine.  Tested on i686 and x86-64 without
>
> --with-arch=native --with-cpu=native
>
> on x86-64 machine.
>
> OK for trunk?

No. Please revert the original patch (it was not a regression), and
let's revisit this for gcc-10.

Uros.

> Thanks.
>
>
> H.J.
> ----
> A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
> fix for PR target/88483, which caused:
>
> FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t
> ]*%esp
>
> on i386.  ix86_find_max_used_stack_alignment failed to notice stack
> reference via non-stack/frame registers and missed stack alignment
> requirement.  There's no guarantee that we're visiting the blocks in
> execution order.  If there may be indirect stack references, stop and
> restore the original stack alignment.  Update stack alignment only if
> we check stack alignment.
>
> 	PR target/88483
> 	* config/i386/i386.c (ix86_stack_referenced_p): New function.
> 	(ix86_find_max_used_stack_alignment): If there may be indirect
> 	stack references, stop and restore the original stack alignment.
> ---
>  gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index b6dea0c061d..e28a8d50069 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12777,6 +12777,57 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> +/* Return true if destination register of SET_BODY may reference stack.
> */
> +
> +static bool
> +ix86_stack_referenced_p (const_rtx set_body)
> +{
> +  rtx set_dest, set_src;
> +  int i;
> +
> +  switch (GET_CODE (set_body))
> +    {
> +    case SET:
> +      /* If a register is set from a stack stack address, it may be
> +	 used to reference stack indirectly.  */
> +      set_dest = SET_DEST (set_body);
> +      if (SUBREG_P (set_dest))
> +	set_dest = SUBREG_REG (set_dest);
> +      if (!REG_P (set_dest) || !GENERAL_REG_P (set_dest))
> +	return false;
> +      set_src = SET_SRC (set_body);
> +      if (address_operand (set_src, VOIDmode))
> +	{
> +	  struct ix86_address parts;
> +	  if (!ix86_decompose_address (set_src, &parts))
> +	    return false;
> +	  if (parts.base)
> +	    {
> +	      if (SUBREG_P (parts.base))
> +		parts.base = SUBREG_REG (parts.base);
> +	      return (parts.base == stack_pointer_rtx
> +		      || parts.base == frame_pointer_rtx);
> +	    }
> +	  if (parts.index)
> +	    {
> +	      if (SUBREG_P (parts.index))
> +		parts.index = SUBREG_REG (parts.index);
> +	      return (parts.index == stack_pointer_rtx
> +		      || parts.index == frame_pointer_rtx);
> +	    }
> +	}
> +      break;
> +    case PARALLEL:
> +      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
> +	if (ix86_stack_referenced_p (XVECEXP (set_body, 0, i)))
> +	  return true;
> +      /* FALLTHROUGH */
> +    default:
> +      break;
> +    }
> +  return false;
> +}
> +
>  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
>     to the largest alignment, in bits, of stack slot used if stack
>     frame is required and CHECK_STACK_SLOT is true.  */
> @@ -12795,8 +12846,13 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
>    add_to_hard_reg_set (&set_up_by_prologue, Pmode,
>  		       HARD_FRAME_POINTER_REGNUM);
>
> -  /* The preferred stack alignment is the minimum stack alignment.  */
> -  if (stack_alignment > crtl->preferred_stack_boundary)
> +  /* Save the original stack alignment.  */
> +  unsigned int original_stack_alignment = stack_alignment;
> +
> +  /* The preferred stack alignment is the minimum stack alignment.
> +     Update stack alignment only if we check stack alignment.  */
> +  if (check_stack_slot
> +      && stack_alignment > crtl->preferred_stack_boundary)
>      stack_alignment = crtl->preferred_stack_boundary;
>
>    bool require_stack_frame = false;
> @@ -12811,6 +12867,16 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
>  	  {
>  	    require_stack_frame = true;
>
> +	    if (ix86_stack_referenced_p (PATTERN (insn)))
> +	      {
> +		/* There's no guarantee that we're visiting the
> +		   blocks in execution order.  If there may be
> +		   indirect stack references, stop and restore
> +		   the original stack alignment.  */
> +		stack_alignment = original_stack_alignment;
> +		goto done;
> +	      }
> +
>  	    if (check_stack_slot)
>  	      {
>  		/* Find the maximum stack alignment.  */
> @@ -12827,9 +12893,15 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
>  			stack_alignment = alignment;
>  		    }
>  	      }
> +	    else
> +	      {
> +		/* We are done.  */
> +		goto done;
> +	      }
>  	  }
>      }
>
> +done:
>    return require_stack_frame;
>  }
>
> --
> 2.19.2
>
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index caa701fe242..edc8f4f092e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12876,7 +12876,6 @@  ix86_finalize_stack_frame_flags (void)
 	   && flag_exceptions
 	   && cfun->can_throw_non_call_exceptions)
       && !ix86_frame_pointer_required ()
-      && get_frame_size () == 0
       && ix86_nsaved_sseregs () == 0
       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
     {
diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
new file mode 100644
index 00000000000..5aec8fd4cf6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mavx2" } */
+
+struct B
+{
+  char a[12];
+  int b;
+};
+
+struct B
+f2 (void)
+{
+  struct B x = {};
+  return x;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */