diff mbox series

x86: Don't allocate stack frame nor align stack if not needed

Message ID 20190521150157.2405-1-hjl.tools@gmail.com
State New
Headers show
Series x86: Don't allocate stack frame nor align stack if not needed | expand

Commit Message

H.J. Lu May 21, 2019, 3:01 p.m. UTC
get_frame_size () returns used stack slots during compilation, which
may be optimized out later.  This patch does the followings:

1. Add no_stack_frame to machine_function to indicate that the function
doesn't need a stack frame.
2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
3. Always call ix86_find_max_used_stack_alignment to check if stack
frame is needed.

Tested on i686 and x86-64 with

--with-arch=native --with-cpu=native

Tested on AVX512 machine configured with

--with-arch=native --with-cpu=native

gcc/

	PR target/88483
	* config/i386/i386.c (ix86_get_frame_size): New function.
	(ix86_frame_pointer_required): Replace get_frame_size with
	ix86_get_frame_size.
	(ix86_compute_frame_layout): Likewise.
	(ix86_find_max_used_stack_alignment): Changed to void.  Set
	no_stack_frame.
	(ix86_finalize_stack_frame_flags): Always call
	ix86_find_max_used_stack_alignment.  Replace get_frame_size with
	ix86_get_frame_size.
	* config/i386/i386.h (machine_function): Add no_stack_frame.

gcc/testsuite/

	PR target/88483
	* gcc.target/i386/stackalign/pr88483-1.c: New test.
	* gcc.target/i386/stackalign/pr88483-2.c: Likewise.
---
 gcc/config/i386/i386.c                        | 53 ++++++++++++-------
 gcc/config/i386/i386.h                        |  3 ++
 .../gcc.target/i386/stackalign/pr88483-1.c    | 18 +++++++
 .../gcc.target/i386/stackalign/pr88483-2.c    | 18 +++++++
 4 files changed, 74 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c

Comments

Uros Bizjak May 22, 2019, 7:31 a.m. UTC | #1
On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> get_frame_size () returns used stack slots during compilation, which
> may be optimized out later.  This patch does the followings:
>
> 1. Add no_stack_frame to machine_function to indicate that the function
> doesn't need a stack frame.

Can you please add "stack_frame_required" to machine_function with
inverted meaning instead? Every single usage of no_stack_frame is
inverted, and it is hard to follow this negative logic through the
code.

> 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
> 3. Always call ix86_find_max_used_stack_alignment to check if stack
> frame is needed.
>
> Tested on i686 and x86-64 with
>
> --with-arch=native --with-cpu=native
>
> Tested on AVX512 machine configured with
>
> --with-arch=native --with-cpu=native
>
> gcc/
>
>         PR target/88483
>         * config/i386/i386.c (ix86_get_frame_size): New function.
>         (ix86_frame_pointer_required): Replace get_frame_size with
>         ix86_get_frame_size.
>         (ix86_compute_frame_layout): Likewise.
>         (ix86_find_max_used_stack_alignment): Changed to void.  Set
>         no_stack_frame.
>         (ix86_finalize_stack_frame_flags): Always call
>         ix86_find_max_used_stack_alignment.  Replace get_frame_size with
>         ix86_get_frame_size.
>         * config/i386/i386.h (machine_function): Add no_stack_frame.
>
> gcc/testsuite/
>
>         PR target/88483
>         * gcc.target/i386/stackalign/pr88483-1.c: New test.
>         * gcc.target/i386/stackalign/pr88483-2.c: Likewise.
> ---
>  gcc/config/i386/i386.c                        | 53 ++++++++++++-------
>  gcc/config/i386/i386.h                        |  3 ++
>  .../gcc.target/i386/stackalign/pr88483-1.c    | 18 +++++++
>  .../gcc.target/i386/stackalign/pr88483-2.c    | 18 +++++++
>  4 files changed, 74 insertions(+), 18 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 54607748b0b..d0b2a4f8b70 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
>           && (frame.nregs + frame.nsseregs) == 0);
>  }
>
> +/* Return stack frame size.  get_frame_size () returns used stack slots
> +   during compilation, which may be optimized out later.  no_stack_frame
> +   is set to true if stack frame isn't needed.  */
> +
> +static HOST_WIDE_INT
> +ix86_get_frame_size (void)
> +{
> +  if (cfun->machine->no_stack_frame)
> +    return 0;
> +  else
> +    return get_frame_size ();
> +}
> +
>  /* Value should be nonzero if functions must have frame pointers.
>     Zero means the frame pointer need not be set up (and parms may
>     be accessed via the stack pointer) in functions that seem suitable.  */
> @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
>
>    /* Win64 SEH, very large frames need a frame-pointer as maximum stack
>       allocation is 4GB.  */
> -  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
> +  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
>      return true;
>
>    /* SSE saves require frame-pointer when stack is misaligned.  */
> @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
>    unsigned HOST_WIDE_INT stack_alignment_needed;
>    HOST_WIDE_INT offset;
>    unsigned HOST_WIDE_INT preferred_alignment;
> -  HOST_WIDE_INT size = get_frame_size ();
> +  HOST_WIDE_INT size = ix86_get_frame_size ();
>    HOST_WIDE_INT to_allocate;
>
>    /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
> @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> -/* Return true if stack frame is required.  Update STACK_ALIGNMENT
> -   to the largest alignment, in bits, of stack slot used if stack
> -   frame is required and CHECK_STACK_SLOT is true.  */
> +/* Set no_stack_frame to true if stack frame isn't required.  Update
> +   STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
> +   used if stack frame is required and CHECK_STACK_SLOT is true.  */

From the above comment, you can see the confusion caused by using
negative logic for no_stack_frame.

> -static bool
> +static void
>  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>                                     bool check_stack_slot)
>  {
> @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>           }
>      }
>
> -  return require_stack_frame;
> +  cfun->machine->no_stack_frame = !require_stack_frame;
>  }
>
>  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
>        return;
>      }
>
> +  /* It is always safe to compute max_used_stack_alignment.  We
> +     compute it only if 128-bit aligned load/store may be generated
> +     on misaligned stack slot which will lead to segfault. */
> +  bool check_stack_slot
> +    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
> +  ix86_find_max_used_stack_alignment (stack_alignment,
> +                                     check_stack_slot);
> +
>    /* If the only reason for frame_pointer_needed is that we conservatively
>       assumed stack realignment might be needed or -fno-omit-frame-pointer
>       is used, but in the end nothing that needed the stack alignment had
> @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
>            && flag_exceptions
>            && cfun->can_throw_non_call_exceptions)
>        && !ix86_frame_pointer_required ()
> -      && get_frame_size () == 0
> +      && ix86_get_frame_size () == 0
>        && ix86_nsaved_sseregs () == 0
>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>      {
> -      if (ix86_find_max_used_stack_alignment (stack_alignment,
> -                                             stack_realign))
> +      if (!cfun->machine->no_stack_frame)
>         {
>           /* Stack frame is required.  If stack alignment needed is less
>              than incoming stack boundary, don't realign stack.  */
> @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void)
>           recompute_frame_layout_p = true;
>         }
>      }
> -  else if (crtl->max_used_stack_slot_alignment >= 128)
> +  else if (crtl->max_used_stack_slot_alignment >= 128
> +          && !cfun->machine->no_stack_frame)
>      {
>        /* We don't need to realign stack.  max_used_stack_alignment is
>          used to decide how stack frame should be aligned.  This is
> -        independent of any psABIs nor 32-bit vs 64-bit.  It is always
> -        safe to compute max_used_stack_alignment.  We compute it only
> -        if 128-bit aligned load/store may be generated on misaligned
> -        stack slot which will lead to segfault.   */
> -      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> -       cfun->machine->max_used_stack_alignment
> -         = stack_alignment / BITS_PER_UNIT;
> +        independent of any psABIs nor 32-bit vs 64-bit.  */
> +      cfun->machine->max_used_stack_alignment
> +       = stack_alignment / BITS_PER_UNIT;;

Double semicolon.

>      }
>
>    if (crtl->stack_realign_needed != stack_realign)
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index be1480fdcf8..6b4186d3dac 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2754,6 +2754,9 @@ struct GTY(()) machine_function {
>    /* If true, ENDBR is queued at function entrance.  */
>    BOOL_BITFIELD endbr_queued_at_entrance : 1;
>
> +  /* Nonzero if the function doesn't need a stack frame.  */
> +  BOOL_BITFIELD no_stack_frame : 1;
> +
>    /* The largest alignment, in bytes, of stack slot actually used.  */
>    unsigned int max_used_stack_alignment;
>
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> new file mode 100644
> index 00000000000..c8bb0832fe2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +
> +struct B
> +{
> +  char a[12];
> +  int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> +  struct B x = {};
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t \]*%\[re\]?sp" } } */
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> new file mode 100644
> index 00000000000..e94fa1d18fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +
> +struct B
> +{
> +  char a[12];
> +  int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> +  struct B x = {};
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t \]*%\[re\]?sp" } } */
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> --
> 2.20.1
>
H.J. Lu May 22, 2019, 3:09 p.m. UTC | #2
On Wed, May 22, 2019 at 12:31 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > get_frame_size () returns used stack slots during compilation, which
> > may be optimized out later.  This patch does the followings:
> >
> > 1. Add no_stack_frame to machine_function to indicate that the function
> > doesn't need a stack frame.
>
> Can you please add "stack_frame_required" to machine_function with
> inverted meaning instead? Every single usage of no_stack_frame is
> inverted, and it is hard to follow this negative logic through the
> code.

Fixed.

> > 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
> > 3. Always call ix86_find_max_used_stack_alignment to check if stack
> > frame is needed.
> >
> > Tested on i686 and x86-64 with
> >
> > --with-arch=native --with-cpu=native
> >
> > Tested on AVX512 machine configured with
> >
> > --with-arch=native --with-cpu=native
> >
> > gcc/
> >
> >         PR target/88483
> >         * config/i386/i386.c (ix86_get_frame_size): New function.
> >         (ix86_frame_pointer_required): Replace get_frame_size with
> >         ix86_get_frame_size.
> >         (ix86_compute_frame_layout): Likewise.
> >         (ix86_find_max_used_stack_alignment): Changed to void.  Set
> >         no_stack_frame.
> >         (ix86_finalize_stack_frame_flags): Always call
> >         ix86_find_max_used_stack_alignment.  Replace get_frame_size with
> >         ix86_get_frame_size.
> >         * config/i386/i386.h (machine_function): Add no_stack_frame.
> >
> > gcc/testsuite/
> >
> >         PR target/88483
> >         * gcc.target/i386/stackalign/pr88483-1.c: New test.
> >         * gcc.target/i386/stackalign/pr88483-2.c: Likewise.
> > ---
> >  gcc/config/i386/i386.c                        | 53 ++++++++++++-------
> >  gcc/config/i386/i386.h                        |  3 ++
> >  .../gcc.target/i386/stackalign/pr88483-1.c    | 18 +++++++
> >  .../gcc.target/i386/stackalign/pr88483-2.c    | 18 +++++++
> >  4 files changed, 74 insertions(+), 18 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 54607748b0b..d0b2a4f8b70 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
> >           && (frame.nregs + frame.nsseregs) == 0);
> >  }
> >
> > +/* Return stack frame size.  get_frame_size () returns used stack slots
> > +   during compilation, which may be optimized out later.  no_stack_frame
> > +   is set to true if stack frame isn't needed.  */
> > +
> > +static HOST_WIDE_INT
> > +ix86_get_frame_size (void)
> > +{
> > +  if (cfun->machine->no_stack_frame)
> > +    return 0;
> > +  else
> > +    return get_frame_size ();
> > +}
> > +
> >  /* Value should be nonzero if functions must have frame pointers.
> >     Zero means the frame pointer need not be set up (and parms may
> >     be accessed via the stack pointer) in functions that seem suitable.  */
> > @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
> >
> >    /* Win64 SEH, very large frames need a frame-pointer as maximum stack
> >       allocation is 4GB.  */
> > -  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
> > +  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
> >      return true;
> >
> >    /* SSE saves require frame-pointer when stack is misaligned.  */
> > @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
> >    unsigned HOST_WIDE_INT stack_alignment_needed;
> >    HOST_WIDE_INT offset;
> >    unsigned HOST_WIDE_INT preferred_alignment;
> > -  HOST_WIDE_INT size = get_frame_size ();
> > +  HOST_WIDE_INT size = ix86_get_frame_size ();
> >    HOST_WIDE_INT to_allocate;
> >
> >    /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
> > @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
> >    return "";
> >  }
> >
> > -/* Return true if stack frame is required.  Update STACK_ALIGNMENT
> > -   to the largest alignment, in bits, of stack slot used if stack
> > -   frame is required and CHECK_STACK_SLOT is true.  */
> > +/* Set no_stack_frame to true if stack frame isn't required.  Update
> > +   STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
> > +   used if stack frame is required and CHECK_STACK_SLOT is true.  */
>
> From the above comment, you can see the confusion caused by using
> negative logic for no_stack_frame.
>
> > -static bool
> > +static void
> >  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >                                     bool check_stack_slot)
> >  {
> > @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >           }
> >      }
> >
> > -  return require_stack_frame;
> > +  cfun->machine->no_stack_frame = !require_stack_frame;
> >  }
> >
> >  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> > @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
> >        return;
> >      }
> >
> > +  /* It is always safe to compute max_used_stack_alignment.  We
> > +     compute it only if 128-bit aligned load/store may be generated
> > +     on misaligned stack slot which will lead to segfault. */
> > +  bool check_stack_slot
> > +    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
> > +  ix86_find_max_used_stack_alignment (stack_alignment,
> > +                                     check_stack_slot);
> > +
> >    /* If the only reason for frame_pointer_needed is that we conservatively
> >       assumed stack realignment might be needed or -fno-omit-frame-pointer
> >       is used, but in the end nothing that needed the stack alignment had
> > @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
> >            && flag_exceptions
> >            && cfun->can_throw_non_call_exceptions)
> >        && !ix86_frame_pointer_required ()
> > -      && get_frame_size () == 0
> > +      && ix86_get_frame_size () == 0
> >        && ix86_nsaved_sseregs () == 0
> >        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> >      {
> > -      if (ix86_find_max_used_stack_alignment (stack_alignment,
> > -                                             stack_realign))
> > +      if (!cfun->machine->no_stack_frame)
> >         {
> >           /* Stack frame is required.  If stack alignment needed is less
> >              than incoming stack boundary, don't realign stack.  */
> > @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void)
> >           recompute_frame_layout_p = true;
> >         }
> >      }
> > -  else if (crtl->max_used_stack_slot_alignment >= 128)
> > +  else if (crtl->max_used_stack_slot_alignment >= 128
> > +          && !cfun->machine->no_stack_frame)
> >      {
> >        /* We don't need to realign stack.  max_used_stack_alignment is
> >          used to decide how stack frame should be aligned.  This is
> > -        independent of any psABIs nor 32-bit vs 64-bit.  It is always
> > -        safe to compute max_used_stack_alignment.  We compute it only
> > -        if 128-bit aligned load/store may be generated on misaligned
> > -        stack slot which will lead to segfault.   */
> > -      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> > -       cfun->machine->max_used_stack_alignment
> > -         = stack_alignment / BITS_PER_UNIT;
> > +        independent of any psABIs nor 32-bit vs 64-bit.  */
> > +      cfun->machine->max_used_stack_alignment
> > +       = stack_alignment / BITS_PER_UNIT;;
>
> Double semicolon.
>

Fixed.

Here is the updated patch.   OK for trunk?

Thanks.
Uros Bizjak May 22, 2019, 6:36 p.m. UTC | #3
On Wed, May 22, 2019 at 5:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, May 22, 2019 at 12:31 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > get_frame_size () returns used stack slots during compilation, which
> > > may be optimized out later.  This patch does the followings:
> > >
> > > 1. Add no_stack_frame to machine_function to indicate that the function
> > > doesn't need a stack frame.
> >
> > Can you please add "stack_frame_required" to machine_function with
> > inverted meaning instead? Every single usage of no_stack_frame is
> > inverted, and it is hard to follow this negative logic through the
> > code.
>
> Fixed.
>
> > > 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
> > > 3. Always call ix86_find_max_used_stack_alignment to check if stack
> > > frame is needed.
> > >
> > > Tested on i686 and x86-64 with
> > >
> > > --with-arch=native --with-cpu=native
> > >
> > > Tested on AVX512 machine configured with
> > >
> > > --with-arch=native --with-cpu=native
> > >
> > > gcc/
> > >
> > >         PR target/88483
> > >         * config/i386/i386.c (ix86_get_frame_size): New function.
> > >         (ix86_frame_pointer_required): Replace get_frame_size with
> > >         ix86_get_frame_size.
> > >         (ix86_compute_frame_layout): Likewise.
> > >         (ix86_find_max_used_stack_alignment): Changed to void.  Set
> > >         no_stack_frame.
> > >         (ix86_finalize_stack_frame_flags): Always call
> > >         ix86_find_max_used_stack_alignment.  Replace get_frame_size with
> > >         ix86_get_frame_size.
> > >         * config/i386/i386.h (machine_function): Add no_stack_frame.
> > >
> > > gcc/testsuite/
> > >
> > >         PR target/88483
> > >         * gcc.target/i386/stackalign/pr88483-1.c: New test.
> > >         * gcc.target/i386/stackalign/pr88483-2.c: Likewise.
> > > ---
> > >  gcc/config/i386/i386.c                        | 53 ++++++++++++-------
> > >  gcc/config/i386/i386.h                        |  3 ++
> > >  .../gcc.target/i386/stackalign/pr88483-1.c    | 18 +++++++
> > >  .../gcc.target/i386/stackalign/pr88483-2.c    | 18 +++++++
> > >  4 files changed, 74 insertions(+), 18 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index 54607748b0b..d0b2a4f8b70 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
> > >           && (frame.nregs + frame.nsseregs) == 0);
> > >  }
> > >
> > > +/* Return stack frame size.  get_frame_size () returns used stack slots
> > > +   during compilation, which may be optimized out later.  no_stack_frame
> > > +   is set to true if stack frame isn't needed.  */
> > > +
> > > +static HOST_WIDE_INT
> > > +ix86_get_frame_size (void)
> > > +{
> > > +  if (cfun->machine->no_stack_frame)
> > > +    return 0;
> > > +  else
> > > +    return get_frame_size ();
> > > +}
> > > +
> > >  /* Value should be nonzero if functions must have frame pointers.
> > >     Zero means the frame pointer need not be set up (and parms may
> > >     be accessed via the stack pointer) in functions that seem suitable.  */
> > > @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
> > >
> > >    /* Win64 SEH, very large frames need a frame-pointer as maximum stack
> > >       allocation is 4GB.  */
> > > -  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
> > > +  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
> > >      return true;
> > >
> > >    /* SSE saves require frame-pointer when stack is misaligned.  */
> > > @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
> > >    unsigned HOST_WIDE_INT stack_alignment_needed;
> > >    HOST_WIDE_INT offset;
> > >    unsigned HOST_WIDE_INT preferred_alignment;
> > > -  HOST_WIDE_INT size = get_frame_size ();
> > > +  HOST_WIDE_INT size = ix86_get_frame_size ();
> > >    HOST_WIDE_INT to_allocate;
> > >
> > >    /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
> > > @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
> > >    return "";
> > >  }
> > >
> > > -/* Return true if stack frame is required.  Update STACK_ALIGNMENT
> > > -   to the largest alignment, in bits, of stack slot used if stack
> > > -   frame is required and CHECK_STACK_SLOT is true.  */
> > > +/* Set no_stack_frame to true if stack frame isn't required.  Update
> > > +   STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
> > > +   used if stack frame is required and CHECK_STACK_SLOT is true.  */
> >
> > From the above comment, you can see the confusion caused by using
> > negative logic for no_stack_frame.
> >
> > > -static bool
> > > +static void
> > >  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> > >                                     bool check_stack_slot)
> > >  {
> > > @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> > >           }
> > >      }
> > >
> > > -  return require_stack_frame;
> > > +  cfun->machine->no_stack_frame = !require_stack_frame;
> > >  }
> > >
> > >  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> > > @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
> > >        return;
> > >      }
> > >
> > > +  /* It is always safe to compute max_used_stack_alignment.  We
> > > +     compute it only if 128-bit aligned load/store may be generated
> > > +     on misaligned stack slot which will lead to segfault. */
> > > +  bool check_stack_slot
> > > +    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
> > > +  ix86_find_max_used_stack_alignment (stack_alignment,
> > > +                                     check_stack_slot);
> > > +
> > >    /* If the only reason for frame_pointer_needed is that we conservatively
> > >       assumed stack realignment might be needed or -fno-omit-frame-pointer
> > >       is used, but in the end nothing that needed the stack alignment had
> > > @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
> > >            && flag_exceptions
> > >            && cfun->can_throw_non_call_exceptions)
> > >        && !ix86_frame_pointer_required ()
> > > -      && get_frame_size () == 0
> > > +      && ix86_get_frame_size () == 0
> > >        && ix86_nsaved_sseregs () == 0
> > >        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> > >      {
> > > -      if (ix86_find_max_used_stack_alignment (stack_alignment,
> > > -                                             stack_realign))
> > > +      if (!cfun->machine->no_stack_frame)
> > >         {
> > >           /* Stack frame is required.  If stack alignment needed is less
> > >              than incoming stack boundary, don't realign stack.  */
> > > @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void)
> > >           recompute_frame_layout_p = true;
> > >         }
> > >      }
> > > -  else if (crtl->max_used_stack_slot_alignment >= 128)
> > > +  else if (crtl->max_used_stack_slot_alignment >= 128
> > > +          && !cfun->machine->no_stack_frame)
> > >      {
> > >        /* We don't need to realign stack.  max_used_stack_alignment is
> > >          used to decide how stack frame should be aligned.  This is
> > > -        independent of any psABIs nor 32-bit vs 64-bit.  It is always
> > > -        safe to compute max_used_stack_alignment.  We compute it only
> > > -        if 128-bit aligned load/store may be generated on misaligned
> > > -        stack slot which will lead to segfault.   */
> > > -      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> > > -       cfun->machine->max_used_stack_alignment
> > > -         = stack_alignment / BITS_PER_UNIT;
> > > +        independent of any psABIs nor 32-bit vs 64-bit.  */
> > > +      cfun->machine->max_used_stack_alignment
> > > +       = stack_alignment / BITS_PER_UNIT;;
> >
> > Double semicolon.
> >
>
> Fixed.
>
> Here is the updated patch.   OK for trunk?

LGTM.

Thanks,
Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 54607748b0b..d0b2a4f8b70 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5012,6 +5012,19 @@  ix86_can_use_return_insn_p (void)
 	  && (frame.nregs + frame.nsseregs) == 0);
 }
 
+/* Return stack frame size.  get_frame_size () returns used stack slots
+   during compilation, which may be optimized out later.  no_stack_frame
+   is set to true if stack frame isn't needed.  */
+
+static HOST_WIDE_INT
+ix86_get_frame_size (void)
+{
+  if (cfun->machine->no_stack_frame)
+    return 0;
+  else
+    return get_frame_size ();
+}
+
 /* Value should be nonzero if functions must have frame pointers.
    Zero means the frame pointer need not be set up (and parms may
    be accessed via the stack pointer) in functions that seem suitable.  */
@@ -5035,7 +5048,7 @@  ix86_frame_pointer_required (void)
 
   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
      allocation is 4GB.  */
-  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
+  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
     return true;
 
   /* SSE saves require frame-pointer when stack is misaligned.  */
@@ -5842,7 +5855,7 @@  ix86_compute_frame_layout (void)
   unsigned HOST_WIDE_INT stack_alignment_needed;
   HOST_WIDE_INT offset;
   unsigned HOST_WIDE_INT preferred_alignment;
-  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT size = ix86_get_frame_size ();
   HOST_WIDE_INT to_allocate;
 
   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
@@ -7436,11 +7449,11 @@  output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
-/* Return true if stack frame is required.  Update STACK_ALIGNMENT
-   to the largest alignment, in bits, of stack slot used if stack
-   frame is required and CHECK_STACK_SLOT is true.  */
+/* Set no_stack_frame to true if stack frame isn't required.  Update
+   STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
+   used if stack frame is required and CHECK_STACK_SLOT is true.  */
 
-static bool
+static void
 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 				    bool check_stack_slot)
 {
@@ -7489,7 +7502,7 @@  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 	  }
     }
 
-  return require_stack_frame;
+  cfun->machine->no_stack_frame = !require_stack_frame;
 }
 
 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
@@ -7519,6 +7532,14 @@  ix86_finalize_stack_frame_flags (void)
       return;
     }
 
+  /* It is always safe to compute max_used_stack_alignment.  We
+     compute it only if 128-bit aligned load/store may be generated
+     on misaligned stack slot which will lead to segfault. */
+  bool check_stack_slot
+    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
+  ix86_find_max_used_stack_alignment (stack_alignment,
+				      check_stack_slot);
+
   /* If the only reason for frame_pointer_needed is that we conservatively
      assumed stack realignment might be needed or -fno-omit-frame-pointer
      is used, but in the end nothing that needed the stack alignment had
@@ -7538,12 +7559,11 @@  ix86_finalize_stack_frame_flags (void)
 	   && flag_exceptions
 	   && cfun->can_throw_non_call_exceptions)
       && !ix86_frame_pointer_required ()
-      && get_frame_size () == 0
+      && ix86_get_frame_size () == 0
       && ix86_nsaved_sseregs () == 0
       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
     {
-      if (ix86_find_max_used_stack_alignment (stack_alignment,
-					      stack_realign))
+      if (!cfun->machine->no_stack_frame)
 	{
 	  /* Stack frame is required.  If stack alignment needed is less
 	     than incoming stack boundary, don't realign stack.  */
@@ -7631,17 +7651,14 @@  ix86_finalize_stack_frame_flags (void)
 	  recompute_frame_layout_p = true;
 	}
     }
-  else if (crtl->max_used_stack_slot_alignment >= 128)
+  else if (crtl->max_used_stack_slot_alignment >= 128
+	   && !cfun->machine->no_stack_frame)
     {
       /* We don't need to realign stack.  max_used_stack_alignment is
 	 used to decide how stack frame should be aligned.  This is
-	 independent of any psABIs nor 32-bit vs 64-bit.  It is always
-	 safe to compute max_used_stack_alignment.  We compute it only
-	 if 128-bit aligned load/store may be generated on misaligned
-	 stack slot which will lead to segfault.   */
-      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
-	cfun->machine->max_used_stack_alignment
-	  = stack_alignment / BITS_PER_UNIT;
+	 independent of any psABIs nor 32-bit vs 64-bit.  */
+      cfun->machine->max_used_stack_alignment
+	= stack_alignment / BITS_PER_UNIT;;
     }
 
   if (crtl->stack_realign_needed != stack_realign)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index be1480fdcf8..6b4186d3dac 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2754,6 +2754,9 @@  struct GTY(()) machine_function {
   /* If true, ENDBR is queued at function entrance.  */
   BOOL_BITFIELD endbr_queued_at_entrance : 1;
 
+  /* Nonzero if the function doesn't need a stack frame.  */
+  BOOL_BITFIELD no_stack_frame : 1;
+
   /* The largest alignment, in bytes, of stack slot actually used.  */
   unsigned int max_used_stack_alignment;
 
diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
new file mode 100644
index 00000000000..c8bb0832fe2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+
+struct B
+{
+  char a[12];
+  int b;
+};
+
+struct B
+f2 (void)
+{
+  struct B x = {};
+  return x;
+}
+
+/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t \]*%\[re\]?sp" } } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
new file mode 100644
index 00000000000..e94fa1d18fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+
+struct B
+{
+  char a[12];
+  int b;
+};
+
+struct B
+f2 (void)
+{
+  struct B x = {};
+  return x;
+}
+
+/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t \]*%\[re\]?sp" } } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */