diff mbox series

[v2] x86-64: Find a scratch register for large model profiling

Message ID 20240201230247.214664-1-hjl.tools@gmail.com
State New
Headers show
Series [v2] x86-64: Find a scratch register for large model profiling | expand

Commit Message

H.J. Lu Feb. 1, 2024, 11:02 p.m. UTC
Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
an caller-saved register, which isn't used for parameter passing, for
large model profiling and sorry if we can't find one.

gcc/

	PR target/113689
	* config/i386/i386.cc (set_int_parameter_registers_bit): New.
	(test_int_parameter_registers_bit): Likewise.
	(x86_64_select_profile_regnum): New.
	(construct_container): Call set_int_parameter_registers_bit for
	integer register parameter passing.
	(function_arg_32): Likewise.
	(x86_function_profiler): Call x86_64_select_profile_regnum to
	get a scratch register for large model profiling.
	* config/i386/i386.h (machine_function): Add
	int_parameter_registers.

gcc/testsuite/

	PR target/113689
	* gcc.target/i386/pr113689-1.c: New file.
	* gcc.target/i386/pr113689-2.c: Likewise.
	* gcc.target/i386/pr113689-3.c: Likewise.
---
 gcc/config/i386/i386.cc                    | 122 ++++++++++++++++++---
 gcc/config/i386/i386.h                     |   5 +
 gcc/testsuite/gcc.target/i386/pr113689-1.c |  49 +++++++++
 gcc/testsuite/gcc.target/i386/pr113689-2.c |  41 +++++++
 gcc/testsuite/gcc.target/i386/pr113689-3.c |  24 ++++
 5 files changed, 225 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

Comments

Jakub Jelinek Feb. 2, 2024, 12:22 p.m. UTC | #1
On Thu, Feb 01, 2024 at 03:02:47PM -0800, H.J. Lu wrote:
> @@ -2763,6 +2789,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
>        {
>        case X86_64_INTEGER_CLASS:
>        case X86_64_INTEGERSI_CLASS:
> +	if (!in_return)
> +	  set_int_parameter_registers_bit (intreg[0]);
>  	return gen_rtx_REG (mode, intreg[0]);
>        case X86_64_SSE_CLASS:
>        case X86_64_SSEHF_CLASS:
> @@ -2821,6 +2849,11 @@ construct_container (machine_mode mode, machine_mode orig_mode,
>        if (mode == BLKmode)
>  	{
>  	  /* Use TImode for BLKmode values in 2 integer registers.  */
> +	  if (!in_return)
> +	    {
> +	      set_int_parameter_registers_bit (intreg[0]);
> +	      set_int_parameter_registers_bit (intreg[1]);
> +	    }
>  	  exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
>  				      gen_rtx_REG (TImode, intreg[0]),
>  				      GEN_INT (0));

Isn't the above (computed from just whether a function has such an argument)
already available from cum->nregs or similar plus the sequence of argument
registers?  Or df which certainly also needs to know what registers contain
arguments?
Though, the above means a function argument register which isn't used in a
function will be impossible to use for mcount.
We certainly can use it (although var-tracking will not know it got
clobbered).
So, wouldn't it be better to ask at the start of prologue generation df what
registers are live at the start of the function (i.e. at the point of the
NOTE_INSN_PROLOG_END because rest of prologue is emitted before that) and
remember a suitable register for the profiling there?
> @@ -22749,6 +22789,38 @@ current_fentry_section (const char **name)
>    return true;
>  }
>  
> +/* Return an caller-saved register, which isn't used for parameter
> +   passing, at entry for profile.  */
> +
> +static int
> +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> +{
> +  /* Use %r10 if it isn't used by DRAP.  */
> +  bool r10_ok = !crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG;
> +  if (r10_ok)
> +    return R10_REG;
> +
> +  int i;
> +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> +    if (GENERAL_REGNO_P (i)
> +	&& (r10_ok || i != R10_REG)

r10_ok is false here, so just
  if (flag_fentry != 0 || !crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)
    return R10_REG;
at the start and don't declare r10_ok.

> +#ifdef NO_PROFILE_COUNTERS
> +	&& (r11_ok || i != R11_REG)
> +#else
> +	&& i != R11_REG
> +#endif
> +	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> +	&& !fixed_regs[i]
> +	&& call_used_regs[i]
> +	&& !test_int_parameter_registers_bit (i))
> +      return i;
> +
> +  sorry ("no register available for profiling %<-mcmodel=large%s%>",
> +	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> +
> +  return INVALID_REGNUM;
> +}
> +
>  /* Output assembler code to FILE to increment profiler label # LABELNO
>     for profiling a function entry.  */
>  void
> @@ -22783,42 +22855,60 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
>  	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
>  #endif
>  
> +      int scratch;
> +      const char *reg_prefix;
> +      const char *reg;
> +
>        if (!TARGET_PECOFF)
>  	{
>  	  switch (ix86_cmodel)
>  	    {
>  	    case CM_LARGE:
> -	      /* NB: R10 is caller-saved.  Although it can be used as a
> -		 static chain register, it is preserved when calling
> -		 mcount for nested functions.  */
> +	      scratch = x86_64_select_profile_regnum (true);
> +	      reg = hi_reg_name[scratch];
> +	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
>  	      if (ASSEMBLER_DIALECT == ASM_INTEL)
> -		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
> -			       "\tcall\tr10\n", mcount_name);
> +		fprintf (file,
> +			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
> +			 "\tcall\t%s%s\n",
> +			 reg_prefix, reg, mcount_name, reg_prefix, reg);
>  	      else
> -		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
> -			 mcount_name);
> +		fprintf (file,
> +			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
> +			 mcount_name, reg_prefix, reg, reg_prefix, reg);
>  	      break;
>  	    case CM_LARGE_PIC:
>  #ifdef NO_PROFILE_COUNTERS
> +	      scratch = x86_64_select_profile_regnum (false);
> +	      reg = hi_reg_name[scratch];
> +	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
>  	      if (ASSEMBLER_DIALECT == ASM_INTEL)
>  		{
>  		  fprintf (file, "1:movabs\tr11, "
>  				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
> -		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
> -		  fprintf (file, "\tadd\tr10, r11\n");
> +		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
> +			   reg_prefix, reg);
> +		  fprintf (file, "\tadd\t%s%s, r11\n",
> +			   reg_prefix, reg);
>  		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
>  			   mcount_name);
> -		  fprintf (file, "\tadd\tr10, r11\n");
> -		  fprintf (file, "\tcall\tr10\n");
> +		  fprintf (file, "\tadd\t%s%s, r11\n",
> +			   reg_prefix, reg);
> +		  fprintf (file, "\tcall\t%s%s\n",
> +			   reg_prefix, reg);
>  		  break;
>  		}
>  	      fprintf (file,
>  		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
> -	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
> -	      fprintf (file, "\taddq\t%%r11, %%r10\n");
> +	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
> +		       reg_prefix, reg);
> +	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> +		       reg_prefix, reg);
>  	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
> -	      fprintf (file, "\taddq\t%%r11, %%r10\n");
> -	      fprintf (file, "\tcall\t*%%r10\n");
> +	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> +		       reg_prefix, reg);
> +	      fprintf (file, "\tcall\t*%%%s%s\n",
> +		       reg_prefix, reg);
>  #else
>  	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 35ce8b00d36..7967bc5196c 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2847,6 +2847,11 @@ struct GTY(()) machine_function {
>    /* True if red zone is used.  */
>    BOOL_BITFIELD red_zone_used : 1;
>  
> +  /* Bit mask for integer registers used for parameter passing.
> +     The lower 8 bits are for legacy registers and the upper
> +     8 bits are for r8-r15.  */
> +  unsigned int int_parameter_registers : 16;
> +
>    /* The largest alignment, in bytes, of stack slot actually used.  */
>    unsigned int max_used_stack_alignment;
>  
> diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> new file mode 100644
> index 00000000000..c32445e0fc4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do run { target { lp64 && fpic } } } */
> +/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
> +
> +#include <stdarg.h>
> +
> +__attribute__((noipa,noclone,noinline))
> +void
> +bar (int a1, int a2, int a3, int a4, int a5, int a6,
> +     char *x, char *y, int *z)
> +{
> +  if (a1 != 1)
> +    __builtin_abort ();
> +  if (a2 != 2)
> +    __builtin_abort ();
> +  if (a3 != 3)
> +    __builtin_abort ();
> +  if (a4 != 4)
> +    __builtin_abort ();
> +  if (a5 != 5)
> +    __builtin_abort ();
> +  if (a6 != 6)
> +    __builtin_abort ();
> +  x[0] = 42;
> +  y[0] = 42;
> +  if (z[0] != 16)
> +    __builtin_abort ();
> +}
> +
> +__attribute__((noipa,noclone,noinline))
> +void 
> +foo (int c, int d, int e, int f, int g, int h, int z, ...)
> +{
> +  typedef char B[32];
> +  B b __attribute__((aligned (32)));
> +  va_list ap;
> +  va_start (ap, z);
> +  int x = va_arg (ap, int);
> +  if (x != 38)
> +    __builtin_abort ();
> +  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
> +  va_end (ap);
> +}
> +
> +int
> +main ()
> +{
> +  foo (1, 2, 3, 4, 5, 6, 16, 38);
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr113689-2.c b/gcc/testsuite/gcc.target/i386/pr113689-2.c
> new file mode 100644
> index 00000000000..fec5b171a39
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr113689-2.c
> @@ -0,0 +1,41 @@
> +/* { dg-do run { target { lp64 && fpic } } } */
> +/* { dg-options "-O2 -fpic -fprofile -mcmodel=large" } */
> +
> +__attribute__((noipa,noclone,noinline))
> +void
> +bar (int a1, int a2, int a3, int a4, int a5, int a6,
> +     char *x, char *y, int *z)
> +{
> +  if (a1 != 1)
> +    __builtin_abort ();
> +  if (a2 != 2)
> +    __builtin_abort ();
> +  if (a3 != 3)
> +    __builtin_abort ();
> +  if (a4 != 4)
> +    __builtin_abort ();
> +  if (a5 != 5)
> +    __builtin_abort ();
> +  if (a6 != 6)
> +    __builtin_abort ();
> +  x[0] = 42;
> +  y[0] = 42;
> +  if (z[0] != 16)
> +    __builtin_abort ();
> +}
> +
> +__attribute__((noipa,noclone,noinline))
> +void 
> +foo (int c, int d, int e, int f, int g, int h, int z)
> +{
> +  typedef char B[32];
> +  B b __attribute__((aligned (32)));
> +  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
> +}
> +
> +int
> +main ()
> +{
> +  foo (1, 2, 3, 4, 5, 6, 16);
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr113689-3.c b/gcc/testsuite/gcc.target/i386/pr113689-3.c
> new file mode 100644
> index 00000000000..10099fc8d96
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr113689-3.c
> @@ -0,0 +1,24 @@
> +/* { dg-do run { target { lp64 && avx_runtime } } } */

I think this test isn't needed.
If you really want it, it should have bitint effective
target and guard with __BITINT_MAXWIDTH__ >= 511 to be
consistent with other bitint tests.

> +/* { dg-options "-O2 -fprofile -mcmodel=large -mavx" } */
> +
> +__attribute__((noipa,noclone,noinline))
> +_BitInt(511)
> +foo(_BitInt(7) a, _BitInt(511) b)
> +{
> +  ({
> +    volatile _BitInt(511) d = (-b / a);
> +    if (d != -1)
> +      __builtin_abort();
> +    d;
> +  });
> +  return b;
> +}
> +
> +int
> +main(void)
> +{
> +  _BitInt(511) x = foo(5, 5);
> +  if (x != 5)
> +    __builtin_abort();
> +  return 0;
> +}
> -- 
> 2.43.0

	Jakub
H.J. Lu Feb. 2, 2024, 3:44 p.m. UTC | #2
On Fri, Feb 2, 2024 at 4:22 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Thu, Feb 01, 2024 at 03:02:47PM -0800, H.J. Lu wrote:
> > @@ -2763,6 +2789,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> >        {
> >        case X86_64_INTEGER_CLASS:
> >        case X86_64_INTEGERSI_CLASS:
> > +     if (!in_return)
> > +       set_int_parameter_registers_bit (intreg[0]);
> >       return gen_rtx_REG (mode, intreg[0]);
> >        case X86_64_SSE_CLASS:
> >        case X86_64_SSEHF_CLASS:
> > @@ -2821,6 +2849,11 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> >        if (mode == BLKmode)
> >       {
> >         /* Use TImode for BLKmode values in 2 integer registers.  */
> > +       if (!in_return)
> > +         {
> > +           set_int_parameter_registers_bit (intreg[0]);
> > +           set_int_parameter_registers_bit (intreg[1]);
> > +         }
> >         exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
> >                                     gen_rtx_REG (TImode, intreg[0]),
> >                                     GEN_INT (0));
>
> Isn't the above (computed from just whether a function has such an argument)
> already available from cum->nregs or similar plus the sequence of argument
> registers?  Or df which certainly also needs to know what registers contain
> arguments?

Fixed in v4 to use df_get_live_out instead.

> Though, the above means a function argument register which isn't used in a
> function will be impossible to use for mcount.
> We certainly can use it (although var-tracking will not know it got
> clobbered).
> So, wouldn't it be better to ask at the start of prologue generation df what
> registers are live at the start of the function (i.e. at the point of the
> NOTE_INSN_PROLOG_END because rest of prologue is emitted before that) and
> remember a suitable register for the profiling there?
> > @@ -22749,6 +22789,38 @@ current_fentry_section (const char **name)
> >    return true;
> >  }
> >
> > +/* Return an caller-saved register, which isn't used for parameter
> > +   passing, at entry for profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  /* Use %r10 if it isn't used by DRAP.  */
> > +  bool r10_ok = !crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG;
> > +  if (r10_ok)
> > +    return R10_REG;
> > +
> > +  int i;
> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +    if (GENERAL_REGNO_P (i)
> > +     && (r10_ok || i != R10_REG)
>
> r10_ok is false here, so just
>   if (flag_fentry != 0 || !crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)
>     return R10_REG;
> at the start and don't declare r10_ok.
>
> > +#ifdef NO_PROFILE_COUNTERS
> > +     && (r11_ok || i != R11_REG)
> > +#else
> > +     && i != R11_REG
> > +#endif
> > +     && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > +     && !fixed_regs[i]
> > +     && call_used_regs[i]
> > +     && !test_int_parameter_registers_bit (i))
> > +      return i;
> > +
> > +  sorry ("no register available for profiling %<-mcmodel=large%s%>",
> > +      ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> >     for profiling a function entry.  */
> >  void
> > @@ -22783,42 +22855,60 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
> >       fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
> >  #endif
> >
> > +      int scratch;
> > +      const char *reg_prefix;
> > +      const char *reg;
> > +
> >        if (!TARGET_PECOFF)
> >       {
> >         switch (ix86_cmodel)
> >           {
> >           case CM_LARGE:
> > -           /* NB: R10 is caller-saved.  Although it can be used as a
> > -              static chain register, it is preserved when calling
> > -              mcount for nested functions.  */
> > +           scratch = x86_64_select_profile_regnum (true);
> > +           reg = hi_reg_name[scratch];
> > +           reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
> >             if (ASSEMBLER_DIALECT == ASM_INTEL)
> > -             fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
> > -                            "\tcall\tr10\n", mcount_name);
> > +             fprintf (file,
> > +                      "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
> > +                      "\tcall\t%s%s\n",
> > +                      reg_prefix, reg, mcount_name, reg_prefix, reg);
> >             else
> > -             fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
> > -                      mcount_name);
> > +             fprintf (file,
> > +                      "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
> > +                      mcount_name, reg_prefix, reg, reg_prefix, reg);
> >             break;
> >           case CM_LARGE_PIC:
> >  #ifdef NO_PROFILE_COUNTERS
> > +           scratch = x86_64_select_profile_regnum (false);
> > +           reg = hi_reg_name[scratch];
> > +           reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
> >             if (ASSEMBLER_DIALECT == ASM_INTEL)
> >               {
> >                 fprintf (file, "1:movabs\tr11, "
> >                                "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
> > -               fprintf (file, "\tlea\tr10, 1b[rip]\n");
> > -               fprintf (file, "\tadd\tr10, r11\n");
> > +               fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
> > +                        reg_prefix, reg);
> > +               fprintf (file, "\tadd\t%s%s, r11\n",
> > +                        reg_prefix, reg);
> >                 fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
> >                          mcount_name);
> > -               fprintf (file, "\tadd\tr10, r11\n");
> > -               fprintf (file, "\tcall\tr10\n");
> > +               fprintf (file, "\tadd\t%s%s, r11\n",
> > +                        reg_prefix, reg);
> > +               fprintf (file, "\tcall\t%s%s\n",
> > +                        reg_prefix, reg);
> >                 break;
> >               }
> >             fprintf (file,
> >                      "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
> > -           fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
> > -           fprintf (file, "\taddq\t%%r11, %%r10\n");
> > +           fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
> > +                    reg_prefix, reg);
> > +           fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> > +                    reg_prefix, reg);
> >             fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
> > -           fprintf (file, "\taddq\t%%r11, %%r10\n");
> > -           fprintf (file, "\tcall\t*%%r10\n");
> > +           fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> > +                    reg_prefix, reg);
> > +           fprintf (file, "\tcall\t*%%%s%s\n",
> > +                    reg_prefix, reg);
> >  #else
> >             sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
> >  #endif
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 35ce8b00d36..7967bc5196c 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -2847,6 +2847,11 @@ struct GTY(()) machine_function {
> >    /* True if red zone is used.  */
> >    BOOL_BITFIELD red_zone_used : 1;
> >
> > +  /* Bit mask for integer registers used for parameter passing.
> > +     The lower 8 bits are for legacy registers and the upper
> > +     8 bits are for r8-r15.  */
> > +  unsigned int int_parameter_registers : 16;
> > +
> >    /* The largest alignment, in bytes, of stack slot actually used.  */
> >    unsigned int max_used_stack_alignment;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> > new file mode 100644
> > index 00000000000..c32445e0fc4
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> > @@ -0,0 +1,49 @@
> > +/* { dg-do run { target { lp64 && fpic } } } */
> > +/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
> > +
> > +#include <stdarg.h>
> > +
> > +__attribute__((noipa,noclone,noinline))
> > +void
> > +bar (int a1, int a2, int a3, int a4, int a5, int a6,
> > +     char *x, char *y, int *z)
> > +{
> > +  if (a1 != 1)
> > +    __builtin_abort ();
> > +  if (a2 != 2)
> > +    __builtin_abort ();
> > +  if (a3 != 3)
> > +    __builtin_abort ();
> > +  if (a4 != 4)
> > +    __builtin_abort ();
> > +  if (a5 != 5)
> > +    __builtin_abort ();
> > +  if (a6 != 6)
> > +    __builtin_abort ();
> > +  x[0] = 42;
> > +  y[0] = 42;
> > +  if (z[0] != 16)
> > +    __builtin_abort ();
> > +}
> > +
> > +__attribute__((noipa,noclone,noinline))
> > +void
> > +foo (int c, int d, int e, int f, int g, int h, int z, ...)
> > +{
> > +  typedef char B[32];
> > +  B b __attribute__((aligned (32)));
> > +  va_list ap;
> > +  va_start (ap, z);
> > +  int x = va_arg (ap, int);
> > +  if (x != 38)
> > +    __builtin_abort ();
> > +  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
> > +  va_end (ap);
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  foo (1, 2, 3, 4, 5, 6, 16, 38);
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr113689-2.c b/gcc/testsuite/gcc.target/i386/pr113689-2.c
> > new file mode 100644
> > index 00000000000..fec5b171a39
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr113689-2.c
> > @@ -0,0 +1,41 @@
> > +/* { dg-do run { target { lp64 && fpic } } } */
> > +/* { dg-options "-O2 -fpic -fprofile -mcmodel=large" } */
> > +
> > +__attribute__((noipa,noclone,noinline))
> > +void
> > +bar (int a1, int a2, int a3, int a4, int a5, int a6,
> > +     char *x, char *y, int *z)
> > +{
> > +  if (a1 != 1)
> > +    __builtin_abort ();
> > +  if (a2 != 2)
> > +    __builtin_abort ();
> > +  if (a3 != 3)
> > +    __builtin_abort ();
> > +  if (a4 != 4)
> > +    __builtin_abort ();
> > +  if (a5 != 5)
> > +    __builtin_abort ();
> > +  if (a6 != 6)
> > +    __builtin_abort ();
> > +  x[0] = 42;
> > +  y[0] = 42;
> > +  if (z[0] != 16)
> > +    __builtin_abort ();
> > +}
> > +
> > +__attribute__((noipa,noclone,noinline))
> > +void
> > +foo (int c, int d, int e, int f, int g, int h, int z)
> > +{
> > +  typedef char B[32];
> > +  B b __attribute__((aligned (32)));
> > +  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  foo (1, 2, 3, 4, 5, 6, 16);
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr113689-3.c b/gcc/testsuite/gcc.target/i386/pr113689-3.c
> > new file mode 100644
> > index 00000000000..10099fc8d96
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr113689-3.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do run { target { lp64 && avx_runtime } } } */
>
> I think this test isn't needed.
> If you really want it, it should have bitint effective
> target and guard with __BITINT_MAXWIDTH__ >= 511 to be
> consistent with other bitint tests.

Removed in v4.

> > +/* { dg-options "-O2 -fprofile -mcmodel=large -mavx" } */
> > +
> > +__attribute__((noipa,noclone,noinline))
> > +_BitInt(511)
> > +foo(_BitInt(7) a, _BitInt(511) b)
> > +{
> > +  ({
> > +    volatile _BitInt(511) d = (-b / a);
> > +    if (d != -1)
> > +      __builtin_abort();
> > +    d;
> > +  });
> > +  return b;
> > +}
> > +
> > +int
> > +main(void)
> > +{
> > +  _BitInt(511) x = foo(5, 5);
> > +  if (x != 5)
> > +    __builtin_abort();
> > +  return 0;
> > +}
> > --
> > 2.43.0
>
>         Jakub
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..d0538f138e9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2628,6 +2628,32 @@  classify_argument (machine_mode mode, const_tree type,
   return n;
 }
 
+/* Set the integer register REGNO bit in int_parameter_registers.  */
+
+static void
+set_int_parameter_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+    cfun->machine->int_parameter_registers |= 1 << regno;
+  else
+    cfun->machine->int_parameter_registers
+      |= 1 << (regno - FIRST_REX_INT_REG + 8);
+}
+
+/* Return true if the integer register REGNO bit in
+   int_parameter_registers is set.  */
+
+static bool
+test_int_parameter_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+    return (cfun->machine->int_parameter_registers
+	    & (1 << regno)) != 0;
+  else
+    return (cfun->machine->int_parameter_registers
+	    & (1 << (regno - FIRST_REX_INT_REG + 8))) != 0;
+}
+
 /* Examine the argument and return set number of register required in each
    class.  Return true iff parameter should be passed in memory.  */
 
@@ -2763,6 +2789,8 @@  construct_container (machine_mode mode, machine_mode orig_mode,
       {
       case X86_64_INTEGER_CLASS:
       case X86_64_INTEGERSI_CLASS:
+	if (!in_return)
+	  set_int_parameter_registers_bit (intreg[0]);
 	return gen_rtx_REG (mode, intreg[0]);
       case X86_64_SSE_CLASS:
       case X86_64_SSEHF_CLASS:
@@ -2821,6 +2849,11 @@  construct_container (machine_mode mode, machine_mode orig_mode,
       if (mode == BLKmode)
 	{
 	  /* Use TImode for BLKmode values in 2 integer registers.  */
+	  if (!in_return)
+	    {
+	      set_int_parameter_registers_bit (intreg[0]);
+	      set_int_parameter_registers_bit (intreg[1]);
+	    }
 	  exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
 				      gen_rtx_REG (TImode, intreg[0]),
 				      GEN_INT (0));
@@ -2829,7 +2862,11 @@  construct_container (machine_mode mode, machine_mode orig_mode,
 	  return ret;
 	}
       else
-	return gen_rtx_REG (mode, intreg[0]);
+	{
+	  if (!in_return)
+	    set_int_parameter_registers_bit (intreg[0]);
+	  return gen_rtx_REG (mode, intreg[0]);
+	}
     }
 
   /* Otherwise figure out the entries of the PARALLEL.  */
@@ -2860,6 +2897,8 @@  construct_container (machine_mode mode, machine_mode orig_mode,
 	      = gen_rtx_EXPR_LIST (VOIDmode,
 				   gen_rtx_REG (tmpmode, *intreg),
 				   GEN_INT (i*8));
+	    if (!in_return)
+	      set_int_parameter_registers_bit (*intreg);
 	    intreg++;
 	    break;
 	  case X86_64_SSEHF_CLASS:
@@ -3241,6 +3280,7 @@  pass_in_reg:
 	      if (regno == AX_REG)
 		regno = CX_REG;
 	    }
+	  set_int_parameter_registers_bit (regno);
 	  return gen_rtx_REG (mode, regno);
 	}
       break;
@@ -22749,6 +22789,38 @@  current_fentry_section (const char **name)
   return true;
 }
 
+/* Return an caller-saved register, which isn't used for parameter
+   passing, at entry for profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  /* Use %r10 if it isn't used by DRAP.  */
+  bool r10_ok = !crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG;
+  if (r10_ok)
+    return R10_REG;
+
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (GENERAL_REGNO_P (i)
+	&& (r10_ok || i != R10_REG)
+#ifdef NO_PROFILE_COUNTERS
+	&& (r11_ok || i != R11_REG)
+#else
+	&& i != R11_REG
+#endif
+	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
+	&& !fixed_regs[i]
+	&& call_used_regs[i]
+	&& !test_int_parameter_registers_bit (i))
+      return i;
+
+  sorry ("no register available for profiling %<-mcmodel=large%s%>",
+	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.  */
 void
@@ -22783,42 +22855,60 @@  x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+      int scratch;
+      const char *reg_prefix;
+      const char *reg;
+
       if (!TARGET_PECOFF)
 	{
 	  switch (ix86_cmodel)
 	    {
 	    case CM_LARGE:
-	      /* NB: R10 is caller-saved.  Although it can be used as a
-		 static chain register, it is preserved when calling
-		 mcount for nested functions.  */
+	      scratch = x86_64_select_profile_regnum (true);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
-		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-			       "\tcall\tr10\n", mcount_name);
+		fprintf (file,
+			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
+			 "\tcall\t%s%s\n",
+			 reg_prefix, reg, mcount_name, reg_prefix, reg);
 	      else
-		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
-			 mcount_name);
+		fprintf (file,
+			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
+			 mcount_name, reg_prefix, reg, reg_prefix, reg);
 	      break;
 	    case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
+	      scratch = x86_64_select_profile_regnum (false);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
 		{
 		  fprintf (file, "1:movabs\tr11, "
 				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
-		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
-		  fprintf (file, "\tadd\tr10, r11\n");
+		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
 		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
 			   mcount_name);
-		  fprintf (file, "\tadd\tr10, r11\n");
-		  fprintf (file, "\tcall\tr10\n");
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tcall\t%s%s\n",
+			   reg_prefix, reg);
 		  break;
 		}
 	      fprintf (file,
 		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
-	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
+	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
 	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
-	      fprintf (file, "\tcall\t*%%r10\n");
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\tcall\t*%%%s%s\n",
+		       reg_prefix, reg);
 #else
 	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 35ce8b00d36..7967bc5196c 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2847,6 +2847,11 @@  struct GTY(()) machine_function {
   /* True if red zone is used.  */
   BOOL_BITFIELD red_zone_used : 1;
 
+  /* Bit mask for integer registers used for parameter passing.
+     The lower 8 bits are for legacy registers and the upper
+     8 bits are for r8-r15.  */
+  unsigned int int_parameter_registers : 16;
+
   /* The largest alignment, in bytes, of stack slot actually used.  */
   unsigned int max_used_stack_alignment;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
new file mode 100644
index 00000000000..c32445e0fc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
@@ -0,0 +1,49 @@ 
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
+
+#include <stdarg.h>
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, int a3, int a4, int a5, int a6,
+     char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  if (a3 != 3)
+    __builtin_abort ();
+  if (a4 != 4)
+    __builtin_abort ();
+  if (a5 != 5)
+    __builtin_abort ();
+  if (a6 != 6)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z, ...)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  va_list ap;
+  va_start (ap, z);
+  int x = va_arg (ap, int);
+  if (x != 38)
+    __builtin_abort ();
+  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
+  va_end (ap);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16, 38);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-2.c b/gcc/testsuite/gcc.target/i386/pr113689-2.c
new file mode 100644
index 00000000000..fec5b171a39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-2.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fpic -fprofile -mcmodel=large" } */
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, int a3, int a4, int a5, int a6,
+     char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  if (a3 != 3)
+    __builtin_abort ();
+  if (a4 != 4)
+    __builtin_abort ();
+  if (a5 != 5)
+    __builtin_abort ();
+  if (a6 != 6)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-3.c b/gcc/testsuite/gcc.target/i386/pr113689-3.c
new file mode 100644
index 00000000000..10099fc8d96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-3.c
@@ -0,0 +1,24 @@ 
+/* { dg-do run { target { lp64 && avx_runtime } } } */
+/* { dg-options "-O2 -fprofile -mcmodel=large -mavx" } */
+
+__attribute__((noipa,noclone,noinline))
+_BitInt(511)
+foo(_BitInt(7) a, _BitInt(511) b)
+{
+  ({
+    volatile _BitInt(511) d = (-b / a);
+    if (d != -1)
+      __builtin_abort();
+    d;
+  });
+  return b;
+}
+
+int
+main(void)
+{
+  _BitInt(511) x = foo(5, 5);
+  if (x != 5)
+    __builtin_abort();
+  return 0;
+}