diff mbox series

x86-64: Find a scratch register for large model profiling

Message ID 20240201181530.99684-1-hjl.tools@gmail.com
State New
Headers show
Series x86-64: Find a scratch register for large model profiling | expand

Commit Message

H.J. Lu Feb. 1, 2024, 6:15 p.m. UTC
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
a scratch register for large model profiling and sorry if we can't find
one.

gcc/

	PR target/113689
	* config/i386/i386.cc (x86_64_select_profile_regnum): New.
	(x86_function_profiler): Call x86_64_select_profile_regnum to
	get a scratch register for large model profiling.

gcc/testsuite/

	PR target/113689
	* gcc.target/i386/pr113689-1.c: New file.
	* gcc.target/i386/pr113689-2.c: Likewise.
	* gcc.target/i386/pr113689-3.c: Likewise.
	* gcc.target/i386/pr98482-1.c: Updated.
	* gcc.target/i386/pr98482-2.c: Likewise.
---
 gcc/config/i386/i386.cc                    | 73 +++++++++++++++++-----
 gcc/testsuite/gcc.target/i386/pr113689-1.c | 41 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr113689-2.c | 32 ++++++++++
 gcc/testsuite/gcc.target/i386/pr113689-3.c | 24 +++++++
 gcc/testsuite/gcc.target/i386/pr98482-1.c  |  4 +-
 gcc/testsuite/gcc.target/i386/pr98482-2.c  |  2 +-
 6 files changed, 158 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

Comments

Jakub Jelinek Feb. 1, 2024, 6:31 p.m. UTC | #1
On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
>    return true;
>  }
>  
> +/* Return an unused caller-saved register at entry for profile.  */
> +
> +static int
> +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> +{
> +  int i;

Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
behavior unless emitting profiler after prologue)?

> +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> +    if (GENERAL_REGNO_P (i)
> +#ifdef NO_PROFILE_COUNTERS
> +	&& (r11_ok || i != R11_REG)
> +#else
> +	&& i != R11_REG
> +#endif
> +	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> +	&& !fixed_regs[i]
> +	&& call_used_regs[i]
> +	&& !df_regs_ever_live_p (i))

Also, isn't this too restrictive?
I mean, all we care about is whether there is some register
which is not live across the NOTE_INSN_PROLOG_END note, no?
I.e. doesn't contain any of function's argument that are used later,
and isn't set in the prologue to be used later.  E.g. call used
register which is just saved to stack in the prologue might be just fine.

> +      return i;
> +
> +  sorry ("No register available for profiling %<-mcmodel=large%s%>",

Diagnostics shouldn't start with capital letter.

> +	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> +
> +  return INVALID_REGNUM;
> +}
> +
>  /* Output assembler code to FILE to increment profiler label # LABELNO
>     for profiling a function entry.  */
>  void

	Jakub
H.J. Lu Feb. 1, 2024, 11:02 p.m. UTC | #2
On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
> >    return true;
> >  }
> >
> > +/* Return an unused caller-saved register at entry for profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  int i;
>
> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
> behavior unless emitting profiler after prologue)?

Fixed in v2.

> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +    if (GENERAL_REGNO_P (i)
> > +#ifdef NO_PROFILE_COUNTERS
> > +     && (r11_ok || i != R11_REG)
> > +#else
> > +     && i != R11_REG
> > +#endif
> > +     && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > +     && !fixed_regs[i]
> > +     && call_used_regs[i]
> > +     && !df_regs_ever_live_p (i))
>
> Also, isn't this too restrictive?
> I mean, all we care about is whether there is some register
> which is not live across the NOTE_INSN_PROLOG_END note, no?
> I.e. doesn't contain any of function's argument that are used later,
> and isn't set in the prologue to be used later.  E.g. call used
> register which is just saved to stack in the prologue might be just fine.

Fixed in v2.

> > +      return i;
> > +
> > +  sorry ("No register available for profiling %<-mcmodel=large%s%>",
>
> Diagnostics shouldn't start with capital letter.
>
> > +      ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> >     for profiling a function entry.  */
> >  void
>
>         Jakub
>
Bernhard Reutner-Fischer Feb. 2, 2024, 12:07 p.m. UTC | #3
On 2 February 2024 00:02:54 CET, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek <jakub@redhat.com> wrote:
>>
>> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
>> > --- a/gcc/config/i386/i386.cc
>> > +++ b/gcc/config/i386/i386.cc
>> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
>> >    return true;
>> >  }
>> >
>> > +/* Return an unused caller-saved register at entry for profile.  */
>> > +
>> > +static int
>> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
>> > +{
>> > +  int i;
>>
>> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
>> behavior unless emitting profiler after prologue)?
>
>Fixed in v2.

Nit: r10_ok is now superfluous, but lets wait for Jakub.
thanks
H.J. Lu Feb. 2, 2024, 12:22 p.m. UTC | #4
On Fri, Feb 2, 2024 at 4:07 AM <rep.dot.nop@gmail.com> wrote:
>
> On 2 February 2024 00:02:54 CET, "H.J. Lu" <hjl.tools@gmail.com> wrote:
> >On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek <jakub@redhat.com> wrote:
> >>
> >> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> >> > --- a/gcc/config/i386/i386.cc
> >> > +++ b/gcc/config/i386/i386.cc
> >> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
> >> >    return true;
> >> >  }
> >> >
> >> > +/* Return an unused caller-saved register at entry for profile.  */
> >> > +
> >> > +static int
> >> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> >> > +{
> >> > +  int i;
> >>
> >> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
> >> behavior unless emitting profiler after prologue)?
> >
> >Fixed in v2.
>
> Nit: r10_ok is now superfluous, but lets wait for Jakub.
> thanks

Fixed in v3.

Thanks.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..04f88a7162b 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22749,6 +22749,31 @@  current_fentry_section (const char **name)
   return true;
 }
 
+/* Return an unused caller-saved register at entry for profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (GENERAL_REGNO_P (i)
+#ifdef NO_PROFILE_COUNTERS
+	&& (r11_ok || i != R11_REG)
+#else
+	&& i != R11_REG
+#endif
+	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
+	&& !fixed_regs[i]
+	&& call_used_regs[i]
+	&& !df_regs_ever_live_p (i))
+      return i;
+
+  sorry ("No register available for profiling %<-mcmodel=large%s%>",
+	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.  */
 void
@@ -22783,42 +22808,60 @@  x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+      int scratch;
+      const char *reg_prefix;
+      const char *reg;
+
       if (!TARGET_PECOFF)
 	{
 	  switch (ix86_cmodel)
 	    {
 	    case CM_LARGE:
-	      /* NB: R10 is caller-saved.  Although it can be used as a
-		 static chain register, it is preserved when calling
-		 mcount for nested functions.  */
+	      scratch = x86_64_select_profile_regnum (true);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
-		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-			       "\tcall\tr10\n", mcount_name);
+		fprintf (file,
+			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
+			 "\tcall\t%s%s\n",
+			 reg_prefix, reg, mcount_name, reg_prefix, reg);
 	      else
-		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
-			 mcount_name);
+		fprintf (file,
+			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
+			 mcount_name, reg_prefix, reg, reg_prefix, reg);
 	      break;
 	    case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
+	      scratch = x86_64_select_profile_regnum (false);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
 		{
 		  fprintf (file, "1:movabs\tr11, "
 				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
-		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
-		  fprintf (file, "\tadd\tr10, r11\n");
+		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
 		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
 			   mcount_name);
-		  fprintf (file, "\tadd\tr10, r11\n");
-		  fprintf (file, "\tcall\tr10\n");
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tcall\t%s%s\n",
+			   reg_prefix, reg);
 		  break;
 		}
 	      fprintf (file,
 		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
-	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
+	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
 	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
-	      fprintf (file, "\tcall\t*%%r10\n");
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\tcall\t*%%%s%s\n",
+		       reg_prefix, reg);
 #else
 	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
new file mode 100644
index 00000000000..97c40d051db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, int a3, int a4, int a5, int a6,
+     char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  if (a3 != 3)
+    __builtin_abort ();
+  if (a4 != 4)
+    __builtin_abort ();
+  if (a5 != 5)
+    __builtin_abort ();
+  if (a6 != 6)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-2.c b/gcc/testsuite/gcc.target/i386/pr113689-2.c
new file mode 100644
index 00000000000..b0663b9bfe6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-2.c
@@ -0,0 +1,32 @@ 
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fpic -fprofile -mcmodel=large" } */
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  bar (c, d, &b[0], __builtin_alloca (z), &z);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-3.c b/gcc/testsuite/gcc.target/i386/pr113689-3.c
new file mode 100644
index 00000000000..10099fc8d96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-3.c
@@ -0,0 +1,24 @@ 
+/* { dg-do run { target { lp64 && avx_runtime } } } */
+/* { dg-options "-O2 -fprofile -mcmodel=large -mavx" } */
+
+__attribute__((noipa,noclone,noinline))
+_BitInt(511)
+foo(_BitInt(7) a, _BitInt(511) b)
+{
+  ({
+    volatile _BitInt(511) d = (-b / a);
+    if (d != -1)
+      __builtin_abort();
+    d;
+  });
+  return b;
+}
+
+int
+main(void)
+{
+  _BitInt(511) x = foo(5, 5);
+  if (x != 5)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr98482-1.c b/gcc/testsuite/gcc.target/i386/pr98482-1.c
index b1d9619275e..ccf068eef5b 100644
--- a/gcc/testsuite/gcc.target/i386/pr98482-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr98482-1.c
@@ -1,8 +1,8 @@ 
 /* { dg-do compile { target { *-*-linux* && lp64 } } } */
 /* { dg-require-effective-target mfentry } */
 /* { dg-options "-fprofile -mfentry -O2 -mcmodel=large" } */
-/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__, %r10\n\tcall\t\\*%r10" { target nonpic } } } */
-/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %r10\n\tcall\t\\*%r10" { target { ! nonpic } } } } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__, %rax\n\tcall\t\\*%rax" { target nonpic } } } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %rdx\n\tcall\t\\*%rax" { target { ! nonpic } } } } */
 
 void
 func (void)
diff --git a/gcc/testsuite/gcc.target/i386/pr98482-2.c b/gcc/testsuite/gcc.target/i386/pr98482-2.c
index 03c62a4b67b..7220445ce7f 100644
--- a/gcc/testsuite/gcc.target/i386/pr98482-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr98482-2.c
@@ -2,7 +2,7 @@ 
 /* { dg-require-effective-target mfentry } */
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-fpic -fprofile -mfentry -O2 -mcmodel=large" } */
-/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %r10\n\tcall\t\\*%r10" } } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %rdx\n\tcall\t\\*%rdx" } } */
 
 void
 func (void)