diff mbox series

aarch64: Avoid duplicating bti j insns for jump tables [PR99988]

Message ID 20210413132945.rhhjcj6fcaeqxe5g@arm.com
State New
Headers show
Series aarch64: Avoid duplicating bti j insns for jump tables [PR99988] | expand

Commit Message

Alex Coplan April 13, 2021, 1:29 p.m. UTC
Hi all,

This patch fixes PR99988 which shows us generating large (> 250)
sequences of back-to-back bti j instructions.

The fix is simply to avoid inserting bti j instructions at the target of
a jump table if we've already inserted one for a given label.

Testing:
 * Bootstrapped and regtested on aarch64-linux-gnu (with and without
 -mbranch-protection=standard), no regressions.

Presumably this is stage 1 material since the bug isn't a regression? If
so, OK for GCC 12 stage 1?

Thanks,
Alex

gcc/ChangeLog:

	PR target/99988
	* config/aarch64/aarch64-bti-insert.c (aarch64_bti_j_insn_p): New.
	(rest_of_insert_bti): Avoid inserting duplicate bti j insns for
	jump table targets.

gcc/testsuite/ChangeLog:

	PR target/99988
	* gcc.target/aarch64/pr99988.c: New test.

Comments

Richard Sandiford April 15, 2021, 5:45 p.m. UTC | #1
Looks good in general, but like you say, it's GCC 12 material.

Alex Coplan <alex.coplan@arm.com> writes:
> diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c
> index 936649769c7..943fa3c1097 100644
> --- a/gcc/config/aarch64/aarch64-bti-insert.c
> +++ b/gcc/config/aarch64/aarch64-bti-insert.c
> @@ -120,6 +120,13 @@ aarch64_pac_insn_p (rtx x)
>    return false;
>  }
>  
> +static bool
> +aarch64_bti_j_insn_p (rtx_insn *insn)
> +{
> +  rtx pat = PATTERN (insn);
> +  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
> +}
> +

Nit, but even a simple function like this should have a comment. :-)

>  /* Insert the BTI instruction.  */
>  /* This is implemented as a late RTL pass that runs before branch
>     shortening and does the following.  */
> @@ -165,6 +172,9 @@ rest_of_insert_bti (void)
>  		  for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
>  		    {
>  		      label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0));
> +		      if (aarch64_bti_j_insn_p (next_nonnote_insn (label)))
> +			continue;
> +

This should be next_nonnote_nondebug_insn (quite the mouthful),
otherwise debug instructions could affect the choice.

The thing returned by next_nonnote_nondebug_insn isn't in general
guaranteed to be an insn (unlike next_real_nondebug_insn).  It might
also be null in very odd cases.  I think we should therefore check
for null and INSN_P before checking PATTERN.

Thanks,
Richard

>  		      bti_insn = gen_bti_j ();
>  		      emit_insn_after (bti_insn, label);
>  		    }
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c
> new file mode 100644
> index 00000000000..2d87f41a717
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbranch-protection=standard" } */
> +/* { dg-final { scan-assembler-times {bti j} 13 } } */
> +int a;
> +int c();
> +int d();
> +int e();
> +int f();
> +int g();
> +void h() {
> +  switch (a) {
> +  case 0:
> +  case 56:
> +  case 57:
> +    break;
> +  case 58:
> +  case 59:
> +  case 61:
> +  case 62:
> +    c();
> +  case 64:
> +  case 63:
> +    d();
> +  case 66:
> +  case 65:
> +    d();
> +  case 68:
> +  case 67:
> +    d();
> +  case 69:
> +  case 70:
> +    d();
> +  case 71:
> +  case 72:
> +  case 88:
> +  case 87:
> +    d();
> +  case 90:
> +  case 89:
> +    d();
> +  case 92:
> +  case 1:
> +    d();
> +  case 93:
> +  case 73:
> +  case 4:
> +    e();
> +  case 76:
> +  case 5:
> +    f();
> +  case 7:
> +  case 8:
> +  case 84:
> +  case 85:
> +    break;
> +  case 6:
> +  case 299:
> +  case 9:
> +  case 80:
> +  case 2:
> +  case 3:
> +    e();
> +  default:
> +    g();
> +  }
> +}
Alex Coplan April 21, 2021, 11:39 a.m. UTC | #2
Hi Richard,

On 15/04/2021 18:45, Richard Sandiford wrote:
> Looks good in general, but like you say, it's GCC 12 material.

Thanks for the review. The attached patch addresses these comments and
bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk?

Thanks,
Alex

> 
> Alex Coplan <alex.coplan@arm.com> writes:
> > diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c
> > index 936649769c7..943fa3c1097 100644
> > --- a/gcc/config/aarch64/aarch64-bti-insert.c
> > +++ b/gcc/config/aarch64/aarch64-bti-insert.c
> > @@ -120,6 +120,13 @@ aarch64_pac_insn_p (rtx x)
> >    return false;
> >  }
> >  
> > +static bool
> > +aarch64_bti_j_insn_p (rtx_insn *insn)
> > +{
> > +  rtx pat = PATTERN (insn);
> > +  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
> > +}
> > +
> 
> Nit, but even a simple function like this should have a comment. :-)
> 
> >  /* Insert the BTI instruction.  */
> >  /* This is implemented as a late RTL pass that runs before branch
> >     shortening and does the following.  */
> > @@ -165,6 +172,9 @@ rest_of_insert_bti (void)
> >  		  for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
> >  		    {
> >  		      label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0));
> > +		      if (aarch64_bti_j_insn_p (next_nonnote_insn (label)))
> > +			continue;
> > +
> 
> This should be next_nonnote_nondebug_insn (quite the mouthful),
> otherwise debug instructions could affect the choice.
> 
> The thing returned by next_nonnote_nondebug_insn isn't in general
> guaranteed to be an insn (unlike next_real_nondebug_insn).  It might
> also be null in very odd cases.  I think we should therefore check
> for null and INSN_P before checking PATTERN.
> 
> Thanks,
> Richard
> 
> >  		      bti_insn = gen_bti_j ();
> >  		      emit_insn_after (bti_insn, label);
> >  		    }
diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c
index 936649769c7..5d6bc169d6b 100644
--- a/gcc/config/aarch64/aarch64-bti-insert.c
+++ b/gcc/config/aarch64/aarch64-bti-insert.c
@@ -120,6 +120,17 @@ aarch64_pac_insn_p (rtx x)
   return false;
 }
 
+/* Check if INSN is a BTI J insn.  */
+static bool
+aarch64_bti_j_insn_p (rtx_insn *insn)
+{
+  if (!insn || !INSN_P (insn))
+    return false;
+
+  rtx pat = PATTERN (insn);
+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
+}
+
 /* Insert the BTI instruction.  */
 /* This is implemented as a late RTL pass that runs before branch
    shortening and does the following.  */
@@ -165,6 +176,10 @@ rest_of_insert_bti (void)
 		  for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
 		    {
 		      label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0));
+		      rtx_insn *next = next_nonnote_nondebug_insn (label);
+		      if (aarch64_bti_j_insn_p (next))
+			continue;
+
 		      bti_insn = gen_bti_j ();
 		      emit_insn_after (bti_insn, label);
 		    }
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c
new file mode 100644
index 00000000000..2d87f41a717
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=standard" } */
+/* { dg-final { scan-assembler-times {bti j} 13 } } */
+int a;
+int c();
+int d();
+int e();
+int f();
+int g();
+void h() {
+  switch (a) {
+  case 0:
+  case 56:
+  case 57:
+    break;
+  case 58:
+  case 59:
+  case 61:
+  case 62:
+    c();
+  case 64:
+  case 63:
+    d();
+  case 66:
+  case 65:
+    d();
+  case 68:
+  case 67:
+    d();
+  case 69:
+  case 70:
+    d();
+  case 71:
+  case 72:
+  case 88:
+  case 87:
+    d();
+  case 90:
+  case 89:
+    d();
+  case 92:
+  case 1:
+    d();
+  case 93:
+  case 73:
+  case 4:
+    e();
+  case 76:
+  case 5:
+    f();
+  case 7:
+  case 8:
+  case 84:
+  case 85:
+    break;
+  case 6:
+  case 299:
+  case 9:
+  case 80:
+  case 2:
+  case 3:
+    e();
+  default:
+    g();
+  }
+}
Richard Sandiford April 21, 2021, 12:05 p.m. UTC | #3
Alex Coplan <alex.coplan@arm.com> writes:
> Hi Richard,
>
> On 15/04/2021 18:45, Richard Sandiford wrote:
>> Looks good in general, but like you say, it's GCC 12 material.
>
> Thanks for the review. The attached patch addresses these comments and
> bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk?

OK, thanks.

Richard
Christophe Lyon April 22, 2021, 7:24 a.m. UTC | #4
On Wed, 21 Apr 2021 at 14:05, Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Alex Coplan <alex.coplan@arm.com> writes:
> > Hi Richard,
> >
> > On 15/04/2021 18:45, Richard Sandiford wrote:
> >> Looks good in general, but like you say, it's GCC 12 material.
> >
> > Thanks for the review. The attached patch addresses these comments and
> > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk?
>
> OK, thanks.
>

The new test fails with -mabi=ilp32:
sorry, unimplemented: return address signing is only supported for '-mabi=lp64'

Is that OK:
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c
b/gcc/testsuite/gcc.target/aarch64/pr99988.c
index 2d87f41..7cca496 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr99988.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target lp64 } } */
 /* { dg-options "-O2 -mbranch-protection=standard" } */
 /* { dg-final { scan-assembler-times {bti j} 13 } } */
 int a;

Thanks,

Christophe
Richard Sandiford April 22, 2021, 11:32 a.m. UTC | #5
Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> On Wed, 21 Apr 2021 at 14:05, Richard Sandiford via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> Alex Coplan <alex.coplan@arm.com> writes:
>> > Hi Richard,
>> >
>> > On 15/04/2021 18:45, Richard Sandiford wrote:
>> >> Looks good in general, but like you say, it's GCC 12 material.
>> >
>> > Thanks for the review. The attached patch addresses these comments and
>> > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk?
>>
>> OK, thanks.
>>
>
> The new test fails with -mabi=ilp32:
> sorry, unimplemented: return address signing is only supported for '-mabi=lp64'
>
> Is that OK:
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c
> b/gcc/testsuite/gcc.target/aarch64/pr99988.c
> index 2d87f41..7cca496 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr99988.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile } */
> +/* { dg-do compile { target lp64 } } */
>  /* { dg-options "-O2 -mbranch-protection=standard" } */
>  /* { dg-final { scan-assembler-times {bti j} 13 } } */
>  int a;

OK, thanks.

Richard
Alex Coplan April 30, 2021, 5:58 p.m. UTC | #6
Hi Richard,

On 21/04/2021 13:05, Richard Sandiford wrote:
> Alex Coplan <alex.coplan@arm.com> writes:
> > Hi Richard,
> >
> > On 15/04/2021 18:45, Richard Sandiford wrote:
> >> Looks good in general, but like you say, it's GCC 12 material.
> >
> > Thanks for the review. The attached patch addresses these comments and
> > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk?
> 
> OK, thanks.

The patch applies cleanly and bootstraps/regtests OK on the GCC 11
branch. OK to backport there and to 10 and 9 if the same is true for
those branches?

Thanks,
Alex
Richard Sandiford May 11, 2021, 10:28 a.m. UTC | #7
Alex Coplan <alex.coplan@arm.com> writes:
> Hi Richard,
>
> On 21/04/2021 13:05, Richard Sandiford wrote:
>> Alex Coplan <alex.coplan@arm.com> writes:
>> > Hi Richard,
>> >
>> > On 15/04/2021 18:45, Richard Sandiford wrote:
>> >> Looks good in general, but like you say, it's GCC 12 material.
>> >
>> > Thanks for the review. The attached patch addresses these comments and
>> > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk?
>> 
>> OK, thanks.
>
> The patch applies cleanly and bootstraps/regtests OK on the GCC 11
> branch. OK to backport there and to 10 and 9 if the same is true for
> those branches?

To summarise what we discussed off-list: this initially looked like it
was “just” a new DCE optimisation, which is why it seemed like GCC 12
material.  However, as Alex pointed out, the unpatched BTI support can
generate code whose size is quadratic in the number of switch cases,
which is a serious regression whichever way you cut it.

So this is OK for backports, and would have been OK during GCC 11
stage 4 too.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c
index 936649769c7..943fa3c1097 100644
--- a/gcc/config/aarch64/aarch64-bti-insert.c
+++ b/gcc/config/aarch64/aarch64-bti-insert.c
@@ -120,6 +120,13 @@  aarch64_pac_insn_p (rtx x)
   return false;
 }
 
+static bool
+aarch64_bti_j_insn_p (rtx_insn *insn)
+{
+  rtx pat = PATTERN (insn);
+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
+}
+
 /* Insert the BTI instruction.  */
 /* This is implemented as a late RTL pass that runs before branch
    shortening and does the following.  */
@@ -165,6 +172,9 @@  rest_of_insert_bti (void)
 		  for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
 		    {
 		      label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0));
+		      if (aarch64_bti_j_insn_p (next_nonnote_insn (label)))
+			continue;
+
 		      bti_insn = gen_bti_j ();
 		      emit_insn_after (bti_insn, label);
 		    }
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c
new file mode 100644
index 00000000000..2d87f41a717
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -0,0 +1,66 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=standard" } */
+/* { dg-final { scan-assembler-times {bti j} 13 } } */
+int a;
+int c();
+int d();
+int e();
+int f();
+int g();
+void h() {
+  switch (a) {
+  case 0:
+  case 56:
+  case 57:
+    break;
+  case 58:
+  case 59:
+  case 61:
+  case 62:
+    c();
+  case 64:
+  case 63:
+    d();
+  case 66:
+  case 65:
+    d();
+  case 68:
+  case 67:
+    d();
+  case 69:
+  case 70:
+    d();
+  case 71:
+  case 72:
+  case 88:
+  case 87:
+    d();
+  case 90:
+  case 89:
+    d();
+  case 92:
+  case 1:
+    d();
+  case 93:
+  case 73:
+  case 4:
+    e();
+  case 76:
+  case 5:
+    f();
+  case 7:
+  case 8:
+  case 84:
+  case 85:
+    break;
+  case 6:
+  case 299:
+  case 9:
+  case 80:
+  case 2:
+  case 3:
+    e();
+  default:
+    g();
+  }
+}