diff mbox

[PATCH/AARCH64] Disable load/store pair peephole for volatile mem

Message ID CA+=Sn1mQrb--ieVZAbaA0if+wSj5x+uCV5=xefbyV6CL5XUM_g@mail.gmail.com
State New
Headers show

Commit Message

Andrew Pinski Dec. 10, 2014, 2:18 a.m. UTC
Hi,
  As mentioned in
https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the
load/store pair peepholes currently accept volatile mem which can
cause wrong code as the architecture does not define which part of the
pair happens first.

This patch disables the peephole for volatile mem and adds two
testcases so that volatile loads are not converted into load pair (I
could add the same for store pair if needed).  In the second testcase,
only f3 does not get converted to load pair, even though the order of
the loads are different.

OK?  Bootstrapped and tested on aarch64-linux-gnu without any regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject
volatile mems.
(aarch64_operands_adjust_ok_for_ldpstp): Likewise.

testsuite/ChangeLog:
* gcc.target/aarch64/volatileloadpair-1.c: New testcase.
* gcc.target/aarch64/volatileloadpair-2.c: New testcase.

Comments

Bin.Cheng Dec. 10, 2014, 5:35 a.m. UTC | #1
On Wed, Dec 10, 2014 at 10:18 AM, Andrew Pinski <pinskia@gmail.com> wrote:
> Hi,
>   As mentioned in
> https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the
> load/store pair peepholes currently accept volatile mem which can
> cause wrong code as the architecture does not define which part of the
> pair happens first.
>
> This patch disables the peephole for volatile mem and adds two
> testcases so that volatile loads are not converted into load pair (I
> could add the same for store pair if needed).  In the second testcase,
> only f3 does not get converted to load pair, even though the order of
> the loads are different.
>
> OK?  Bootstrapped and tested on aarch64-linux-gnu without any regressions.
>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject
> volatile mems.
> (aarch64_operands_adjust_ok_for_ldpstp): Likewise.
>
> testsuite/ChangeLog:
> * gcc.target/aarch64/volatileloadpair-1.c: New testcase.
> * gcc.target/aarch64/volatileloadpair-2.c: New testcase.

> @@ -10702,6 +10706,11 @@ aarch64_operands_adjust_ok_for_ldpstp (r
>    if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
>     return false;
>
> +  /* The mems cannot be volatile.  */
> +  if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
> +      || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
> +    return false;
> +

I realized that "!MEM_P (mem_1)" can't be true here.  Now we are
fixing this, could you please remove the MEM_P check and put volatile
checks before "aarch64_mem_pair_operand"?  It's more expensive.

Thanks,
bin
Andrew Pinski Jan. 13, 2015, 4:28 a.m. UTC | #2
On Tue, Dec 9, 2014 at 6:18 PM, Andrew Pinski <pinskia@gmail.com> wrote:
> Hi,
>   As mentioned in
> https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the
> load/store pair peepholes currently accept volatile mem which can
> cause wrong code as the architecture does not define which part of the
> pair happens first.
>
> This patch disables the peephole for volatile mem and adds two
> testcases so that volatile loads are not converted into load pair (I
> could add the same for store pair if needed).  In the second testcase,
> only f3 does not get converted to load pair, even though the order of
> the loads are different.
>
> OK?  Bootstrapped and tested on aarch64-linux-gnu without any regressions.

Ping.


>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject
> volatile mems.
> (aarch64_operands_adjust_ok_for_ldpstp): Likewise.
>
> testsuite/ChangeLog:
> * gcc.target/aarch64/volatileloadpair-1.c: New testcase.
> * gcc.target/aarch64/volatileloadpair-2.c: New testcase.
Marcus Shawcroft Jan. 13, 2015, 12:27 p.m. UTC | #3
On 10 December 2014 at 02:18, Andrew Pinski <pinskia@gmail.com> wrote:
> Hi,
>   As mentioned in
> https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the
> load/store pair peepholes currently accept volatile mem which can
> cause wrong code as the architecture does not define which part of the
> pair happens first.
>
> This patch disables the peephole for volatile mem and adds two
> testcases so that volatile loads are not converted into load pair (I
> could add the same for store pair if needed).  In the second testcase,
> only f3 does not get converted to load pair, even though the order of
> the loads are different.
>
> OK?  Bootstrapped and tested on aarch64-linux-gnu without any regressions.
>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject
> volatile mems.
> (aarch64_operands_adjust_ok_for_ldpstp): Likewise.
>
> testsuite/ChangeLog:
> * gcc.target/aarch64/volatileloadpair-1.c: New testcase.
> * gcc.target/aarch64/volatileloadpair-2.c: New testcase.


OK.

Bin, Feel free to follow up with a patch to reorg the MEM_P

/Marcus
diff mbox

Patch

Index: testsuite/gcc.target/aarch64/volatileloadpair-2.c
===================================================================
--- testsuite/gcc.target/aarch64/volatileloadpair-2.c	(revision 0)
+++ testsuite/gcc.target/aarch64/volatileloadpair-2.c	(revision 0)
@@ -0,0 +1,49 @@ 
+/* { dg-do compile } */
+/* { dg-do options "-O2" } */
+/* volatile references should not produce load pair. */
+/* { dg-final { scan-assembler-not "ldp\t" } } */
+
+int f0(volatile int *a)
+{
+  int b = a[0];
+  int c = a[1];
+  int d = a[2];
+  int e = a[3];
+  return b + c + d + e;
+}
+
+int f1(volatile int *a)
+{
+  int b = a[1];
+  int c = a[0];
+  int d = a[2];
+  int e = a[3];
+  return b + c + d + e;
+}
+
+int f2(volatile int *a)
+{
+  int b = a[1];
+  int c = a[0];
+  int d = a[3];
+  int e = a[2];
+  return b + c + d + e;
+}
+
+int f3(volatile int *a)
+{
+  int b = a[1];
+  int c = a[3];
+  int d = a[0];
+  int e = a[2];
+  return b + c + d + e;
+}
+
+int f4(volatile int *a)
+{
+  int b = a[1];
+  int c = a[3];
+  int d = a[2];
+  int e = a[0];
+  return b + c + d + e;
+}
Index: testsuite/gcc.target/aarch64/volatileloadpair-1.c
===================================================================
--- testsuite/gcc.target/aarch64/volatileloadpair-1.c	(revision 0)
+++ testsuite/gcc.target/aarch64/volatileloadpair-1.c	(revision 0)
@@ -0,0 +1,46 @@ 
+/* { dg-do compile } */
+/* { dg-do options "-O2" } */
+/* volatile references should not produce load pair. */
+/* { dg-final { scan-assembler-not "ldp\t" } } */
+
+int f0(volatile int *a)
+{
+  int b = a[0];
+  int c = a[1];
+  return b + c;
+}
+
+int f1(volatile int *a)
+{
+  int b = a[1];
+  int c = a[0];
+  return b + c;
+}
+
+int f2(volatile int *a)
+{
+  int b = a[1];
+  int c = a[2];
+  return b + c;
+}
+
+int f3(volatile int *a)
+{
+  int b = a[2];
+  int c = a[1];
+  return b + c;
+}
+
+int f4(volatile int *a)
+{
+  int b = a[2];
+  int c = a[3];
+  return b + c;
+}
+
+int f5(volatile int *a)
+{
+  int b = a[3];
+  int c = a[2];
+  return b + c;
+}
Index: config/aarch64/aarch64.c
===================================================================
--- config/aarch64/aarch64.c	(revision 218554)
+++ config/aarch64/aarch64.c	(working copy)
@@ -10595,6 +10595,10 @@  aarch64_operands_ok_for_ldpstp (rtx *ope
       reg_2 = operands[3];
     }
 
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
+    return false;
+
   /* Check if the addresses are in the form of [base+offset].  */
   extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
   if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
@@ -10702,6 +10706,11 @@  aarch64_operands_adjust_ok_for_ldpstp (r
   if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
     return false;
 
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
+      || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
+    return false;
+
   /* Check if the addresses are in the form of [base+offset].  */
   extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
   if (base_1 == NULL_RTX || offset_1 == NULL_RTX)