diff mbox series

[v2,1/5] or1k: Fix code quality for volatile memory loads

Message ID 20190703033351.11924-2-shorne@gmail.com
State New
Headers show
Series OpenRISC updates for 10 (fpu, fixes) | expand

Commit Message

Stafford Horne July 3, 2019, 3:33 a.m. UTC
Volatile memory does not match the memory_operand predicate.  This
causes extra extend/mask instructions instructions when reading
from volatile memory.  On OpenRISC loading volatile memory can be
treated the same as regular memory loads which supports combined
sign/zero extends.  Fixing this eliminates the need for extra
extend/mask instructions.

This also adds a test provided by Richard Selvaggi which uncovered the
issue while we were looking into another issue.

gcc/ChangeLog:

	PR target/90363
	* config/or1k/or1k.md (zero_extend<mode>si2): Update predicate.
	(extend<mode>si2): Update predicate.
	* gcc/config/or1k/predicates.md (volatile_mem_operand): New.
	(reg_or_mem_operand): New.

gcc/testsuite/ChangeLog:

	PR target/90363
	* gcc.target/or1k/swap-1.c: New test.
	* gcc.target/or1k/swap-2.c: New test.
---
Since v1:
 - Fixed typos with volatile

 gcc/config/or1k/or1k.md                |  6 +--
 gcc/config/or1k/predicates.md          | 18 +++++++
 gcc/testsuite/gcc.target/or1k/swap-1.c | 70 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/or1k/swap-2.c | 47 +++++++++++++++++
 4 files changed, 138 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/or1k/swap-1.c
 create mode 100644 gcc/testsuite/gcc.target/or1k/swap-2.c
diff mbox series

Patch

diff --git a/gcc/config/or1k/or1k.md b/gcc/config/or1k/or1k.md
index 2dad51cd46b..757d899c442 100644
--- a/gcc/config/or1k/or1k.md
+++ b/gcc/config/or1k/or1k.md
@@ -328,11 +328,11 @@ 
 ;; Sign Extending
 ;; -------------------------------------------------------------------------
 
-;; Zero extension can always be done with AND and an extending load.
+;; Zero extension can always be done with AND or an extending load.
 
 (define_insn "zero_extend<mode>si2"
   [(set (match_operand:SI 0 "register_operand"                     "=r,r")
-	(zero_extend:SI (match_operand:I12 1 "nonimmediate_operand" "r,m")))]
+	(zero_extend:SI (match_operand:I12 1 "reg_or_mem_operand" "r,m")))]
   ""
   "@
    l.andi\t%0, %1, <zext_andi>
@@ -344,7 +344,7 @@ 
 
 (define_insn "extend<mode>si2"
   [(set (match_operand:SI 0 "register_operand"                      "=r,r")
-	(sign_extend:SI (match_operand:I12 1 "nonimmediate_operand"  "r,m")))]
+	(sign_extend:SI (match_operand:I12 1 "reg_or_mem_operand"  "r,m")))]
   "TARGET_SEXT"
   "@
    l.ext<ldst>s\t%0, %1
diff --git a/gcc/config/or1k/predicates.md b/gcc/config/or1k/predicates.md
index 879236bca49..b895f1b4228 100644
--- a/gcc/config/or1k/predicates.md
+++ b/gcc/config/or1k/predicates.md
@@ -82,3 +82,21 @@ 
 
 (define_predicate "equality_comparison_operator"
   (match_code "ne,eq"))
+
+;; Borrowed from rs6000
+;  Return true if the operand is in volatile memory.  Note that during the
+;; RTL generation phase, memory_operand does not return TRUE for volatile
+;; memory references.  So this function allows us to recognize volatile
+;; references where it's safe.
+(define_predicate "volatile_mem_operand"
+  (and (match_code "mem")
+       (match_test "MEM_VOLATILE_P (op)")
+       (if_then_else (match_test "reload_completed")
+	 (match_operand 0 "memory_operand")
+	 (match_test "memory_address_p (mode, XEXP (op, 0))"))))
+
+;; Return true if the operand is a register or memory; including volatile
+;; memory.
+(define_predicate "reg_or_mem_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "volatile_mem_operand")))
diff --git a/gcc/testsuite/gcc.target/or1k/swap-1.c b/gcc/testsuite/gcc.target/or1k/swap-1.c
new file mode 100644
index 00000000000..4c179d1e430
--- /dev/null
+++ b/gcc/testsuite/gcc.target/or1k/swap-1.c
@@ -0,0 +1,70 @@ 
+/* { dg-do run } */
+/* { dg-options "-Os -mhard-mul -msoft-div -msoft-float" } */
+
+/* Notes:
+
+   This test failed on or1k GCC 7.2.0, and passes on or1k GCC 5.3.0
+   as well as the or1k port released in GCC 9.1.
+
+   The main program is organized as a loop structure so gcc does not
+   optimize-away the calls to swap_1().  Compiling with -O2 is still smart
+   enough to optimize-away the calls, but using -Os does not.
+   The bad code is only generated when compiled with -Os.
+
+   When the bad code is generated all code is okay except for the very last
+   instruction (a 'l.addc' in the l.jr delay slot).
+   Up to that point in execution, r11 and r12 contain the correct (expected)
+   values, but the execution of the final "l.addc" corrupts r11.
+
+   This test is added to ensure this does not come back.  */
+
+#include <stdint.h>
+
+volatile static uint8_t g_doswap = 1;
+
+uint64_t swap_1 (uint64_t u64) {
+  uint32_t u64_lo, u64_hi, u64_tmp;
+
+  u64_lo = u64 & 0xFFFFFFFF;
+  u64_hi = u64 >> 32;
+
+  if (g_doswap)
+    {
+      u64_tmp = u64_lo;
+      u64_lo  = u64_hi;
+      u64_hi  = u64_tmp;
+    }
+
+  u64 = u64_lo;
+  u64 += ((uint64_t) u64_hi << 32);
+
+  return u64;
+}
+
+int main () {
+  int ret;
+  int iter;
+  uint64_t  aa[2];   // inputs to swap function
+  uint64_t  ee[2];   // expected outputs of swap function
+  uint64_t  rr[2];   // actual results of swap function
+
+  g_doswap = 1;
+
+  // populate inputs, and expected outputs:
+  aa[0] = 0x123456789abcdef0;
+  aa[1] = 0x0123456789abcdef;
+
+  ee[0] = 0x9ABCDEF012345678;
+  ee[1] = 0x89ABCDEF01234567;
+
+  ret = 0;
+  for (iter = 0; iter < 2; iter++)
+    {
+      rr[iter] = swap_1(aa[iter]);
+      // early-out if there's a mis-match:
+      if (ee[iter] != rr[iter])
+        ret = 1;
+    }
+
+  return ret;
+}
diff --git a/gcc/testsuite/gcc.target/or1k/swap-2.c b/gcc/testsuite/gcc.target/or1k/swap-2.c
new file mode 100644
index 00000000000..3730b4ee2e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/or1k/swap-2.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Os -mhard-mul -msoft-div -msoft-float" } */
+
+/* Notes:
+
+   This test failed on or1k GCC 7.2.0, and passes on or1k GCC 5.3.0
+   as well as the or1k port released in GCC 9.1.
+
+   The main program is organized as a loop structure so gcc does not
+   optimize-away the calls to swap_1().  Compiling with -O2 is still smart
+   enough to optimize-away the calls, but using -Os does not.
+   The bad code is only generated when compiled with -Os.
+
+   When the bad code is generated all code is okay except for the very last
+   instruction (a 'l.addc' in the l.jr delay slot).
+   Up to that point in execution, r11 and r12 contain the correct (expected)
+   values, but the execution of the final "l.addc" corrupts r11.
+
+   This test ensures an l.addc is not in the output.  Also, while confirming
+   this test we uncovered PR/90363, we use it to check for that as well.  */
+
+#include <stdint.h>
+
+volatile static uint8_t g_doswap = 1;
+
+uint64_t swap_1 (uint64_t u64) {
+  uint32_t u64_lo, u64_hi, u64_tmp;
+
+  u64_lo = u64 & 0xFFFFFFFF;
+  u64_hi = u64 >> 32;
+
+  if (g_doswap)
+    {
+      u64_tmp = u64_lo;
+      u64_lo  = u64_hi;
+      u64_hi  = u64_tmp;
+    }
+
+  u64 = u64_lo;
+  u64 += ((uint64_t) u64_hi << 32);
+
+  return u64;
+}
+
+/* Check to ensure the volatile load does not get zero extended.  */
+/* { dg-final { scan-assembler-not "0xff" } } */
+/* { dg-final { scan-assembler-not "l.addc" } } */