diff mbox

[i386] : Fix PR71245, atomic load/store bounces the data to the stack using fild/fistp

Message ID CAFULd4YgbUxgn00u1C5qaO7CnkUYxQvjBzKjnurKmpHkXjDxzA@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak May 29, 2016, 8:55 p.m. UTC
Hello!

As shown in the PR, when moving DFmode value to/from a FP register, we
don't need to bounce it with an atomic DImode fild/fistp in case of
-march=pentium. DFmode move is atomic by itself.

2016-05-29  Uros Bizjak  <ubizjak@gmail.com>

    PR target/71245
    * config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
    New peepholes to remove unneeded fild/fistp pairs.
    (define_peephole2 atomic_loaddi_fpu): Ditto.

testsuite/ChangeLog:

2016-05-29  Uros Bizjak  <ubizjak@gmail.com>

    PR target/71245
    * gcc.target/i386/pr71245-1.c: New test.
    * gcc.target/i386/pr71245-2.c: Ditto.

Bootstrapped on x86_64-linux-gnu and regression tested with -m32/march=pentium.

Committed to mainline SVN.

Uros.
diff mbox

Patch

Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md	(revision 236861)
+++ config/i386/sync.md	(working copy)
@@ -210,6 +210,34 @@ 
   DONE;
 })
 
+(define_peephole2
+  [(set (match_operand:DF 0 "fp_register_operand")
+	(unspec:DF [(match_operand:DI 1 "memory_operand")]
+		   UNSPEC_FILD_ATOMIC))
+   (set (match_operand:DI 2 "memory_operand")
+	(unspec:DI [(match_dup 0)]
+		   UNSPEC_FIST_ATOMIC))
+   (set (match_operand:DF 3 "fp_register_operand")
+	(match_operand:DF 4 "memory_operand"))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (2, operands[0])
+   && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+  [(set (match_dup 3) (match_dup 5))]
+  "operands[5] = gen_lowpart (DFmode, operands[1]);")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "sse_reg_operand")
+	(match_operand:DI 1 "memory_operand"))
+   (set (match_operand:DI 2 "memory_operand")
+	(match_dup 0))
+   (set (match_operand:DF 3 "fp_register_operand")
+	(match_operand:DF 4 "memory_operand"))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (2, operands[0])
+   && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+  [(set (match_dup 3) (match_dup 5))]
+  "operands[5] = gen_lowpart (DFmode, operands[1]);")
+
 (define_expand "atomic_store<mode>"
   [(set (match_operand:ATOMIC 0 "memory_operand")
 	(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
@@ -298,6 +326,34 @@ 
   DONE;
 })
 
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand")
+	(match_operand:DF 1 "fp_register_operand"))
+   (set (match_operand:DF 2 "fp_register_operand")
+	(unspec:DF [(match_operand:DI 3 "memory_operand")]
+		   UNSPEC_FILD_ATOMIC))
+   (set (match_operand:DI 4 "memory_operand")
+	(unspec:DI [(match_dup 2)]
+		   UNSPEC_FIST_ATOMIC))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (3, operands[2])
+   && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+  [(set (match_dup 5) (match_dup 1))]
+  "operands[5] = gen_lowpart (DFmode, operands[4]);")
+
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand")
+	(match_operand:DF 1 "fp_register_operand"))
+   (set (match_operand:DI 2 "sse_reg_operand")
+	(match_operand:DI 3 "memory_operand"))
+   (set (match_operand:DI 4 "memory_operand")
+	(match_dup 2))]
+  "!TARGET_64BIT
+   && peep2_reg_dead_p (3, operands[2])
+   && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+  [(set (match_dup 5) (match_dup 1))]
+  "operands[5] = gen_lowpart (DFmode, operands[4]);")
+
 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
 ;; operations.  But the fix_trunc patterns want way more setup than we want
 ;; to provide.  Note that the scratch is DFmode instead of XFmode in order
Index: testsuite/gcc.target/i386/pr71245-1.c
===================================================================
--- testsuite/gcc.target/i386/pr71245-1.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr71245-1.c	(working copy)
@@ -0,0 +1,22 @@ 
+/* PR target/71245 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=pentium -mno-sse -mfpmath=387" } */
+
+typedef union
+{
+  unsigned long long ll;
+  double d;
+} u_t;
+
+u_t d = { .d = 5.0 };
+
+void foo_d (void)
+{
+  u_t tmp;
+  
+  tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
+  tmp.d += 1.0;
+  __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "(fistp|fild)" } } */
Index: testsuite/gcc.target/i386/pr71245-2.c
===================================================================
--- testsuite/gcc.target/i386/pr71245-2.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr71245-2.c	(working copy)
@@ -0,0 +1,22 @@ 
+/* PR target/71245 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=pentium -msse -mno-sse2 -mfpmath=387" } */
+
+typedef union
+{
+  unsigned long long ll;
+  double d;
+} u_t;
+
+u_t d = { .d = 5.0 };
+
+void foo_d (void)
+{
+  u_t tmp;
+  
+  tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
+  tmp.d += 1.0;
+  __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "movlps" } } */