diff mbox

[i386] : Fix PR 80706, prevent unwanted peephole2 matching

Message ID CAFULd4bS+=ytw7t2YVEEjjXE0GKQQaU6Oqibs2Nhfsqjoz6mGw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak May 11, 2017, 6:20 p.m. UTC
Hello!

Attached patch prevents unwanted peephole2 matching. With SSE, we can
expand atomic load through XMM registers using generic DImode move
patterns. It can happen that peephole2 matches unrelated insn
sequence, and this way removes random FP store to memory.

The patch introduces specialized SSE move patterns for this purpose.

2017-05-11  Uros Bizjak  <ubizjak@gmail.com>

    PR target/80706
    * config/i386/sync.md (UNSPEC_LDX_ATOMIC): New unspec.
    (UNSPEC_STX_ATOMIC): Ditto.
    (loaddi_via_sse): New insn.
    (storedi_via_sse): Ditto.
    (atomic_loaddi_fpu): Emit loaddi_via_sse and storedi_via_sse.
    Update corresponding peephole2 patterns.
    (atomic_storedi_fpu): Ditto.

testsuite/ChangeLog:

2017-05-11  Uros Bizjak  <ubizjak@gmail.com>
        Jakub Jelinek  <jakub@redhat.com>

    PR target/80706
    * gcc.target/i386/pr80706.c: New test.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline, will be backported to gcc-7 branch.

Uros.
diff mbox

Patch

Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md	(revision 247914)
+++ config/i386/sync.md	(working copy)
@@ -25,6 +25,9 @@ 
   UNSPEC_FILD_ATOMIC
   UNSPEC_FIST_ATOMIC
 
+  UNSPEC_LDX_ATOMIC
+  UNSPEC_STX_ATOMIC
+
   ;; __atomic support
   UNSPEC_LDA
   UNSPEC_STA
@@ -199,9 +202,8 @@ 
 	}
       else
 	{
-	  adjust_reg_mode (tmp, DImode);
-	  emit_move_insn (tmp, src);
-	  emit_move_insn (mem, tmp);
+	  emit_insn (gen_loaddi_via_sse (tmp, src));
+	  emit_insn (gen_storedi_via_sse (mem, tmp));
 	}
 
       if (mem != dst)
@@ -226,10 +228,12 @@ 
   "operands[5] = gen_lowpart (DFmode, operands[1]);")
 
 (define_peephole2
-  [(set (match_operand:DI 0 "sse_reg_operand")
-	(match_operand:DI 1 "memory_operand"))
+  [(set (match_operand:DF 0 "sse_reg_operand")
+	(unspec:DF [(match_operand:DI 1 "memory_operand")]
+		   UNSPEC_LDX_ATOMIC))
    (set (match_operand:DI 2 "memory_operand")
-	(match_dup 0))
+	(unspec:DI [(match_dup 0)]
+		   UNSPEC_STX_ATOMIC))
    (set (match_operand:DF 3 "fp_register_operand")
 	(match_operand:DF 4 "memory_operand"))]
   "!TARGET_64BIT
@@ -301,7 +305,9 @@ 
   rtx dst = operands[0], src = operands[1];
   rtx mem = operands[2], tmp = operands[3];
 
-  if (!SSE_REG_P (src))
+  if (SSE_REG_P (src))
+    emit_move_insn (dst, src);
+  else
     {
       if (REG_P (src))
 	{
@@ -313,16 +319,13 @@ 
 	{
 	  emit_insn (gen_loaddi_via_fpu (tmp, src));
 	  emit_insn (gen_storedi_via_fpu (dst, tmp));
-	  DONE;
 	}
       else
 	{
-	  adjust_reg_mode (tmp, DImode);
-	  emit_move_insn (tmp, src);
-	  src = tmp;
+	  emit_insn (gen_loaddi_via_sse (tmp, src));
+	  emit_insn (gen_storedi_via_sse (dst, tmp));
 	}
     }
-  emit_move_insn (dst, src);
   DONE;
 })
 
@@ -344,10 +347,12 @@ 
 (define_peephole2
   [(set (match_operand:DF 0 "memory_operand")
 	(match_operand:DF 1 "fp_register_operand"))
-   (set (match_operand:DI 2 "sse_reg_operand")
-	(match_operand:DI 3 "memory_operand"))
+   (set (match_operand:DF 2 "sse_reg_operand")
+	(unspec:DF [(match_operand:DI 3 "memory_operand")]
+		   UNSPEC_LDX_ATOMIC))
    (set (match_operand:DI 4 "memory_operand")
-	(match_dup 2))]
+	(unspec:DI [(match_dup 2)]
+		   UNSPEC_STX_ATOMIC))]
   "!TARGET_64BIT
    && peep2_reg_dead_p (3, operands[2])
    && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
@@ -382,6 +387,32 @@ 
   [(set_attr "type" "fmov")
    (set_attr "mode" "DI")])
 
+(define_insn "loaddi_via_sse"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
+		   UNSPEC_LDX_ATOMIC))]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE2)
+    return "%vmovq\t{%1, %0|%0, %1}";
+  return "movlps\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "DI")])
+
+(define_insn "storedi_via_sse"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DF 1 "register_operand" "x")]
+		   UNSPEC_STX_ATOMIC))]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE2)
+    return "%vmovq\t{%1, %0|%0, %1}";
+  return "movlps\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "DI")])
+
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:QI 0 "register_operand")	;; bool success output
    (match_operand:SWI124 1 "register_operand")	;; oldval output
Index: testsuite/gcc.target/i386/pr80706.c
===================================================================
--- testsuite/gcc.target/i386/pr80706.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr80706.c	(working copy)
@@ -0,0 +1,30 @@ 
+/* PR target/80706 */
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+union U { double value; struct S { int lsw; int msw; } parts; };
+
+__attribute__((noinline, noclone)) double
+foo (void)
+{
+  __asm volatile ("" : : : "memory");
+  return 2.0;
+}
+
+__attribute__((noinline, noclone)) double
+bar (void)
+{
+  double s = foo ();
+  union U z;
+  z.value = s;
+  z.parts.lsw = 0;
+  return z.value * z.value + s * s;
+}
+
+int
+main ()
+{
+  if (bar () != 8.0)
+    __builtin_abort ();
+  return 0;
+}