diff mbox

[i386] : Fix PR 80425, Extra inter-unit register move with zero-extension

Message ID CAFULd4avHGJUWn+UyCYfD3+OKY=idZDyszn3icXC2UEz-nO7OA@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak May 15, 2017, 7:13 p.m. UTC
Hello!

Attached patch introduces peephole2 pattern to avoid intermediate
DImode register in interunit zero-extend sequence.

However, it looks there is still slight problem with RA. Without
-mtune=intel, we have direct GR->XMM interunit moves disabled, but
pr80425-2.c testcase compiles to:

        movl    a(%rip), %eax
        movq    %rax, -56(%rbp)
        vmovq   -56(%rbp), %xmm1

The compiler could emit a direct mem->XMM zero-extending move, without
intermediate stack slot.

2017-05-15  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386.i386.md (*zero_extendsidi2): Do not penalize
    non-interunit SSE move alternatives with '?'.
    (zero-extendsidi peephole2): New peephole to skip intermediate
    general register in SSE zero-extend sequence.

testsuite/ChangeLog:

2017-05-15  Uros Bizjak  <ubizjak@gmail.com>

    * gcc.target/i386/pr80425-1.c: New test.
    * gcc.target/i386/pr80425-2.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
diff mbox

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 248065)
+++ config/i386/i386.md	(working copy)
@@ -3762,10 +3762,10 @@ 
 
 (define_insn "*zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-		"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x,?*x,?*v,*r")
+		"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,*x,*x,*v,*r")
 	(zero_extend:DI
 	 (match_operand:SI 1 "x86_64_zext_operand"
-	        "0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m  , *x, *v,*k")))]
+	        "0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m ,*x,*v,*k")))]
   ""
 {
   switch (get_attr_type (insn))
@@ -3885,6 +3885,15 @@ 
    (set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
+(define_peephole2
+  [(set (match_operand:DI 0 "general_reg_operand")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand")))
+   (set (match_operand:DI 2 "sse_reg_operand") (match_dup 0))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(zero_extend:DI (match_dup 1)))])
+
 (define_mode_attr kmov_isa
   [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
 
Index: testsuite/gcc.target/i386/pr80425-1.c
===================================================================
--- testsuite/gcc.target/i386/pr80425-1.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr80425-1.c	(working copy)
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=intel" } */
+
+#include <x86intrin.h>
+
+__m512i
+f1 (__m512i x, int a)
+{
+  return _mm512_srai_epi32 (x, a);
+}
+
+/* { dg-final { scan-assembler-times "movd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
Index: testsuite/gcc.target/i386/pr80425-2.c
===================================================================
--- testsuite/gcc.target/i386/pr80425-2.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr80425-2.c	(working copy)
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=intel" } */
+
+#include <x86intrin.h>
+
+extern int a;
+
+__m512i
+f1 (__m512i x)
+{
+  return _mm512_srai_epi32 (x, a);
+}
+
+/* { dg-final { scan-assembler-times "movd\[ \\t\]+\[^\n\]*%xmm" 1 } } */