Patchwork [i386] : Fix PR46098; ICE in extract_insn with __builtin_ia32_loadupd(); in a better way.

login
register
mail settings
Submitter Uros Bizjak
Date May 14, 2012, 7:23 p.m.
Message ID <CAFULd4YahzCyN2qtaaZH-66oshsWpHBf2-3yUqy+mQc1y4sc6g@mail.gmail.com>
Download mbox | patch
Permalink /patch/159121/
State New
Headers show

Comments

Uros Bizjak - May 14, 2012, 7:23 p.m.
Hello!

As mentioned in the PR [1], comment #9, the fix for the ICE is not
optimal (if not wrong). The problem with mem/mem operands was fixed as
a fixup in the expander, where operands[1] was pulled into a register.
Unfortunately, unaligned moves should not be handled in the same way
as aligned moves, since one of the operands is unaligned. As it turns
out, for the testcase form the PR, the expander pulls the wrong
operand:

--cut here--
/* { dg-do compile } */
/* { dg-options "-msse2 -ffloat-store" } */

typedef double v2df __attribute__((vector_size (16)));

v2df foo (double *d)
{
  return __builtin_ia32_loadupd (d);
}
--cut here--

        movsd   (%rax), %xmm0
        movhpd  8(%rax), %xmm0
        movupd  %xmm0, -16(%rbp)
        movapd  -16(%rbp), %xmm0

So, we move (unaligned!) memory to a register, and then use movupd to
store to aligned stack slot. Luckily, gcc figures that the load is
from unaligned memory and generates movsd/movhpd combo.

The intention was to generate:

        movupd  (%rax), %xmm0
        movapd  %xmm0, -16(%rbp)
        movapd  -16(%rbp), %xmm0

So, we don't want a fixup in the expander, but we should always load
to a register for the "load" builtin class.

2012-05-14  Uros Bizjak  <ubizjak@gmail.com>

	PR target/46098
	* config/i386/i386.c (ix86_expand_special_args_builtin): Always
	generate target register for "load" class builtins.

	Revert:
	2010-10-22  Uros Bizjak  <ubizjak@gmail.com>

	PR target/46098
	* config/i386/sse.md (*avx_movu<ssemodesuffix><avxmodesuffix>):
	Rename from avx_movu<ssemodesuffix><avxmodesuffix>.
	(avx_movu<ssemodesuffix><avxmodesuffix>): New expander.
	(*<sse>_movu<ssemodesuffix>): Rename from <sse>_movu<ssemodesuffix>.
	(<sse>_movu<ssemodesuffix>): New expander.
	(*avx_movdqu<avxmodesuffix>): Rename from avx_movdqu<avxmodesuffix>.
	(avx_movdqu<avxmodesuffix>): New expander.
	(*sse2_movdqu): Rename from sse2_movdqu.
	(sse2_movdqu): New expander.

testsuite/ChangeLog:

2012-05-14  Uros Bizjak  <ubizjak@gmail.com>

	* gcc.target/i386/avx256-unaligned-load-[1234].c: Update scan strings.
	* gcc.target/i386/avx256-unaligned-store-[1234].c: Ditto.

Attached patch was bootstrapped and regression tested on
x86_64-pc-linux-gnu {,-m32}. Patch will be committed to SVN and all
release branches.

[1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46098

Uros.

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 187465)
+++ config/i386/i386.c	(working copy)
@@ -29472,8 +29472,8 @@  ix86_expand_special_args_builtin (const struct bui
       arg_adjust = 0;
       if (optimize
 	  || target == 0
-	  || GET_MODE (target) != tmode
-	  || !insn_p->operand[0].predicate (target, tmode))
+	  || !register_operand (target, tmode)
+	  || GET_MODE (target) != tmode)
 	target = gen_reg_rtx (tmode);
     }
 
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 187465)
+++ config/i386/sse.md	(working copy)
@@ -588,18 +588,7 @@ 
   DONE;
 })
 
-(define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
-  [(set (match_operand:VF 0 "nonimmediate_operand")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand")]
-	  UNSPEC_MOVU))]
-  "TARGET_SSE"
-{
-  if (MEM_P (operands[0]) && MEM_P (operands[1]))
-    operands[1] = force_reg (<MODE>mode, operands[1]);
-})
-
-(define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
+(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
 	(unspec:VF
 	  [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
@@ -631,17 +620,7 @@ 
 	      ]
 	      (const_string "<MODE>")))])
 
-(define_expand "<sse2>_movdqu<avxsizesuffix>"
-  [(set (match_operand:VI1 0 "nonimmediate_operand")
-	(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand")]
-		    UNSPEC_MOVU))]
-  "TARGET_SSE2"
-{
-  if (MEM_P (operands[0]) && MEM_P (operands[1]))
-    operands[1] = force_reg (<MODE>mode, operands[1]);
-})
-
-(define_insn "*<sse2>_movdqu<avxsizesuffix>"
+(define_insn "<sse2>_movdqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
 	(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
 		    UNSPEC_MOVU))]
Index: testsuite/gcc.target/i386/avx256-unaligned-store-2.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-store-2.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-store-2.c	(working copy)
@@ -24,6 +24,6 @@  avx_test (void)
     }
 }
 
-/* { dg-final { scan-assembler-not "\\*avx_movdqu256/2" } } */
+/* { dg-final { scan-assembler-not "avx_movdqu256/2" } } */
 /* { dg-final { scan-assembler "vmovdqu.*\\*movv16qi_internal/3" } } */
 /* { dg-final { scan-assembler "vextract.128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-load-3.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-load-3.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-load-3.c	(working copy)
@@ -14,6 +14,6 @@  avx_test (void)
     c[i] = a[i] * b[i+3];
 }
 
-/* { dg-final { scan-assembler-not "\\*avx_movupd256/1" } } */
-/* { dg-final { scan-assembler "\\*sse2_movupd/1" } } */
+/* { dg-final { scan-assembler-not "avx_movupd256/1" } } */
+/* { dg-final { scan-assembler "sse2_movupd/1" } } */
 /* { dg-final { scan-assembler "vinsertf128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-store-3.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-store-3.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-store-3.c	(working copy)
@@ -17,6 +17,6 @@  avx_test (void)
     d[i] = c[i] * 20.0;
 }
 
-/* { dg-final { scan-assembler-not "\\*avx_movupd256/2" } } */
+/* { dg-final { scan-assembler-not "avx_movupd256/2" } } */
 /* { dg-final { scan-assembler "vmovupd.*\\*movv2df_internal/3" } } */
 /* { dg-final { scan-assembler "vextractf128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-load-4.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-load-4.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-load-4.c	(working copy)
@@ -14,6 +14,6 @@  avx_test (void)
     b[i] = a[i+3] * 2;
 }
 
-/* { dg-final { scan-assembler "\\*avx_movups256/1" } } */
-/* { dg-final { scan-assembler-not "\\*avx_movups/1" } } */
+/* { dg-final { scan-assembler "avx_movups256/1" } } */
+/* { dg-final { scan-assembler-not "avx_movups/1" } } */
 /* { dg-final { scan-assembler-not "vinsertf128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-store-4.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-store-4.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-store-4.c	(working copy)
@@ -14,7 +14,7 @@  avx_test (void)
     b[i+3] = a[i] * c[i];
 }
 
-/* { dg-final { scan-assembler "\\*avx_movups256/2" } } */
-/* { dg-final { scan-assembler-not "\\*avx_movups/2" } } */
+/* { dg-final { scan-assembler "avx_movups256/2" } } */
+/* { dg-final { scan-assembler-not "avx_movups/2" } } */
 /* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
 /* { dg-final { scan-assembler-not "vextractf128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-load-1.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-load-1.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-load-1.c	(working copy)
@@ -14,6 +14,6 @@  avx_test (void)
     c[i] = a[i] * b[i+3];
 }
 
-/* { dg-final { scan-assembler-not "\\*avx_movups256/1" } } */
-/* { dg-final { scan-assembler "\\*sse_movups/1" } } */
+/* { dg-final { scan-assembler-not "avx_movups256/1" } } */
+/* { dg-final { scan-assembler "sse_movups/1" } } */
 /* { dg-final { scan-assembler "vinsertf128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-store-1.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-store-1.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-store-1.c	(working copy)
@@ -17,6 +17,6 @@  avx_test (void)
     d[i] = c[i] * 20.0;
 }
 
-/* { dg-final { scan-assembler-not "\\*avx_movups256/2" } } */
+/* { dg-final { scan-assembler-not "avx_movups256/2" } } */
 /* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */
 /* { dg-final { scan-assembler "vextractf128" } } */
Index: testsuite/gcc.target/i386/avx256-unaligned-load-2.c
===================================================================
--- testsuite/gcc.target/i386/avx256-unaligned-load-2.c	(revision 187465)
+++ testsuite/gcc.target/i386/avx256-unaligned-load-2.c	(working copy)
@@ -24,6 +24,6 @@  avx_test (void)
     }
 }
 
-/* { dg-final { scan-assembler-not "\\*avx_movdqu256/1" } } */
-/* { dg-final { scan-assembler "\\*sse2_movdqu/1" } } */
+/* { dg-final { scan-assembler-not "avx_movdqu256/1" } } */
+/* { dg-final { scan-assembler "sse2_movdqu/1" } } */
 /* { dg-final { scan-assembler "vinsert.128" } } */