Patchwork [i386] : Fix PR target/46051: [4.6 Regression] ICE: in extract_insn, at recog.c:2110 with -mavx -ftree-vectorize

login
register
mail settings
Submitter Uros Bizjak
Date Oct. 17, 2010, 8:42 p.m.
Message ID <AANLkTimO_N5R=c_4fGQv8PXpsMPyR3Vh=z5wHdFEOFJH@mail.gmail.com>
Download mbox | patch
Permalink /patch/68094/
State New
Headers show

Comments

Uros Bizjak - Oct. 17, 2010, 8:42 p.m.
On Sun, Oct 17, 2010 at 6:36 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> Please fix the expanders that generate wrong patterns instead, i.e.
>>> vec_interleave_lowv4df.
>>
>> Something like attached, but there are probably more instances to fix.
>
> It works.  Thanks.

There were similar problems with v8sf vec_interleave expanders.
Attached patch fixes them all.

2010-10-17  Uros Bizjak  <ubizjak@gmail.com>

	PR target/46051
	* config/i386/sse.md (vec_interleave_highv4df): Fix third RTX of
	generated sequence to match *avx_vperm2f128<mode>_nozero.
	(vec_interleave_lowv4df): Ditto.
	(vec_interleave_highv8sf): Ditto.
	(vec_interleave_lowv8sf): Ditto.

testsuite/ChangeLog:

2010-10-17  Uros Bizjak  <ubizjak@gmail.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46051
	* gcc.target/i386/pr46051.c: New test.

Patch was tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.

Patch

Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 165590)
+++ config/i386/sse.md	(working copy)
@@ -3574,15 +3574,14 @@ 
 		     (const_int 6) (const_int 14)
 		     (const_int 7) (const_int 15)])))
    (set (match_operand:V8SF 0 "register_operand" "")
-	(vec_concat:V8SF
-	  (vec_select:V4SF
+	(vec_select:V8SF
+	  (vec_concat:V16SF
 	    (match_dup 3)
-	    (parallel [(const_int 4) (const_int 5)
-		       (const_int 6) (const_int 7)]))
-	  (vec_select:V4SF
-	    (match_dup 4)
-	    (parallel [(const_int 4) (const_int 5)
-		       (const_int 6) (const_int 7)]))))]
+	    (match_dup 4))
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
  "TARGET_AVX"
 {
   operands[3] = gen_reg_rtx (V8SFmode);
@@ -3653,15 +3652,14 @@ 
 		     (const_int 6) (const_int 14)
 		     (const_int 7) (const_int 15)])))
    (set (match_operand:V8SF 0 "register_operand" "")
-	(vec_concat:V8SF
-	  (vec_select:V4SF
+	(vec_select:V8SF
+	  (vec_concat:V16SF
 	    (match_dup 3)
-	    (parallel [(const_int 0) (const_int 1)
-		       (const_int 2) (const_int 3)]))
-	  (vec_select:V4SF
-	    (match_dup 4)
-	    (parallel [(const_int 0) (const_int 1)
-		       (const_int 2) (const_int 3)]))))]
+	    (match_dup 4))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)])))]
  "TARGET_AVX"
 {
   operands[3] = gen_reg_rtx (V8SFmode);
@@ -4583,13 +4581,12 @@ 
 	  (parallel [(const_int 1) (const_int 5)
 		     (const_int 3) (const_int 7)])))
    (set (match_operand:V4DF 0 "register_operand" "")
-	(vec_concat:V4DF
-	  (vec_select:V2DF
+	(vec_select:V4DF
+	  (vec_concat:V8DF
 	    (match_dup 3)
-	    (parallel [(const_int 2) (const_int 3)]))
-	  (vec_select:V2DF
-	    (match_dup 4)
-	    (parallel [(const_int 2) (const_int 3)]))))]
+	    (match_dup 4))
+	  (parallel [(const_int 2) (const_int 3)
+		     (const_int 6) (const_int 7)])))]
  "TARGET_AVX"
 {
   operands[3] = gen_reg_rtx (V4DFmode);
@@ -4718,13 +4715,12 @@ 
 	  (parallel [(const_int 1) (const_int 5)
 		     (const_int 3) (const_int 7)])))
    (set (match_operand:V4DF 0 "register_operand" "")
-	(vec_concat:V4DF
-	  (vec_select:V2DF
+	(vec_select:V4DF
+	  (vec_concat:V8DF
 	    (match_dup 3)
-	    (parallel [(const_int 0) (const_int 1)]))
-	  (vec_select:V2DF
-	    (match_dup 4)
-	    (parallel [(const_int 0) (const_int 1)]))))]
+	    (match_dup 4))
+	  (parallel [(const_int 0) (const_int 1)
+	  	     (const_int 4) (const_int 5)])))]
  "TARGET_AVX"
 {
   operands[3] = gen_reg_rtx (V4DFmode);
Index: testsuite/gcc.target/i386/pr46051.c
===================================================================
--- testsuite/gcc.target/i386/pr46051.c	(revision 0)
+++ testsuite/gcc.target/i386/pr46051.c	(revision 0)
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx -mtune=generic" } */
+
+double val1[4][2], val2[4][2], chk[4][2];
+
+void
+foo (void)
+{
+  int i, j;
+  for (i = 0; i < 4; i++)
+    {
+      double tmp = 0;
+      for (j = 0; j < 2; j++)
+       tmp += val1[i][j] * val2[i][j];
+      for (j = 0; j < 2; j++)
+       chk[i][j] = tmp;
+    }
+}
+
+float val1f[8][2], val2f[8][2], chkf[8][2];
+
+void
+foof (void)
+{
+  int i, j;
+  for (i = 0; i < 8; i++)
+    {
+      float tmp = 0;
+      for (j = 0; j < 2; j++)
+       tmp += val1f[i][j] * val2f[i][j];
+      for (j = 0; j < 2; j++)
+       chkf[i][j] = tmp;
+    }
+}