Patchwork [i386] : Back port Fix PR 52908 - xop-mul-1:f9 miscompiled on bulldozer (-mxop) to 4.7

login
register
mail settings
Submitter venkataramanan.kumar@amd.com
Date June 7, 2012, 11:07 a.m.
Message ID <20120607110718.19739.91036.sendpatchset@adcelk01.amd.com>
Download mbox | patch
Permalink /patch/163605/
State New
Headers show

Comments

venkataramanan.kumar@amd.com - June 7, 2012, 11:07 a.m.
Hi Maintainers,

Please find the patch below that backports PR target/52908 to GCC 4.7.

The patch passed bootstrap and regression test.

Ok to commit?

regards,
Venkat.
Jakub Jelinek - June 7, 2012, 11:33 a.m.
On Thu, Jun 07, 2012 at 06:07:18AM -0500, venkataramanan.kumar@amd.com wrote:
> Please find the patch below that backports PR target/52908 to GCC 4.7.
> 
> The patch passed bootstrap and regression test.
> 
> Ok to commit?

Please wait with it until 4.7.1 is released.

> +2012-06-07  Venkataramanan Kumar <venkataramanan.kumar@amd.com>
> +

The common way is to use:
	Backport from mainline
	2012-05-09  Uros Bizjak  <ubizjak@gmail.com>
instead of the following line:
> +	Backport from  2012-05-09 mainline r187354
> +
> +	PR target/52908
> +	* config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
> +	xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
> +	(vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
> +	instead of xop_mulv2div2di3_low.
> +	(xop_p<macs>dql): Fix vec_select selector.
> +	(xop_p<macs>dqh): Ditto.
> +	(xop_mulv2div2di3_low): Remove insn_and_split pattern.
> +	(xop_mulv2div2di3_high): Ditto.

	Jakub
Uros Bizjak - June 14, 2012, 9:03 a.m.
On Thu, Jun 7, 2012 at 1:33 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Jun 07, 2012 at 06:07:18AM -0500, venkataramanan.kumar@amd.com wrote:
>> Please find the patch below that backports PR target/52908 to GCC 4.7.
>>
>> The patch passed bootstrap and regression test.
>>
>> Ok to commit?
>
> Please wait with it until 4.7.1 is released.
>
>> +2012-06-07  Venkataramanan Kumar <venkataramanan.kumar@amd.com>
>> +
>
> The common way is to use:
>        Backport from mainline
>        2012-05-09  Uros Bizjak  <ubizjak@gmail.com>
> instead of the following line:
>> +     Backport from  2012-05-09 mainline r187354
>> +
>> +     PR target/52908
>> +     * config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
>> +     xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
>> +     (vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
>> +     instead of xop_mulv2div2di3_low.
>> +     (xop_p<macs>dql): Fix vec_select selector.
>> +     (xop_p<macs>dqh): Ditto.
>> +     (xop_mulv2div2di3_low): Remove insn_and_split pattern.
>> +     (xop_mulv2div2di3_high): Ditto.

OK with above change.

Thanks,
Uros.

Patch

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 187449)
+++ ChangeLog	(working copy)
@@ -1,3 +1,17 @@ 
+2012-06-07  Venkataramanan Kumar <venkataramanan.kumar@amd.com>
+
+	Backport from  2012-05-09 mainline r187354
+
+	PR target/52908
+	* config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
+	xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
+	(vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
+	instead of xop_mulv2div2di3_low.
+	(xop_p<macs>dql): Fix vec_select selector.
+	(xop_p<macs>dqh): Ditto.
+	(xop_mulv2div2di3_low): Remove insn_and_split pattern.
+	(xop_mulv2div2di3_high): Ditto.
+
 2012-05-13  Uros Bizjak  <ubizjak@gmail.com>
 
 	Backport from mainline
Index: testsuite/gcc.target/i386/xop-imul32widen-vector.c
===================================================================
--- testsuite/gcc.target/i386/xop-imul32widen-vector.c	(revision 187449)
+++ testsuite/gcc.target/i386/xop-imul32widen-vector.c	(working copy)
@@ -32,5 +32,5 @@ 
   exit (0);
 }
 
-/* { dg-final { scan-assembler "vpmacsdql" } } */
+/* { dg-final { scan-assembler "vpmuldq" } } */
 /* { dg-final { scan-assembler "vpmacsdqh" } } */
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 187449)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,11 @@ 
+2012-06-07  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
+
+	Back port from 2012-05-09 mainline r187354
+
+	PR target/52908
+	* gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler
+	directive to Scan for vpmuldq, not vpmacsdql.
+
 2012-05-12  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gnat.dg/null_pointer_deref3.adb: New test.
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 187449)
+++ config/i386/sse.md	(working copy)
@@ -5743,11 +5743,15 @@ 
 
   if (TARGET_XOP)
     {
+      rtx t3 = gen_reg_rtx (V2DImode);
+
       emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
 				    GEN_INT (1), GEN_INT (3)));
       emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
 				    GEN_INT (1), GEN_INT (3)));
-      emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+      emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+      emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
       DONE;
     }
 
@@ -5772,11 +5776,15 @@ 
 
   if (TARGET_XOP)
     {
+      rtx t3 = gen_reg_rtx (V2DImode);
+
       emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
 				    GEN_INT (1), GEN_INT (3)));
       emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
 				    GEN_INT (1), GEN_INT (3)));
-      emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+      emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+      emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
       DONE;
     }
 
@@ -10443,12 +10451,12 @@ 
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 1)
-		       (const_int 3)])))
-	  (vec_select:V2SI
+            (parallel [(const_int 0)
+                       (const_int 2)])))
+	   (vec_select:V2SI
 	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	   (parallel [(const_int 1)
-		      (const_int 3)])))
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
 	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
@@ -10462,13 +10470,13 @@ 
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 0)
-		       (const_int 2)])))
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 0)
-		       (const_int 2)]))))
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
 	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
@@ -10482,61 +10490,19 @@ 
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 1)
-		       (const_int 3)])))
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 1)
-		       (const_int 3)]))))
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
 	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add.  In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as operands 1/2
-(define_insn_and_split "xop_mulv2div2di3_low"
-  [(set (match_operand:V2DI 0 "register_operand" "=&x")
-	(mult:V2DI
-	  (sign_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "%x")
-	      (parallel [(const_int 1)
-			 (const_int 3)])))
-	  (sign_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	      (parallel [(const_int 1)
-			 (const_int 3)])))))]
-  "TARGET_XOP"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(match_dup 3))
-   (set (match_dup 0)
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 1)
-	    (parallel [(const_int 1)
-		       (const_int 3)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 2)
-	    (parallel [(const_int 1)
-		       (const_int 3)]))))
-	 (match_dup 0)))]
-{
-  operands[3] = CONST0_RTX (V2DImode);
-}
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "TI")])
-
 (define_insn "xop_pmacsdqh"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(plus:V2DI
@@ -10544,61 +10510,19 @@ 
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 0)
-		       (const_int 2)])))
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 0)
-		       (const_int 2)]))))
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
 	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add.  In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as either operands[1] or operands[2]
-(define_insn_and_split "xop_mulv2div2di3_high"
-  [(set (match_operand:V2DI 0 "register_operand" "=&x")
-	(mult:V2DI
-	  (sign_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "%x")
-	      (parallel [(const_int 0)
-			 (const_int 2)])))
-	  (sign_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	      (parallel [(const_int 0)
-			 (const_int 2)])))))]
-  "TARGET_XOP"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(match_dup 3))
-   (set (match_dup 0)
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 1)
-	    (parallel [(const_int 0)
-		       (const_int 2)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 2)
-	    (parallel [(const_int 0)
-		       (const_int 2)]))))
-	 (match_dup 0)))]
-{
-  operands[3] = CONST0_RTX (V2DImode);
-}
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "TI")])
-
 ;; XOP parallel integer multiply/add instructions for the intrinisics
 (define_insn "xop_pmacsswd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")