diff mbox

[i386] recognize haddpd

Message ID alpine.DEB.2.02.1210081619430.1112@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Oct. 8, 2012, 2:40 p.m. UTC
On Fri, 28 Sep 2012, Uros Bizjak wrote:

>>>> 2) {v[0]-v[1], v[0]-v[1]} is not recognized as a hsubpd because
>>>> vec_duplicate doesn't match vec_concat. Do we really need to duplicate (no
>>>> pun intended) the pattern?
>
> You can add this transformation to simplify-rtx.c. Probably vec_concat
> with two equal operands can be canonicalized as vec_duplicate.

Actually, it is replacing vec_duplicate with vec_concat that would help. 
Well, I'll see about that later.

Here is what I came up with, trying to follow your other advice (thanks a 
lot!).

Passes bootstrap+testsuite.

2012-10-08  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	PR target/54400
 	* config/i386/i386.md (type attribute): Add sseadd1.
 	(unit attribute): Add support for sseadd1.
 	* config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into...
 	(sse3_haddv2df3): ... expander.
 	(*sse3_haddv2df3): ... define_insn. Accept permuted operands.
 	(sse3_hsubv2df3): ... define_insn.
 	(*sse3_haddv2df3_low): New define_insn.
 	(*sse3_hsubv2df3_low): New define_insn.

gcc/testsuite/
 	PR target/54400
 	* gcc.target/i386/pr54400.c: New testcase.

Comments

Uros Bizjak Oct. 8, 2012, 4:08 p.m. UTC | #1
On Mon, Oct 8, 2012 at 4:40 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Fri, 28 Sep 2012, Uros Bizjak wrote:
>
>>>>> 2) {v[0]-v[1], v[0]-v[1]} is not recognized as a hsubpd because
>>>>> vec_duplicate doesn't match vec_concat. Do we really need to duplicate
>>>>> (no
>>>>> pun intended) the pattern?
>>
>>
>> You can add this transformation to simplify-rtx.c. Probably vec_concat
>> with two equal operands can be canonicalized as vec_duplicate.
>
>
> Actually, it is replacing vec_duplicate with vec_concat that would help.
> Well, I'll see about that later.
>
> Here is what I came up with, trying to follow your other advice (thanks a
> lot!).
>
> Passes bootstrap+testsuite.
>
> 2012-10-08  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         PR target/54400
>         * config/i386/i386.md (type attribute): Add sseadd1.
>         (unit attribute): Add support for sseadd1.
>         * config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into...
>         (sse3_haddv2df3): ... expander.
>         (*sse3_haddv2df3): ... define_insn. Accept permuted operands.
>         (sse3_hsubv2df3): ... define_insn.
>         (*sse3_haddv2df3_low): New define_insn.
>         (*sse3_hsubv2df3_low): New define_insn.
>
> gcc/testsuite/
>         PR target/54400
>
>         * gcc.target/i386/pr54400.c: New testcase.
>
> --
> Marc Glisse
>
> Index: gcc/testsuite/gcc.target/i386/pr54400.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/pr54400.c     (revision 0)
> +++ gcc/testsuite/gcc.target/i386/pr54400.c     (revision 0)
> @@ -0,0 +1,53 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
> +
> +#include <x86intrin.h>
> +
> +double f (__m128d p)
> +{
> +  return p[0] - p[1];
> +}
> +
> +double g1 (__m128d p)
> +{
> +  return p[0] + p[1];
> +}
> +
> +double g2 (__m128d p)
> +{
> +  return p[1] + p[0];
> +}
> +
> +__m128d h (__m128d p, __m128d q)
> +{
> +  __m128d r = { p[0] - p[1], q[0] - q[1] };
> +  return r;
> +}
> +
> +__m128d i1 (__m128d p, __m128d q)
> +{
> +  __m128d r = { p[0] + p[1], q[0] + q[1] };
> +  return r;
> +}
> +
> +__m128d i2 (__m128d p, __m128d q)
> +{
> +  __m128d r = { p[0] + p[1], q[1] + q[0] };
> +  return r;
> +}
> +
> +__m128d i3 (__m128d p, __m128d q)
> +{
> +  __m128d r = { p[1] + p[0], q[0] + q[1] };
> +  return r;
> +}
> +
> +__m128d i4 (__m128d p, __m128d q)
> +{
> +  __m128d r = { p[1] + p[0], q[1] + q[0] };
> +  return r;
> +}
> +
> +/* { dg-final { scan-assembler-times "hsubpd" 2 } } */
> +/* { dg-final { scan-assembler-times "haddpd" 6 } } */
> +/* { dg-final { scan-assembler-not "unpck" } } */
>
> Property changes on: gcc/testsuite/gcc.target/i386/pr54400.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/config/i386/i386.md
> ===================================================================
> --- gcc/config/i386/i386.md     (revision 192206)
> +++ gcc/config/i386/i386.md     (working copy)
> @@ -320,36 +320,36 @@
>  ;; provided in other attributes.
>  (define_attr "type"
>    "other,multi,
>     alu,alu1,negnot,imov,imovx,lea,
>     incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
>     icmp,test,ibr,setcc,icmov,
>     push,pop,call,callv,leave,
>     str,bitmanip,
>     fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
>     sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
> -
> sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
> -   ssemuladd,sse4arg,lwp,
> +   sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
> +   ssediv,sseins,ssemuladd,sse4arg,lwp,
>     mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
>    (const_string "other"))
>
>  ;; Main data type used by the insn
>  (define_attr "mode"
>
> "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
>    (const_string "unknown"))
>
>  ;; The CPU unit operations uses.
>  (define_attr "unit" "integer,i387,sse,mmx,unknown"
>    (cond [(eq_attr "type"
> "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
>            (const_string "i387")
>          (eq_attr "type"
> "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
> -                         sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
> +
> sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
>                           ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
>            (const_string "sse")
>          (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
>            (const_string "mmx")
>          (eq_attr "type" "other")
>            (const_string "unknown")]
>          (const_string "integer")))

You missed the most important sseadd1 addition, the one that prevents
checking of operand2 when calculating "memory" attribute:

	 (and (eq_attr "type"
		 "!alu1,negnot,ishift1,
		   imov,imovx,icmp,test,bitmanip,
		   fmov,fcmp,fsgn,
		   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
		   sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
	      (match_operand 2 "memory_operand"))

Please note "!" in the above expression.

>
>  ;; The (bounding maximum) length of an instruction immediate.
>  (define_attr "length_immediate" ""
> Index: gcc/config/i386/sse.md
> ===================================================================
> --- gcc/config/i386/sse.md      (revision 192206)
> +++ gcc/config/i386/sse.md      (working copy)
> @@ -1209,42 +1209,120 @@
>               (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
>             (plusminus:DF
>               (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
>               (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
>    "TARGET_AVX"
>    "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "type" "sseadd")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "V4DF")])
>
> -(define_insn "sse3_h<plusminus_insn>v2df3"
> +(define_expand "sse3_haddv2df3"
> +  [(set (match_operand:V2DF 0 "register_operand")
> +       (vec_concat:V2DF
> +         (plus:DF
> +           (vec_select:DF
> +             (match_operand:V2DF 1 "register_operand")
> +             (parallel [(const_int 0)]))
> +           (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
> +         (plus:DF
> +           (vec_select:DF
> +             (match_operand:V2DF 2 "nonimmediate_operand")
> +             (parallel [(const_int 0)]))
> +           (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
> +  "TARGET_SSE3")
> +
> +(define_insn "*sse3_haddv2df3"
>    [(set (match_operand:V2DF 0 "register_operand" "=x,x")
>         (vec_concat:V2DF
> -         (plusminus:DF
> +         (plus:DF
> +           (vec_select:DF
> +             (match_operand:V2DF 1 "register_operand" "0,x")
> +             (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
> +           (vec_select:DF
> +             (match_dup 1)
> +             (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
> +         (plus:DF
> +           (vec_select:DF
> +             (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
> +             (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
> +           (vec_select:DF
> +             (match_dup 2)
> +             (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
> +  "TARGET_SSE3 && INTVAL (operands[3]) != INTVAL (operands[4])
> +   && INTVAL (operands[5]) != INTVAL (operands[6])"
> +  "@
> +   haddpd\t{%2, %0|%0, %2}
> +   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "isa" "noavx,avx")
> +   (set_attr "type" "sseadd")
> +   (set_attr "prefix" "orig,vex")
> +   (set_attr "mode" "V2DF")])

Please use (match_dup 3) in place of (match_operand 5) and (match_dup
4) in place of (match_operand 6) predicates. These should be the same.

Also note that you have to add handling of sseadd1 attribute in other
(scheduler) *.md files. Simply grep for sseadd and add ",sseadd1"
everywhere.

Uros.
Uros Bizjak Oct. 8, 2012, 4:11 p.m. UTC | #2
On Mon, Oct 8, 2012 at 6:08 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

>> +(define_insn "*sse3_haddv2df3"
>>    [(set (match_operand:V2DF 0 "register_operand" "=x,x")
>>         (vec_concat:V2DF
>> -         (plusminus:DF
>> +         (plus:DF
>> +           (vec_select:DF
>> +             (match_operand:V2DF 1 "register_operand" "0,x")
>> +             (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
>> +           (vec_select:DF
>> +             (match_dup 1)
>> +             (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
>> +         (plus:DF
>> +           (vec_select:DF
>> +             (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
>> +             (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
>> +           (vec_select:DF
>> +             (match_dup 2)
>> +             (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
>> +  "TARGET_SSE3 && INTVAL (operands[3]) != INTVAL (operands[4])
>> +   && INTVAL (operands[5]) != INTVAL (operands[6])"
>> +  "@
>> +   haddpd\t{%2, %0|%0, %2}
>> +   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
>> +  [(set_attr "isa" "noavx,avx")
>> +   (set_attr "type" "sseadd")
>> +   (set_attr "prefix" "orig,vex")
>> +   (set_attr "mode" "V2DF")])
>
> Please use (match_dup 3) in place of (match_operand 5) and (match_dup
> 4) in place of (match_operand 6) predicates. These should be the same.

Oh, I was too quick with this part. The code above is OK, since we can
permute every part independently.

Uros.
diff mbox

Patch

Index: gcc/testsuite/gcc.target/i386/pr54400.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr54400.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr54400.c	(revision 0)
@@ -0,0 +1,53 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+double f (__m128d p)
+{
+  return p[0] - p[1];
+}
+
+double g1 (__m128d p)
+{
+  return p[0] + p[1];
+}
+
+double g2 (__m128d p)
+{
+  return p[1] + p[0];
+}
+
+__m128d h (__m128d p, __m128d q)
+{
+  __m128d r = { p[0] - p[1], q[0] - q[1] };
+  return r;
+}
+
+__m128d i1 (__m128d p, __m128d q)
+{
+  __m128d r = { p[0] + p[1], q[0] + q[1] };
+  return r;
+}
+
+__m128d i2 (__m128d p, __m128d q)
+{
+  __m128d r = { p[0] + p[1], q[1] + q[0] };
+  return r;
+}
+
+__m128d i3 (__m128d p, __m128d q)
+{
+  __m128d r = { p[1] + p[0], q[0] + q[1] };
+  return r;
+}
+
+__m128d i4 (__m128d p, __m128d q)
+{
+  __m128d r = { p[1] + p[0], q[1] + q[0] };
+  return r;
+}
+
+/* { dg-final { scan-assembler-times "hsubpd" 2 } } */
+/* { dg-final { scan-assembler-times "haddpd" 6 } } */
+/* { dg-final { scan-assembler-not "unpck" } } */

Property changes on: gcc/testsuite/gcc.target/i386/pr54400.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 192206)
+++ gcc/config/i386/i386.md	(working copy)
@@ -320,36 +320,36 @@ 
 ;; provided in other attributes.
 (define_attr "type"
   "other,multi,
    alu,alu1,negnot,imov,imovx,lea,
    incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,bitmanip,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
    sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
-   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
-   ssemuladd,sse4arg,lwp,
+   sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
+   ssediv,sseins,ssemuladd,sse4arg,lwp,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
 ;; Main data type used by the insn
 (define_attr "mode"
   "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
   (const_string "unknown"))
 
 ;; The CPU unit operations uses.
 (define_attr "unit" "integer,i387,sse,mmx,unknown"
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
 	   (const_string "i387")
 	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
-			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+			  sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
 			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
 	   (const_string "sse")
 	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
 	   (const_string "mmx")
 	 (eq_attr "type" "other")
 	   (const_string "unknown")]
 	 (const_string "integer")))
 
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md	(revision 192206)
+++ gcc/config/i386/sse.md	(working copy)
@@ -1209,42 +1209,120 @@ 
 	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
 	    (plusminus:DF
 	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
 	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
   "TARGET_AVX"
   "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
-(define_insn "sse3_h<plusminus_insn>v2df3"
+(define_expand "sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(vec_concat:V2DF
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3")
+
+(define_insn "*sse3_haddv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
 	(vec_concat:V2DF
-	  (plusminus:DF
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "0,x")
+	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+	    (vec_select:DF
+	      (match_dup 1)
+	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+	  (plus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
+	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+	    (vec_select:DF
+	      (match_dup 2)
+	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+  "TARGET_SSE3 && INTVAL (operands[3]) != INTVAL (operands[4])
+   && INTVAL (operands[5]) != INTVAL (operands[6])"
+  "@
+   haddpd\t{%2, %0|%0, %2}
+   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_concat:V2DF
+	  (minus:DF
 	    (vec_select:DF
 	      (match_operand:V2DF 1 "register_operand" "0,x")
 	      (parallel [(const_int 0)]))
 	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
-	  (plusminus:DF
+	  (minus:DF
 	    (vec_select:DF
 	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
 	      (parallel [(const_int 0)]))
 	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
   "TARGET_SSE3"
   "@
-   h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
-   vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+   hsubpd\t{%2, %0|%0, %2}
+   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V2DF")])
 
+(define_insn "*sse3_haddv2df3_low"
+  [(set (match_operand:DF 0 "register_operand" "=x,x")
+	(plus:DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "register_operand" "0,x")
+	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+	  (vec_select:DF
+	    (match_dup 1)
+	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+  "TARGET_SSE3 && INTVAL (operands[2]) != INTVAL (operands[3])"
+  "@
+   haddpd\t{%0, %0|%0, %0}
+   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_hsubv2df3_low"
+  [(set (match_operand:DF 0 "register_operand" "=x,x")
+	(minus:DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "register_operand" "0,x")
+	    (parallel [(const_int 0)]))
+	  (vec_select:DF
+	    (match_dup 1)
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE3"
+  "@
+   hsubpd\t{%0, %0|%0, %0}
+   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
 (define_insn "avx_h<plusminus_insn>v8sf3"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
 	(vec_concat:V8SF
 	  (vec_concat:V4SF
 	    (vec_concat:V2SF
 	      (plusminus:SF
 		(vec_select:SF
 		  (match_operand:V8SF 1 "register_operand" "x")
 		  (parallel [(const_int 0)]))
 		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))