Patchwork [6/n,i386] : Add zero-extended vec_extract patterns

login
register
mail settings
Submitter Uros Bizjak
Date May 9, 2013, 7:44 p.m.
Message ID <CAFULd4ZCWu5XB8TD2HLSWjA0J28h19wKa=Svaw-ePMMVoPRaZA@mail.gmail.com>
Download mbox | patch
Permalink /patch/242818/
State New
Headers show

Comments

Uros Bizjak - May 9, 2013, 7:44 p.m.
On Thu, May 9, 2013 at 9:43 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

> movd from xmm register to integer register also zero-extends the value
> in integer register. Also, the patch adds a couple of missing
> alternatives to *vec_extractv4si, we can "extract" the values using
> 128bit SSE shifts.
>
> 2013-05-09  Uros Bizjak  <ubizjak@gmail.com>
>
>     * config/i386/sse.md (*vec_extractv4si_0_zext): New pattern.
>     (*vec_extractv4si_zext_mem): Ditto.
>     (*vec_extractv2di): Add 0->x and x->x alternatives.
>     * config/i386/mmx.md (*vec_extractv2si_zext_mem): New pattern.
>     * config/i386/i386.md (*zero_extendsidi2): Add *Yj->?r alternative.
>
> Patch was tested on x86_64-pc-linux-gnu and committed to mainline SVN.

Now with a patch.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 198747)
+++ i386.md	(working copy)
@@ -3088,10 +3088,10 @@ 
 
 (define_insn "*zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-			"=r,?r,?o,r   ,o,?*Ym,?!*y,?*Yi,?*x")
+			"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?*Yi,?*x")
 	(zero_extend:DI
 	 (match_operand:SI 1 "x86_64_zext_operand"
-	        	"0 ,rm,r ,rmWz,0,r   ,m   ,r   ,m")))]
+	        	"0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,r   ,m")))]
   ""
 {
   switch (get_attr_type (insn))
@@ -3109,6 +3109,9 @@ 
       return "movd\t{%1, %0|%0, %1}";
 
     case TYPE_SSEMOV:
+      if (GENERAL_REG_P (operands[0]))
+	return "%vmovd\t{%1, %k0|%k0, %1}";
+
       return "%vmovd\t{%1, %0|%0, %1}";
 
     default:
@@ -3118,9 +3121,9 @@ 
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1,2")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "3")
+	    (eq_attr "alternative" "3,7")
 	      (const_string "x64")
-	    (eq_attr "alternative" "8")
+	    (eq_attr "alternative" "9")
 	      (const_string "sse2")
 	   ]
 	   (const_string "*")))
@@ -3129,7 +3132,7 @@ 
 	      (const_string "multi")
 	    (eq_attr "alternative" "5,6")
 	      (const_string "mmxmov")
-	    (eq_attr "alternative" "7,8")
+	    (eq_attr "alternative" "7,8,9")
 	      (const_string "ssemov")
 	   ]
 	   (const_string "imovx")))
@@ -3144,7 +3147,7 @@ 
    (set (attr "mode")
      (cond [(eq_attr "alternative" "5,6")
 	      (const_string "DI")
-	    (eq_attr "alternative" "7,8")
+	    (eq_attr "alternative" "7,8,9")
 	      (const_string "TI")
 	   ]
 	   (const_string "SI")))])
Index: mmx.md
===================================================================
--- mmx.md	(revision 198747)
+++ mmx.md	(working copy)
@@ -1323,6 +1323,20 @@ 
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = adjust_address (operands[1], SImode, 4);")
 
+(define_insn_and_split "*vec_extractv2si_zext_mem"
+  [(set (match_operand:DI 0 "register_operand" "=y,x,r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V2SI 1 "memory_operand" "o,o,o")
+	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
+  "TARGET_64BIT && TARGET_MMX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
+})
+
 (define_expand "vec_extractv2si"
   [(match_operand:SI 0 "register_operand")
    (match_operand:V2SI 1 "register_operand")
Index: sse.md
===================================================================
--- sse.md	(revision 198747)
+++ sse.md	(working copy)
@@ -7331,6 +7331,18 @@ 
   "#"
   [(set_attr "isa" "*,sse4,*,*")])
 
+(define_insn_and_split "*vec_extractv4si_0_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
+
 (define_insn "*vec_extractv2di_0_sse"
   [(set (match_operand:DI 0 "nonimmediate_operand"     "=x,m")
 	(vec_select:DI
@@ -7350,16 +7362,35 @@ 
   "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
 
 (define_insn "*vec_extractv4si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
 	(vec_select:SI
-	  (match_operand:V4SI 1 "register_operand" "x")
+	  (match_operand:V4SI 1 "register_operand" "x,0,x")
 	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
   "TARGET_SSE4_1"
-  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sselog1")
-   (set_attr "prefix_extra" "1")
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
+
+    case 1:
+      operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
+      return "psrldq\t{%2, %0|%0, %2}";
+
+    case 2:
+      operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
+      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,noavx,avx")
+   (set_attr "type" "sselog1,sseishft1,sseishft1")
+   (set_attr "memory" "*,none,none")
+   (set_attr "prefix_extra" "1,*,*")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix" "maybe_vex,orig,vex")
    (set_attr "mode" "TI")])
 
 (define_insn "*vec_extractv4si_zext"
@@ -7384,6 +7415,20 @@ 
   "TARGET_SSE"
   "#")
 
+(define_insn_and_split "*vec_extractv4si_zext_mem"
+  [(set (match_operand:DI 0 "register_operand" "=x,r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "memory_operand" "o,o")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
+  "TARGET_64BIT && TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
+})
+
 (define_insn "*vec_extractv2di_1"
   [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,m,x,x,x,x,r")
 	(vec_select:DI