Patchwork [2/n,i386] : Merge *sse4_1_pextrq and *sse4_1_pextrd with base vec_select patterns.

login
register
mail settings
Submitter Uros Bizjak
Date May 6, 2013, 7:52 p.m.
Message ID <CAFULd4bx7mU30TozK629+Ro=5pdi2eOyXi4STQ8j46D7HeK5vg@mail.gmail.com>
Download mbox | patch
Permalink /patch/241763/
State New
Headers show

Comments

Uros Bizjak - May 6, 2013, 7:52 p.m.
Hello!

Attached patch merges *sse4_1_pextrq and *sse4_1_pextrd with base
vec_select patterns. The patch splits instruction with zero selector
to plain movdi and movsi patterns. Please note that pextr $0,...
should only be generated for !TARGET_INTER_UNIT_MOVES_FROM_VEC targets
when -msse4 is used. For TARGET_INTER_UNIT_MOVES_FROM_VEC, we should
always generate corresponding plain movq or movd interunit move.

2013-05-06  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/i386.md (isa): Add x64_sse4 member.
    (enabled): Handle x64_sse4.
    (*movdi_internal): Add *x->?r alternative to emit pextrq $0,%xmm,%reg
    instruction for 64bit SSE4_1 targets.  Update insn attributes.
    (*movsi_internal): Add *x->?r alternative to emit pextrd $0,%xmm,%reg
    instruction for SSE4_1 targets.  Update insn attributes.
    * config/i386/sse.md (*vec_extract<ssevecmodelower>_0): Merge
    with *sse4_1_pextrd and *sse4_1_pextrq having const_0 selector.
    (*vec_extractv2di_1): Merge with *sse4_1_pextrq having
    const_1 selector.
    (*vec_extractv4si): Rename from *sse4_1_pextrd.
    (*vec_extractv4si_zext): Rename from *sse4_1_pextrd_zext.
    (*vec_extract<ssevecmodelower>_0 splitters): Merge splitters together.

Patch was tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 198611)
+++ i386.md	(working copy)
@@ -658,12 +658,15 @@ 
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,x64,x64_sse4_noavx,x64_avx,nox64,sse2,sse2_noavx,
-		    sse3,sse4,sse4_noavx,avx,noavx,avx2,noavx2,bmi2,fma4,fma"
+(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
+		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
+		    avx2,noavx2,bmi2,fma4,fma"
   (const_string "base"))
 
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
+	 (eq_attr "isa" "x64_sse4")
+	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
 	 (eq_attr "isa" "x64_sse4_noavx")
 	   (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
 	 (eq_attr "isa" "x64_avx")
@@ -1850,9 +1853,9 @@ 
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?*Yi,?*Ym,?*Yi")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
 	(match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*x,m ,*x,*Yj,r   ,*Yj ,*Yn"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*x,m ,*x,*Yj,*x,r   ,*Yj ,*Yn"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -1872,6 +1875,9 @@ 
       return "movq\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
+      if (GENERAL_REG_P (operands[0]))
+	return "%vpextrq\t{$0, %1, %0|%0, %1, 0}";
+
       return standard_sse_constant_opcode (insn, operands[1]);
 
     case TYPE_SSEMOV:
@@ -1924,8 +1930,10 @@ 
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "2,3,4,5,10,11,16,17")
+	    (eq_attr "alternative" "2,3,4,5,10,11,16,18")
 	      (const_string "x64")
+	    (eq_attr "alternative" "17")
+	      (const_string "x64_sse4")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
@@ -1935,13 +1943,13 @@ 
 	      (const_string "mmx")
 	    (eq_attr "alternative" "7,8,9,10,11")
 	      (const_string "mmxmov")
-	    (eq_attr "alternative" "12")
+	    (eq_attr "alternative" "12,17")
 	      (const_string "sselog1")
-	    (eq_attr "alternative" "13,14,15,16,17")
+	    (eq_attr "alternative" "13,14,15,16,18")
 	      (const_string "ssemov")
-	    (eq_attr "alternative" "18,19")
+	    (eq_attr "alternative" "19,20")
 	      (const_string "ssecvt")
- 	    (match_operand 1 "pic_32bit_operand")
+	    (match_operand 1 "pic_32bit_operand")
 	      (const_string "lea")
 	   ]
 	   (const_string "imov")))
@@ -1951,14 +1959,20 @@ 
 	 (const_string "0")
 	 (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else
-       (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
-	 (const_string "8")
-	 (const_string "*")))
+     (cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
+	      (const_string "8")
+	    (eq_attr "alternative" "17")
+	      (const_string "1")
+	   ]
+	   (const_string "*")))
    (set (attr "prefix_rex")
-     (if_then_else (eq_attr "alternative" "10,11,16,17")
+     (if_then_else (eq_attr "alternative" "10,11,16,17,18")
        (const_string "1")
        (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else (eq_attr "alternative" "17")
+       (const_string "1")
+       (const_string "*")))
    (set (attr "prefix")
      (if_then_else (eq_attr "type" "sselog1,ssemov")
        (const_string "maybe_vex")
@@ -1984,6 +1998,8 @@ 
 	    (and (eq_attr "alternative" "14,15")
 		 (not (match_test "TARGET_SSE2")))
 	      (const_string "V2SF")
+	    (eq_attr "alternative" "17")
+	      (const_string "TI")
 	   ]
 	   (const_string "DI")))])
 
@@ -1998,14 +2014,17 @@ 
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-			"=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?*Yi")
+			"=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?r,?*Yi")
 	(match_operand:SI 1 "general_operand"
-			"g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,r"))]
+			"g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,*x,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_SSELOG1:
+      if (GENERAL_REG_P (operands[0]))
+	return "%vpextrd\t{$0, %1, %0|%0, %1, 0}";
+
       return standard_sse_constant_opcode (insn, operands[1]);
 
     case TYPE_SSEMOV:
@@ -2056,19 +2075,31 @@ 
       gcc_unreachable ();
     }
 }
-  [(set (attr "type")
+  [(set (attr "isa")
+     (if_then_else (eq_attr "alternative" "11")
+       (const_string "sse4")
+       (const_string "*")))
+   (set (attr "type")
      (cond [(eq_attr "alternative" "2")
 	      (const_string "mmx")
 	    (eq_attr "alternative" "3,4,5")
 	      (const_string "mmxmov")
-	    (eq_attr "alternative" "6")
+	    (eq_attr "alternative" "6,11")
 	      (const_string "sselog1")
-	    (eq_attr "alternative" "7,8,9,10,11")
+	    (eq_attr "alternative" "7,8,9,10,12")
 	      (const_string "ssemov")
  	    (match_operand 1 "pic_32bit_operand")
 	      (const_string "lea")
 	   ]
 	   (const_string "imov")))
+   (set (attr "length_immediate")
+     (if_then_else (eq_attr "alternative" "11")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else (eq_attr "alternative" "11")
+       (const_string "1")
+       (const_string "*")))
    (set (attr "prefix")
      (if_then_else (eq_attr "type" "sselog1,ssemov")
        (const_string "maybe_vex")
@@ -2094,6 +2125,8 @@ 
 	    (and (eq_attr "alternative" "8,9")
 	         (not (match_test "TARGET_SSE2")))
 	      (const_string "SF")
+	    (eq_attr "alternative" "11")
+	      (const_string "TI")
 	   ]
 	   (const_string "SI")))])
 
Index: sse.md
===================================================================
--- sse.md	(revision 198611)
+++ sse.md	(working copy)
@@ -6987,48 +6987,6 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "*sse4_1_pextrd"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-	(vec_select:SI
-	  (match_operand:V4SI 1 "register_operand" "x")
-	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
-  "TARGET_SSE4_1"
-  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "length_immediate" "1")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "*sse4_1_pextrd_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	  (vec_select:SI
-	    (match_operand:V4SI 1 "register_operand" "x")
-	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
-  "TARGET_64BIT && TARGET_SSE4_1"
-  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "length_immediate" "1")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "TI")])
-
-;; It must come before *vec_extractv2di_1 since it is preferred.
-(define_insn "*sse4_1_pextrq"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-	(vec_select:DI
-	  (match_operand:V2DI 1 "register_operand" "x")
-	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
-  "TARGET_SSE4_1 && TARGET_64BIT"
-  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_rex" "1")
-   (set_attr "prefix_extra" "1")
-   (set_attr "length_immediate" "1")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "TI")])
-
 (define_expand "avx2_pshufdv3"
   [(match_operand:V8SI 0 "register_operand")
    (match_operand:V8SI 1 "nonimmediate_operand")
@@ -7358,12 +7316,13 @@ 
    (set_attr "mode" "TI,TI,V4SF,SF,SF")])
 
 (define_insn "*vec_extract<ssevecmodelower>_0"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=x,m,r ,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=r,r,x ,m,r")
 	(vec_select:SWI48
-	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm,x,Yj,m")
+	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "Yj,x,xm,x,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "#")
+  "#"
+  [(set_attr "isa" "*,sse4,*,*,*")])
 
 (define_insn "*vec_extractv2di_0_sse"
   [(set (match_operand:DI 0 "nonimmediate_operand"     "=x,m")
@@ -7375,25 +7334,46 @@ 
   "#")
 
 (define_split
-  [(set (match_operand:SWI48x 0 "register_operand")
+  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
 	(vec_select:SWI48x
-	  (match_operand:<ssevecmode> 1 "memory_operand")
+	  (match_operand:<ssevecmode> 1 "nonimmediate_operand")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE && reload_completed"
   [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = adjust_address (operands[1], <MODE>mode, 0);")
+{
+  if (REG_P (operands[1]))
+    operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+  else
+    operands[1] = adjust_address (operands[1], <MODE>mode, 0);
+})
 
-(define_split
-  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
-	(vec_select:SWI48x
-	  (match_operand:<ssevecmode> 1 "register_operand")
-	  (parallel [(const_int 0)])))]
-  "TARGET_SSE && reload_completed
-   && (TARGET_INTER_UNIT_MOVES_FROM_VEC
-       || !GENERAL_REG_P (operands [0]))"
-  [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
+(define_insn "*vec_extractv4si"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(vec_select:SI
+	  (match_operand:V4SI 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
+  "TARGET_SSE4_1"
+  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
 
+(define_insn "*vec_extractv4si_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
+  "TARGET_64BIT && TARGET_SSE4_1"
+  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
 (define_insn_and_split "*vec_extractv4si_mem"
   [(set (match_operand:SI 0 "register_operand" "=x,r")
 	(vec_select:SI
@@ -7408,24 +7388,27 @@ 
 })
 
 (define_insn "*vec_extractv2di_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,x,r")
+  [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,m,x,x,x,x,r")
 	(vec_select:DI
-	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,x,o,o")
+	  (match_operand:V2DI 1 "nonimmediate_operand"  "x ,x,0,x,x,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
+   %vpextrq\t{$1, %1, %0|%0, %1, 1}
    %vmovhps\t{%1, %0|%0, %1}
    psrldq\t{$8, %0|%0, 8}
    vpsrldq\t{$8, %1, %0|%0, %1, 8}
    movhlps\t{%1, %0|%0, %1}
    #
    #"
-  [(set_attr "isa" "*,sse2_noavx,avx,noavx,*,x64")
-   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
-   (set_attr "length_immediate" "*,1,1,*,*,*")
-   (set_attr "memory" "*,none,none,*,*,*")
-   (set_attr "prefix" "maybe_vex,orig,vex,orig,*,*")
-   (set_attr "mode" "V2SF,TI,TI,V4SF,DI,DI")])
+  [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
+   (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
+   (set_attr "length_immediate" "1,*,1,1,*,*,*")
+   (set_attr "memory" "*,*,none,none,*,*,*")
+   (set_attr "prefix_rex" "1,*,*,*,*,*,*")
+   (set_attr "prefix_extra" "1,*,*,*,*,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
+   (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")