Patchwork [i386] : Macroize and simplify vector integer pack/unpack patterns.

login
register
mail settings
Submitter Uros Bizjak
Date April 16, 2011, 12:53 p.m.
Message ID <BANLkTi=2nxnKNuUXUoo-ohoY-FcSWOpaPQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/91485/
State New
Headers show

Comments

Uros Bizjak - April 16, 2011, 12:53 p.m.
Hello!

2011-04-16  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/sse.md (sseunpackmode): New mode attribute.
	(ssepackmode): Ditto.
	(vec_pack_trunc_<mode>): Macroize expander from
	vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator.
	(vec_unpacks_lo_<mode>): Macroize expander from
	vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
	(vec_unpacks_hi_<mode>): Macroize expander from
	vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
	(vec_unpacku_lo_<mode>): Macroize expander from
	vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
	(vec_unpacku_hi_<mode>): Macroize expander from
	vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
	* config/i386/i386.c (ix86_expand_sse_unpack): Merge with
	ix86_expand_sse4_unpack.
	* config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}.
Committed to SVN mainline.

Uros.

Patch

Index: sse.md
===================================================================
--- sse.md	(revision 172580)
+++ sse.md	(working copy)
@@ -70,7 +70,32 @@ 
 (define_mode_iterator VI24_128 [V8HI V4SI])
 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
 
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse
+  [(SF "sse") (DF "sse2")
+   (V4SF "sse") (V2DF "sse2")
+   (V8SF "avx") (V4DF "avx")])
 
+(define_mode_attr sse2
+  [(V16QI "sse2") (V32QI "avx")
+   (V2DI "sse2") (V4DI "avx")])
+
+(define_mode_attr sse3
+  [(V16QI "sse3") (V32QI "avx")])
+
+(define_mode_attr sse4_1
+  [(V4SF "sse4_1") (V2DF "sse4_1")
+   (V8SF "avx") (V4DF "avx")])
+
+;; Pack/unpack vector modes
+(define_mode_attr sseunpackmode
+  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+
+(define_mode_attr ssepackmode
+  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
+
+
+
 ;; Instruction suffix for sign and zero extensions.
 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
 
@@ -126,23 +151,6 @@ 
    (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
    (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
 
-;; Mapping from float mode to required SSE level
-(define_mode_attr sse
-  [(SF "sse") (DF "sse2")
-   (V4SF "sse") (V2DF "sse2")
-   (V8SF "avx") (V4DF "avx")])
-
-(define_mode_attr sse2
-  [(V16QI "sse2") (V32QI "avx")
-   (V2DI "sse2") (V4DI "avx")])
-
-(define_mode_attr sse3
-  [(V16QI "sse3") (V32QI "avx")])
-
-(define_mode_attr sse4_1
-  [(V4SF "sse4_1") (V2DF "sse4_1")
-   (V8SF "avx") (V4DF "avx")])
-
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
 
@@ -5856,42 +5864,18 @@ 
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_expand "vec_pack_trunc_v8hi"
-  [(match_operand:V16QI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")
-   (match_operand:V8HI 2 "register_operand" "")]
+(define_expand "vec_pack_trunc_<mode>"
+  [(match_operand:<ssepackmode> 0 "register_operand" "")
+   (match_operand:VI248_128 1 "register_operand" "")
+   (match_operand:VI248_128 2 "register_operand" "")]
   "TARGET_SSE2"
 {
-  rtx op1 = gen_lowpart (V16QImode, operands[1]);
-  rtx op2 = gen_lowpart (V16QImode, operands[2]);
+  rtx op1 = gen_lowpart (<MODE>mode, operands[1]);
+  rtx op2 = gen_lowpart (<MODE>mode, operands[2]);
   ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
   DONE;
 })
 
-(define_expand "vec_pack_trunc_v4si"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")
-   (match_operand:V4SI 2 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  rtx op1 = gen_lowpart (V8HImode, operands[1]);
-  rtx op2 = gen_lowpart (V8HImode, operands[2]);
-  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
-  DONE;
-})
-
-(define_expand "vec_pack_trunc_v2di"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V2DI 1 "register_operand" "")
-   (match_operand:V2DI 2 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  rtx op1 = gen_lowpart (V4SImode, operands[1]);
-  rtx op2 = gen_lowpart (V4SImode, operands[2]);
-  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
-  DONE;
-})
-
 (define_insn "sse2_packsswb"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x")
 	(vec_concat:V16QI
@@ -6767,150 +6751,30 @@ 
    (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
    (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
 
-(define_expand "vec_unpacku_hi_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, true);
-  else
-    ix86_expand_sse_unpack (operands, true, true);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, true, true); DONE;")
 
-(define_expand "vec_unpacks_hi_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, true);
-  else
-    ix86_expand_sse_unpack (operands, false, true);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, false, true); DONE;")
 
-(define_expand "vec_unpacku_lo_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, false);
-  else
-    ix86_expand_sse_unpack (operands, true, false);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, true, false); DONE;")
 
-(define_expand "vec_unpacks_lo_v16qi"
-  [(match_operand:V8HI 0 "register_operand" "")
-   (match_operand:V16QI 1 "register_operand" "")]
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<sseunpackmode> 0 "register_operand" "")
+   (match_operand:VI124_128 1 "register_operand" "")]
   "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, false);
-  else
-    ix86_expand_sse_unpack (operands, false, false);
-  DONE;
-})
+  "ix86_expand_sse_unpack (operands, false, false); DONE;")
 
-(define_expand "vec_unpacku_hi_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, true);
-  else
-    ix86_expand_sse_unpack (operands, true, true);
-  DONE;
-})
-
-(define_expand "vec_unpacks_hi_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, true);
-  else
-    ix86_expand_sse_unpack (operands, false, true);
-  DONE;
-})
-
-(define_expand "vec_unpacku_lo_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, false);
-  else
-    ix86_expand_sse_unpack (operands, true, false);
-  DONE;
-})
-
-(define_expand "vec_unpacks_lo_v8hi"
-  [(match_operand:V4SI 0 "register_operand" "")
-   (match_operand:V8HI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, false);
-  else
-    ix86_expand_sse_unpack (operands, false, false);
-  DONE;
-})
-
-(define_expand "vec_unpacku_hi_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, true);
-  else
-    ix86_expand_sse_unpack (operands, true, true);
-  DONE;
-})
-
-(define_expand "vec_unpacks_hi_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, true);
-  else
-    ix86_expand_sse_unpack (operands, false, true);
-  DONE;
-})
-
-(define_expand "vec_unpacku_lo_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, true, false);
-  else
-    ix86_expand_sse_unpack (operands, true, false);
-  DONE;
-})
-
-(define_expand "vec_unpacks_lo_v4si"
-  [(match_operand:V2DI 0 "register_operand" "")
-   (match_operand:V4SI 1 "register_operand" "")]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_expand_sse4_unpack (operands, false, false);
-  else
-    ix86_expand_sse_unpack (operands, false, false);
-  DONE;
-})
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Miscellaneous
@@ -10062,7 +9926,7 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn_and_split "vec_dup<mode>"
+(define_insn "vec_dup<mode>"
   [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
 	(vec_duplicate:AVX256MODE24P
 	  (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
@@ -10070,15 +9934,20 @@ 
   "@
    vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
    #"
-  "&& reload_completed && REG_P (operands[1])"
-  [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
-   (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
-  "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
+(define_split
+  [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
+	(vec_duplicate:AVX256MODE24P
+	  (match_operand:<avxscalarmode> 1 "register_operand" "")))]
+  "TARGET_AVX && reload_completed"
+  [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
+   (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));")
+
 (define_insn "avx_vbroadcastf128_<mode>"
   [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
 	(vec_concat:AVX256MODE
Index: i386-protos.h
===================================================================
--- i386-protos.h	(revision 172580)
+++ i386-protos.h	(working copy)
@@ -114,7 +114,6 @@ 
 extern bool ix86_expand_fp_vcond (rtx[]);
 extern bool ix86_expand_int_vcond (rtx[]);
 extern void ix86_expand_sse_unpack (rtx[], bool, bool);
-extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void ix86_split_call_vzeroupper (rtx, rtx);
Index: i386.c
===================================================================
--- i386.c	(revision 172580)
+++ i386.c	(working copy)
@@ -19100,91 +19100,87 @@ 
 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 {
   enum machine_mode imode = GET_MODE (operands[1]);
-  rtx (*unpack)(rtx, rtx, rtx);
-  rtx se, dest;
+  rtx tmp, dest;
 
-  switch (imode)
+  if (TARGET_SSE4_1)
     {
-    case V16QImode:
+      rtx (*unpack)(rtx, rtx);
+
+      switch (imode)
+	{
+	case V16QImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+	  break;
+	case V8HImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv4hiv4si2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv4hiv4si2;
+	  break;
+	case V4SImode:
+	  if (unsigned_p)
+	    unpack = gen_sse4_1_zero_extendv2siv2di2;
+	  else
+	    unpack = gen_sse4_1_sign_extendv2siv2di2;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
       if (high_p)
-        unpack = gen_vec_interleave_highv16qi;
+	{
+	  /* Shift higher 8 bytes to lower 8 bytes.  */
+	  tmp = gen_reg_rtx (imode);
+	  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
+					 gen_lowpart (V1TImode, operands[1]),
+					 GEN_INT (64)));
+	}
       else
-        unpack = gen_vec_interleave_lowv16qi;
-      break;
-    case V8HImode:
-      if (high_p)
-        unpack = gen_vec_interleave_highv8hi;
-      else
-        unpack = gen_vec_interleave_lowv8hi;
-      break;
-    case V4SImode:
-      if (high_p)
-        unpack = gen_vec_interleave_highv4si;
-      else
-        unpack = gen_vec_interleave_lowv4si;
-      break;
-    default:
-      gcc_unreachable ();
-    }
+	tmp = operands[1];
 
-  dest = gen_lowpart (imode, operands[0]);
-
-  if (unsigned_p)
-    se = force_reg (imode, CONST0_RTX (imode));
+      emit_insn (unpack (operands[0], tmp));
+    }
   else
-    se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
-                              operands[1], pc_rtx, pc_rtx);
+    {
+      rtx (*unpack)(rtx, rtx, rtx);
 
-  emit_insn (unpack (dest, operands[1], se));
-}
+      switch (imode)
+	{
+	case V16QImode:
+	  if (high_p)
+	    unpack = gen_vec_interleave_highv16qi;
+	  else
+	    unpack = gen_vec_interleave_lowv16qi;
+	  break;
+	case V8HImode:
+	  if (high_p)
+	    unpack = gen_vec_interleave_highv8hi;
+	  else
+	    unpack = gen_vec_interleave_lowv8hi;
+	  break;
+	case V4SImode:
+	  if (high_p)
+	    unpack = gen_vec_interleave_highv4si;
+	  else
+	    unpack = gen_vec_interleave_lowv4si;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
 
-/* This function performs the same task as ix86_expand_sse_unpack,
-   but with SSE4.1 instructions.  */
+      dest = gen_lowpart (imode, operands[0]);
 
-void
-ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
-  enum machine_mode imode = GET_MODE (operands[1]);
-  rtx (*unpack)(rtx, rtx);
-  rtx src, dest;
-
-  switch (imode)
-    {
-    case V16QImode:
       if (unsigned_p)
-	unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+	tmp = force_reg (imode, CONST0_RTX (imode));
       else
-	unpack = gen_sse4_1_sign_extendv8qiv8hi2;
-      break;
-    case V8HImode:
-      if (unsigned_p)
-	unpack = gen_sse4_1_zero_extendv4hiv4si2;
-      else
-	unpack = gen_sse4_1_sign_extendv4hiv4si2;
-      break;
-    case V4SImode:
-      if (unsigned_p)
-	unpack = gen_sse4_1_zero_extendv2siv2di2;
-      else
-	unpack = gen_sse4_1_sign_extendv2siv2di2;
-      break;
-    default:
-      gcc_unreachable ();
-    }
+	tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+				   operands[1], pc_rtx, pc_rtx);
 
-  dest = operands[0];
-  if (high_p)
-    {
-      /* Shift higher 8 bytes to lower 8 bytes.  */
-      src = gen_reg_rtx (imode);
-      emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
-				     gen_lowpart (V1TImode, operands[1]),
-				     GEN_INT (64)));
+      emit_insn (unpack (dest, operands[1], tmp));
     }
-  else
-    src = operands[1];
-
-  emit_insn (unpack (dest, src));
 }
 
 /* Expand conditional increment or decrement using adb/sbb instructions.