Patchwork [i386] : Macroize movmsk/maskmov insns

login
register
mail settings
Submitter Uros Bizjak
Date April 18, 2011, 7:52 p.m.
Message ID <BANLkTimBVYPhhvuEztJ+4y_zPA8OgH_hDQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/91837/
State New
Headers show

Comments

Uros Bizjak - April 18, 2011, 7:52 p.m.
Hello!

Attached patch macroizes movmsk and maskmov instructions.  As an added
bonus, it also implements maskmovdqu tests, so "%%%" comment can be
removed.

2011-04-18  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.h (SSE_VEC_FLOAT_MODE_P): Remove.
	(AVX_FLOAT_MODE_P): Ditto.
	(AVX128_VEC_FLOAT_MODE_P): Ditto.
	(AVX256_VEC_FLOAT_MODE_P): Ditto.
	(AVX_VEC_FLOAT_MODE_P): Ditto.
	* config/i386/i386.md (UNSPEC_MASKLOAD): Remove.
	(UNSPEC_MASKSTORE): Ditto.
	* config/i386/sse.md (<sse>_movmsk<ssemodesuffix><avxmodesuffix>):
	Merge from <sse>_movmsk<ssemodesuffix> and
	avx_movmsk<ssemodesuffix>256.  Use VF mode iterator.
	(*sse2_maskmovdqu): Merge with *sse2_maskmovdqu_rex64.  Use P mode
	iterator.
	(avx_maskload<ssemodesuffix><avxmodesuffix>): New expander.
	(avx_maskstore<ssemodesuffix><avxmodesuffix>): Ditto.
	(*avx_maskmov<ssemodesuffix><avxmodesuffix>): New insn.

testsuite/ChangeLog:

2011-04-18  Uros Bizjak  <ubizjak@gmail.com>

	* gcc.target/i386/sse2-maskmovdqu.c: New test.
	* gcc.target/i386/avx-vmaskmovdqu.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}  AVX target.  Patch was committed to mainline.

Uros.

Patch

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 172652)
+++ config/i386/i386.h	(working copy)
@@ -1328,22 +1328,6 @@  enum reg_class
 #define SSE_FLOAT_MODE_P(MODE) \
   ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
 
-#define SSE_VEC_FLOAT_MODE_P(MODE) \
-  ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
-
-#define AVX_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode))
-
-#define AVX128_VEC_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode))
-
-#define AVX256_VEC_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode))
-
-#define AVX_VEC_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \
-		  || (MODE) == V8SFmode || (MODE) == V4DFmode))
-
 #define FMA4_VEC_FLOAT_MODE_P(MODE) \
   (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
 		  || (MODE) == V8SFmode || (MODE) == V4DFmode))
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 172652)
+++ config/i386/i386.md	(working copy)
@@ -224,8 +224,6 @@ 
   UNSPEC_VPERMIL
   UNSPEC_VPERMIL2
   UNSPEC_VPERMIL2F128
-  UNSPEC_MASKLOAD
-  UNSPEC_MASKSTORE
   UNSPEC_CAST
   UNSPEC_VTESTP
   UNSPEC_VCVTPH2PS
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 172652)
+++ config/i386/sse.md	(working copy)
@@ -6893,23 +6893,12 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx_movmsk<ssemodesuffix>256"
+(define_insn "<sse>_movmsk<ssemodesuffix><avxmodesuffix>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
-	  [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+	  [(match_operand:VF 1 "register_operand" "x")]
 	  UNSPEC_MOVMSK))]
-  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
-  "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "<sse>_movmsk<ssemodesuffix>"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI
-	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
-	  UNSPEC_MOVMSK))]
-  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  ""
   "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
@@ -6935,35 +6924,18 @@ 
   "TARGET_SSE2")
 
 (define_insn "*sse2_maskmovdqu"
-  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
 		       (match_operand:V16QI 2 "register_operand" "x")
 		       (mem:V16QI (match_dup 0))]
 		      UNSPEC_MASKMOV))]
-  "TARGET_SSE2 && !TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "%vmaskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix_data16" "1")
-   ;; The implicit %rdi operand confuses default length_vex computation.
-   (set_attr "length_vex" "3")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "*sse2_maskmovdqu_rex64"
-  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
-	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
-		       (match_operand:V16QI 2 "register_operand" "x")
-		       (mem:V16QI (match_dup 0))]
-		      UNSPEC_MASKMOV))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "TARGET_SSE2"
   "%vmaskmovdqu\t{%2, %1|%1, %2}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    ;; The implicit %rdi operand confuses default length_vex computation.
    (set (attr "length_vex")
-     (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
+     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
@@ -10349,28 +10321,33 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
-  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
-	(unspec:AVXMODEF2P
-	  [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
-	   (match_operand:<avxpermvecmode> 2 "register_operand" "x")
+(define_expand "avx_maskload<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:VF 0 "register_operand" "")
+	(unspec:VF
+	  [(match_operand:<avxpermvecmode> 2 "register_operand" "")
+	   (match_operand:VF 1 "memory_operand" "")
 	   (match_dup 0)]
-	  UNSPEC_MASKLOAD))]
-  "TARGET_AVX"
-  "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
-  [(set_attr "type" "sselog1")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "<MODE>")])
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX")
 
-(define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
-  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
-	(unspec:AVXMODEF2P
-	  [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
-	   (match_operand:AVXMODEF2P 2 "register_operand" "x")
+(define_expand "avx_maskstore<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:VF 0 "memory_operand" "")
+	(unspec:VF
+	  [(match_operand:<avxpermvecmode> 1 "register_operand" "")
+	   (match_operand:VF 2 "register_operand" "")
 	   (match_dup 0)]
-	  UNSPEC_MASKSTORE))]
-  "TARGET_AVX"
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX")
+
+(define_insn "*avx_maskmov<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
+	(unspec:VF
+	  [(match_operand:<avxpermvecmode> 1 "register_operand" "x,x")
+	   (match_operand:VF 2 "nonimmediate_operand" "m,x")
+	   (match_dup 0)]
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX
+   && (REG_P (operands[0]) == MEM_P (operands[2]))"
   "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
Index: testsuite/gcc.target/i386/sse2-maskmovdqu.c
===================================================================
--- testsuite/gcc.target/i386/sse2-maskmovdqu.c	(revision 0)
+++ testsuite/gcc.target/i386/sse2-maskmovdqu.c	(revision 0)
@@ -0,0 +1,44 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+
+#ifndef MASK
+#define MASK 0x7986
+#endif
+
+#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 7)
+
+void static
+TEST (void)
+{
+  __m128i src, mask;
+  char s[16] = { 1,-2,3,-4,5,-6,7,-8,9,-10,11,-12,13,-14,15,-16 };
+  char m[16];
+
+  char u[20] = { 0 };
+  int i;
+
+  for (i = 0; i < 16; i++)
+    m[i] = mask_v (i);
+
+  src = _mm_loadu_si128 ((__m128i *)s);
+  mask = _mm_loadu_si128 ((__m128i *)m);
+
+  _mm_maskmoveu_si128 (src, mask, u+3);
+
+  for (i = 0; i < 16; i++)
+    if (u[i+3] != (m[i] ? s[i] : 0))
+      abort ();
+}
Index: testsuite/gcc.target/i386/avx-vmaskmovdqu.c
===================================================================
--- testsuite/gcc.target/i386/avx-vmaskmovdqu.c	(revision 0)
+++ testsuite/gcc.target/i386/avx-vmaskmovdqu.c	(revision 0)
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse2-maskmovdqu.c"