Patchwork [i386] : Expand insv pattern to pinsr{q,w,d,q} insn

login
register
mail settings
Submitter Uros Bizjak
Date April 20, 2011, 7:54 p.m.
Message ID <BANLkTik48h4soJ-gPHpdsepG90-t8Mfqvw@mail.gmail.com>
Download mbox | patch
Permalink /patch/92301/
State New
Headers show

Comments

Uros Bizjak - April 20, 2011, 7:54 p.m.
Hello!

Attached patch enhances the fix for PR target/48678 to generate
pinsr{q,w,d,q} insn when value is inserted into vector register.

2011-04-20  Uros Bizjak  <ubizjak@gmail.com>

	PR target/48678
	* config/i386/i386.md (insv): Change operand 0 constraint to
	"register_operand".  Change operand 1 and 2 constraint to
	"const_int_operand".  Expand to pinsr{b,w,d,q} * when appropriate.
	* config/i386/sse.md (sse4_1_pinsrb): Export.
	(sse2_pinsrw): Ditto.
	(sse4_1_pinsrd): Ditto.
	(sse4_1_pinsrq): Ditto.
	* config/i386/i386-protos.h (ix86_expand_pinsr): Add prototype.
	* config/i386/i386.c (ix86_expand_pinsr): New.

testsuite/ChangeLog:

2011-04-20  Uros Bizjak  <ubizjak@gmail.com>

	PR target/48678
	* gcc.target/i386/sse2-pinsrw.c: New test.
	* gcc.target/i386/avx-vpinsrw.c: Ditto.
	* gcc.target/i386/sse4_1-insvqi.c: Ditto.
	* gcc.target/i386/sse2-insvhi.c: Ditto.
	* gcc.target/i386/sse4_1-insvsi.c: Ditto.
	* gcc.target/i386/sse4_1-insvdi.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{-m32}.  Committed to mainline SVN.

Uros.

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 172780)
+++ config/i386/i386.md	(working copy)
@@ -10393,14 +10393,17 @@ 
 })
 
 (define_expand "insv"
-  [(set (zero_extract (match_operand 0 "ext_register_operand" "")
-		      (match_operand 1 "const8_operand" "")
-		      (match_operand 2 "const8_operand" ""))
+  [(set (zero_extract (match_operand 0 "register_operand" "")
+		      (match_operand 1 "const_int_operand" "")
+		      (match_operand 2 "const_int_operand" ""))
         (match_operand 3 "register_operand" ""))]
   ""
 {
   rtx (*gen_mov_insv_1) (rtx, rtx);
 
+  if (ix86_expand_pinsr (operands))
+    DONE;
+
   /* Handle insertions to %ah et al.  */
   if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
     FAIL;
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 172780)
+++ config/i386/sse.md	(working copy)
@@ -6051,7 +6051,7 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "*sse4_1_pinsrb"
+(define_insn "sse4_1_pinsrb"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
 	(vec_merge:V16QI
 	  (vec_duplicate:V16QI
@@ -6083,7 +6083,7 @@ 
    (set_attr "prefix" "orig,orig,vex,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "*sse2_pinsrw"
+(define_insn "sse2_pinsrw"
   [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
 	(vec_merge:V8HI
 	  (vec_duplicate:V8HI
@@ -6117,7 +6117,7 @@ 
    (set_attr "mode" "TI")])
 
 ;; It must come before sse2_loadld since it is preferred.
-(define_insn "*sse4_1_pinsrd"
+(define_insn "sse4_1_pinsrd"
   [(set (match_operand:V4SI 0 "register_operand" "=x,x")
 	(vec_merge:V4SI
 	  (vec_duplicate:V4SI
@@ -6145,7 +6145,7 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "*sse4_1_pinsrq"
+(define_insn "sse4_1_pinsrq"
   [(set (match_operand:V2DI 0 "register_operand" "=x,x")
 	(vec_merge:V2DI
 	  (vec_duplicate:V2DI
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h	(revision 172780)
+++ config/i386/i386-protos.h	(working copy)
@@ -203,6 +203,7 @@  extern void ix86_expand_vector_extract (
 extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
 extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
+extern bool ix86_expand_pinsr (rtx *);
 
 /* In i386-c.c  */
 extern void ix86_target_macros (void);
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 172780)
+++ config/i386/i386.c	(working copy)
@@ -34106,6 +34106,88 @@  ix86_expand_vec_extract_even_odd (rtx ta
   /* ... or we use the special-case patterns.  */
   expand_vec_perm_even_odd_1 (&d, odd);
 }
+
+/* Expand an insert into a vector register through pinsr insn.
+   Return true if successful.  */
+
+bool
+ix86_expand_pinsr (rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[3];
+
+  unsigned int size = INTVAL (operands[1]);
+  unsigned int pos = INTVAL (operands[2]);
+
+  if (GET_CODE (dst) == SUBREG)
+    {
+      pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
+      dst = SUBREG_REG (dst);
+    }
+
+  if (GET_CODE (src) == SUBREG)
+    src = SUBREG_REG (src);
+
+  switch (GET_MODE (dst))
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+      {
+	enum machine_mode srcmode, dstmode;
+	rtx (*pinsr)(rtx, rtx, rtx, rtx);
+
+	srcmode = mode_for_size (size, MODE_INT, 0);
+
+	switch (srcmode)
+	  {
+	  case QImode:
+	    if (!TARGET_SSE4_1)
+	      return false;
+	    dstmode = V16QImode;
+	    pinsr = gen_sse4_1_pinsrb;
+	    break;
+
+	  case HImode:
+	    if (!TARGET_SSE2)
+	      return false;
+	    dstmode = V8HImode;
+	    pinsr = gen_sse2_pinsrw;
+	    break;
+
+	  case SImode:
+	    if (!TARGET_SSE4_1)
+	      return false;
+	    dstmode = V4SImode;
+	    pinsr = gen_sse4_1_pinsrd;
+	    break;
+
+	  case DImode:
+	    gcc_assert (TARGET_64BIT);
+	    if (!TARGET_SSE4_1)
+	      return false;
+	    dstmode = V2DImode;
+	    pinsr = gen_sse4_1_pinsrq;
+	    break;
+
+	  default:
+	    return false;
+	  }
+
+	dst = gen_lowpart (dstmode, dst);
+	src = gen_lowpart (srcmode, src);
+
+	pos /= size;
+
+	emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
+	return true;
+      }
+
+    default:
+      return false;
+    }
+}
 
 /* This function returns the calling abi specific va_list type node.
    It returns  the FNDECL specific va_list type.  */
Index: testsuite/gcc.target/i386/sse2-pinsrw.c
===================================================================
--- testsuite/gcc.target/i386/sse2-pinsrw.c	(revision 0)
+++ testsuite/gcc.target/i386/sse2-pinsrw.c	(revision 0)
@@ -0,0 +1,86 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+#include <string.h>
+
+#define msk0 0x00
+#define msk1 0x01
+#define msk2 0x02
+#define msk3 0x03
+#define msk4 0x04
+#define msk5 0x05
+#define msk6 0x06
+#define msk7 0x07
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x;
+      unsigned int i[4];
+      unsigned short s[8];
+    } res [8], val, tmp;
+  int masks[8];
+  unsigned short ins[4] = { 3, 4, 5, 6 };
+  int i;
+
+  val.i[0] = 0x35251505;
+  val.i[1] = 0x75655545;
+  val.i[2] = 0xB5A59585;
+  val.i[3] = 0xF5E5D5C5;
+
+  /* Check pinsrw imm8, r32, xmm.  */
+  res[0].x = _mm_insert_epi16 (val.x, ins[0], msk0);
+  res[1].x = _mm_insert_epi16 (val.x, ins[0], msk1);
+  res[2].x = _mm_insert_epi16 (val.x, ins[0], msk2);
+  res[3].x = _mm_insert_epi16 (val.x, ins[0], msk3);
+  res[4].x = _mm_insert_epi16 (val.x, ins[0], msk4);
+  res[5].x = _mm_insert_epi16 (val.x, ins[0], msk5);
+  res[6].x = _mm_insert_epi16 (val.x, ins[0], msk6);
+  res[7].x = _mm_insert_epi16 (val.x, ins[0], msk7);
+
+  masks[0] = msk0;
+  masks[1] = msk1;
+  masks[2] = msk2;
+  masks[3] = msk3;
+  masks[4] = msk4;
+  masks[5] = msk5;
+  masks[6] = msk6;
+  masks[7] = msk7;
+
+  for (i = 0; i < 8; i++)
+    {
+      tmp.x = val.x;
+      tmp.s[masks[i]] = ins[0];
+      if (memcmp (&tmp, &res[i], sizeof (tmp)))
+	abort ();
+    }
+    
+  /* Check pinsrw imm8, m16, xmm.  */
+  for (i = 0; i < 8; i++)
+    {
+      res[i].x = _mm_insert_epi16 (val.x, ins[i % 2], msk0);
+      masks[i] = msk0;
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      tmp.x = val.x;
+      tmp.s[masks[i]] = ins[i % 2];
+      if (memcmp (&tmp, &res[i], sizeof (tmp)))
+	abort ();
+    }
+}
Index: testsuite/gcc.target/i386/sse4_1-insvdi.c
===================================================================
--- testsuite/gcc.target/i386/sse4_1-insvdi.c	(revision 0)
+++ testsuite/gcc.target/i386/sse4_1-insvdi.c	(revision 0)
@@ -0,0 +1,55 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+typedef long T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, long x)
+{
+  struct S s;
+
+  s.d = y;
+  ((T *) &s.d)[1] = x;
+  return s.d;
+}
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x;
+      unsigned int i[4];
+      unsigned long l[2];
+    } res, val, tmp;
+  unsigned long ins[4] = { 3, 4, 5, 6 };
+
+  val.i[0] = 0x35251505;
+  val.i[1] = 0x75655545;
+  val.i[2] = 0xB5A59585;
+  val.i[3] = 0xF5E5D5C5;
+
+  res.x = foo (val.x, ins[3]);
+
+  tmp.x = val.x;
+  tmp.l[1] = ins[3];
+  if (memcmp (&tmp, &res, sizeof (tmp)))
+    abort ();
+}
Index: testsuite/gcc.target/i386/sse4_1-insvqi.c
===================================================================
--- testsuite/gcc.target/i386/sse4_1-insvqi.c	(revision 0)
+++ testsuite/gcc.target/i386/sse4_1-insvqi.c	(revision 0)
@@ -0,0 +1,54 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+typedef char T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, char x)
+{
+  struct S s;
+
+  s.d = y;
+  ((T *) &s.d)[1] = x;
+  return s.d;
+}
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x;
+      unsigned int i[4];
+      unsigned char c[16];
+    } res, val, tmp;
+  unsigned char ins[4] = { 3, 4, 5, 6 };
+
+  val.i[0] = 0x35251505;
+  val.i[1] = 0x75655545;
+  val.i[2] = 0xB5A59585;
+  val.i[3] = 0xF5E5D5C5;
+
+  res.x = foo (val.x, ins[3]);
+
+  tmp.x = val.x;
+  tmp.c[1] = ins[3];
+  if (memcmp (&tmp, &res, sizeof (tmp)))
+    abort ();
+}
Index: testsuite/gcc.target/i386/sse2-insvhi.c
===================================================================
--- testsuite/gcc.target/i386/sse2-insvhi.c	(revision 0)
+++ testsuite/gcc.target/i386/sse2-insvhi.c	(revision 0)
@@ -0,0 +1,54 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+#include <string.h>
+
+typedef short T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, short x)
+{
+  struct S s;
+
+  s.d = y;
+  ((T *) &s.d)[1] = x;
+  return s.d;
+}
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x;
+      unsigned int i[4];
+      unsigned short s[8];
+    } res, val, tmp;
+  unsigned short ins[4] = { 3, 4, 5, 6 };
+
+  val.i[0] = 0x35251505;
+  val.i[1] = 0x75655545;
+  val.i[2] = 0xB5A59585;
+  val.i[3] = 0xF5E5D5C5;
+
+  res.x = foo (val.x, ins[3]);
+
+  tmp.x = val.x;
+  tmp.s[1] = ins[3];
+  if (memcmp (&tmp, &res, sizeof (tmp)))
+    abort ();
+}
Index: testsuite/gcc.target/i386/avx-vpinsrw-1.c
===================================================================
--- testsuite/gcc.target/i386/avx-vpinsrw-1.c	(revision 0)
+++ testsuite/gcc.target/i386/avx-vpinsrw-1.c	(revision 0)
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse2-pinsrw.c"
Index: testsuite/gcc.target/i386/sse4_1-insvsi.c
===================================================================
--- testsuite/gcc.target/i386/sse4_1-insvsi.c	(revision 0)
+++ testsuite/gcc.target/i386/sse4_1-insvsi.c	(revision 0)
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+typedef int T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, int x)
+{
+  struct S s;
+
+  s.d = y;
+  ((T *) &s.d)[1] = x;
+  return s.d;
+}
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x;
+      unsigned int i[4];
+    } res, val, tmp;
+  unsigned int ins[4] = { 3, 4, 5, 6 };
+
+  val.i[0] = 0x35251505;
+  val.i[1] = 0x75655545;
+  val.i[2] = 0xB5A59585;
+  val.i[3] = 0xF5E5D5C5;
+
+  res.x = foo (val.x, ins[3]);
+
+  tmp.x = val.x;
+  tmp.i[1] = ins[3];
+  if (memcmp (&tmp, &res, sizeof (tmp)))
+    abort ();
+}