diff mbox

Add VEC_UNPACK_{HI,LO}_EXPR support for V{32QI,16HI,8SI} with AVX2

Message ID 20111012212850.GI2210@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 12, 2011, 9:28 p.m. UTC
Hi!

This patch allows to vectorize
char a[1024], c[1024];
long long b[1024];
void
foo (void)
{
  int i;
  for (i = 0; i < 1024; i++)
    b[i] = a[i] + 3 * c[i];
}
using 32-byte vectors with -mavx2.  Bootstrapped/regtested
on x86_64-linux and i686-linux, ok for trunk?

2011-10-12  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (vec_unpacks_lo_<mode>,
	vec_unpacks_hi_<mode>, vec_unpacku_lo_<mode>,
	vec_unpacku_hi_<mode>): Change VI124_128 mode to
	VI124_AVX2.
	* config/i386/i386.c (ix86_expand_sse_unpack): Handle
	V32QImode, V16HImode and V8SImode for TARGET_AVX2.


	Jakub

Comments

Richard Henderson Oct. 12, 2011, 9:50 p.m. UTC | #1
On 10/12/2011 02:28 PM, Jakub Jelinek wrote:
> 2011-10-12  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/sse.md (vec_unpacks_lo_<mode>,
> 	vec_unpacks_hi_<mode>, vec_unpacku_lo_<mode>,
> 	vec_unpacku_hi_<mode>): Change VI124_128 mode to
> 	VI124_AVX2.
> 	* config/i386/i386.c (ix86_expand_sse_unpack): Handle
> 	V32QImode, V16HImode and V8SImode for TARGET_AVX2.

Ok.


r~
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2011-10-12 15:42:12.000000000 +0200
+++ gcc/config/i386/sse.md	2011-10-12 16:16:49.000000000 +0200
@@ -7536,25 +7536,25 @@  (define_insn "vec_concatv2di"
 
 (define_expand "vec_unpacks_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand" "")
-   (match_operand:VI124_128 1 "register_operand" "")]
+   (match_operand:VI124_AVX2 1 "register_operand" "")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands, false, false); DONE;")
 
 (define_expand "vec_unpacks_hi_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand" "")
-   (match_operand:VI124_128 1 "register_operand" "")]
+   (match_operand:VI124_AVX2 1 "register_operand" "")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands, false, true); DONE;")
 
 (define_expand "vec_unpacku_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand" "")
-   (match_operand:VI124_128 1 "register_operand" "")]
+   (match_operand:VI124_AVX2 1 "register_operand" "")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands, true, false); DONE;")
 
 (define_expand "vec_unpacku_hi_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand" "")
-   (match_operand:VI124_128 1 "register_operand" "")]
+   (match_operand:VI124_AVX2 1 "register_operand" "")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands, true, true); DONE;")
 
--- gcc/config/i386/i386.c.jj	2011-10-12 14:19:26.000000000 +0200
+++ gcc/config/i386/i386.c	2011-10-12 16:15:50.000000000 +0200
@@ -19658,9 +19658,38 @@  ix86_expand_sse_unpack (rtx operands[2],
   if (TARGET_SSE4_1)
     {
       rtx (*unpack)(rtx, rtx);
+      rtx (*extract)(rtx, rtx) = NULL;
+      enum machine_mode halfmode = BLKmode;
 
       switch (imode)
 	{
+	case V32QImode:
+	  if (unsigned_p)
+	    unpack = gen_avx2_zero_extendv16qiv16hi2;
+	  else
+	    unpack = gen_avx2_sign_extendv16qiv16hi2;
+	  halfmode = V16QImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
+	  break;
+	case V16HImode:
+	  if (unsigned_p)
+	    unpack = gen_avx2_zero_extendv8hiv8si2;
+	  else
+	    unpack = gen_avx2_sign_extendv8hiv8si2;
+	  halfmode = V8HImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
+	  break;
+	case V8SImode:
+	  if (unsigned_p)
+	    unpack = gen_avx2_zero_extendv4siv4di2;
+	  else
+	    unpack = gen_avx2_sign_extendv4siv4di2;
+	  halfmode = V4SImode;
+	  extract
+	    = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
+	  break;
 	case V16QImode:
 	  if (unsigned_p)
 	    unpack = gen_sse4_1_zero_extendv8qiv8hi2;
@@ -19683,7 +19712,12 @@  ix86_expand_sse_unpack (rtx operands[2],
 	  gcc_unreachable ();
 	}
 
-      if (high_p)
+      if (GET_MODE_SIZE (imode) == 32)
+	{
+	  tmp = gen_reg_rtx (halfmode);
+	  emit_insn (extract (tmp, operands[1]));
+	}
+      else if (high_p)
 	{
 	  /* Shift higher 8 bytes to lower 8 bytes.  */
 	  tmp = gen_reg_rtx (imode);