Patchwork [i386] : Add AVX modes to ix86_modes_tieable_p

login
register
mail settings
Submitter Uros Bizjak
Date March 28, 2012, 9:34 p.m.
Message ID <CAFULd4auLiY0fE9EFCNs8J4r3f2deSrfygUGS5PACxKeXXjVOg@mail.gmail.com>
Download mbox | patch
Permalink /patch/149310/
State New
Headers show

Comments

Uros Bizjak - March 28, 2012, 9:34 p.m.
Hello!

Attached patch adds AVX modes to ix86_modes_tieable_p, in the same way
as other SSE and MMX modes.

Additionally, the patch removes unneeded gen_lowpart calls from
ix86_expand_vector_move_misalign. The mode function argument just
duplicates the mode of operands for convenience.

2012-03-28  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
	(ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls.

Tested on x86_64-pc-linux-gnu {,-m32} with and without -mfpmath=avx.

Committed.

Uros.

Patch

Index: i386.c
===================================================================
--- i386.c	(revision 185918)
+++ i386.c	(working copy)
@@ -15831,17 +15831,18 @@  ix86_expand_vector_move_misalign (enum machine_mod
 	  switch (GET_MODE_SIZE (mode))
 	    {
 	    case 16:
-	      /*  If we're optimizing for size, movups is the smallest.  */
 	      if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
 		{
 		  op0 = gen_lowpart (V4SFmode, op0);
 		  op1 = gen_lowpart (V4SFmode, op1);
 		  emit_insn (gen_sse_movups (op0, op1));
-		  return;
 		}
-	      op0 = gen_lowpart (V16QImode, op0);
-	      op1 = gen_lowpart (V16QImode, op1);
-	      emit_insn (gen_sse2_movdqu (op0, op1));
+	      else
+		{
+		  op0 = gen_lowpart (V16QImode, op0);
+		  op1 = gen_lowpart (V16QImode, op1);
+		  emit_insn (gen_sse2_movdqu (op0, op1));
+		}
 	      break;
 	    case 32:
 	      op0 = gen_lowpart (V32QImode, op0);
@@ -15853,27 +15854,22 @@  ix86_expand_vector_move_misalign (enum machine_mod
 	    }
 	  break;
 	case MODE_VECTOR_FLOAT:
-	  op0 = gen_lowpart (mode, op0);
-	  op1 = gen_lowpart (mode, op1);
-
 	  switch (mode)
 	    {
 	    case V4SFmode:
 	      emit_insn (gen_sse_movups (op0, op1));
 	      break;
-	    case V8SFmode:
-	      ix86_avx256_split_vector_move_misalign (op0, op1);
-	      break;
 	    case V2DFmode:
 	      if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
 		{
 		  op0 = gen_lowpart (V4SFmode, op0);
 		  op1 = gen_lowpart (V4SFmode, op1);
 		  emit_insn (gen_sse_movups (op0, op1));
-		  return;
 		}
-	      emit_insn (gen_sse2_movupd (op0, op1));
+	      else
+		emit_insn (gen_sse2_movupd (op0, op1));
 	      break;
+	    case V8SFmode:
 	    case V4DFmode:
 	      ix86_avx256_split_vector_move_misalign (op0, op1);
 	      break;
@@ -15918,8 +15914,6 @@  ix86_expand_vector_move_misalign (enum machine_mod
 
 	  if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
 	    {
-	      op0 = gen_lowpart (V2DFmode, op0);
-	      op1 = gen_lowpart (V2DFmode, op1);
 	      emit_insn (gen_sse2_movupd (op0, op1));
 	      return;
 	    }
@@ -15984,8 +15978,8 @@  ix86_expand_vector_move_misalign (enum machine_mod
 	  return;
 	}
 
-      /* ??? Similar to above, only less clear because of quote
-	 typeless stores unquote.  */
+      /* ??? Similar to above, only less clear
+	 because of typeless stores.  */
       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
 	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
         {
@@ -15998,11 +15992,7 @@  ix86_expand_vector_move_misalign (enum machine_mod
       if (TARGET_SSE2 && mode == V2DFmode)
 	{
 	  if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
-	    {
-	      op0 = gen_lowpart (V2DFmode, op0);
-	      op1 = gen_lowpart (V2DFmode, op1);
-	      emit_insn (gen_sse2_movupd (op0, op1));
-	    }
+	    emit_insn (gen_sse2_movupd (op0, op1));
 	  else
 	    {
 	      m = adjust_address (op0, DFmode, 0);
@@ -31399,6 +31389,10 @@  ix86_modes_tieable_p (enum machine_mode mode1, enu
 
   /* If MODE2 is only appropriate for an SSE register, then tie with
      any other mode acceptable to SSE registers.  */
+  if (GET_MODE_SIZE (mode2) == 32
+      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+    return (GET_MODE_SIZE (mode1) == 32
+	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
   if (GET_MODE_SIZE (mode2) == 16
       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
     return (GET_MODE_SIZE (mode1) == 16