Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 187286)
+++ config/i386/sse.md	(working copy)
@@ -449,8 +449,6 @@
 	      && (misaligned_operand (operands[0], <MODE>mode)
 		  || misaligned_operand (operands[1], <MODE>mode)))
 	    return "vmovupd\t{%1, %0|%0, %1}";
-	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovapd\t{%1, %0|%0, %1}";
 
@@ -460,8 +458,6 @@
 	      && (misaligned_operand (operands[0], <MODE>mode)
 		  || misaligned_operand (operands[1], <MODE>mode)))
 	    return "vmovdqu\t{%1, %0|%0, %1}";
-	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovdqa\t{%1, %0|%0, %1}";
 
@@ -475,19 +471,21 @@
   [(set_attr "type" "sselog1,ssemov,ssemov")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
-	(cond [(match_test "TARGET_AVX")
+	(cond [(and (eq_attr "alternative" "1,2")
+	      	    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+		 (if_then_else
+		    (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
+		    (const_string "V8SF")
+		    (const_string "V4SF"))
+	       (match_test "TARGET_AVX")
 		 (const_string "<sseinsnmode>")
-	       (ior (ior (match_test "optimize_function_for_size_p (cfun)")
-			 (not (match_test "TARGET_SSE2")))
+	       (ior (and (eq_attr "alternative" "1,2")
+			 (match_test "optimize_function_for_size_p (cfun)"))
 		    (and (eq_attr "alternative" "2")
 			 (match_test "TARGET_SSE_TYPELESS_STORES")))
 		 (const_string "V4SF")
-	       (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
-		 (const_string "V4SF")
-	       (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
-		 (const_string "V2DF")
 	      ]
-	  (const_string "TI")))])
+	  (const_string "<sseinsnmode>")))])
 
 (define_insn "sse2_movq128"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
@@ -597,11 +595,33 @@
 	  [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
 	  UNSPEC_MOVU))]
   "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      return "%vmovups\t{%1, %0|%0, %1}";
+    default:
+      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+    }
+}
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
    (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "<MODE>")])
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (if_then_else
+		    (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
+		    (const_string "V8SF")
+		    (const_string "V4SF"))
+	       (match_test "TARGET_AVX")
+		 (const_string "<MODE>")
+	       (ior (match_test "optimize_function_for_size_p (cfun)")
+		    (and (eq_attr "alternative" "1")
+			 (match_test "TARGET_SSE_TYPELESS_STORES")))
+	         (const_string "V4SF")
+	      ]
+	(const_string "<MODE>")))])
 
 (define_expand "<sse2>_movdqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "nonimmediate_operand")
@@ -618,7 +638,16 @@
 	(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
 		    UNSPEC_MOVU))]
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "%vmovdqu\t{%1, %0|%0, %1}"
+{
+  switch (get_attr_mode (insn))
+    {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      return "%vmovups\t{%1, %0|%0, %1}";
+    default:
+      return "%vmovdqu\t{%1, %0|%0, %1}";
+    }
+}
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
    (set (attr "prefix_data16")
@@ -627,7 +656,20 @@
      (const_string "*")
      (const_string "1")))
    (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "<sseinsnmode>")])
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (if_then_else
+		    (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
+		    (const_string "V8SF")
+		    (const_string "V4SF"))
+	       (match_test "TARGET_AVX")
+		 (const_string "<sseinsnmode>")
+	       (ior (match_test "optimize_function_for_size_p (cfun)")
+		    (and (eq_attr "alternative" "1")
+			 (match_test "TARGET_SSE_TYPELESS_STORES")))
+	         (const_string "V4SF")
+	      ]
+	(const_string "<sseinsnmode>")))])
 
 (define_insn "<sse3>_lddqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "register_operand" "=x")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 187289)
+++ config/i386/i386.c	(working copy)
@@ -15907,60 +15907,19 @@ ix86_expand_vector_move_misalign (enum machine_mod
   op0 = operands[0];
   op1 = operands[1];
 
-  if (TARGET_AVX)
+  if (TARGET_AVX
+      && GET_MODE_SIZE (mode) == 32)
     {
       switch (GET_MODE_CLASS (mode))
 	{
 	case MODE_VECTOR_INT:
 	case MODE_INT:
-	  switch (GET_MODE_SIZE (mode))
-	    {
-	    case 16:
-	      if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-		{
-		  op0 = gen_lowpart (V4SFmode, op0);
-		  op1 = gen_lowpart (V4SFmode, op1);
-		  emit_insn (gen_sse_movups (op0, op1));
-		}
-	      else
-		{
-		  op0 = gen_lowpart (V16QImode, op0);
-		  op1 = gen_lowpart (V16QImode, op1);
-		  emit_insn (gen_sse2_movdqu (op0, op1));
-		}
-	      break;
-	    case 32:
-	      op0 = gen_lowpart (V32QImode, op0);
-	      op1 = gen_lowpart (V32QImode, op1);
-	      ix86_avx256_split_vector_move_misalign (op0, op1);
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
-	  break;
+	  op0 = gen_lowpart (V32QImode, op0);
+	  op1 = gen_lowpart (V32QImode, op1);
+	  /* FALLTHRU */
+
 	case MODE_VECTOR_FLOAT:
-	  switch (mode)
-	    {
-	    case V4SFmode:
-	      emit_insn (gen_sse_movups (op0, op1));
-	      break;
-	    case V2DFmode:
-	      if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-		{
-		  op0 = gen_lowpart (V4SFmode, op0);
-		  op1 = gen_lowpart (V4SFmode, op1);
-		  emit_insn (gen_sse_movups (op0, op1));
-		}
-	      else
-		emit_insn (gen_sse2_movupd (op0, op1));
-	      break;
-	    case V8SFmode:
-	    case V4DFmode:
-	      ix86_avx256_split_vector_move_misalign (op0, op1);
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
+	  ix86_avx256_split_vector_move_misalign (op0, op1);
 	  break;
 
 	default:
@@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mod
 
   if (MEM_P (op1))
     {
-      /* If we're optimizing for size, movups is the smallest.  */
-      if (optimize_insn_for_size_p ()
-	  || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	{
-	  op0 = gen_lowpart (V4SFmode, op0);
-	  op1 = gen_lowpart (V4SFmode, op1);
-	  emit_insn (gen_sse_movups (op0, op1));
-	  return;
-	}
-
       /* ??? If we have typed data, then it would appear that using
 	 movdqu is the only way to get unaligned data loaded with
 	 integer type.  */
@@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mod
 	{
 	  op0 = gen_lowpart (V16QImode, op0);
 	  op1 = gen_lowpart (V16QImode, op1);
+	  /* We will eventually emit movups based on insn attributes.  */
 	  emit_insn (gen_sse2_movdqu (op0, op1));
-	  return;
 	}
-
-      if (TARGET_SSE2 && mode == V2DFmode)
+      else if (TARGET_SSE2 && mode == V2DFmode)
         {
           rtx zero;
 
-	  if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_function_for_size_p (cfun))
 	    {
+	      /* We will eventually emit movups based on insn attributes.  */
 	      emit_insn (gen_sse2_movupd (op0, op1));
 	      return;
 	    }
@@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mod
 	}
       else
         {
-	  if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_function_for_size_p (cfun))
 	    {
 	      op0 = gen_lowpart (V4SFmode, op0);
 	      op1 = gen_lowpart (V4SFmode, op1);
@@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mod
 
 	  if (mode != V4SFmode)
 	    op0 = gen_lowpart (V4SFmode, op0);
+
 	  m = adjust_address (op1, V2SFmode, 0);
 	  emit_insn (gen_sse_loadlps (op0, op0, m));
 	  m = adjust_address (op1, V2SFmode, 8);
@@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mod
     }
   else if (MEM_P (op0))
     {
-      /* If we're optimizing for size, movups is the smallest.  */
-      if (optimize_insn_for_size_p ()
-	  || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	{
-	  op0 = gen_lowpart (V4SFmode, op0);
-	  op1 = gen_lowpart (V4SFmode, op1);
-	  emit_insn (gen_sse_movups (op0, op1));
-	  return;
-	}
-
-      /* ??? Similar to above, only less clear
-	 because of typeless stores.  */
-      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
-	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
         {
 	  op0 = gen_lowpart (V16QImode, op0);
 	  op1 = gen_lowpart (V16QImode, op1);
+	  /* We will eventually emit movups based on insn attributes.  */
 	  emit_insn (gen_sse2_movdqu (op0, op1));
-	  return;
 	}
-
-      if (TARGET_SSE2 && mode == V2DFmode)
+      else if (TARGET_SSE2 && mode == V2DFmode)
 	{
-	  if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_function_for_size_p (cfun))
+	    /* We will eventually emit movups based on insn attributes.  */
 	    emit_insn (gen_sse2_movupd (op0, op1));
 	  else
 	    {
@@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mod
 	  if (mode != V4SFmode)
 	    op1 = gen_lowpart (V4SFmode, op1);
 
-	  if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+	  if (TARGET_AVX
+	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+	      || optimize_function_for_size_p (cfun))
 	    {
 	      op0 = gen_lowpart (V4SFmode, op0);
 	      emit_insn (gen_sse_movups (op0, op1));
