diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bb0b890..d9dc571 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7062,11 +7062,8 @@ static void
 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
 {
   rtx save_area, mem;
-  rtx label;
-  rtx tmp_reg;
-  rtx nsse_reg;
   alias_set_type set;
-  int i;
+  int i, max;
 
   /* GPR size of varargs save area.  */
   if (cfun->va_list_gpr_size)
@@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
   save_area = frame_pointer_rtx;
   set = get_varargs_alias_set ();
 
-  for (i = cum->regno;
-       i < X86_64_REGPARM_MAX
-       && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
-       i++)
+  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+  if (max > X86_64_REGPARM_MAX)
+    max = X86_64_REGPARM_MAX;
+
+  for (i = cum->regno; i < max; i++)
     {
       mem = gen_rtx_MEM (Pmode,
 			 plus_constant (save_area, i * UNITS_PER_WORD));
@@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
 
   if (ix86_varargs_fpr_size)
     {
+      enum machine_mode smode;
+      rtx label, test;
+
       /* Now emit code to save SSE registers.  The AX parameter contains number
-	 of SSE parameter registers used to call this function.  We use
-	 sse_prologue_save insn template that produces computed jump across
-	 SSE saves.  We need some preparation work to get this working.  */
+	 of SSE parameter registers used to call this function, though all we
+	 actually check here is the zero/non-zero status.  */
 
       label = gen_label_rtx ();
+      test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
+      emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
+				      label));
+
+      /* If we've determined that we're only loading scalars (and not
+	 vector data) then we can store doubles instead.  */
+      if (crtl->stack_alignment_needed < 128)
+	smode = DFmode;
+      else
+	smode = V4SFmode;
 
-      nsse_reg = gen_reg_rtx (Pmode);
-      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
-
-      /* Compute address of memory block we save into.  We always use pointer
-	 pointing 127 bytes after first byte to store - this is needed to keep
-	 instruction size limited by 4 bytes (5 bytes for AVX) with one
-	 byte displacement.  */
-      tmp_reg = gen_reg_rtx (Pmode);
-      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
-			      plus_constant (save_area,
-					     ix86_varargs_gpr_size + 127)));
-      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
-      MEM_NOTRAP_P (mem) = 1;
-      set_mem_alias_set (mem, set);
-      set_mem_align (mem, 64);
+      max = cum->sse_regno + cfun->va_list_fpr_size / 16;
+      if (max > X86_64_SSE_REGPARM_MAX)
+	max = X86_64_SSE_REGPARM_MAX;
 
-      /* And finally do the dirty job!  */
-      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
-					GEN_INT (cum->sse_regno), label,
-					gen_reg_rtx (Pmode)));
+      for (i = cum->sse_regno; i < max; ++i)
+	{
+	  mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
+	  mem = gen_rtx_MEM (smode, mem);
+	  MEM_NOTRAP_P (mem) = 1;
+	  set_mem_alias_set (mem, set);
+	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
+
+	  emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
+	}
+
+      emit_label (label);
     }
 }
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 88b4029..6616da2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -79,13 +79,11 @@
   ;; Prologue support
   UNSPEC_STACK_ALLOC
   UNSPEC_SET_GOT
-  UNSPEC_SSE_PROLOGUE_SAVE
   UNSPEC_REG_SAVE
   UNSPEC_DEF_CFA
   UNSPEC_SET_RIP
   UNSPEC_SET_GOT_OFFSET
   UNSPEC_MEMORY_BLOCKAGE
-  UNSPEC_SSE_PROLOGUE_SAVE_LOW
 
   ;; TLS support
   UNSPEC_TP
@@ -17825,179 +17823,6 @@
   { return ASM_SHORT "0x0b0f"; }
   [(set_attr "length" "2")])
 
-(define_expand "sse_prologue_save"
-  [(parallel [(set (match_operand:BLK 0 "" "")
-		   (unspec:BLK [(reg:DI XMM0_REG)
-				(reg:DI XMM1_REG)
-				(reg:DI XMM2_REG)
-				(reg:DI XMM3_REG)
-				(reg:DI XMM4_REG)
-				(reg:DI XMM5_REG)
-				(reg:DI XMM6_REG)
-				(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
-	      (clobber (reg:CC FLAGS_REG))
-	      (clobber (match_operand:DI 1 "register_operand" ""))
-	      (use (match_operand:DI 2 "immediate_operand" ""))
-	      (use (label_ref:DI (match_operand 3 "" "")))
-	      (clobber (match_operand:DI 4 "register_operand" ""))
-	      (use (match_dup 1))])]
-  "TARGET_64BIT"
-  "")
-
-;; Pre-reload version of prologue save.  Until after prologue generation we don't know
-;; what the size of save instruction will be.
-;; Operand 0+operand 6 is the memory save area
-;; Operand 1 is number of registers to save (will get overwritten to operand 5)
-;; Operand 2 is number of non-vaargs SSE arguments
-;; Operand 3 is label starting the save block
-;; Operand 4 is used for temporary computation of jump address
-(define_insn "*sse_prologue_save_insn1"
-  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
-			  (match_operand:DI 6 "const_int_operand" "n")))
-	(unspec:BLK [(reg:DI XMM0_REG)
-		     (reg:DI XMM1_REG)
-		     (reg:DI XMM2_REG)
-		     (reg:DI XMM3_REG)
-		     (reg:DI XMM4_REG)
-		     (reg:DI XMM5_REG)
-		     (reg:DI XMM6_REG)
-		     (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
-   (clobber (reg:CC FLAGS_REG))
-   (clobber (match_operand:DI 1 "register_operand" "=r"))
-   (use (match_operand:DI 2 "const_int_operand" "i"))
-   (use (label_ref:DI (match_operand 3 "" "X")))
-   (clobber (match_operand:DI 4 "register_operand" "=&r"))
-   (use (match_operand:DI 5 "register_operand" "1"))]
-  "TARGET_64BIT
-   && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
-   && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
-  "#"
-  [(set_attr "type" "other")
-   (set_attr "memory" "store")
-   (set_attr "mode" "DI")])
-
-;; We know size of save instruction; expand the computation of jump address
-;; in the jumptable.
-(define_split
-  [(parallel [(set (match_operand:BLK 0 "" "")
-		    (unspec:BLK [(reg:DI XMM0_REG)
-				 (reg:DI XMM1_REG)
-				 (reg:DI XMM2_REG)
-				 (reg:DI XMM3_REG)
-				 (reg:DI XMM4_REG)
-				 (reg:DI XMM5_REG)
-				 (reg:DI XMM6_REG)
-				 (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
-	       (clobber (reg:CC FLAGS_REG))
-	       (clobber (match_operand:DI 1 "register_operand" ""))
-	       (use (match_operand:DI 2 "const_int_operand" ""))
-	       (use (match_operand 3 "" ""))
-	       (clobber (match_operand:DI 4 "register_operand" ""))
-	       (use (match_operand:DI 5 "register_operand" ""))])]
-  "reload_completed"
-  [(parallel [(set (match_dup 0)
-		   (unspec:BLK [(reg:DI XMM0_REG)
-				(reg:DI XMM1_REG)
-				(reg:DI XMM2_REG)
-				(reg:DI XMM3_REG)
-				(reg:DI XMM4_REG)
-				(reg:DI XMM5_REG)
-				(reg:DI XMM6_REG)
-				(reg:DI XMM7_REG)]
-			       UNSPEC_SSE_PROLOGUE_SAVE_LOW))
-	      (use (match_dup 1))
-	      (use (match_dup 2))
-	      (use (match_dup 3))
-	      (use (match_dup 5))])]
-{
-  /* Movaps is 4 bytes, AVX and movsd is 5 bytes.  */
-  int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
-
-  /* Compute address to jump to:
-     label - eax*size + nnamed_sse_arguments*size. */
-  if (size == 5)
-    emit_insn (gen_rtx_SET (VOIDmode, operands[4],
-			    gen_rtx_PLUS
-			      (Pmode,
-			       gen_rtx_MULT (Pmode, operands[1],
-					     GEN_INT (4)),
-			       operands[1])));
-  else  if (size == 4)
-    emit_insn (gen_rtx_SET (VOIDmode, operands[4],
-			    gen_rtx_MULT (Pmode, operands[1],
-					  GEN_INT (4))));
-  else
-    gcc_unreachable ();
-  if (INTVAL (operands[2]))
-    emit_move_insn
-      (operands[1],
-       gen_rtx_CONST (DImode,
-		      gen_rtx_PLUS (DImode,
-				    operands[3],
-				    GEN_INT (INTVAL (operands[2])
-					     * size))));
-  else
-    emit_move_insn (operands[1], operands[3]);
-  emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
-  operands[5] = GEN_INT (size);
-})
-
-(define_insn "sse_prologue_save_insn"
-  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
-			  (match_operand:DI 4 "const_int_operand" "n")))
-	(unspec:BLK [(reg:DI XMM0_REG)
-		     (reg:DI XMM1_REG)
-		     (reg:DI XMM2_REG)
-		     (reg:DI XMM3_REG)
-		     (reg:DI XMM4_REG)
-		     (reg:DI XMM5_REG)
-		     (reg:DI XMM6_REG)
-		     (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
-   (use (match_operand:DI 1 "register_operand" "r"))
-   (use (match_operand:DI 2 "const_int_operand" "i"))
-   (use (label_ref:DI (match_operand 3 "" "X")))
-   (use (match_operand:DI 5 "const_int_operand" "i"))]
-  "TARGET_64BIT
-   && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
-   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-{
-  int i;
-  operands[0] = gen_rtx_MEM (Pmode,
-			     gen_rtx_PLUS (Pmode, operands[0], operands[4]));
-  /* VEX instruction with a REX prefix will #UD.  */
-  if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
-    gcc_unreachable ();
-
-  output_asm_insn ("jmp\t%A1", operands);
-  for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
-    {
-      operands[4] = adjust_address (operands[0], DImode, i*16);
-      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
-      PUT_MODE (operands[4], TImode);
-      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
-        output_asm_insn ("rex", operands);
-      if (crtl->stack_alignment_needed < 128)
-        output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
-      else
-        output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
-    }
-  targetm.asm_out.internal_label (asm_out_file, "L",
-				  CODE_LABEL_NUMBER (operands[3]));
-  return "";
-}
-  [(set_attr "type" "other")
-   (set_attr "length_immediate" "0")
-   (set_attr "length_address" "0")
-   ;; 2 bytes for jump and opernds[4] bytes for each save.
-   (set (attr "length")
-     (plus (const_int 2)
-	   (mult (symbol_ref ("INTVAL (operands[5])"))
-		 (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
-   (set_attr "memory" "store")
-   (set_attr "modrm" "0")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "DI")])
-
 (define_expand "prefetch"
   [(prefetch (match_operand 0 "address_operand" "")
 	     (match_operand:SI 1 "const_int_operand" "")
