Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md	(revision 195557)
+++ gcc/config/rs6000/vector.md	(working copy)
@@ -54,7 +54,7 @@ (define_mode_iterator VEC_E [V16QI V8HI 
 (define_mode_iterator VEC_64 [V2DI V2DF])
 
 ;; Vector reload iterator
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI])
 
 ;; Base type from vector mode
 (define_mode_attr VEC_base [(V16QI "QI")
Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md	(revision 195557)
+++ gcc/config/rs6000/constraints.md	(working copy)
@@ -68,6 +68,16 @@ (define_register_constraint "ws" "rs6000
 (define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
   "@internal")
 
+;; Register constraints to simplify move patterns
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
+  "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
+  "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
+  "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
+
 ;; Altivec style load/store that ignores the bottom bits of the address
 (define_memory_constraint "wZ"
   "Indexed or indirect memory operand, ignoring the bottom 4 bits"
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 195557)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1737,14 +1737,21 @@ rs6000_debug_reg_global (void)
 	   "wa reg_class = %s\n"
 	   "wd reg_class = %s\n"
 	   "wf reg_class = %s\n"
-	   "ws reg_class = %s\n\n",
+	   "wl reg_class = %s\n"
+	   "ws reg_class = %s\n"
+	   "wx reg_class = %s\n"
+	   "wz reg_class = %s\n"
+	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
 
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
     if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
@@ -2108,9 +2115,20 @@ rs6000_init_hard_regno_mode_ok (bool glo
 						  : FLOAT_REGS);
     }
 
+  /* Add conditional constraints based on various options, to allow us to
+     collapse multiple insn patterns.  */
   if (TARGET_ALTIVEC)
     rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
 
+  if (TARGET_LFIWAX)
+    rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+
+  if (TARGET_STFIWX)
+    rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+
+  if (TARGET_LFIWZX)
+    rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+
   /* Set up the reload helper functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
@@ -5176,6 +5194,13 @@ reg_offset_addressing_ok_p (enum machine
         return false;
       break;
 
+    case SDmode:
+      /* If we can do direct load/stores of SDmode, restrict it to reg+reg
+	 addressing for the LFIWZX and STFIWX instructions.  */
+      if (TARGET_NO_SDMODE_STACK)
+	return false;
+      break;
+
     default:
       break;
     }
@@ -7143,6 +7168,7 @@ rs6000_emit_move (rtx dest, rtx source, 
 
   if (reload_in_progress
       && mode == SDmode
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
       && MEM_P (operands[0])
       && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
       && REG_P (operands[1]))
@@ -7167,6 +7193,7 @@ rs6000_emit_move (rtx dest, rtx source, 
       && mode == SDmode
       && REG_P (operands[0])
       && MEM_P (operands[1])
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
       && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
     {
       if (FP_REGNO_P (REGNO (operands[0])))
@@ -13622,7 +13649,7 @@ rs6000_secondary_memory_needed_rtx (enum
   static bool eliminated = false;
   rtx ret;
 
-  if (mode != SDmode)
+  if (mode != SDmode || TARGET_NO_SDMODE_STACK)
     ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
   else
     {
@@ -14209,8 +14236,10 @@ rs6000_secondary_reload_gpr (rtx reg, rt
   return;
 }
 
-/* Allocate a 64-bit stack slot to be used for copying SDmode
-   values through if this function has any SDmode references.  */
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
+   this function has any SDmode references.  If we are on a power7 or later, we
+   don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
+   can load/store the value.  */
 
 static void
 rs6000_alloc_sdmode_stack_slot (void)
@@ -14221,6 +14250,9 @@ rs6000_alloc_sdmode_stack_slot (void)
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
 
+  if (TARGET_NO_SDMODE_STACK)
+    return;
+
   FOR_EACH_BB (bb)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
@@ -14281,8 +14313,7 @@ rs6000_preferred_reload_class (rtx x, en
 {
   enum machine_mode mode = GET_MODE (x);
 
-  if (VECTOR_UNIT_VSX_P (mode)
-      && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+  if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
     return rclass;
 
   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 195557)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -473,6 +473,11 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIDUZ	TARGET_POPCNTD
 #define TARGET_FCTIWUZ	TARGET_POPCNTD
 
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
+   to allocate the SDmode stack slot to get the value into the proper location
+   in the register.  */
+#define TARGET_NO_SDMODE_STACK	(TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
+
 /* In switching from using target_flags to using rs6000_isa_flags, the options
    machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>.  For now map
    OPTION_MASK_<xxx> back into MASK_<xxx>.  */
@@ -1320,7 +1325,10 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wa,		/* Any VSX register */
   RS6000_CONSTRAINT_wd,		/* VSX register for V2DF */
   RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
+  RS6000_CONSTRAINT_wl,		/* FPR register for LFIWAX */
   RS6000_CONSTRAINT_ws,		/* VSX register for DF */
+  RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
+  RS6000_CONSTRAINT_wz,		/* FPR register for LFIWZX */
   RS6000_CONSTRAINT_MAX
 };
 
Index: gcc/config/rs6000/dfp.md
===================================================================
--- gcc/config/rs6000/dfp.md	(revision 195557)
+++ gcc/config/rs6000/dfp.md	(working copy)
@@ -29,77 +29,6 @@ (define_c_enum "unspec"
   ])
 
 
-(define_expand "movsd"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "")
-	(match_operand:SD 1 "any_operand" ""))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
-
-(define_split
-  [(set (match_operand:SD 0 "gpc_reg_operand" "")
-	(match_operand:SD 1 "const_double_operand" ""))]
-  "reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 3))]
-  "
-{
-  long l;
-  REAL_VALUE_TYPE rv;
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
-
-  if (! TARGET_POWERPC64)
-    operands[2] = operand_subword (operands[0], 0, 0, SDmode);
-  else
-    operands[2] = gen_lowpart (SImode, operands[0]);
-
-  operands[3] = gen_int_mode (l, SImode);
-}")
-
-(define_insn "movsd_hardfloat"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r")
-	(match_operand:SD 1 "input_operand"        "r,m,r,f,r,h,0,G,Fn"))]
-  "(gpc_reg_operand (operands[0], SDmode)
-   || gpc_reg_operand (operands[1], SDmode))
-   && (TARGET_HARD_FLOAT && TARGET_FPRS)"
-  "@
-   mr %0,%1
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   fmr %0,%1
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #"
-  [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,8")])
-
-(define_insn "movsd_softfloat"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h")
-	(match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))]
-  "(gpc_reg_operand (operands[0], SDmode)
-   || gpc_reg_operand (operands[1], SDmode))
-   && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
-  "@
-   mr %0,%1
-   mt%0 %1
-   mf%1 %0
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   li %0,%1
-   lis %0,%v1
-   la %0,%a1
-   #
-   #
-   nop"
-  [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")])
-
 (define_insn "movsd_store"
   [(set (match_operand:DD 0 "nonimmediate_operand" "=m")
 	(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 195557)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -253,6 +253,31 @@ (define_mode_iterator FMA_F [
   (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
   ])
 
+; Floating point move iterators to combine binary and decimal moves
+(define_mode_iterator FMOVE32 [SF SD])
+
+; Whether a floating point move is ok, don't allow SD without hardware FP
+(define_mode_attr fmove_ok [(SF "")
+			    (DF "")
+			    (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
+			    (DD "")])
+
+; Convert REAL_VALUE to the appropriate bits
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
+					(DF "REAL_VALUE_TO_TARGET_DOUBLE")
+					(SD "REAL_VALUE_TO_TARGET_DECIMAL32")
+					(DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
+
+; Definitions for load to 32-bit fpr register
+(define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
+(define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+
+; Definitions for store from 32-bit fpr register
+(define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
+(define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
 (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -7853,15 +7878,17 @@ (define_insn "*movcc_internal1"
 ;; can produce floating-point values in fixed-point registers.  Unless the
 ;; value is a simple constant or already in memory, we deal with this by
 ;; allocating memory and copying the value explicitly via that memory location.
-(define_expand "movsf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-	(match_operand:SF 1 "any_operand" ""))]
-  ""
-  "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
+
+;; Move 32-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE32 1 "any_operand" ""))]
+  "<fmove_ok>"
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 (define_split
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(match_operand:SF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE32 1 "const_double_operand" ""))]
   "reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -7874,42 +7901,44 @@ (define_split
   REAL_VALUE_TYPE rv;
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+  <real_value_to_target> (rv, l);
 
   if (! TARGET_POWERPC64)
-    operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+    operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
   else
     operands[2] = gen_lowpart (SImode, operands[0]);
 
   operands[3] = gen_int_mode (l, SImode);
 }")
 
-(define_insn "*movsf_hardfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,!r,*h,!r,!r")
-	(match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,h,0,G,Fn"))]
-  "(gpc_reg_operand (operands[0], SFmode)
-   || gpc_reg_operand (operands[1], SFmode))
+(define_insn "mov<mode>_hardfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
   "@
    mr %0,%1
    lwz%U1%X1 %0,%1
    stw%U0%X0 %1,%0
    fmr %0,%1
-   lfs%U1%X1 %0,%1
-   stfs%U0%X0 %1,%0
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   <f32_li>
+   <f32_si>
    mt%0 %1
    mf%1 %0
    nop
    #
    #"
-  [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8")])
+  [(set_attr "type"   "*,load,store,fp,vecsimple,vecsimple,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
 
-(define_insn "*movsf_softfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
-	(match_operand:SF 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
-  "(gpc_reg_operand (operands[0], SFmode)
-   || gpc_reg_operand (operands[1], SFmode))
+(define_insn "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
+	(match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
   "@
    mr %0,%1
Index: gcc/doc/md.texi
===================================================================
--- gcc/doc/md.texi	(revision 195557)
+++ gcc/doc/md.texi	(working copy)
@@ -2075,9 +2075,18 @@ VSX vector register to hold vector doubl
 @item wf
 VSX vector register to hold vector float data
 
+@item wl
+If the LFIWAX instruction is enabled, a floating point register
+
 @item ws
 VSX vector register to hold scalar float data
 
+@item wx
+If the STFIWX instruction is enabled, a floating point register
+
+@item wz
+If the LFIWZX instruction is enabled, a floating point register
+
 @item wa
 Any VSX register
 
Index: gcc/testsuite/gcc.target/powerpc/sd-vsx.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sd-vsx.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/sd-vsx.c	(revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7 -mhard-dfp" } */
+/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
+/* { dg-final { scan-assembler-times "stfiwx" 1 } } */
+/* { dg-final { scan-assembler-not   "lfd"      } } */
+/* { dg-final { scan-assembler-not   "stfd"     } } */
+/* { dg-final { scan-assembler-times "dctdp"  2 } } */
+/* { dg-final { scan-assembler-times "dadd"   1 } } */
+/* { dg-final { scan-assembler-times "drsp"   1 } } */
+
+/* Test that power7 can directly load/store SDmode variables without using a
+   bounce buffer.  */
+_Decimal32 a;
+
+void inc_dec32 (void)
+{
+  a += (_Decimal32) 1.0;
+}
Index: gcc/testsuite/gcc.target/powerpc/sd-pwr6.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sd-pwr6.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/sd-pwr6.c	(revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power6 -mhard-dfp" } */
+/* { dg-final { scan-assembler-not   "lfiwzx"   } } */
+/* { dg-final { scan-assembler-times "lfd"    2 } } */
+/* { dg-final { scan-assembler-times "dctdp"  2 } } */
+/* { dg-final { scan-assembler-times "dadd"   1 } } */
+/* { dg-final { scan-assembler-times "drsp"   1 } } */
+
+/* Test that for power6 we need to use a bounce buffer on the stack to load
+   SDmode variables because the power6 does not have a way to directly load
+   32-bit values from memory.  */
+_Decimal32 a;
+
+void inc_dec32 (void)
+{
+  a += (_Decimal32) 1.0;
+}
Index: gcc/testsuite/gcc.target/powerpc/vsx-float0.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vsx-float0.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-float0.c	(revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler "xxlxor" } } */
+
+/* Test that we generate xxlor to clear a SFmode register.  */
+
+float sum (float *p, unsigned long n)
+{
+  float sum = 0.0f;	/* generate xxlxor instead of load */
+  while (n-- > 0)
+    sum += *p++;
+
+  return sum;
+}
