diff mbox

PR53914, rs6000 constraints and reload queries

Message ID 20120724060718.GG3182@bubble.grove.modra.org
State New
Headers show

Commit Message

Alan Modra July 24, 2012, 6:07 a.m. UTC
On Mon, Jul 23, 2012 at 07:30:23PM -0400, David Edelsohn wrote:
> This is okay

Committed revision 189801.

	PR target/53914
	PR target/54009
	* config/rs6000/constraints.md (Y): Use mem_operand_gpr.
	* config/rs6000/predicates.md (word_offset_memref_operand): Delete.
	Adjust all rs6000_legitimate_offset_address_p calls.
	* config/rs6000/rs6000-protos.h (mem_operand_gpr): Declare.
	(rs6000_secondary_reload_gpr): Declare.
	(rs6000_legitimate_offset_address_p): Update prototype.
	(rs6000_offsettable_memref_p): Delete.
	(rs6000_secondary_reload_ppc64): Delete.
	* config/rs6000/rs6000.c (address_offset): New function.
	(mem_operand_gpr): Likewise.
	(rs6000_legitimate_offset_address_p): Add worst_case param.  When
	not worst_case assume class of regs with least restrictive offsets.
	Adjust all calls.
	(legitimate_lo_sum_address_p): Simplify register mode tests.
	(rs6000_legitimize_address): Likewise.  Assume best case offset
	addressing.  Combine ELF and MACHO lo_sum code.
	(rs6000_mode_dependent_address): Correct offset addressing limits.
	(rs6000_offsettable_memref_p): Make static, add reg_mode param.
	Use reg_mode to help rs6000_legitimate_offset_address_p.
	(rs6000_secondary_reload): Use address_offset.  Handle 32-bit multi
	gpr load/store when offset too large.
	(rs6000_secondary_reload_gpr): Renamed rs6000_secondary_reload_ppc64.
	(rs6000_split_multireg_move): Adjust rs6000_offsettable_memref_p calls.
	* config/rs6000/rs6000.md (movdf_hardfloat32): Use 'Y' constraint
	for gpr load/store.  Order alternatives as r->Y,Y->r,r->r and
	d->m,m->d,d->d.  Correct size of gpr load/store.
	(movdf_softfloat32): Use 'Y' constraint for gpr load/store.  Order
	alternatives.
	(movti_ppc64): Likewise.
	(movdi_internal32): Likewise.  Also disparage fprs.
	(movdi_mfpgpr, movdi_internal64): Likewise.
	(movtf_internal): Use 'm' for fpr load/store.  Order alternatives.
	(movtf_softfloat): Order alternatives.
	(extenddftf2_internal): Use 'm' and 'Y' for store.
	(movti_power, movti_string): Use 'Y' for gpr load/store.  Order.
	(stack_protect_setdi, stack_protect_testdi): Likewise.
	(movdf_hardfloat64_mfpgpr, movdf_hardfloat64): Order alternatives.
	(movdf_softfloat64): Likewise.
	(reload_<mode>_store): Adjust reload_di_store to provide
	reload_si_store as well.
	(reload_<mode>_load): Likewise.
diff mbox

Patch

Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md	(revision 189800)
+++ gcc/config/rs6000/constraints.md	(working copy)
@@ -150,8 +150,9 @@  to use @samp{m} or @samp{es} in @code{asm} stateme
        (match_test "GET_CODE (XEXP (op, 0)) == REG")))
 
 (define_memory_constraint "Y"
-  "Indexed or word-aligned displacement memory operand"
-  (match_operand 0 "word_offset_memref_operand"))
+  "memory operand for 8 byte and 16 byte gpr load/store"
+  (and (match_code "mem")
+       (match_operand 0 "mem_operand_gpr")))
 
 (define_memory_constraint "Z"
   "Memory operand that is an indexed or indirect from a register (it is
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md	(revision 189800)
+++ gcc/config/rs6000/predicates.md	(working copy)
@@ -432,29 +432,6 @@ 
   (and (match_operand 0 "memory_operand")
        (match_test "offsettable_nonstrict_memref_p (op)")))
 
-;; Return 1 if the operand is a memory operand with an address divisible by 4
-(define_predicate "word_offset_memref_operand"
-  (match_operand 0 "memory_operand")
-{
-  /* Address inside MEM.  */
-  op = XEXP (op, 0);
-
-  /* Extract address from auto-inc/dec.  */
-  if (GET_CODE (op) == PRE_INC
-      || GET_CODE (op) == PRE_DEC)
-    op = XEXP (op, 0);
-  else if (GET_CODE (op) == PRE_MODIFY)
-    op = XEXP (op, 1);
-  else if (GET_CODE (op) == LO_SUM
-	   && GET_CODE (XEXP (op, 0)) == REG
-	   && GET_CODE (XEXP (op, 1)) == CONST)
-    op = XEXP (XEXP (op, 1), 0);
-
-  return (GET_CODE (op) != PLUS
-	  || GET_CODE (XEXP (op, 1)) != CONST_INT
-	  || INTVAL (XEXP (op, 1)) % 4 == 0);
-})
-
 ;; Return 1 if the operand is an indexed or indirect memory operand.
 (define_predicate "indexed_or_indirect_operand"
   (match_code "mem")
@@ -892,7 +869,8 @@ 
   return input_operand (op, mode);
 })
 
-;; Return true if OP is an invalid SUBREG operation on the e500.
+;; Return true if OP is a non-immediate operand and not an invalid
+;; SUBREG operation on the e500.
 (define_predicate "rs6000_nonimmediate_operand"
   (match_code "reg,subreg,mem")
 {
@@ -1325,7 +1303,7 @@ 
       if (base_regno == 0)
 	return 0;
     }
-  else if (rs6000_legitimate_offset_address_p (SImode, src_addr, 0))
+  else if (rs6000_legitimate_offset_address_p (SImode, src_addr, false, false))
     {
       offset = INTVAL (XEXP (src_addr, 1));
       base_regno = REGNO (XEXP (src_addr, 0));
@@ -1353,7 +1331,7 @@ 
 	  newoffset = 0;
 	  addr_reg = newaddr;
 	}
-      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, 0))
+      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, false, false))
 	{
 	  addr_reg = XEXP (newaddr, 0);
 	  newoffset = INTVAL (XEXP (newaddr, 1));
@@ -1400,7 +1378,7 @@ 
       if (base_regno == 0)
 	return 0;
     }
-  else if (rs6000_legitimate_offset_address_p (SImode, dest_addr, 0))
+  else if (rs6000_legitimate_offset_address_p (SImode, dest_addr, false, false))
     {
       offset = INTVAL (XEXP (dest_addr, 1));
       base_regno = REGNO (XEXP (dest_addr, 0));
@@ -1428,7 +1406,7 @@ 
 	  newoffset = 0;
 	  addr_reg = newaddr;
 	}
-      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, 0))
+      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, false, false))
 	{
 	  addr_reg = XEXP (newaddr, 0);
 	  newoffset = INTVAL (XEXP (newaddr, 1));
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h	(revision 189800)
+++ gcc/config/rs6000/rs6000-protos.h	(working copy)
@@ -38,6 +38,7 @@  extern bool macho_lo_sum_memory_operand (rtx, enum
 extern int num_insns_constant (rtx, enum machine_mode);
 extern int num_insns_constant_wide (HOST_WIDE_INT);
 extern int small_data_operand (rtx, enum machine_mode);
+extern bool mem_operand_gpr (rtx, enum machine_mode);
 extern bool toc_relative_expr_p (const_rtx, bool);
 extern bool invalid_e500_subreg (rtx, enum machine_mode);
 extern void validate_condition_mode (enum rtx_code, enum machine_mode);
@@ -83,7 +84,7 @@  extern bool (*rs6000_cannot_change_mode_class_ptr)
 						    enum machine_mode,
 						    enum reg_class);
 extern void rs6000_secondary_reload_inner (rtx, rtx, rtx, bool);
-extern void rs6000_secondary_reload_ppc64 (rtx, rtx, rtx, bool);
+extern void rs6000_secondary_reload_gpr (rtx, rtx, rtx, bool);
 extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
                                          rtx, rtx, rtx);
 extern void paired_expand_vector_move (rtx operands[]);
@@ -121,9 +122,9 @@  extern void rs6000_emit_move (rtx, rtx, enum machi
 extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
 extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
 						    int, int, int, int *);
-extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx, int);
+extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx,
+						bool, bool);
 extern rtx rs6000_find_base_term (rtx);
-extern bool rs6000_offsettable_memref_p (rtx);
 extern rtx rs6000_return_addr (int, rtx);
 extern void rs6000_output_symbol_ref (FILE*, rtx);
 extern HOST_WIDE_INT rs6000_initial_elimination_offset (int, int);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 189800)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -5017,6 +5017,64 @@  gpr_or_gpr_p (rtx op0, rtx op1)
 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
 }
 
+/* Given an address, return a constant offset term if one exists.  */
+
+static rtx
+address_offset (rtx op)
+{
+  if (GET_CODE (op) == PRE_INC
+      || GET_CODE (op) == PRE_DEC)
+    op = XEXP (op, 0);
+  else if (GET_CODE (op) == PRE_MODIFY
+	   || GET_CODE (op) == LO_SUM)
+    op = XEXP (op, 1);
+
+  if (GET_CODE (op) == CONST)
+    op = XEXP (op, 0);
+
+  if (GET_CODE (op) == PLUS)
+    op = XEXP (op, 1);
+
+  if (CONST_INT_P (op))
+    return op;
+
+  return NULL_RTX;
+}
+
+/* Return true if the MEM operand is a memory operand suitable for use
+   with a (full width, possibly multiple) gpr load/store.  On
+   powerpc64 this means the offset must be divisible by 4.
+   Implements 'Y' constraint.
+
+   Accept direct, indexed, offset, lo_sum and tocref.  Since this is
+   a constraint function we know the operand has satisfied a suitable
+   memory predicate.  Also accept some odd rtl generated by reload
+   (see rs6000_legitimize_reload_address for various forms).  It is
+   important that reload rtl be accepted by appropriate constraints
+   but not by the operand predicate.
+
+   Offsetting a lo_sum should not be allowed, except where we know by
+   alignment that a 32k boundary is not crossed, but see the ???
+   comment in rs6000_legitimize_reload_address.  */
+
+bool
+mem_operand_gpr (rtx op, enum machine_mode mode)
+{
+  unsigned HOST_WIDE_INT offset;
+  int extra;
+
+  op = address_offset (XEXP (op, 0));
+  if (op == NULL_RTX)
+    return true;
+
+  offset = INTVAL (op);
+  extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
+  if (extra < 0)
+    extra = 0;
+  else if (TARGET_POWERPC64 && (offset & 3) != 0)
+    return false;
+  return offset + 0x8000 < 0x10000u - extra;
+}
 
 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
 
@@ -5234,13 +5292,15 @@  legitimate_small_data_p (enum machine_mode mode, r
 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
 
 bool
-rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, int strict)
+rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
+				    bool strict, bool worst_case)
 {
-  unsigned HOST_WIDE_INT offset, extra;
+  unsigned HOST_WIDE_INT offset;
+  unsigned int extra;
 
   if (GET_CODE (x) != PLUS)
     return false;
-  if (GET_CODE (XEXP (x, 0)) != REG)
+  if (!REG_P (XEXP (x, 0)))
     return false;
   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
     return false;
@@ -5263,14 +5323,6 @@  bool
       return SPE_CONST_OFFSET_OK (offset);
 
     case DFmode:
-      if (TARGET_E500_DOUBLE)
-	return SPE_CONST_OFFSET_OK (offset);
-
-      /* If we are using VSX scalar loads, restrict ourselves to reg+reg
-	 addressing.  */
-      if (VECTOR_MEM_VSX_P (DFmode))
-	return false;
-
     case DDmode:
     case DImode:
       /* On e500v2, we may have:
@@ -5281,25 +5333,33 @@  bool
       if (TARGET_E500_DOUBLE)
 	return SPE_CONST_OFFSET_OK (offset);
 
-      if (mode == DFmode || mode == DDmode || !TARGET_POWERPC64)
+      /* If we are using VSX scalar loads, restrict ourselves to reg+reg
+	 addressing.  */
+      if (mode == DFmode && VECTOR_MEM_VSX_P (DFmode))
+	return false;
+
+      if (!worst_case)
+	break;
+      if (!TARGET_POWERPC64)
 	extra = 4;
       else if (offset & 3)
 	return false;
       break;
 
     case TFmode:
+    case TDmode:
+    case TImode:
       if (TARGET_E500_DOUBLE)
 	return (SPE_CONST_OFFSET_OK (offset)
 		&& SPE_CONST_OFFSET_OK (offset + 8));
 
-    case TDmode:
-    case TImode:
-      if (mode == TFmode || mode == TDmode || !TARGET_POWERPC64)
+      extra = 8;
+      if (!worst_case)
+	break;
+      if (!TARGET_POWERPC64)
 	extra = 12;
       else if (offset & 3)
 	return false;
-      else
-	extra = 8;
       break;
 
     default:
@@ -5379,9 +5439,7 @@  legitimate_lo_sum_address_p (enum machine_mode mod
   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
     return false;
   /* Restrict addressing for DI because of our SUBREG hackery.  */
-  if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
-			     || mode == DDmode || mode == TDmode
-			     || mode == DImode))
+  if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
     return false;
   x = XEXP (x, 1);
 
@@ -5393,10 +5451,10 @@  legitimate_lo_sum_address_p (enum machine_mode mod
 	return false;
       if (GET_MODE_NUNITS (mode) != 1)
 	return false;
-      if (GET_MODE_BITSIZE (mode) > 64
-	  || (GET_MODE_BITSIZE (mode) > 32 && !TARGET_POWERPC64
-	      && !(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-		   && (mode == DFmode || mode == DDmode))))
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+	  && !(/* ??? Assume floating point reg based on mode?  */
+	       TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+	       && (mode == DFmode || mode == DDmode)))
 	return false;
 
       return CONSTANT_P (x);
@@ -5431,7 +5489,7 @@  static rtx
 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
 			   enum machine_mode mode)
 {
-  unsigned int extra = 0;
+  unsigned int extra;
 
   if (!reg_offset_addressing_ok_p (mode))
     {
@@ -5458,22 +5516,18 @@  rs6000_legitimize_address (rtx x, rtx oldx ATTRIBU
 	return rs6000_legitimize_tls_address (x, model);
     }
 
+  extra = 0;
   switch (mode)
     {
-    case DFmode:
-    case DDmode:
-      extra = 4;
-      break;
-    case DImode:
-      if (!TARGET_POWERPC64)
-	extra = 4;
-      break;
     case TFmode:
     case TDmode:
-      extra = 12;
-      break;
     case TImode:
-      extra = TARGET_POWERPC64 ? 8 : 12;
+      /* As in legitimate_offset_address_p we do not assume
+	 worst-case.  The mode here is just a hint as to the registers
+	 used.  A TImode is usually in gprs, but may actually be in
+	 fprs.  Leave worst-case scenario for reload to handle via
+	 insn constraints.  */
+      extra = 8;
       break;
     default:
       break;
@@ -5484,13 +5538,8 @@  rs6000_legitimize_address (rtx x, rtx oldx ATTRIBU
       && GET_CODE (XEXP (x, 1)) == CONST_INT
       && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
 	  >= 0x10000 - extra)
-      && !((TARGET_POWERPC64
-	    && (mode == DImode || mode == TImode)
-	    && (INTVAL (XEXP (x, 1)) & 3) != 0)
-	   || SPE_VECTOR_MODE (mode)
-	   || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
-				      || mode == DImode || mode == DDmode
-				      || mode == TDmode))))
+      && !(SPE_VECTOR_MODE (mode)
+	   || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
     {
       HOST_WIDE_INT high_int, low_int;
       rtx sum;
@@ -5506,23 +5555,17 @@  rs6000_legitimize_address (rtx x, rtx oldx ATTRIBU
 	   && GET_CODE (XEXP (x, 0)) == REG
 	   && GET_CODE (XEXP (x, 1)) != CONST_INT
 	   && GET_MODE_NUNITS (mode) == 1
-	   && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
-	       || TARGET_POWERPC64
-	       || ((mode != DImode && mode != DFmode && mode != DDmode)
-		   || (TARGET_E500_DOUBLE && mode != DDmode)))
-	   && (TARGET_POWERPC64 || mode != DImode)
-	   && !avoiding_indexed_address_p (mode)
-	   && mode != TImode
-	   && mode != TFmode
-	   && mode != TDmode)
+	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	       || (/* ??? Assume floating point reg based on mode?  */
+		   (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+		   && (mode == DFmode || mode == DDmode)))
+	   && !avoiding_indexed_address_p (mode))
     {
       return gen_rtx_PLUS (Pmode, XEXP (x, 0),
 			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
     }
   else if (SPE_VECTOR_MODE (mode)
-	   || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
-				      || mode == DDmode || mode == TDmode
-				      || mode == DImode)))
+	   || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
     {
       if (mode == DImode)
 	return x;
@@ -5555,7 +5598,11 @@  rs6000_legitimize_address (rtx x, rtx oldx ATTRIBU
 
       return force_reg (Pmode, x);
     }
-  else if (TARGET_ELF
+  else if ((TARGET_ELF
+#if TARGET_MACHO
+	    || !MACHO_DYNAMIC_NO_PIC_P
+#endif
+	    )
 	   && TARGET_32BIT
 	   && TARGET_NO_TOC
 	   && ! flag_pic
@@ -5563,32 +5610,18 @@  rs6000_legitimize_address (rtx x, rtx oldx ATTRIBU
 	   && GET_CODE (x) != CONST_DOUBLE
 	   && CONSTANT_P (x)
 	   && GET_MODE_NUNITS (mode) == 1
-	   && (GET_MODE_BITSIZE (mode) <= 32
-	       || ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	       || (/* ??? Assume floating point reg based on mode?  */
+		   (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
 		   && (mode == DFmode || mode == DDmode))))
     {
       rtx reg = gen_reg_rtx (Pmode);
-      emit_insn (gen_elf_high (reg, x));
+      if (TARGET_ELF)
+	emit_insn (gen_elf_high (reg, x));
+      else
+	emit_insn (gen_macho_high (reg, x));
       return gen_rtx_LO_SUM (Pmode, reg, x);
     }
-  else if (TARGET_MACHO && TARGET_32BIT && TARGET_NO_TOC
-	   && ! flag_pic
-#if TARGET_MACHO
-	   && ! MACHO_DYNAMIC_NO_PIC_P
-#endif
-	   && GET_CODE (x) != CONST_INT
-	   && GET_CODE (x) != CONST_DOUBLE
-	   && CONSTANT_P (x)
-	   && GET_MODE_NUNITS (mode) == 1
-	   && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
-	       || (mode != DFmode && mode != DDmode))
-	   && mode != DImode
-	   && mode != TImode)
-    {
-      rtx reg = gen_reg_rtx (Pmode);
-      emit_insn (gen_macho_high (reg, x));
-      return gen_rtx_LO_SUM (Pmode, reg, x);
-    }
   else if (TARGET_TOC
 	   && GET_CODE (x) == SYMBOL_REF
 	   && constant_pool_expr_p (x)
@@ -6177,7 +6210,18 @@  rs6000_legitimize_reload_address (rtx x, enum mach
 #endif
       /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
 	 The same goes for DImode without 64-bit gprs and DFmode and DDmode
-	 without fprs.  */
+	 without fprs.
+	 ??? Assume floating point reg based on mode?  This assumption is
+	 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
+	 where reload ends up doing a DFmode load of a constant from
+	 mem using two gprs.  Unfortunately, at this point reload
+	 hasn't yet selected regs so poking around in reload data
+	 won't help and even if we could figure out the regs reliably,
+	 we'd still want to allow this transformation when the mem is
+	 naturally aligned.  Since we say the address is good here, we
+	 can't disable offsets from LO_SUMs in mem_operand_gpr.
+	 FIXME: Allow offset from lo_sum for other modes too, when
+	 mem is sufficiently aligned.  */
       && mode != TFmode
       && mode != TDmode
       && (mode != DImode || TARGET_POWERPC64)
@@ -6327,7 +6371,7 @@  rs6000_legitimate_address_p (enum machine_mode mod
 	  || XEXP (x, 0) == arg_pointer_rtx)
       && GET_CODE (XEXP (x, 1)) == CONST_INT)
     return 1;
-  if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict))
+  if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
     return 1;
   if (mode != TImode
       && mode != TFmode
@@ -6355,7 +6399,8 @@  rs6000_legitimate_address_p (enum machine_mode mod
 	   && (mode == DFmode || mode == DDmode || mode == DImode))
       && TARGET_UPDATE
       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
-      && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), reg_ok_strict)
+      && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
+					      reg_ok_strict, false)
 	  || (!avoiding_indexed_address_p (mode)
 	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
       && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
@@ -6419,7 +6464,7 @@  rs6000_mode_dependent_address (const_rtx addr)
 	  && GET_CODE (XEXP (addr, 1)) == CONST_INT)
 	{
 	  unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
-	  return val + 12 + 0x8000 >= 0x10000;
+	  return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
 	}
       break;
 
@@ -6489,14 +6534,16 @@  rs6000_find_base_term (rtx op)
 
    in 32-bit mode, that the recog predicate rejects.  */
 
-bool
-rs6000_offsettable_memref_p (rtx op)
+static bool
+rs6000_offsettable_memref_p (rtx op, enum machine_mode reg_mode)
 {
+  bool worst_case;
+
   if (!MEM_P (op))
     return false;
 
   /* First mimic offsettable_memref_p.  */
-  if (offsettable_address_p (1, GET_MODE (op), XEXP (op, 0)))
+  if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
     return true;
 
   /* offsettable_address_p invokes rs6000_mode_dependent_address, but
@@ -6504,8 +6551,13 @@  rs6000_find_base_term (rtx op)
      reference and, therefore, assumes that it is the largest supported
      mode (TFmode).  As a consequence, legitimate offsettable memory
      references are rejected.  rs6000_legitimate_offset_address_p contains
-     the correct logic for the PLUS case of rs6000_mode_dependent_address.  */
-  return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), 1);
+     the correct logic for the PLUS case of rs6000_mode_dependent_address,
+     at least with a little bit of help here given that we know the
+     actual registers used.  */
+  worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
+		|| GET_MODE_SIZE (reg_mode) == 4);
+  return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
+					     true, worst_case);
 }
 
 /* Change register usage conditional on target flags.  */
@@ -13516,7 +13568,8 @@  rs6000_secondary_reload (bool in_p,
 	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
 	    {
 	      if (!legitimate_indirect_address_p (addr, false)
-		  && !rs6000_legitimate_offset_address_p (TImode, addr, false))
+		  && !rs6000_legitimate_offset_address_p (TImode, addr,
+							  false, true))
 		{
 		  sri->icode = icode;
 		  /* account for splitting the loads, and converting the
@@ -13589,18 +13642,9 @@  rs6000_secondary_reload (bool in_p,
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
-      rtx addr = XEXP (x, 0);
+      rtx off = address_offset (XEXP (x, 0));
 
-      if (GET_CODE (addr) == PRE_MODIFY)
-	addr = XEXP (addr, 1);
-      else if (GET_CODE (addr) == LO_SUM
-	       && GET_CODE (XEXP (addr, 0)) == REG
-	       && GET_CODE (XEXP (addr, 1)) == CONST)
-	addr = XEXP (XEXP (addr, 1), 0);
-
-      if (GET_CODE (addr) == PLUS
-	  && GET_CODE (XEXP (addr, 1)) == CONST_INT
-	  && (INTVAL (XEXP (addr, 1)) & 3) != 0)
+      if (off != NULL_RTX && (INTVAL (off) & 3) != 0)
 	{
 	  if (in_p)
 	    sri->icode = CODE_FOR_reload_di_load;
@@ -13612,6 +13656,27 @@  rs6000_secondary_reload (bool in_p,
       else
 	default_p = true;
     }
+  else if (!TARGET_POWERPC64
+	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && MEM_P (x)
+	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+    {
+      rtx off = address_offset (XEXP (x, 0));
+
+      if (off != NULL_RTX
+	  && ((unsigned HOST_WIDE_INT) INTVAL (off) + 0x8000
+	      >= 0x1000u - (GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD)))
+	{
+	  if (in_p)
+	    sri->icode = CODE_FOR_reload_si_load;
+	  else
+	    sri->icode = CODE_FOR_reload_si_store;
+	  sri->extra_cost = 2;
+	  ret = NO_REGS;
+	}
+      else
+	default_p = true;
+    }
   else
     default_p = true;
 
@@ -13700,8 +13765,9 @@  rs6000_secondary_reload_inner (rtx reg, rtx mem, r
 	}
 
       if (GET_CODE (addr) == PLUS
-	  && (!rs6000_legitimate_offset_address_p (TImode, addr, false)
-	      || and_op2 != NULL_RTX))
+	  && (and_op2 != NULL_RTX
+	      || !rs6000_legitimate_offset_address_p (TImode, addr,
+						      false, true)))
 	{
 	  addr_op1 = XEXP (addr, 0);
 	  addr_op2 = XEXP (addr, 1);
@@ -13733,7 +13799,8 @@  rs6000_secondary_reload_inner (rtx reg, rtx mem, r
 	  scratch_or_premodify = scratch;
 	}
       else if (!legitimate_indirect_address_p (addr, false)
-	       && !rs6000_legitimate_offset_address_p (TImode, addr, false))
+	       && !rs6000_legitimate_offset_address_p (TImode, addr,
+						       false, true))
 	{
 	  if (TARGET_DEBUG_ADDR)
 	    {
@@ -13792,7 +13859,7 @@  rs6000_secondary_reload_inner (rtx reg, rtx mem, r
 	      && GET_MODE_SIZE (mode) == 8
 	      && and_op2 == NULL_RTX
 	      && scratch_or_premodify == scratch
-	      && rs6000_legitimate_offset_address_p (mode, addr, false)))
+	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
 	;
 
       else if (GET_CODE (addr) == PLUS)
@@ -13901,10 +13968,11 @@  rs6000_secondary_reload_inner (rtx reg, rtx mem, r
 }
 
 /* Convert reloads involving 64-bit gprs and misaligned offset
-   addressing to use indirect addressing.  */
+   addressing, or multiple 32-bit gprs and offsets that are too large,
+   to use indirect addressing.  */
 
 void
-rs6000_secondary_reload_ppc64 (rtx reg, rtx mem, rtx scratch, bool store_p)
+rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
 {
   int regno = true_regnum (reg);
   enum reg_class rclass;
@@ -13913,7 +13981,7 @@  void
 
   if (TARGET_DEBUG_ADDR)
     {
-      fprintf (stderr, "\nrs6000_secondary_reload_ppc64, type = %s\n",
+      fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
 	       store_p ? "store" : "load");
       fprintf (stderr, "reg:\n");
       debug_rtx (reg);
@@ -16993,7 +17061,7 @@  rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
 	      src = replace_equiv_address (src, breg);
 	    }
-	  else if (! rs6000_offsettable_memref_p (src))
+	  else if (! rs6000_offsettable_memref_p (src, reg_mode))
 	    {
 	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
 		{
@@ -17059,7 +17127,7 @@  rs6000_split_multireg_move (rtx dst, rtx src)
 		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
 	      dst = replace_equiv_address (dst, breg);
 	    }
-	  else if (!rs6000_offsettable_memref_p (dst)
+	  else if (!rs6000_offsettable_memref_p (dst, reg_mode)
 		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
 	    {
 	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
@@ -17097,7 +17165,7 @@  rs6000_split_multireg_move (rtx dst, rtx src)
 		}
 	    }
 	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
-	    gcc_assert (rs6000_offsettable_memref_p (dst));
+	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
 	}
 
       for (i = 0; i < nregs; i++)
@@ -27935,7 +28003,7 @@  rs6000_allocate_stack_temp (enum machine_mode mode
   if (!legitimate_indirect_address_p (addr, strict_p))
     {
       if (offsettable_p
-	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p))
+	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
 	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
 
       else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 189800)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -9659,15 +9659,12 @@ 
 #endif
 }")
 
-;; Don't have reload use general registers to load a constant.  First,
-;; it might not work if the output operand is the equivalent of
-;; a non-offsettable memref, but also it is less efficient than loading
-;; the constant into an FP register, since it will probably be used there.
-;; The "??" is a kludge until we can figure out a more reasonable way
-;; of handling these non-offsettable values.
+;; Don't have reload use general registers to load a constant.  It is
+;; less efficient than loading the constant into an FP register, since
+;; it will probably be used there.
 (define_insn "*movdf_hardfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=!r,??r,m,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,!r,!r,!r")
-	(match_operand:DF 1 "input_operand" "r,m,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,!r,!r,!r")
+	(match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
    && (gpc_reg_operand (operands[0], DFmode)
        || gpc_reg_operand (operands[1], DFmode))"
@@ -9691,11 +9688,11 @@ 
     case 8:
       return \"stxsd%U0x %x1,%y0\";
     case 9:
-      return \"fmr %0,%1\";
+      return \"stfd%U0%X0 %1,%0\";
     case 10:
       return \"lfd%U1%X1 %0,%1\";
     case 11:
-      return \"stfd%U0%X0 %1,%0\";
+      return \"fmr %0,%1\";
     case 12:
       return \"xxlxor %x0,%x0,%x0\";
     case 13:
@@ -9704,46 +9701,47 @@ 
       return \"#\";
     }
 }"
-  [(set_attr "type" "two,load,store,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,*,*,*")
-   (set_attr "length" "8,16,16,4,4,4,4,4,4,4,4,4,4,8,12,16")])
+  [(set_attr "type" "store,load,two,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,*,*,*")
+   (set_attr "length" "8,8,8,4,4,4,4,4,4,4,4,4,4,8,12,16")])
 
 (define_insn "*movdf_softfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m,r,r,r")
-	(match_operand:DF 1 "input_operand" "r,m,r,G,H,F"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
+	(match_operand:DF 1 "input_operand" "r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 
    && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) 
        || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
    && (gpc_reg_operand (operands[0], DFmode)
        || gpc_reg_operand (operands[1], DFmode))"
   "#"
-  [(set_attr "type" "two,load,store,*,*,*")
+  [(set_attr "type" "store,load,two,*,*,*")
    (set_attr "length" "8,8,8,8,12,16")])
 
 ;; Reload patterns to support gpr load/store with misaligned mem.
-(define_expand "reload_di_store"
+;; and multiple gpr load/store at offset >= 0xfffc
+(define_expand "reload_<mode>_store"
   [(parallel [(match_operand 0 "memory_operand" "=m")
               (match_operand 1 "gpc_reg_operand" "r")
-              (match_operand:DI 2 "register_operand" "=&b")])]
-  "TARGET_POWERPC64"
+              (match_operand:GPR 2 "register_operand" "=&b")])]
+  ""
 {
-  rs6000_secondary_reload_ppc64 (operands[1], operands[0], operands[2], true);
+  rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
   DONE;
 })
 
-(define_expand "reload_di_load"
+(define_expand "reload_<mode>_load"
   [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
               (match_operand 1 "memory_operand" "m")
-              (match_operand:DI 2 "register_operand" "=b")])]
-  "TARGET_POWERPC64"
+              (match_operand:GPR 2 "register_operand" "=b")])]
+  ""
 {
-  rs6000_secondary_reload_ppc64 (operands[0], operands[1], operands[2], false);
+  rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
   DONE;
 })
 
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*movdf_hardfloat64_mfpgpr"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r,r,d")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r,r,d")
 	(match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
   "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
    && TARGET_DOUBLE_FLOAT
@@ -9759,9 +9757,9 @@ 
    lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
    stxsd%U0x %x1,%y0
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
    fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
    xxlxor %x0,%x0,%x0
    mt%0 %1
    mf%1 %0
@@ -9771,13 +9769,13 @@ 
    #
    mftgpr %0,%1
    mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
+  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
    (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
 
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*movdf_hardfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r")
 	(match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F"))]
   "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
    && TARGET_DOUBLE_FLOAT
@@ -9793,9 +9791,9 @@ 
    lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
    stxsd%U0x %x1,%y0
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
    fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
    xxlxor %x0,%x0,%x0
    mt%0 %1
    mf%1 %0
@@ -9803,18 +9801,18 @@ 
    #
    #
    #"
-  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
+  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
    (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
 
 (define_insn "*movdf_softfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
-	(match_operand:DF 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
+	(match_operand:DF 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
   "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
    && (gpc_reg_operand (operands[0], DFmode)
        || gpc_reg_operand (operands[1], DFmode))"
   "@
+   std%U0%X0 %1,%0
    ld%U1%X1 %0,%1
-   std%U0%X0 %1,%0
    mr %0,%1
    mt%0 %1
    mf%1 %0
@@ -9822,7 +9820,7 @@ 
    #
    #
    {cror 0,0,0|nop}"
-  [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
+  [(set_attr "type" "store,load,*,mtjmpr,mfjmpr,*,*,*,*")
    (set_attr "length" "4,4,4,4,4,8,12,16,4")])
 
 (define_expand "movtf"
@@ -9831,12 +9829,12 @@ 
   "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
   "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
 
-; It's important to list the o->f and f->o moves before f->f because
-; otherwise reload, given m->f, will try to pick f->f and reload it,
-; which doesn't make progress.  Likewise r->Y must be before r->r.
+;; It's important to list Y->r and r->Y before r->r because otherwise
+;; reload, given m->r, will try to pick r->r and reload it, which
+;; doesn't make progress.
 (define_insn_and_split "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=o,d,d,r,Y,r")
-	(match_operand:TF 1 "input_operand"         "d,o,d,YGHF,r,r"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
+	(match_operand:TF 1 "input_operand" "d,m,d,r,YGHF,r"))]
   "!TARGET_IEEEQUAD
    && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
    && (gpc_reg_operand (operands[0], TFmode)
@@ -9848,8 +9846,8 @@ 
   [(set_attr "length" "8,8,8,20,20,16")])
 
 (define_insn_and_split "*movtf_softfloat"
-  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=r,Y,r")
-	(match_operand:TF 1 "input_operand"         "YGHF,r,r"))]
+  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=Y,r,r")
+	(match_operand:TF 1 "input_operand"         "r,YGHF,r"))]
   "!TARGET_IEEEQUAD
    && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
    && (gpc_reg_operand (operands[0], TFmode)
@@ -9890,9 +9888,9 @@ 
 })
 
 (define_insn_and_split "*extenddftf2_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=o,d,&d,r")
-       (float_extend:TF (match_operand:DF 1 "input_operand" "dr,md,md,rmGHF")))
-   (use (match_operand:DF 2 "zero_reg_mem_operand" "rd,m,d,n"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=m,Y,d,&d,r")
+       (float_extend:TF (match_operand:DF 1 "input_operand" "d,r,md,md,rmGHF")))
+   (use (match_operand:DF 2 "zero_reg_mem_operand" "d,r,m,d,n"))]
   "!TARGET_IEEEQUAD
    && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
    && TARGET_LONG_DOUBLE_128"
@@ -10145,11 +10143,13 @@ 
 ;; Next come the multi-word integer load and store and the load and store
 ;; multiple insns.
 
-; List r->r after r->"o<>", otherwise reload will try to reload a
-; non-offsettable address by using r->r which won't make progress.
+;; List r->r after r->Y, otherwise reload will try to reload a
+;; non-offsettable address by using r->r which won't make progress.
+;; Use of fprs is disparaged slightly otherwise reload prefers to reload
+;; a gpr into a fpr instead of reloading an invalid 'Y' address
 (define_insn "*movdi_internal32"
-  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=o<>,r,r,*d,*d,m,r,?wa")
-	(match_operand:DI 1 "input_operand" "r,r,m,d,m,d,IJKnGHF,O"))]
+  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r,?wa")
+	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF,O"))]
   "! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -10157,12 +10157,12 @@ 
    #
    #
    #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
    fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
    #
    xxlxor %x0,%x0,%x0"
-  [(set_attr "type" "load,*,store,fp,fpload,fpstore,*,vecsimple")])
+  [(set_attr "type" "store,load,*,fpstore,fpload,fp,*,vecsimple")])
 
 (define_split
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
@@ -10195,50 +10195,50 @@ 
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_insn "*movdi_mfpgpr"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,*d,*d,m,r,*h,*h,r,*d")
-	(match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,d,m,d,*h,r,0,*d,r"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*d")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*d,r"))]
   "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
   "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
    mr %0,%1
-   ld%U1%X1 %0,%1
-   std%U0%X0 %1,%0
    li %0,%1
    lis %0,%v1
    #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
    fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
    mf%1 %0
    mt%0 %1
    {cror 0,0,0|nop}
    mftgpr %0,%1
    mffgpr %0,%1"
-  [(set_attr "type" "*,load,store,*,*,*,fp,fpload,fpstore,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
+  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
    (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
 
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,*d,*d,m,r,*h,*h,?wa")
-	(match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,d,m,d,*h,r,0,O"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,?wa")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,O"))]
   "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS)
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
   "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
    mr %0,%1
-   ld%U1%X1 %0,%1
-   std%U0%X0 %1,%0
    li %0,%1
    lis %0,%v1
    #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
    fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
    mf%1 %0
    mt%0 %1
    {cror 0,0,0|nop}
    xxlxor %x0,%x0,%x0"
-  [(set_attr "type" "*,load,store,*,*,*,fp,fpload,fpstore,mfjmpr,mtjmpr,*,vecsimple")
+  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,vecsimple")
    (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4")])
 
 ;; immediate value valid for a single instruction hiding in a const_double
@@ -10313,8 +10313,8 @@ 
 ;; giving the SCRATCH mq.
 
 (define_insn "*movti_power"
-  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,m,????r,????r,????r,r")
-	(match_operand:TI 1 "input_operand" "r,r,r,Q,m,n"))
+  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand" "r,r,Q,Y,r,n"))
    (clobber (match_scratch:SI 2 "=q,q#X,X,X,X,X"))]
   "TARGET_POWER && ! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
@@ -10329,25 +10329,25 @@ 
       if (TARGET_STRING)
         return \"{stsi|stswi} %1,%P0,16\";
     case 1:
+      return \"#\";
     case 2:
-      return \"#\";
-    case 3:
       /* If the address is not used in the output, we can use lsi.  Otherwise,
 	 fall through to generating four loads.  */
       if (TARGET_STRING
 	  && ! reg_overlap_mentioned_p (operands[0], operands[1]))
 	return \"{lsi|lswi} %0,%P1,16\";
       /* ... fall through ...  */
+    case 3:
     case 4:
     case 5:
       return \"#\";
     }
 }"
-  [(set_attr "type" "store,store,*,load,load,*")])
+  [(set_attr "type" "store,store,load,load,*,*")])
 
 (define_insn "*movti_string"
-  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,o<>,????r,????r,????r,r")
-	(match_operand:TI 1 "input_operand" "r,r,r,Q,m,n"))]
+  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand" "r,r,Q,Y,r,n"))]
   "! TARGET_POWER && ! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
   "*
@@ -10360,33 +10360,33 @@ 
       if (TARGET_STRING)
         return \"{stsi|stswi} %1,%P0,16\";
     case 1:
+      return \"#\";
     case 2:
-      return \"#\";
-    case 3:
       /* If the address is not used in the output, we can use lsi.  Otherwise,
 	 fall through to generating four loads.  */
       if (TARGET_STRING
           && ! reg_overlap_mentioned_p (operands[0], operands[1]))
 	return \"{lsi|lswi} %0,%P1,16\";
       /* ... fall through ...  */
+    case 3:
     case 4:
     case 5:
       return \"#\";
     }
 }"
-  [(set_attr "type" "store_ux,store_ux,*,load_ux,load_ux,*")
+  [(set_attr "type" "store_ux,store_ux,load_ux,load_ux,*,*")
    (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
    			                  (const_string "always")
 					  (const_string "conditional")))])
 
 (define_insn "*movti_ppc64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o<>,r")
-	(match_operand:TI 1 "input_operand" "r,r,m"))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Y,r,r")
+	(match_operand:TI 1 "input_operand" "r,Y,r"))]
   "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
     || gpc_reg_operand (operands[1], TImode)))
    && VECTOR_MEM_NONE_P (TImode)"
   "#"
-  [(set_attr "type" "*,store,load")])
+  [(set_attr "type" "store,load,*")])
 
 (define_split
   [(set (match_operand:TI 0 "gpc_reg_operand" "")
@@ -13215,8 +13215,8 @@ 
    (set_attr "length" "12")])
 
 (define_insn "stack_protect_setdi"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+  [(set (match_operand:DI 0 "memory_operand" "=Y")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "Y")] UNSPEC_SP_SET))
    (set (match_scratch:DI 2 "=&r") (const_int 0))]
   "TARGET_64BIT"
   "ld%U1%X1 %2,%1\;std%U0%X0 %2,%0\;{lil|li} %2,0"
@@ -13257,8 +13257,8 @@ 
 
 (define_insn "stack_protect_testdi"
   [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
-        (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "m,m")
-		      (match_operand:DI 2 "memory_operand" "m,m")]
+        (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "Y,Y")
+		      (match_operand:DI 2 "memory_operand" "Y,Y")]
 		     UNSPEC_SP_TEST))
    (set (match_scratch:DI 4 "=r,r") (const_int 0))
    (clobber (match_scratch:DI 3 "=&r,&r"))]