diff mbox

rs6000: Revamp rotate-and-mask and insert

Message ID f75f7e14d91da792267d71ccbac579335409f742.1436707789.git.segher@kernel.crashing.org
State New
Headers show

Commit Message

Segher Boessenkool July 12, 2015, 5:08 p.m. UTC
This rewrites all the rotate-and-mask and insert patterns.

The goals are to have simpler, shorter, less error-prone code (with much
fewer machine description patterns), as well as to get better optimised
machine code.

All "mask only" insns are now handled by a single pattern; all rotate (or
shift) and mask by another pattern; and all insert insns by just a few
patterns as well.  Also, more patterns that can be done by a combination
of two of these (or a load immediate and one) are now done that way.

This removes a few constraints and output modifiers.  I don't think any
inline assembler code uses those (they really shouldn't), but if in fact
it is used, it is easy to put back simpler definitions just for
compatibility.  We can use a few free letters for both constraints and
modifiers.

Bootstrapped and tested on powerpc64-linux with the usual options
(-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); no regressions.

This reduces the number of generated instructions for 32-bit by about
1 in 2000, and for 64-bit by about 1 in 700.  It always does that by
decreasing path length so it should never make performance worse either.

There are two FIXMEs for pretty harmless stuff, but the rtx_costs one
at least probably wants fixing soonish.

Oh, and this patch should be applied after the HAVE_insv patch I posted
earlier today, for good results.


Is this okay for trunk?


2015-07-12  Segher Boessenkool  <segher@kernel.crashing.org>

	PR target/66217
	* config/rs6000/constraints.md ("S", "T", "t"): Delete.
	* config/rs6000/predicates.md (mask_operand, mask_operand_wrap,
	mask64_operand, mask64_2_operand, any_mask_operand, and64_2_operand,
	and_2rld_operand):  Delete.
	(and_operand): Adjust.
	(rotate_mask_operator): New.
	* config/rs6000/rs6000-protos.h (build_mask64_2_operands,
	includes_lshift_p, includes_rshift_p, includes_rldic_lshift_p,
	includes_rldicr_lshift_p, insvdi_rshift_rlwimi_p, extract_MB,
	extract_ME): Delete.
	(rs6000_is_valid_mask, rs6000_is_valid_and_mask,
	rs6000_is_valid_shift_mask, rs6000_is_valid_insert_mask,
	rs6000_insn_for_and_mask, rs6000_insn_for_shift_mask,
	rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
	rs6000_emit_2insn_and): New.
	* config/rs6000/rs6000.c (num_insns_constant): Adjust.
	(build_mask64_2_operands, includes_lshift_p, includes_rshift_p,
	includes_rldic_lshift_p, includes_rldicr_lshift_p,
	insvdi_rshift_rlwimi_p, extract_MB, extract_ME): Delete.
	(rs6000_is_valid_mask, rs6000_is_valid_and_mask,
	rs6000_insn_for_and_mask, rs6000_is_valid_shift_mask,
	s6000_insn_for_shift_mask, rs6000_is_valid_insert_mask,
	rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
	rs6000_emit_2insn_and): New.
	(print_operand) <'b', 'B', 'm', 'M', 's', 'S', 'W'>: Delete.
	(rs6000_rtx_costs) <CONST_INT>: Delete mask_operand and mask64_operand
	handling.
	<NOT>: Don't fall through to next case.
	<AND>: Handle the various rotate-and-mask cases directly.
	<IOR>: Always cost as one insn.
	* config/rs6000/rs6000.md (splitter for bswap:SI): Adjust.
	(and<mode>3): Adjust expander for the new patterns.
	(and<mode>3_imm, and<mode>3_imm_dot, and<mode>3_imm_dot2,
	and<mode>3_imm_mask_dot, and<mode>3_imm_mask_dot2): Adjust condition.
	(*and<mode>3_imm_dot_shifted): New.
	(*and<mode>3_mask): Delete, rewrite as ...
	(and<mode>3_mask): ... New.
	(*and<mode>3_mask_dot, *and<mode>3_mask_dot): Rewrite.
	(andsi3_internal0_nomc): Delete.
	(*andsi3_internal6): Delete.
	(*and<mode>3_2insn): New.
	(insv, insvsi_internal, *insvsi_internal1, *insvsi_internal2,
	*insvsi_internal3, *insvsi_internal4, *insvsi_internal5,
	*insvsi_internal6, insvdi_internal, *insvdi_internal2,
	*insvdi_internal3): Delete.
	(*rotl<mode>3_mask, *rotl<mode>3_mask_dot, *rotl<mode>3_mask_dot2,
	*rotl<mode>3_insert, *rotl<mode>3_insert_2, *rotl<mode>3_insert_3,
	*rotl<mode>3_insert_4, two splitters for multi-precision shifts,
	*ior<mode>_mask): New.
	(extzv, extzvdi_internal, *extzvdi_internal1, *extzvdi_internal2,
	*rotlsi3_mask, *rotlsi3_mask_dot, *rotlsi3_mask_dot2,
	*ashlsi3_imm_mask, *ashlsi3_imm_mask_dot, *ashlsi3_imm_mask_dot2,
	*lshrsi3_imm_mask, *lshrsi3_imm_mask_dot, *lshrsi3_imm_mask_dot2):
	Delete.
	(ashr<mode>3): Delete expander.
	(*ashr<mode>3): Rename to ...
	(ashr<mode>3): ... This.
	(ashrdi3_no_power, *ashrdisi3_noppc64be): Delete.
	(*rotldi3_internal4, *rotldi3_internal5 and split,
	*rotldi3_internal6 and split, *ashldi3_internal4, ashldi3_internal5
	and split, *ashldi3_internal6 and split, *ashldi3_internal7,
	ashldi3_internal8 and split, *ashldi3_internal9 and split): Delete.
	(*anddi3_2rld, *anddi3_2rld_dot, *anddi3_2rld_dot2): Delete.
	(splitter for loading a mask): Adjust.

---
 gcc/config/rs6000/constraints.md  |   18 -
 gcc/config/rs6000/predicates.md   |  170 +----
 gcc/config/rs6000/rs6000-protos.h |   17 +-
 gcc/config/rs6000/rs6000.c        |  796 ++++++++++++++--------
 gcc/config/rs6000/rs6000.md       | 1345 ++++++++++---------------------------
 5 files changed, 870 insertions(+), 1476 deletions(-)

Comments

David Edelsohn July 12, 2015, 8:18 p.m. UTC | #1
On Sun, Jul 12, 2015 at 1:08 PM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> This rewrites all the rotate-and-mask and insert patterns.
>
> The goals are to have simpler, shorter, less error-prone code (with much
> fewer machine description patterns), as well as to get better optimised
> machine code.
>
> All "mask only" insns are now handled by a single pattern; all rotate (or
> shift) and mask by another pattern; and all insert insns by just a few
> patterns as well.  Also, more patterns that can be done by a combination
> of two of these (or a load immediate and one) are now done that way.
>
> This removes a few constraints and output modifiers.  I don't think any
> inline assembler code uses those (they really shouldn't), but if in fact
> it is used, it is easy to put back simpler definitions just for
> compatibility.  We can use a few free letters for both constraints and
> modifiers.
>
> Bootstrapped and tested on powerpc64-linux with the usual options
> (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); no regressions.
>
> This reduces the number of generated instructions for 32-bit by about
> 1 in 2000, and for 64-bit by about 1 in 700.  It always does that by
> decreasing path length so it should never make performance worse either.
>
> There are two FIXMEs for pretty harmless stuff, but the rtx_costs one
> at least probably wants fixing soonish.
>
> Oh, and this patch should be applied after the HAVE_insv patch I posted
> earlier today, for good results.
>
>
> Is this okay for trunk?
>
>
> 2015-07-12  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         PR target/66217
>         * config/rs6000/constraints.md ("S", "T", "t"): Delete.
>         * config/rs6000/predicates.md (mask_operand, mask_operand_wrap,
>         mask64_operand, mask64_2_operand, any_mask_operand, and64_2_operand,
>         and_2rld_operand):  Delete.
>         (and_operand): Adjust.
>         (rotate_mask_operator): New.
>         * config/rs6000/rs6000-protos.h (build_mask64_2_operands,
>         includes_lshift_p, includes_rshift_p, includes_rldic_lshift_p,
>         includes_rldicr_lshift_p, insvdi_rshift_rlwimi_p, extract_MB,
>         extract_ME): Delete.
>         (rs6000_is_valid_mask, rs6000_is_valid_and_mask,
>         rs6000_is_valid_shift_mask, rs6000_is_valid_insert_mask,
>         rs6000_insn_for_and_mask, rs6000_insn_for_shift_mask,
>         rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
>         rs6000_emit_2insn_and): New.
>         * config/rs6000/rs6000.c (num_insns_constant): Adjust.
>         (build_mask64_2_operands, includes_lshift_p, includes_rshift_p,
>         includes_rldic_lshift_p, includes_rldicr_lshift_p,
>         insvdi_rshift_rlwimi_p, extract_MB, extract_ME): Delete.
>         (rs6000_is_valid_mask, rs6000_is_valid_and_mask,
>         rs6000_insn_for_and_mask, rs6000_is_valid_shift_mask,
>         s6000_insn_for_shift_mask, rs6000_is_valid_insert_mask,
>         rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
>         rs6000_emit_2insn_and): New.
>         (print_operand) <'b', 'B', 'm', 'M', 's', 'S', 'W'>: Delete.
>         (rs6000_rtx_costs) <CONST_INT>: Delete mask_operand and mask64_operand
>         handling.
>         <NOT>: Don't fall through to next case.
>         <AND>: Handle the various rotate-and-mask cases directly.
>         <IOR>: Always cost as one insn.
>         * config/rs6000/rs6000.md (splitter for bswap:SI): Adjust.
>         (and<mode>3): Adjust expander for the new patterns.
>         (and<mode>3_imm, and<mode>3_imm_dot, and<mode>3_imm_dot2,
>         and<mode>3_imm_mask_dot, and<mode>3_imm_mask_dot2): Adjust condition.
>         (*and<mode>3_imm_dot_shifted): New.
>         (*and<mode>3_mask): Delete, rewrite as ...
>         (and<mode>3_mask): ... New.
>         (*and<mode>3_mask_dot, *and<mode>3_mask_dot): Rewrite.
>         (andsi3_internal0_nomc): Delete.
>         (*andsi3_internal6): Delete.
>         (*and<mode>3_2insn): New.
>         (insv, insvsi_internal, *insvsi_internal1, *insvsi_internal2,
>         *insvsi_internal3, *insvsi_internal4, *insvsi_internal5,
>         *insvsi_internal6, insvdi_internal, *insvdi_internal2,
>         *insvdi_internal3): Delete.
>         (*rotl<mode>3_mask, *rotl<mode>3_mask_dot, *rotl<mode>3_mask_dot2,
>         *rotl<mode>3_insert, *rotl<mode>3_insert_2, *rotl<mode>3_insert_3,
>         *rotl<mode>3_insert_4, two splitters for multi-precision shifts,
>         *ior<mode>_mask): New.
>         (extzv, extzvdi_internal, *extzvdi_internal1, *extzvdi_internal2,
>         *rotlsi3_mask, *rotlsi3_mask_dot, *rotlsi3_mask_dot2,
>         *ashlsi3_imm_mask, *ashlsi3_imm_mask_dot, *ashlsi3_imm_mask_dot2,
>         *lshrsi3_imm_mask, *lshrsi3_imm_mask_dot, *lshrsi3_imm_mask_dot2):
>         Delete.
>         (ashr<mode>3): Delete expander.
>         (*ashr<mode>3): Rename to ...
>         (ashr<mode>3): ... This.
>         (ashrdi3_no_power, *ashrdisi3_noppc64be): Delete.
>         (*rotldi3_internal4, *rotldi3_internal5 and split,
>         *rotldi3_internal6 and split, *ashldi3_internal4, ashldi3_internal5
>         and split, *ashldi3_internal6 and split, *ashldi3_internal7,
>         ashldi3_internal8 and split, *ashldi3_internal9 and split): Delete.
>         (*anddi3_2rld, *anddi3_2rld_dot, *anddi3_2rld_dot2): Delete.
>         (splitter for loading a mask): Adjust.

This is great!  I'm glad that you completed this feature.

We need to combine this with Alan's earlier patches.  I know that this
deletes some of the patterns that Alan was changing.  Alan's patches
also corrected a few predicate mistakes.  We need to figure out which
of Alan's patches to merge and which are superceded.

Thanks, David
Alan Modra July 15, 2015, 2:40 a.m. UTC | #2
On Sun, Jul 12, 2015 at 04:18:31PM -0400, David Edelsohn wrote:
> On Sun, Jul 12, 2015 at 1:08 PM, Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
> > This rewrites all the rotate-and-mask and insert patterns.
> 
> This is great!  I'm glad that you completed this feature.

Compared to mainline the results do look good, and some of bugs I
found in the old patterns have disappeared.  I particularly like the
fact that the "S" and "T" operand constraints are no longer needed.

There are one or two regressions related to a TODO that Segher added.
The following produces poorer code than mainline.

extern void lfoo (long);
void mask2_cond1 (long x)
{
  if ((x & 0x00fff000000fffffL) > 0)
    lfoo (0);
}
void mask2_cond2 (long x)
{
  if ((x & 0x00fff000000fffffL) > 0)
    lfoo (x & 0x00fff000000fffffL);
}

Also, rs6000.md patterns uses SImode for the rotate/shift count.
Segher has added some new insns that use DImode when 64-bit.  I think
that inconsistency ought to be fixed.

(I haven't completely analysed this) but won't

(define_insn_and_split "*and<mode>3_imm_dot_shifted"
[snip]
	    (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
			  (match_operand:GPR 4 "const_int_operand" "n,n"))
					 ^^^this
fail to match combined patterns generated from other rs6000.md
patterns like

(define_insn "lshr<mode>3"
  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
	(lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
		      (match_operand:SI 2 "reg_or_cint_operand" "rn")))]
		      		     ^^this?
Segher Boessenkool July 15, 2015, 4:53 p.m. UTC | #3
On Wed, Jul 15, 2015 at 12:10:51PM +0930, Alan Modra wrote:
> There are one or two regressions related to a TODO that Segher added.
> The following produces poorer code than mainline.

Yes, that's why it says TODO :-)

> Also, rs6000.md patterns uses SImode for the rotate/shift count.
> Segher has added some new insns that use DImode when 64-bit.

"Some"?  Looks like all even.  &($^&*()%$&.  Would be nice if gen*
could warn, hrm.

> I think that inconsistency ought to be fixed.

Yeah; patch in progress.  Thanks for spotting it.

> (I haven't completely analysed this) but won't
> 
> (define_insn_and_split "*and<mode>3_imm_dot_shifted"
> [snip]
> 	    (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
> 			  (match_operand:GPR 4 "const_int_operand" "n,n"))
> 					 ^^^this
> fail to match combined patterns generated from other rs6000.md
> patterns like
> 
> (define_insn "lshr<mode>3"

This pattern isn't generated from that; it's generated from and_imm.
Combine thinks many "dot1" patterns should look different than the
"base" and dot2 patterns; this is an important case (it happens when
testing a single bit).


Segher
David Edelsohn July 20, 2015, 4:18 p.m. UTC | #4
On Sun, Jul 12, 2015 at 1:08 PM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> This rewrites all the rotate-and-mask and insert patterns.
>
> The goals are to have simpler, shorter, less error-prone code (with much
> fewer machine description patterns), as well as to get better optimised
> machine code.
>
> All "mask only" insns are now handled by a single pattern; all rotate (or
> shift) and mask by another pattern; and all insert insns by just a few
> patterns as well.  Also, more patterns that can be done by a combination
> of two of these (or a load immediate and one) are now done that way.
>
> This removes a few constraints and output modifiers.  I don't think any
> inline assembler code uses those (they really shouldn't), but if in fact
> it is used, it is easy to put back simpler definitions just for
> compatibility.  We can use a few free letters for both constraints and
> modifiers.
>
> Bootstrapped and tested on powerpc64-linux with the usual options
> (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); no regressions.
>
> This reduces the number of generated instructions for 32-bit by about
> 1 in 2000, and for 64-bit by about 1 in 700.  It always does that by
> decreasing path length so it should never make performance worse either.
>
> There are two FIXMEs for pretty harmless stuff, but the rtx_costs one
> at least probably wants fixing soonish.
>
> Oh, and this patch should be applied after the HAVE_insv patch I posted
> earlier today, for good results.
>
>
> Is this okay for trunk?
>
>
> 2015-07-12  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         PR target/66217
>         * config/rs6000/constraints.md ("S", "T", "t"): Delete.
>         * config/rs6000/predicates.md (mask_operand, mask_operand_wrap,
>         mask64_operand, mask64_2_operand, any_mask_operand, and64_2_operand,
>         and_2rld_operand):  Delete.
>         (and_operand): Adjust.
>         (rotate_mask_operator): New.
>         * config/rs6000/rs6000-protos.h (build_mask64_2_operands,
>         includes_lshift_p, includes_rshift_p, includes_rldic_lshift_p,
>         includes_rldicr_lshift_p, insvdi_rshift_rlwimi_p, extract_MB,
>         extract_ME): Delete.
>         (rs6000_is_valid_mask, rs6000_is_valid_and_mask,
>         rs6000_is_valid_shift_mask, rs6000_is_valid_insert_mask,
>         rs6000_insn_for_and_mask, rs6000_insn_for_shift_mask,
>         rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
>         rs6000_emit_2insn_and): New.
>         * config/rs6000/rs6000.c (num_insns_constant): Adjust.
>         (build_mask64_2_operands, includes_lshift_p, includes_rshift_p,
>         includes_rldic_lshift_p, includes_rldicr_lshift_p,
>         insvdi_rshift_rlwimi_p, extract_MB, extract_ME): Delete.
>         (rs6000_is_valid_mask, rs6000_is_valid_and_mask,
>         rs6000_insn_for_and_mask, rs6000_is_valid_shift_mask,
>         s6000_insn_for_shift_mask, rs6000_is_valid_insert_mask,
>         rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
>         rs6000_emit_2insn_and): New.
>         (print_operand) <'b', 'B', 'm', 'M', 's', 'S', 'W'>: Delete.
>         (rs6000_rtx_costs) <CONST_INT>: Delete mask_operand and mask64_operand
>         handling.
>         <NOT>: Don't fall through to next case.
>         <AND>: Handle the various rotate-and-mask cases directly.
>         <IOR>: Always cost as one insn.
>         * config/rs6000/rs6000.md (splitter for bswap:SI): Adjust.
>         (and<mode>3): Adjust expander for the new patterns.
>         (and<mode>3_imm, and<mode>3_imm_dot, and<mode>3_imm_dot2,
>         and<mode>3_imm_mask_dot, and<mode>3_imm_mask_dot2): Adjust condition.
>         (*and<mode>3_imm_dot_shifted): New.
>         (*and<mode>3_mask): Delete, rewrite as ...
>         (and<mode>3_mask): ... New.
>         (*and<mode>3_mask_dot, *and<mode>3_mask_dot): Rewrite.
>         (andsi3_internal0_nomc): Delete.
>         (*andsi3_internal6): Delete.
>         (*and<mode>3_2insn): New.
>         (insv, insvsi_internal, *insvsi_internal1, *insvsi_internal2,
>         *insvsi_internal3, *insvsi_internal4, *insvsi_internal5,
>         *insvsi_internal6, insvdi_internal, *insvdi_internal2,
>         *insvdi_internal3): Delete.
>         (*rotl<mode>3_mask, *rotl<mode>3_mask_dot, *rotl<mode>3_mask_dot2,
>         *rotl<mode>3_insert, *rotl<mode>3_insert_2, *rotl<mode>3_insert_3,
>         *rotl<mode>3_insert_4, two splitters for multi-precision shifts,
>         *ior<mode>_mask): New.
>         (extzv, extzvdi_internal, *extzvdi_internal1, *extzvdi_internal2,
>         *rotlsi3_mask, *rotlsi3_mask_dot, *rotlsi3_mask_dot2,
>         *ashlsi3_imm_mask, *ashlsi3_imm_mask_dot, *ashlsi3_imm_mask_dot2,
>         *lshrsi3_imm_mask, *lshrsi3_imm_mask_dot, *lshrsi3_imm_mask_dot2):
>         Delete.
>         (ashr<mode>3): Delete expander.
>         (*ashr<mode>3): Rename to ...
>         (ashr<mode>3): ... This.
>         (ashrdi3_no_power, *ashrdisi3_noppc64be): Delete.
>         (*rotldi3_internal4, *rotldi3_internal5 and split,
>         *rotldi3_internal6 and split, *ashldi3_internal4, ashldi3_internal5
>         and split, *ashldi3_internal6 and split, *ashldi3_internal7,
>         ashldi3_internal8 and split, *ashldi3_internal9 and split): Delete.
>         (*anddi3_2rld, *anddi3_2rld_dot, *anddi3_2rld_dot2): Delete.
>         (splitter for loading a mask): Adjust.

This patch is okay with the follow-up revisions.

Thanks, David
diff mbox

Patch

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 7fc635b..d70b817 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -230,29 +230,11 @@  (define_constraint "R"
 
 ;; General constraints
 
-(define_constraint "S"
-  "Constant that can be placed into a 64-bit mask operand"
-  (and (match_test "TARGET_POWERPC64")
-       (match_operand 0 "mask64_operand")))
-
-(define_constraint "T"
-  "Constant that can be placed into a 32-bit mask operand"
-  (match_operand 0 "mask_operand"))
-
 (define_constraint "U"
   "V.4 small data reference"
   (and (match_test "DEFAULT_ABI == ABI_V4")
        (match_test "small_data_operand (op, mode)")))
 
-(define_constraint "t"
-  "AND masks that can be performed by two rldic{l,r} insns
-   (but excluding those that could match other constraints of anddi3)"
-  (and (and (and (match_operand 0 "mask64_2_operand")
-		 (match_test "(fixed_regs[CR0_REGNO]
-			      || !logical_operand (op, DImode))"))
-	    (not (match_operand 0 "mask_operand")))
-       (not (match_operand 0 "mask64_operand"))))
-
 (define_constraint "W"
   "vector constant that does not require memory"
   (match_operand 0 "easy_vector_constant"))
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e2d836f..2adeee7 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -751,178 +751,14 @@  (define_predicate "non_logical_cint_operand"
        (and (not (match_operand 0 "logical_operand"))
 	    (match_operand 0 "reg_or_logical_cint_operand"))))
 
-;; Return 1 if op is a constant that can be encoded in a 32-bit mask,
-;; suitable for use with rlwinm (no more than two 1->0 or 0->1
-;; transitions).  Reject all ones and all zeros, since these should have
-;; been optimized away and confuse the making of MB and ME.
-(define_predicate "mask_operand"
-  (match_code "const_int")
-{
-  unsigned HOST_WIDE_INT c, lsb;
-
-  c = INTVAL (op);
-
-  if (TARGET_POWERPC64)
-    {
-      /* Fail if the mask is not 32-bit.  */
-      if (mode == DImode && (c & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0)
-	return 0;
-
-      /* Fail if the mask wraps around because the upper 32-bits of the
-	 mask will all be 1s, contrary to GCC's internal view.  */
-      if ((c & 0x80000001) == 0x80000001)
-	return 0;
-    }
-
-  /* We don't change the number of transitions by inverting,
-     so make sure we start with the LS bit zero.  */
-  if (c & 1)
-    c = ~c;
-
-  /* Reject all zeros or all ones.  */
-  if (c == 0)
-    return 0;
-
-  /* Find the first transition.  */
-  lsb = c & -c;
-
-  /* Invert to look for a second transition.  */
-  c = ~c;
-
-  /* Erase first transition.  */
-  c &= -lsb;
-
-  /* Find the second transition (if any).  */
-  lsb = c & -c;
-
-  /* Match if all the bits above are 1's (or c is zero).  */
-  return c == -lsb;
-})
-
-;; Return 1 for the PowerPC64 rlwinm corner case.
-(define_predicate "mask_operand_wrap"
-  (match_code "const_int")
-{
-  unsigned HOST_WIDE_INT c, lsb;
-
-  c = INTVAL (op);
-
-  if ((c & 0x80000001) != 0x80000001)
-    return 0;
-
-  c = ~c;
-  if (c == 0)
-    return 0;
-
-  lsb = c & -c;
-  c = ~c;
-  c &= -lsb;
-  lsb = c & -c;
-  return c == -lsb;
-})
-
-;; Return 1 if the operand is a constant that is a PowerPC64 mask
-;; suitable for use with rldicl or rldicr (no more than one 1->0 or 0->1
-;; transition).  Reject all zeros, since zero should have been
-;; optimized away and confuses the making of MB and ME.
-(define_predicate "mask64_operand"
-  (match_code "const_int")
-{
-  unsigned HOST_WIDE_INT c, lsb;
-
-  c = INTVAL (op);
-
-  /* Reject all zeros.  */
-  if (c == 0)
-    return 0;
-
-  /* We don't change the number of transitions by inverting,
-     so make sure we start with the LS bit zero.  */
-  if (c & 1)
-    c = ~c;
-
-  /* Find the first transition.  */
-  lsb = c & -c;
-
-  /* Match if all the bits above are 1's (or c is zero).  */
-  return c == -lsb;
-})
-
-;; Like mask64_operand, but allow up to three transitions.  This
-;; predicate is used by insn patterns that generate two rldicl or
-;; rldicr machine insns.
-(define_predicate "mask64_2_operand"
-  (match_code "const_int")
-{
-  unsigned HOST_WIDE_INT c, lsb;
-
-  c = INTVAL (op);
-
-  /* Disallow all zeros.  */
-  if (c == 0)
-    return 0;
-
-  /* We don't change the number of transitions by inverting,
-     so make sure we start with the LS bit zero.  */
-  if (c & 1)
-    c = ~c;
-
-  /* Find the first transition.  */
-  lsb = c & -c;
-
-  /* Invert to look for a second transition.  */
-  c = ~c;
-
-  /* Erase first transition.  */
-  c &= -lsb;
-
-  /* Find the second transition.  */
-  lsb = c & -c;
-
-  /* Invert to look for a third transition.  */
-  c = ~c;
-
-  /* Erase second transition.  */
-  c &= -lsb;
-
-  /* Find the third transition (if any).  */
-  lsb = c & -c;
-
-  /* Match if all the bits above are 1's (or c is zero).  */
-  return c == -lsb;
-})
-
-;; Match a mask_operand or a mask64_operand.
-(define_predicate "any_mask_operand"
-  (ior (match_operand 0 "mask_operand")
-       (and (match_test "TARGET_POWERPC64 && mode == DImode")
-	    (match_operand 0 "mask64_operand"))))
-
-;; Like and_operand, but also match constants that can be implemented
-;; with two rldicl or rldicr insns.
-(define_predicate "and64_2_operand"
-  (ior (match_operand 0 "mask64_2_operand")
-       (if_then_else (match_test "fixed_regs[CR0_REGNO]")
-	 (match_operand 0 "gpc_reg_operand")
-	 (match_operand 0 "logical_operand"))))
-
 ;; Return 1 if the operand is either a non-special register or a
 ;; constant that can be used as the operand of a logical AND.
 (define_predicate "and_operand"
-  (ior (match_operand 0 "mask_operand")
-       (and (match_test "TARGET_POWERPC64 && mode == DImode")
-	    (match_operand 0 "mask64_operand"))
+  (ior (match_test "rs6000_is_valid_and_mask (op, mode)")
        (if_then_else (match_test "fixed_regs[CR0_REGNO]")
 	 (match_operand 0 "gpc_reg_operand")
 	 (match_operand 0 "logical_operand"))))
 
-;; Return 1 if the operand is a constant that can be used as the operand
-;; of a logical AND, implemented with two rld* insns, and it cannot be done
-;; using just one insn.
-(define_predicate "and_2rld_operand"
-  (and (match_operand 0 "and64_2_operand")
-       (not (match_operand 0 "and_operand"))))
-
 ;; Return 1 if the operand is either a logical operand or a short cint operand.
 (define_predicate "scc_eq_operand"
   (ior (match_operand 0 "logical_operand")
@@ -1128,6 +964,10 @@  (define_predicate "rs6000_nonimmediate_operand"
   return nonimmediate_operand (op, mode);
 })
 
+;; Return true if operand is an operator used in rotate-and-mask instructions.
+(define_predicate "rotate_mask_operator"
+  (match_code "rotate,ashift,lshiftrt"))
+
 ;; Return true if operand is boolean operator.
 (define_predicate "boolean_operator"
   (match_code "and,ior,xor"))
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index bd1ede1..d90e73a 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -66,15 +66,18 @@  extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned);
 extern void rs6000_expand_extract_even (rtx, rtx, rtx);
 extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
 extern void rs6000_scale_v2df (rtx, rtx, int);
-extern void build_mask64_2_operands (rtx, rtx *);
 extern int expand_block_clear (rtx[]);
 extern int expand_block_move (rtx[]);
 extern const char * rs6000_output_load_multiple (rtx[]);
-extern int includes_lshift_p (rtx, rtx);
-extern int includes_rshift_p (rtx, rtx);
-extern int includes_rldic_lshift_p (rtx, rtx);
-extern int includes_rldicr_lshift_p (rtx, rtx);
-extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
+extern bool rs6000_is_valid_mask (rtx, int *, int *, machine_mode);
+extern bool rs6000_is_valid_and_mask (rtx, machine_mode);
+extern bool rs6000_is_valid_shift_mask (rtx, rtx, machine_mode);
+extern bool rs6000_is_valid_insert_mask (rtx, rtx, machine_mode);
+extern const char *rs6000_insn_for_and_mask (machine_mode, rtx *, bool);
+extern const char *rs6000_insn_for_shift_mask (machine_mode, rtx *, bool);
+extern const char *rs6000_insn_for_insert_mask (machine_mode, rtx *, bool);
+extern bool rs6000_is_valid_2insn_and (rtx, machine_mode);
+extern void rs6000_emit_2insn_and (machine_mode, rtx *, bool, bool);
 extern int registers_ok_for_quad_peep (rtx, rtx);
 extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
@@ -102,8 +105,6 @@  extern void paired_expand_vector_move (rtx operands[]);
 
 
 extern int ccr_bit (rtx, int);
-extern int extract_MB (rtx);
-extern int extract_ME (rtx);
 extern void rs6000_output_function_entry (FILE *, const char *);
 extern void print_operand (FILE *, rtx, int);
 extern void print_operand_address (FILE *, rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 23fc686..216b9c5 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5264,7 +5264,7 @@  num_insns_constant (rtx op, machine_mode mode)
     {
     case CONST_INT:
       if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
-	  && mask64_operand (op, mode))
+	  && rs6000_is_valid_and_mask (op, mode))
 	return 2;
       else
 	return num_insns_constant_wide (INTVAL (op));
@@ -5312,7 +5312,7 @@  num_insns_constant (rtx op, machine_mode mode)
 		|| (high == -1 && low < 0))
 	      return num_insns_constant_wide (low);
 
-	    else if (mask64_operand (op, mode))
+	    else if (rs6000_is_valid_and_mask (op, mode))
 	      return 2;
 
 	    else if (low == 0)
@@ -6027,69 +6027,6 @@  rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
   emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
 }
 
-/* Generates shifts and masks for a pair of rldicl or rldicr insns to
-   implement ANDing by the mask IN.  */
-void
-build_mask64_2_operands (rtx in, rtx *out)
-{
-  unsigned HOST_WIDE_INT c, lsb, m1, m2;
-  int shift;
-
-  gcc_assert (GET_CODE (in) == CONST_INT);
-
-  c = INTVAL (in);
-  if (c & 1)
-    {
-      /* Assume c initially something like 0x00fff000000fffff.  The idea
-	 is to rotate the word so that the middle ^^^^^^ group of zeros
-	 is at the MS end and can be cleared with an rldicl mask.  We then
-	 rotate back and clear off the MS    ^^ group of zeros with a
-	 second rldicl.  */
-      c = ~c;			/*   c == 0xff000ffffff00000 */
-      lsb = c & -c;		/* lsb == 0x0000000000100000 */
-      m1 = -lsb;		/*  m1 == 0xfffffffffff00000 */
-      c = ~c;			/*   c == 0x00fff000000fffff */
-      c &= -lsb;		/*   c == 0x00fff00000000000 */
-      lsb = c & -c;		/* lsb == 0x0000100000000000 */
-      c = ~c;			/*   c == 0xff000fffffffffff */
-      c &= -lsb;		/*   c == 0xff00000000000000 */
-      shift = 0;
-      while ((lsb >>= 1) != 0)
-	shift++;		/* shift == 44 on exit from loop */
-      m1 <<= 64 - shift;	/*  m1 == 0xffffff0000000000 */
-      m1 = ~m1;			/*  m1 == 0x000000ffffffffff */
-      m2 = ~c;			/*  m2 == 0x00ffffffffffffff */
-    }
-  else
-    {
-      /* Assume c initially something like 0xff000f0000000000.  The idea
-	 is to rotate the word so that the     ^^^  middle group of zeros
-	 is at the LS end and can be cleared with an rldicr mask.  We then
-	 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
-	 a second rldicr.  */
-      lsb = c & -c;		/* lsb == 0x0000010000000000 */
-      m2 = -lsb;		/*  m2 == 0xffffff0000000000 */
-      c = ~c;			/*   c == 0x00fff0ffffffffff */
-      c &= -lsb;		/*   c == 0x00fff00000000000 */
-      lsb = c & -c;		/* lsb == 0x0000100000000000 */
-      c = ~c;			/*   c == 0xff000fffffffffff */
-      c &= -lsb;		/*   c == 0xff00000000000000 */
-      shift = 0;
-      while ((lsb >>= 1) != 0)
-	shift++;		/* shift == 44 on exit from loop */
-      m1 = ~c;			/*  m1 == 0x00ffffffffffffff */
-      m1 >>= shift;		/*  m1 == 0x0000000000000fff */
-      m1 = ~m1;			/*  m1 == 0xfffffffffffff000 */
-    }
-
-  /* Note that when we only have two 0->1 and 1->0 transitions, one of the
-     masks will be all 1's.  We are guaranteed more than one transition.  */
-  out[0] = GEN_INT (64 - shift);
-  out[1] = GEN_INT (m1);
-  out[2] = GEN_INT (shift);
-  out[3] = GEN_INT (m2);
-}
-
 /* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
 
 bool
@@ -16403,121 +16340,495 @@  validate_condition_mode (enum rtx_code code, machine_mode mode)
 }
 
 
-/* Return 1 if ANDOP is a mask that has no bits on that are not in the
-   mask required to convert the result of a rotate insn into a shift
-   left insn of SHIFTOP bits.  Both are known to be SImode CONST_INT.  */
+/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
+   rldicl, rldicr, or rldic instruction in mode MODE.  If so, if E is
+   not zero, store there the bit offset (counted from the right) where
+   the single stretch of 1 bits begins; and similarly for B, the bit
+   offset where it ends.  */
 
-int
-includes_lshift_p (rtx shiftop, rtx andop)
+bool
+rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
 {
-  unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
+  unsigned HOST_WIDE_INT val = INTVAL (mask);
+  unsigned HOST_WIDE_INT bit;
+  int nb, ne;
+  int n = GET_MODE_PRECISION (mode);
+
+  if (mode != DImode && mode != SImode)
+    return false;
+
+  if (INTVAL (mask) >= 0)
+    {
+      bit = val & -val;
+      ne = exact_log2 (bit);
+      nb = exact_log2 (val + bit);
+    }
+  else if (val + 1 == 0)
+    {
+      nb = n;
+      ne = 0;
+    }
+  else if (val & 1)
+    {
+      val = ~val;
+      bit = val & -val;
+      nb = exact_log2 (bit);
+      ne = exact_log2 (val + bit);
+    }
+  else
+    {
+      bit = val & -val;
+      ne = exact_log2 (bit);
+      if (val + bit == 0)
+	nb = n;
+      else
+	nb = 0;
+    }
 
-  shift_mask <<= INTVAL (shiftop);
+  nb--;
 
-  return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
+  if (nb < 0 || ne < 0 || nb >= n || ne >= n)
+    return false;
+
+  if (b)
+    *b = nb;
+  if (e)
+    *e = ne;
+
+  return true;
 }
 
-/* Similar, but for right shift.  */
+/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
+   or rldicr instruction, to implement an AND with it in mode MODE.  */
 
-int
-includes_rshift_p (rtx shiftop, rtx andop)
+bool
+rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
 {
-  unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
+  int nb, ne;
+
+  if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
+    return false;
+
+  /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
+     does not wrap.  */
+  if (mode == DImode)
+    return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
 
-  shift_mask >>= INTVAL (shiftop);
+  /* For SImode, rlwinm can do everything.  */
+  if (mode == SImode)
+    return (nb < 32 && ne < 32);
 
-  return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
+  return false;
 }
 
-/* Return 1 if ANDOP is a mask suitable for use with an rldic insn
-   to perform a left shift.  It must have exactly SHIFTOP least
-   significant 0's, then one or more 1's, then zero or more 0's.  */
+/* Return the instruction template for an AND with mask in mode MODE, with
+   operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
 
-int
-includes_rldic_lshift_p (rtx shiftop, rtx andop)
+const char *
+rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
 {
-  if (GET_CODE (andop) == CONST_INT)
+  int nb, ne;
+
+  if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
+    gcc_unreachable ();
+
+  if (mode == DImode && ne == 0)
     {
-      unsigned HOST_WIDE_INT c, lsb, shift_mask;
+      operands[3] = GEN_INT (63 - nb);
+      if (dot)
+	return "rldicl. %0,%1,0,%3";
+      return "rldicl %0,%1,0,%3";
+    }
 
-      c = INTVAL (andop);
-      if (c == 0 || c == HOST_WIDE_INT_M1U)
-	return 0;
+  if (mode == DImode && nb == 63)
+    {
+      operands[3] = GEN_INT (63 - ne);
+      if (dot)
+	return "rldicr. %0,%1,0,%3";
+      return "rldicr %0,%1,0,%3";
+    }
 
-      shift_mask = HOST_WIDE_INT_M1U;
-      shift_mask <<= INTVAL (shiftop);
+  if (nb < 32 && ne < 32)
+    {
+      operands[3] = GEN_INT (31 - nb);
+      operands[4] = GEN_INT (31 - ne);
+      if (dot)
+	return "rlwinm. %0,%1,0,%3,%4";
+      return "rlwinm %0,%1,0,%3,%4";
+    }
 
-      /* Find the least significant one bit.  */
-      lsb = c & -c;
+  gcc_unreachable ();
+}
 
-      /* It must coincide with the LSB of the shift mask.  */
-      if (-lsb != shift_mask)
-	return 0;
+/* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
+   rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
+   shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE.  */
+
+bool
+rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
+{
+  int nb, ne;
 
-      /* Invert to look for the next transition (if any).  */
-      c = ~c;
+  if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
+    return false;
 
-      /* Remove the low group of ones (originally low group of zeros).  */
-      c &= -lsb;
+  int n = GET_MODE_PRECISION (mode);
+  int sh = -1;
 
-      /* Again find the lsb, and check we have all 1's above.  */
-      lsb = c & -c;
-      return c == -lsb;
+  if (CONST_INT_P (XEXP (shift, 1)))
+    {
+      sh = INTVAL (XEXP (shift, 1));
+      if (sh < 0 || sh >= n)
+	return false;
     }
-  else
-    return 0;
+
+  rtx_code code = GET_CODE (shift);
+
+  /* Convert any shift by 0 to a rotate, to simplify below code.  */
+  if (sh == 0)
+    code = ROTATE;
+
+  /* Convert rotate to simple shift if we can, to make analysis simpler.  */
+  if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
+    code = ASHIFT;
+  if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
+    {
+      code = LSHIFTRT;
+      sh = n - sh;
+    }
+
+  /* DImode rotates need rld*.  */
+  if (mode == DImode && code == ROTATE)
+    return (nb == 63 || ne == 0 || ne == sh);
+
+  /* SImode rotates need rlw*.  */
+  if (mode == SImode && code == ROTATE)
+    return (nb < 32 && ne < 32 && sh < 32);
+
+  /* Wrap-around masks are only okay for rotates.  */
+  if (ne > nb)
+    return false;
+
+  /* Variable shifts are only okay for rotates.  */
+  if (sh < 0)
+    return false;
+
+  /* Don't allow ASHIFT if the mask is wrong for that.  */
+  if (code == ASHIFT && ne < sh)
+    return false;
+
+  /* If we can do it with an rlw*, we can do it.  Don't allow LSHIFTRT
+     if the mask is wrong for that.  */
+  if (nb < 32 && ne < 32 && sh < 32
+      && !(code == LSHIFTRT && nb >= 32 - sh))
+    return true;
+
+  /* If we can do it with an rld*, we can do it.  Don't allow LSHIFTRT
+     if the mask is wrong for that.  */
+  if (code == LSHIFTRT)
+    sh = 64 - sh;
+  if (nb == 63 || ne == 0 || ne == sh)
+    return !(code == LSHIFTRT && nb >= sh);
+
+  return false;
 }
 
-/* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
-   to perform a left shift.  It must have SHIFTOP or more least
-   significant 0's, with the remainder of the word 1's.  */
+/* Return the instruction template for a shift with mask in mode MODE, with
+   operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
 
-int
-includes_rldicr_lshift_p (rtx shiftop, rtx andop)
+const char *
+rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
 {
-  if (GET_CODE (andop) == CONST_INT)
+  int nb, ne;
+
+  if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
+    gcc_unreachable ();
+
+  if (mode == DImode && ne == 0)
     {
-      unsigned HOST_WIDE_INT c, lsb, shift_mask;
+      if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
+	operands[2] = GEN_INT (64 - INTVAL (operands[2]));
+      operands[3] = GEN_INT (63 - nb);
+      if (dot)
+	return "rld%I2cl. %0,%1,%2,%3";
+      return "rld%I2cl %0,%1,%2,%3";
+    }
 
-      shift_mask = HOST_WIDE_INT_M1U;
-      shift_mask <<= INTVAL (shiftop);
-      c = INTVAL (andop);
+  if (mode == DImode && nb == 63)
+    {
+      operands[3] = GEN_INT (63 - ne);
+      if (dot)
+	return "rld%I2cr. %0,%1,%2,%3";
+      return "rld%I2cr %0,%1,%2,%3";
+    }
 
-      /* Find the least significant one bit.  */
-      lsb = c & -c;
+  if (mode == DImode
+      && GET_CODE (operands[4]) != LSHIFTRT
+      && CONST_INT_P (operands[2])
+      && ne == INTVAL (operands[2]))
+    {
+      operands[3] = GEN_INT (63 - nb);
+      if (dot)
+	return "rld%I2c. %0,%1,%2,%3";
+      return "rld%I2c %0,%1,%2,%3";
+    }
 
-      /* It must be covered by the shift mask.
-	 This test also rejects c == 0.  */
-      if ((lsb & shift_mask) == 0)
-	return 0;
+  if (nb < 32 && ne < 32)
+    {
+      if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
+	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+      operands[3] = GEN_INT (31 - nb);
+      operands[4] = GEN_INT (31 - ne);
+      if (dot)
+	return "rlw%I2nm. %0,%1,%2,%3,%4";
+      return "rlw%I2nm %0,%1,%2,%3,%4";
+    }
+
+  gcc_unreachable ();
+}
+
+/* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
+   rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
+   ASHIFT, or LSHIFTRT) in mode MODE.  */
 
-      /* Check we have all 1's above the transition, and reject all 1's.  */
-      return c == -lsb && lsb != 1;
+bool
+rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
+{
+  int nb, ne;
+
+  if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
+    return false;
+
+  int n = GET_MODE_PRECISION (mode);
+
+  int sh = INTVAL (XEXP (shift, 1));
+  if (sh < 0 || sh >= n)
+    return false;
+
+  rtx_code code = GET_CODE (shift);
+
+  /* Convert any shift by 0 to a rotate, to simplify below code.  */
+  if (sh == 0)
+    code = ROTATE;
+
+  /* Convert rotate to simple shift if we can, to make analysis simpler.  */
+  if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
+    code = ASHIFT;
+  if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
+    {
+      code = LSHIFTRT;
+      sh = n - sh;
     }
-  else
-    return 0;
+
+  /* DImode rotates need rldimi.  */
+  if (mode == DImode && code == ROTATE)
+    return (ne == sh);
+
+  /* SImode rotates need rlwimi.  */
+  if (mode == SImode && code == ROTATE)
+    return (nb < 32 && ne < 32 && sh < 32);
+
+  /* Wrap-around masks are only okay for rotates.  */
+  if (ne > nb)
+    return false;
+
+  /* Don't allow ASHIFT if the mask is wrong for that.  */
+  if (code == ASHIFT && ne < sh)
+    return false;
+
+  /* If we can do it with an rlwimi, we can do it.  Don't allow LSHIFTRT
+     if the mask is wrong for that.  */
+  if (nb < 32 && ne < 32 && sh < 32
+      && !(code == LSHIFTRT && nb >= 32 - sh))
+    return true;
+
+  /* If we can do it with an rldimi, we can do it.  Don't allow LSHIFTRT
+     if the mask is wrong for that.  */
+  if (code == LSHIFTRT)
+    sh = 64 - sh;
+  if (ne == sh)
+    return !(code == LSHIFTRT && nb >= sh);
+
+  return false;
 }
 
-/* Return 1 if operands will generate a valid arguments to rlwimi
-instruction for insert with right shift in 64-bit mode.  The mask may
-not start on the first bit or stop on the last bit because wrap-around
-effects of instruction do not correspond to semantics of RTL insn.  */
+/* Return the instruction template for an insert with mask in mode MODE, with
+   operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
 
-int
-insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
-{
-  if (INTVAL (startop) > 32
-      && INTVAL (startop) < 64
-      && INTVAL (sizeop) > 1
-      && INTVAL (sizeop) + INTVAL (startop) < 64
-      && INTVAL (shiftop) > 0
-      && INTVAL (sizeop) + INTVAL (shiftop) < 32
-      && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
-    return 1;
+const char *
+rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
+{
+  int nb, ne;
 
-  return 0;
+  if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
+    gcc_unreachable ();
+
+  /* Prefer rldimi because rlwimi is cracked.  */
+  if (TARGET_POWERPC64
+      && (!dot || mode == DImode)
+      && GET_CODE (operands[4]) != LSHIFTRT
+      && ne == INTVAL (operands[2]))
+    {
+      operands[3] = GEN_INT (63 - nb);
+      if (dot)
+	return "rldimi. %0,%1,%2,%3";
+      return "rldimi %0,%1,%2,%3";
+    }
+
+  if (nb < 32 && ne < 32)
+    {
+      if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
+	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+      operands[3] = GEN_INT (31 - nb);
+      operands[4] = GEN_INT (31 - ne);
+      if (dot)
+	return "rlwimi. %0,%1,%2,%3,%4";
+      return "rlwimi %0,%1,%2,%3,%4";
+    }
+
+  gcc_unreachable ();
+}
+
+/* Return whether an AND with C (a CONST_INT) in mode MODE can be done
+   using two machine instructions.  */
+
+bool
+rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
+{
+  /* There are two kinds of AND we can handle with two insns:
+     1) those we can do with two rl* insn;
+     2) ori[s];xori[s].
+
+     We do not handle that last case yet.  */
+
+  /* If there is just one stretch of ones, we can do it.  */
+  if (rs6000_is_valid_mask (c, NULL, NULL, mode))
+    return true;
+
+  /* Otherwise, fill in the lowest "hole"; if we can do the result with
+     one insn, we can do the whole thing with two.  */
+  unsigned HOST_WIDE_INT val = INTVAL (c);
+  unsigned HOST_WIDE_INT bit1 = val & -val;
+  unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
+  unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
+  unsigned HOST_WIDE_INT bit3 = val1 & -val1;
+  return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
 }
 
+/* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
+   If EXPAND is true, split rotate-and-mask instructions we generate to
+   their constituent parts as well (this is used during expand); if DOT
+   is true, make the last insn a record-form instruction.  */
+
+void
+rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, bool dot)
+{
+  gcc_assert (!(expand && dot));
+
+  /* We do not actually handle record form yet.  */
+  if (dot)
+    gcc_unreachable ();
+
+  unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
+
+  /* If it is one stretch of ones, it is DImode; shift left, mask, then
+     shift right.  This generates better code than doing the masks without
+     shifts, or shifting first right and then left.  */
+  int nb, ne;
+  if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
+    {
+      gcc_assert (mode == DImode);
+
+      int shift = 63 - nb;
+      if (expand)
+	{
+	  rtx tmp1 = gen_reg_rtx (DImode);
+	  rtx tmp2 = gen_reg_rtx (DImode);
+	  emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
+	  emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
+	  emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
+	}
+      else
+	{
+	  rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
+	  tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
+	  emit_move_insn (operands[0], tmp);
+	  emit_insn (gen_lshrdi3 (operands[0], operands[0], GEN_INT (shift)));
+	}
+      return;
+    }
+
+  /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
+     that does the rest.  */
+  unsigned HOST_WIDE_INT bit1 = val & -val;
+  unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
+  unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
+  unsigned HOST_WIDE_INT bit3 = val1 & -val1;
+
+  unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
+  unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
+
+  gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
+
+  /* Two "no-rotate"-and-mask instructions, for SImode.  */
+  if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
+    {
+      gcc_assert (mode == SImode);
+
+      rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
+      rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
+      emit_move_insn (reg, tmp);
+      tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
+      emit_move_insn (operands[0], tmp);
+      return;
+    }
+
+  gcc_assert (mode == DImode);
+
+  /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
+     insns; we have to do the first in SImode, because it wraps.  */
+  if (mask2 <= 0xffffffff
+      && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
+    {
+      rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
+      rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
+			     GEN_INT (mask1));
+      rtx reg_low = gen_lowpart (SImode, reg);
+      emit_move_insn (reg_low, tmp);
+      tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
+      emit_move_insn (operands[0], tmp);
+      return;
+    }
+
+  /* Two rld* insns: rotate, clear the hole in the middle (which now is
+     at the top end), rotate back and clear the other hole.  */
+  int right = exact_log2 (bit3);
+  int left = 64 - right;
+
+  /* Rotate the mask too.  */
+  mask1 = (mask1 >> right) | ((bit2 - 1) << left);
+
+  if (expand)
+    {
+      rtx tmp1 = gen_reg_rtx (DImode);
+      rtx tmp2 = gen_reg_rtx (DImode);
+      rtx tmp3 = gen_reg_rtx (DImode);
+      emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
+      emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
+      emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
+      emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
+    }
+  else
+    {
+      rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
+      tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
+      emit_move_insn (operands[0], tmp);
+      tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
+      tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
+      emit_move_insn (operands[0], tmp);
+    }
+}
+
 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
    for lfq and stfq insns iff the registers are hard registers.   */
 
@@ -18402,71 +18713,6 @@  rs6000_init_machine_status (void)
 
 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
 
-int
-extract_MB (rtx op)
-{
-  int i;
-  unsigned long val = INTVAL (op);
-
-  /* If the high bit is zero, the value is the first 1 bit we find
-     from the left.  */
-  if ((val & 0x80000000) == 0)
-    {
-      gcc_assert (val & 0xffffffff);
-
-      i = 1;
-      while (((val <<= 1) & 0x80000000) == 0)
-	++i;
-      return i;
-    }
-
-  /* If the high bit is set and the low bit is not, or the mask is all
-     1's, the value is zero.  */
-  if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
-    return 0;
-
-  /* Otherwise we have a wrap-around mask.  Look for the first 0 bit
-     from the right.  */
-  i = 31;
-  while (((val >>= 1) & 1) != 0)
-    --i;
-
-  return i;
-}
-
-int
-extract_ME (rtx op)
-{
-  int i;
-  unsigned long val = INTVAL (op);
-
-  /* If the low bit is zero, the value is the first 1 bit we find from
-     the right.  */
-  if ((val & 1) == 0)
-    {
-      gcc_assert (val & 0xffffffff);
-
-      i = 30;
-      while (((val >>= 1) & 1) == 0)
-	--i;
-
-      return i;
-    }
-
-  /* If the low bit is set and the high bit is not, or the mask is all
-     1's, the value is 31.  */
-  if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
-    return 31;
-
-  /* Otherwise we have a wrap-around mask.  Look for the first 0 bit
-     from the left.  */
-  i = 0;
-  while (((val <<= 1) & 0x80000000) != 0)
-    ++i;
-
-  return i;
-}
-
 /* Write out a function code label.  */
 
 void
@@ -18516,21 +18762,6 @@  print_operand (FILE *file, rtx x, int code)
     {
       /* %a is output_address.  */
 
-    case 'b':
-      /* If constant, low-order 16 bits of constant, unsigned.
-	 Otherwise, write normally.  */
-      if (INT_P (x))
-	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
-      else
-	print_operand (file, x, 0);
-      return;
-
-    case 'B':
-      /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
-	 for 64-bit mask direction.  */
-      putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
-      return;
-
       /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
 	 output_operand.  */
 
@@ -18693,24 +18924,6 @@  print_operand (FILE *file, rtx x, int code)
 	}
       return;
 
-    case 'm':
-      /* MB value for a mask operand.  */
-      if (! mask_operand (x, SImode))
-	output_operand_lossage ("invalid %%m value");
-
-      fprintf (file, "%d", extract_MB (x));
-      return;
-
-    case 'M':
-      /* ME value for a mask operand.  */
-      if (! mask_operand (x, SImode))
-	output_operand_lossage ("invalid %%M value");
-
-      fprintf (file, "%d", extract_ME (x));
-      return;
-
-      /* %n outputs the negative of its operand.  */
-
     case 'N':
       /* Write the number of elements in the vector times 4.  */
       if (GET_CODE (x) != PARALLEL)
@@ -18799,44 +19012,6 @@  print_operand (FILE *file, rtx x, int code)
 	fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
       return;
 
-    case 's':
-      /* Low 5 bits of 32 - value */
-      if (! INT_P (x))
-	output_operand_lossage ("invalid %%s value");
-      else
-	fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
-      return;
-
-    case 'S':
-      /* PowerPC64 mask position.  All 0's is excluded.
-	 CONST_INT 32-bit mask is considered sign-extended so any
-	 transition must occur within the CONST_INT, not on the boundary.  */
-      if (! mask64_operand (x, DImode))
-	output_operand_lossage ("invalid %%S value");
-
-      uval = INTVAL (x);
-
-      if (uval & 1)	/* Clear Left */
-	{
-#if HOST_BITS_PER_WIDE_INT > 64
-	  uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
-#endif
-	  i = 64;
-	}
-      else		/* Clear Right */
-	{
-	  uval = ~uval;
-#if HOST_BITS_PER_WIDE_INT > 64
-	  uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
-#endif
-	  i = 63;
-	}
-      while (uval != 0)
-	--i, uval >>= 1;
-      gcc_assert (i >= 0);
-      fprintf (file, "%d", i);
-      return;
-
     case 't':
       /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
       gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
@@ -18942,13 +19117,6 @@  print_operand (FILE *file, rtx x, int code)
 	print_operand (file, x, 0);
       return;
 
-    case 'W':
-      /* MB value for a PowerPC64 rldic operand.  */
-      i = clz_hwi (INTVAL (x));
-
-      fprintf (file, "%d", i);
-      return;
-
     case 'x':
       /* X is a FPR or Altivec register used in a VSX context.  */
       if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
@@ -30647,10 +30815,7 @@  rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	      && (satisfies_constraint_K (x)
 		  || (mode == SImode
 		      ? satisfies_constraint_L (x)
-		      : satisfies_constraint_J (x))
-		  || mask_operand (x, mode)
-		  || (mode == DImode
-		      && mask64_operand (x, DImode))))
+		      : satisfies_constraint_J (x))))
 	  || ((outer_code == IOR || outer_code == XOR)
 	      && (satisfies_constraint_K (x)
 		  || (mode == SImode
@@ -30799,15 +30964,60 @@  rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
 
     case NOT:
       if (outer_code == AND || outer_code == IOR || outer_code == XOR)
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case AND:
+      if (CONST_INT_P (XEXP (x, 1)))
 	{
-	  *total = 0;
-	  return false;
+	  rtx left = XEXP (x, 0);
+	  rtx_code left_code = GET_CODE (left);
+
+	  /* rotate-and-mask: 1 insn.  */
+	  if ((left_code == ROTATE
+	       || left_code == ASHIFT
+	       || left_code == LSHIFTRT)
+	      && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
+	    {
+	      *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
+	      if (!CONST_INT_P (XEXP (left, 1)))
+		*total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
+	      *total += COSTS_N_INSNS (1);
+	      return true;
+	    }
+
+	  /* rotate-and-mask (no rotate), andi., andis.: 1 insn.  */
+	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+	  if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
+	      || (val & 0xffff) == val
+	      || (val & 0xffff0000) == val
+	      || ((val & 0xffff) == 0 && mode == SImode))
+	    {
+	      *total = rtx_cost (left, mode, AND, 0, speed);
+	      *total += COSTS_N_INSNS (1);
+	      return true;
+	    }
+
+	  /* 2 insns.  */
+	  if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
+	    {
+	      *total = rtx_cost (left, mode, AND, 0, speed);
+	      *total += COSTS_N_INSNS (2);
+	      return true;
+	    }
 	}
-      /* FALLTHRU */
 
-    case AND:
-    case CLZ:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
     case IOR:
+      /* FIXME */
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CLZ:
     case XOR:
     case ZERO_EXTRACT:
       *total = COSTS_N_INSNS (1);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6a14ee8..d8529f8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2245,17 +2245,23 @@  (define_split
   [(set (match_operand:SI 0 "gpc_reg_operand" "")
 	(bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
   "reload_completed"
-  [(set (match_dup 0)
-	(rotate:SI (match_dup 1) (const_int 8)))
-   (set (zero_extract:SI (match_dup 0)
-			 (const_int 8)
-			 (const_int 0))
-	(match_dup 1))
-   (set (zero_extract:SI (match_dup 0)
-			 (const_int 8)
-			 (const_int 16))
+  [(set (match_dup 0)					; DABC
 	(rotate:SI (match_dup 1)
-		   (const_int 16)))]
+		   (const_int 24)))
+   (set (match_dup 0)					; DCBC
+	(ior:SI (and:SI (ashift:SI (match_dup 1)
+				   (const_int 8))
+			(const_int 16711680))
+		(and:SI (match_dup 0)
+			(const_int -16711681))))
+   (set (match_dup 0)					; DCBA
+	(ior:SI (and:SI (lshiftrt:SI (match_dup 1)
+				     (const_int 24))
+			(const_int 255))
+		(and:SI (match_dup 0)
+			(const_int -256))))
+
+  ]
   "")
 
 (define_expand "bswapdi2"
@@ -2892,21 +2898,26 @@  (define_expand "and<mode>3"
       DONE;
     }
 
+  if (rs6000_is_valid_and_mask (operands[2], <MODE>mode))
+    {
+      emit_insn (gen_and<mode>3_mask (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
   if (logical_const_operand (operands[2], <MODE>mode)
-      && !any_mask_operand (operands[2], <MODE>mode))
+      && rs6000_gen_cell_microcode)
     {
-      if (rs6000_gen_cell_microcode)
-	{
-	  emit_insn (gen_and<mode>3_imm (operands[0], operands[1], operands[2]));
-	  DONE;
-	}
-      else
-	operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_and<mode>3_imm (operands[0], operands[1], operands[2]));
+      DONE;
     }
 
-  if ((<MODE>mode == DImode && !and64_2_operand (operands[2], <MODE>mode))
-      || (<MODE>mode != DImode && !and_operand (operands[2], <MODE>mode)))
-    operands[2] = force_reg (<MODE>mode, operands[2]);
+  if (rs6000_is_valid_2insn_and (operands[2], <MODE>mode))
+    {
+      rs6000_emit_2insn_and (<MODE>mode, operands, true, false);
+      DONE;
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
 })
 
 
@@ -2916,7 +2927,7 @@  (define_insn "and<mode>3_imm"
 		 (match_operand:GPR 2 "logical_const_operand" "n")))
    (clobber (match_scratch:CC 3 "=x"))]
   "rs6000_gen_cell_microcode
-   && !any_mask_operand (operands[2], <MODE>mode)"
+   && !rs6000_is_valid_and_mask (operands[2], <MODE>mode)"
   "andi%e2. %0,%1,%u2"
   [(set_attr "type" "logical")
    (set_attr "dot" "yes")])
@@ -2930,7 +2941,7 @@  (define_insn_and_split "*and<mode>3_imm_dot"
    (clobber (match_scratch:CC 4 "=X,x"))]
   "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff)
    && rs6000_gen_cell_microcode
-   && !any_mask_operand (operands[2], <MODE>mode)"
+   && !rs6000_is_valid_and_mask (operands[2], <MODE>mode)"
   "@
    andi%e2. %0,%1,%u2
    #"
@@ -2958,7 +2969,7 @@  (define_insn_and_split "*and<mode>3_imm_dot2"
    (clobber (match_scratch:CC 4 "=X,x"))]
   "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff)
    && rs6000_gen_cell_microcode
-   && !any_mask_operand (operands[2], <MODE>mode)"
+   && !rs6000_is_valid_and_mask (operands[2], <MODE>mode)"
   "@
    andi%e2. %0,%1,%u2
    #"
@@ -2982,8 +2993,7 @@  (define_insn_and_split "*and<mode>3_imm_mask_dot"
 		    (const_int 0)))
    (clobber (match_scratch:GPR 0 "=r,r"))]
   "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff)
-   && rs6000_gen_cell_microcode
-   && any_mask_operand (operands[2], <MODE>mode)"
+   && rs6000_gen_cell_microcode"
   "@
    andi%e2. %0,%1,%u2
    #"
@@ -3008,8 +3018,7 @@  (define_insn_and_split "*and<mode>3_imm_mask_dot2"
 	(and:GPR (match_dup 1)
 		 (match_dup 2)))]
   "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff)
-   && rs6000_gen_cell_microcode
-   && any_mask_operand (operands[2], <MODE>mode)"
+   && rs6000_gen_cell_microcode"
   "@
    andi%e2. %0,%1,%u2
    #"
@@ -3025,31 +3034,68 @@  (define_insn_and_split "*and<mode>3_imm_mask_dot2"
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
-
-(define_insn "*and<mode>3_mask"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
-	(and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
-		 (match_operand:GPR 2 "any_mask_operand" "S,T")))]
+(define_insn_and_split "*and<mode>3_imm_dot_shifted"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	  (and:GPR
+	    (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
+			  (match_operand:GPR 4 "const_int_operand" "n,n"))
+	    (match_operand:GPR 2 "const_int_operand" "n,n"))
+	  (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=r,r"))]
+  "logical_const_operand (GEN_INT (UINTVAL (operands[2])
+				   << INTVAL (operands[4])),
+			  DImode)
+   && (<MODE>mode == Pmode
+       || (UINTVAL (operands[2]) << INTVAL (operands[4])) <= 0x7fffffff)
+   && rs6000_gen_cell_microcode"
+{
+  operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4]));
+  if (which_alternative == 0)
+    return "andi%e2. %0,%1,%u2";
+  else
+    return "#";
+}
+  "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
+  [(set (match_dup 0)
+	(and:GPR (lshiftrt:GPR (match_dup 1)
+			       (match_dup 4))
+		 (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
   ""
-  "@
-   rldic%B2 %0,%1,0,%S2
-   rlwinm %0,%1,0,%m2,%M2"
+  [(set_attr "type" "logical")
+   (set_attr "dot" "yes")
+   (set_attr "length" "4,8")])
+
+
+(define_insn "and<mode>3_mask"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r")
+		 (match_operand:GPR 2 "const_int_operand" "n")))]
+  "rs6000_is_valid_and_mask (operands[2], <MODE>mode)"
+{
+  return rs6000_insn_for_and_mask (<MODE>mode, operands, false);
+}
   [(set_attr "type" "shift")])
 
 (define_insn_and_split "*and<mode>3_mask_dot"
-  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
-	(compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")
-			     (match_operand:GPR 2 "any_mask_operand" "S,T,S,T"))
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:GPR 2 "const_int_operand" "n,n"))
 		    (const_int 0)))
-   (clobber (match_scratch:GPR 0 "=r,r,r,r"))]
+   (clobber (match_scratch:GPR 0 "=r,r"))]
   "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff)
    && rs6000_gen_cell_microcode
-   && !logical_const_operand (operands[2], <MODE>mode)"
-  "@
-   rldic%B2. %0,%1,0,%S2
-   rlwinm. %0,%1,0,%m2,%M2
-   #
-   #"
+   && !logical_const_operand (operands[2], <MODE>mode)
+   && rs6000_is_valid_and_mask (operands[2], <MODE>mode)"
+{
+  if (which_alternative == 0)
+    return rs6000_insn_for_and_mask (<MODE>mode, operands, true);
+  else
+    return "#";
+}
   "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
   [(set (match_dup 0)
 	(and:GPR (match_dup 1)
@@ -3060,24 +3106,26 @@  (define_insn_and_split "*and<mode>3_mask_dot"
   ""
   [(set_attr "type" "shift")
    (set_attr "dot" "yes")
-   (set_attr "length" "4,4,8,8")])
+   (set_attr "length" "4,8")])
 
 (define_insn_and_split "*and<mode>3_mask_dot2"
-  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
-	(compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")
-			     (match_operand:GPR 2 "any_mask_operand" "S,T,S,T"))
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:GPR 2 "const_int_operand" "n,n"))
 		    (const_int 0)))
-   (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r")
+   (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
 	(and:GPR (match_dup 1)
 		 (match_dup 2)))]
   "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff)
    && rs6000_gen_cell_microcode
-   && !logical_const_operand (operands[2], <MODE>mode)"
-  "@
-   rldic%B2. %0,%1,0,%S2
-   rlwinm. %0,%1,0,%m2,%M2
-   #
-   #"
+   && !logical_const_operand (operands[2], <MODE>mode)
+   && rs6000_is_valid_and_mask (operands[2], <MODE>mode)"
+{
+  if (which_alternative == 0)
+    return rs6000_insn_for_and_mask (<MODE>mode, operands, true);
+  else
+    return "#";
+}
   "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
   [(set (match_dup 0)
 	(and:GPR (match_dup 1)
@@ -3088,43 +3136,25 @@  (define_insn_and_split "*and<mode>3_mask_dot2"
   ""
   [(set_attr "type" "shift")
    (set_attr "dot" "yes")
-   (set_attr "length" "4,4,8,8")])
-
-
-
-(define_insn "andsi3_internal0_nomc"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
-        (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
-                (match_operand:SI 2 "and_operand" "?r,T")))]
-  "!rs6000_gen_cell_microcode"
-  "@
-   and %0,%1,%2
-   rlwinm %0,%1,0,%m2,%M2"
-  [(set_attr "type" "logical,shift")])
-
+   (set_attr "length" "4,8")])
 
-;; Handle the PowerPC64 rlwinm corner case
 
-(define_insn_and_split "*andsi3_internal6"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-	(and:SI (match_operand:SI 1 "gpc_reg_operand" "r")
-		(match_operand:SI 2 "mask_operand_wrap" "i")))]
-  "TARGET_POWERPC64"
+; TODO: dots of this
+(define_insn_and_split "*and<mode>3_2insn"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r")
+		 (match_operand:GPR 2 "const_int_operand" "n")))]
+  "rs6000_is_valid_2insn_and (operands[2], <MODE>mode)
+   && !(rs6000_is_valid_and_mask (operands[2], <MODE>mode)
+	|| (logical_const_operand (operands[2], <MODE>mode)
+	    && rs6000_gen_cell_microcode))"
   "#"
-  "TARGET_POWERPC64"
-  [(set (match_dup 0)
-	(and:SI (rotate:SI (match_dup 1) (match_dup 3))
-		(match_dup 4)))
-   (set (match_dup 0)
-	(rotate:SI (match_dup 0) (match_dup 5)))]
-  "
+  "&& 1"
+  [(pc)]
 {
-  int mb = extract_MB (operands[2]);
-  int me = extract_ME (operands[2]);
-  operands[3] = GEN_INT (me + 1);
-  operands[5] = GEN_INT (32 - (me + 1));
-  operands[4] = GEN_INT (~((HOST_WIDE_INT) -1 << (33 + me - mb)));
-}"
+  rs6000_emit_2insn_and (<MODE>mode, operands, false, false);
+  DONE;
+}
   [(set_attr "length" "8")])
 
 
@@ -3366,318 +3396,239 @@  (define_insn "*eqv<mode>3"
   "eqv %0,%1,%2"
   [(set_attr "type" "logical")])
 
-;; Rotate and shift insns, in all their variants.  These support shifts,
-;; field inserts and extracts, and various combinations thereof.
-(define_expand "insv"
-  [(set (zero_extract (match_operand 0 "gpc_reg_operand" "")
-		       (match_operand:SI 1 "const_int_operand" "")
-		       (match_operand:SI 2 "const_int_operand" ""))
-	(match_operand 3 "gpc_reg_operand" ""))]
-  ""
-  "
-{
-  /* Do not handle 16/8 bit structures that fit in HI/QI modes directly, since
-     the (SUBREG:SI (REG:HI xxx)) that is otherwise generated can confuse the
-     compiler if the address of the structure is taken later.  Likewise, do
-     not handle invalid E500 subregs.  */
-  if (GET_CODE (operands[0]) == SUBREG
-      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) < UNITS_PER_WORD
-	  || ((TARGET_E500_DOUBLE || TARGET_SPE)
-	      && invalid_e500_subreg (operands[0], GET_MODE (operands[0])))))
-    FAIL;
-
-  if (TARGET_POWERPC64 && GET_MODE (operands[0]) == DImode)
-    emit_insn (gen_insvdi_internal (operands[0], operands[1], operands[2],
-				    operands[3]));
-  else
-    emit_insn (gen_insvsi_internal (operands[0], operands[1], operands[2],
-				    operands[3]));
-  DONE;
-}")
+;; Rotate-and-mask and insert.
 
-(define_insn "insvsi_internal"
-  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(match_operand:SI 3 "gpc_reg_operand" "r"))]
-  ""
-  "*
-{
-  int start = INTVAL (operands[2]) & 31;
-  int size = INTVAL (operands[1]) & 31;
-
-  operands[4] = GEN_INT (32 - start - size);
-  operands[1] = GEN_INT (start + size - 1);
-  return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}"
-  [(set_attr "type" "insert")])
-
-(define_insn "*insvsi_internal1"
-  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(rotate:SI (match_operand:SI 3 "gpc_reg_operand" "r")
-		   (match_operand:SI 4 "const_int_operand" "i")))]
-  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
-  "*
+(define_insn "*rotl<mode>3_mask"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(and:GPR (match_operator:GPR 4 "rotate_mask_operator"
+		  [(match_operand:GPR 1 "gpc_reg_operand" "r")
+		   (match_operand:GPR 2 "reg_or_cint_operand" "rn")])
+		 (match_operand:GPR 3 "const_int_operand" "n")))]
+  "rs6000_is_valid_shift_mask (operands[3], operands[4], <MODE>mode)"
 {
-  int shift = INTVAL (operands[4]) & 31;
-  int start = INTVAL (operands[2]) & 31;
-  int size = INTVAL (operands[1]) & 31;
-
-  operands[4] = GEN_INT (shift - start - size);
-  operands[1] = GEN_INT (start + size - 1);
-  return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}"
-  [(set_attr "type" "insert")])
+  return rs6000_insn_for_shift_mask (<MODE>mode, operands, false);
+}
+  [(set_attr "type" "shift")
+   (set_attr "maybe_var_shift" "yes")])
 
-(define_insn "*insvsi_internal2"
-  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(ashiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
-		     (match_operand:SI 4 "const_int_operand" "i")))]
-  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
-  "*
+(define_insn_and_split "*rotl<mode>3_mask_dot"
+  [(set (match_operand:CC 5 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	  (and:GPR (match_operator:GPR 4 "rotate_mask_operator"
+		    [(match_operand:GPR 1 "gpc_reg_operand" "r,r")
+		     (match_operand:GPR 2 "reg_or_cint_operand" "rn,rn")])
+		   (match_operand:GPR 3 "const_int_operand" "n,n"))
+	  (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=r,r"))]
+  "(<MODE>mode == Pmode || UINTVAL (operands[3]) <= 0x7fffffff)
+   && rs6000_gen_cell_microcode
+   && rs6000_is_valid_shift_mask (operands[3], operands[4], <MODE>mode)"
 {
-  int shift = INTVAL (operands[4]) & 31;
-  int start = INTVAL (operands[2]) & 31;
-  int size = INTVAL (operands[1]) & 31;
-
-  operands[4] = GEN_INT (32 - shift - start - size);
-  operands[1] = GEN_INT (start + size - 1);
-  return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}"
-  [(set_attr "type" "insert")])
+  if (which_alternative == 0)
+    return rs6000_insn_for_shift_mask (<MODE>mode, operands, true);
+  else
+    return "#";
+}
+  "&& reload_completed && cc_reg_not_cr0_operand (operands[5], CCmode)"
+  [(set (match_dup 0)
+	(and:GPR (match_dup 4)
+		 (match_dup 3)))
+   (set (match_dup 5)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  ""
+  [(set_attr "type" "shift")
+   (set_attr "maybe_var_shift" "yes")
+   (set_attr "dot" "yes")
+   (set_attr "length" "4,8")])
 
-(define_insn "*insvsi_internal3"
-  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
-		     (match_operand:SI 4 "const_int_operand" "i")))]
-  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
-  "*
+(define_insn_and_split "*rotl<mode>3_mask_dot2"
+  [(set (match_operand:CC 5 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	  (and:GPR (match_operator:GPR 4 "rotate_mask_operator"
+		    [(match_operand:GPR 1 "gpc_reg_operand" "r,r")
+		     (match_operand:GPR 2 "reg_or_cint_operand" "rn,rn")])
+		   (match_operand:GPR 3 "const_int_operand" "n,n"))
+	  (const_int 0)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(and:GPR (match_dup 4)
+		 (match_dup 3)))]
+  "(<MODE>mode == Pmode || UINTVAL (operands[3]) <= 0x7fffffff)
+   && rs6000_gen_cell_microcode
+   && rs6000_is_valid_shift_mask (operands[3], operands[4], <MODE>mode)"
 {
-  int shift = INTVAL (operands[4]) & 31;
-  int start = INTVAL (operands[2]) & 31;
-  int size = INTVAL (operands[1]) & 31;
-
-  operands[4] = GEN_INT (32 - shift - start - size);
-  operands[1] = GEN_INT (start + size - 1);
-  return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}"
-  [(set_attr "type" "insert")])
+  if (which_alternative == 0)
+    return rs6000_insn_for_shift_mask (<MODE>mode, operands, true);
+  else
+    return "#";
+}
+  "&& reload_completed && cc_reg_not_cr0_operand (operands[5], CCmode)"
+  [(set (match_dup 0)
+	(and:GPR (match_dup 4)
+		 (match_dup 3)))
+   (set (match_dup 5)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  ""
+  [(set_attr "type" "shift")
+   (set_attr "maybe_var_shift" "yes")
+   (set_attr "dot" "yes")
+   (set_attr "length" "4,8")])
 
-(define_insn "*insvsi_internal4"
-  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(zero_extract:SI (match_operand:SI 3 "gpc_reg_operand" "r")
-			 (match_operand:SI 4 "const_int_operand" "i")
-			 (match_operand:SI 5 "const_int_operand" "i")))]
-  "INTVAL (operands[4]) >= INTVAL (operands[1])"
-  "*
-{
-  int extract_start = INTVAL (operands[5]) & 31;
-  int extract_size = INTVAL (operands[4]) & 31;
-  int insert_start = INTVAL (operands[2]) & 31;
-  int insert_size = INTVAL (operands[1]) & 31;
 
-/* Align extract field with insert field */
-  operands[5] = GEN_INT (extract_start + extract_size - insert_start - insert_size);
-  operands[1] = GEN_INT (insert_start + insert_size - 1);
-  return \"rlwimi %0,%3,%h5,%h2,%h1\";
-}"
+; Two forms for insert (the two arms of the IOR are not canonicalized,
+; both are an AND so are the same precedence).
+(define_insn "*rotl<mode>3_insert"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(ior:GPR (and:GPR (match_operator:GPR 4 "rotate_mask_operator"
+			   [(match_operand:GPR 1 "gpc_reg_operand" "r")
+			    (match_operand:GPR 2 "const_int_operand" "n")])
+			  (match_operand:GPR 3 "const_int_operand" "n"))
+		 (and:GPR (match_operand:GPR 5 "gpc_reg_operand" "0")
+			  (match_operand:GPR 6 "const_int_operand" "n"))))]
+  "rs6000_is_valid_insert_mask (operands[3], operands[4], <MODE>mode)
+   && UINTVAL (operands[3]) + UINTVAL (operands[6]) + 1 == 0"
+{
+  return rs6000_insn_for_insert_mask (<MODE>mode, operands, false);
+}
   [(set_attr "type" "insert")])
+; FIXME: this needs an attr "size", so that the scheduler can see the
+; difference between rlwimi and rldimi.  We also might want dot forms,
+; but not for rlwimi on POWER4 and similar processors.
 
-;; combine patterns for rlwimi
-(define_insn "*insvsi_internal5"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-        (ior:SI (and:SI (match_operand:SI 4 "gpc_reg_operand" "0")
-                        (match_operand:SI 1 "mask_operand" "i"))
-                (and:SI (lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
-                                     (match_operand:SI 2 "const_int_operand" "i"))
-                        (match_operand:SI 5 "mask_operand" "i"))))]
-  "INTVAL(operands[1]) == ~INTVAL(operands[5])"
-  "*
-{
- int me = extract_ME(operands[5]);
- int mb = extract_MB(operands[5]);
- operands[4] = GEN_INT(32 - INTVAL(operands[2]));
- operands[2] = GEN_INT(mb);
- operands[1] = GEN_INT(me);
- return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}"
+(define_insn "*rotl<mode>3_insert_2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(ior:GPR (and:GPR (match_operand:GPR 5 "gpc_reg_operand" "0")
+			  (match_operand:GPR 6 "const_int_operand" "n"))
+		 (and:GPR (match_operator:GPR 4 "rotate_mask_operator"
+			   [(match_operand:GPR 1 "gpc_reg_operand" "r")
+			    (match_operand:GPR 2 "const_int_operand" "n")])
+			  (match_operand:GPR 3 "const_int_operand" "n"))))]
+  "rs6000_is_valid_insert_mask (operands[3], operands[4], <MODE>mode)
+   && UINTVAL (operands[3]) + UINTVAL (operands[6]) + 1 == 0"
+{
+  return rs6000_insn_for_insert_mask (<MODE>mode, operands, false);
+}
   [(set_attr "type" "insert")])
 
-(define_insn "*insvsi_internal6"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-        (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
-                                     (match_operand:SI 2 "const_int_operand" "i"))
-                        (match_operand:SI 5 "mask_operand" "i"))
-                (and:SI (match_operand:SI 4 "gpc_reg_operand" "0")
-                        (match_operand:SI 1 "mask_operand" "i"))))]
-  "INTVAL(operands[1]) == ~INTVAL(operands[5])"
-  "*
-{
- int me = extract_ME(operands[5]);
- int mb = extract_MB(operands[5]);
- operands[4] = GEN_INT(32 - INTVAL(operands[2]));
- operands[2] = GEN_INT(mb);
- operands[1] = GEN_INT(me);
- return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}"
+; There are also some forms without one of the ANDs.
+(define_insn "*rotl<mode>3_insert_3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
+			  (match_operand:GPR 4 "const_int_operand" "n"))
+		 (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+			     (match_operand:GPR 2 "const_int_operand" "n"))))]
+  "INTVAL (operands[2]) == exact_log2 (UINTVAL (operands[4]) + 1)"
+{
+  if (<MODE>mode == SImode)
+    return "rlwimi %0,%1,%h2,0,31-%h2";
+  else
+    return "rldimi %0,%1,%H2,0";
+}
   [(set_attr "type" "insert")])
 
-(define_insn "insvdi_internal"
-  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(match_operand:DI 3 "gpc_reg_operand" "r"))]
-  "TARGET_POWERPC64"
-  "*
-{
-  int start = INTVAL (operands[2]) & 63;
-  int size = INTVAL (operands[1]) & 63;
-
-  operands[1] = GEN_INT (64 - start - size);
-  return \"rldimi %0,%3,%H1,%H2\";
-}"
-  [(set_attr "type" "insert")
-   (set_attr "size" "64")])
-
-(define_insn "*insvdi_internal2"
-  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(ashiftrt:DI (match_operand:DI 3 "gpc_reg_operand" "r")
-		     (match_operand:SI 4 "const_int_operand" "i")))]
-  "TARGET_POWERPC64
-   && insvdi_rshift_rlwimi_p (operands[1], operands[2], operands[4])"
-  "*
-{
-  int shift = INTVAL (operands[4]) & 63;
-  int start = (INTVAL (operands[2]) & 63) - 32;
-  int size = INTVAL (operands[1]) & 63;
-
-  operands[4] = GEN_INT (64 - shift - start - size);
-  operands[2] = GEN_INT (start);
-  operands[1] = GEN_INT (start + size - 1);
-  return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}")
-
-(define_insn "*insvdi_internal3"
-  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
-			 (match_operand:SI 1 "const_int_operand" "i")
-			 (match_operand:SI 2 "const_int_operand" "i"))
-	(lshiftrt:DI (match_operand:DI 3 "gpc_reg_operand" "r")
-		     (match_operand:SI 4 "const_int_operand" "i")))]
-  "TARGET_POWERPC64
-   && insvdi_rshift_rlwimi_p (operands[1], operands[2], operands[4])"
-  "*
-{
-  int shift = INTVAL (operands[4]) & 63;
-  int start = (INTVAL (operands[2]) & 63) - 32;
-  int size = INTVAL (operands[1]) & 63;
-
-  operands[4] = GEN_INT (64 - shift - start - size);
-  operands[2] = GEN_INT (start);
-  operands[1] = GEN_INT (start + size - 1);
-  return \"rlwimi %0,%3,%h4,%h2,%h1\";
-}")
-
-(define_expand "extzv"
-  [(set (match_operand 0 "gpc_reg_operand" "")
-	(zero_extract (match_operand 1 "gpc_reg_operand" "")
-		       (match_operand:SI 2 "const_int_operand" "")
-		       (match_operand:SI 3 "const_int_operand" "")))]
-  ""
-  "
-{
-  /* Do not handle 16/8 bit structures that fit in HI/QI modes directly, since
-     the (SUBREG:SI (REG:HI xxx)) that is otherwise generated can confuse the
-     compiler if the address of the structure is taken later.  */
-  if (GET_CODE (operands[0]) == SUBREG
-      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) < UNITS_PER_WORD))
-    FAIL;
-
-  if (TARGET_POWERPC64 && GET_MODE (operands[1]) == DImode)
-    emit_insn (gen_extzvdi_internal (operands[0], operands[1], operands[2],
-				     operands[3]));
+(define_insn "*rotl<mode>3_insert_4"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
+			  (match_operand:GPR 4 "const_int_operand" "n"))
+		 (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+			       (match_operand:GPR 2 "const_int_operand" "n"))))]
+  "<MODE>mode == SImode &&
+   GET_MODE_PRECISION (<MODE>mode)
+   == INTVAL (operands[2]) + exact_log2 (-UINTVAL (operands[4]))"
+{
+  operands[2] = GEN_INT (GET_MODE_PRECISION (<MODE>mode)
+			 - INTVAL (operands[2]));
+  if (<MODE>mode == SImode)
+    return "rlwimi %0,%1,%h2,32-%h2,31";
   else
-    FAIL;
-
-  DONE;
-}")
+    return "rldimi %0,%1,%H2,64-%H2";
+}
+  [(set_attr "type" "insert")])
 
-(define_insn "extzvdi_internal"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-			 (match_operand:SI 2 "const_int_operand" "i")
-			 (match_operand:SI 3 "const_int_operand" "i")))]
-  "TARGET_POWERPC64"
-  "*
-{
-  int start = INTVAL (operands[3]) & 63;
-  int size = INTVAL (operands[2]) & 63;
 
-  if (start + size >= 64)
-    operands[3] = const0_rtx;
-  else
-    operands[3] = GEN_INT (start + size);
-  operands[2] = GEN_INT (64 - size);
-  return \"rldicl %0,%1,%3,%2\";
-}"
-  [(set_attr "type" "shift")])
+; This handles the important case of multiple-precision shifts.  There is
+; no canonicalization rule for ASHIFT vs. LSHIFTRT, so two patterns.
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(ior:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
+			     (match_operand:GPR 3 "const_int_operand"))
+		 (lshiftrt:GPR (match_operand:GPR 2 "gpc_reg_operand")
+			       (match_operand:GPR 4 "const_int_operand"))))]
+  "can_create_pseudo_p ()
+   && INTVAL (operands[3]) + INTVAL (operands[4])
+      >= GET_MODE_PRECISION (<MODE>mode)"
+  [(set (match_dup 5)
+	(lshiftrt:GPR (match_dup 2)
+		      (match_dup 4)))
+   (set (match_dup 0)
+	(ior:GPR (and:GPR (match_dup 5)
+			  (match_dup 6))
+		 (ashift:GPR (match_dup 1)
+			     (match_dup 3))))]
+{
+  unsigned HOST_WIDE_INT mask = 1;
+  mask = (mask << INTVAL (operands[3])) - 1;
+  operands[5] = gen_reg_rtx (<MODE>mode);
+  operands[6] = GEN_INT (mask);
+})
 
-(define_insn "*extzvdi_internal1"
-  [(set (match_operand:CC 0 "gpc_reg_operand" "=x")
-	(compare:CC (zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-			 (match_operand:SI 2 "const_int_operand" "i")
-			 (match_operand:SI 3 "const_int_operand" "i"))
-		    (const_int 0)))
-   (clobber (match_scratch:DI 4 "=r"))]
-  "TARGET_64BIT && rs6000_gen_cell_microcode"
-  "*
-{
-  int start = INTVAL (operands[3]) & 63;
-  int size = INTVAL (operands[2]) & 63;
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(ior:GPR (lshiftrt:GPR (match_operand:GPR 2 "gpc_reg_operand")
+			       (match_operand:GPR 4 "const_int_operand"))
+		 (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
+			     (match_operand:GPR 3 "const_int_operand"))))]
+  "can_create_pseudo_p ()
+   && INTVAL (operands[3]) + INTVAL (operands[4])
+      >= GET_MODE_PRECISION (<MODE>mode)"
+  [(set (match_dup 5)
+	(lshiftrt:GPR (match_dup 2)
+		      (match_dup 4)))
+   (set (match_dup 0)
+	(ior:GPR (and:GPR (match_dup 5)
+			  (match_dup 6))
+		 (ashift:GPR (match_dup 1)
+			     (match_dup 3))))]
+{
+  unsigned HOST_WIDE_INT mask = 1;
+  mask = (mask << INTVAL (operands[3])) - 1;
+  operands[5] = gen_reg_rtx (<MODE>mode);
+  operands[6] = GEN_INT (mask);
+})
 
-  if (start + size >= 64)
-    operands[3] = const0_rtx;
-  else
-    operands[3] = GEN_INT (start + size);
-  operands[2] = GEN_INT (64 - size);
-  return \"rldicl. %4,%1,%3,%2\";
-}"
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")])
 
-(define_insn "*extzvdi_internal2"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x")
-	(compare:CC (zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-			 (match_operand:SI 2 "const_int_operand" "i")
-			 (match_operand:SI 3 "const_int_operand" "i"))
-		    (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(zero_extract:DI (match_dup 1) (match_dup 2) (match_dup 3)))]
-  "TARGET_64BIT && rs6000_gen_cell_microcode"
-  "*
-{
-  int start = INTVAL (operands[3]) & 63;
-  int size = INTVAL (operands[2]) & 63;
+; Another important case is setting some bits to 1; we can do that with
+; an insert instruction, in many cases.
+(define_insn_and_split "*ior<mode>_mask"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		 (match_operand:GPR 2 "const_int_operand" "n")))]
+  "can_create_pseudo_p ()
+   && !logical_const_operand (operands[2], <MODE>mode)
+   && rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 3)
+	(const_int -1))
+   (set (match_dup 0)
+	(ior:GPR (and:GPR (rotate:GPR (match_dup 3)
+				      (match_dup 4))
+			  (match_dup 2))
+		 (and:GPR (match_dup 1)
+			  (match_dup 5))))]
+{
+  int nb, ne;
+  rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode);
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = GEN_INT (ne);
+  operands[5] = GEN_INT (~UINTVAL (operands[2]));
+}
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
 
-  if (start + size >= 64)
-    operands[3] = const0_rtx;
-  else
-    operands[3] = GEN_INT (start + size);
-  operands[2] = GEN_INT (64 - size);
-  return \"rldicl. %0,%1,%3,%2\";
-}"
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")])
 
+;; Now the simple shifts.
 
 (define_insn "rotl<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
@@ -3747,74 +3698,6 @@  (define_insn_and_split "*rotl<mode>3_dot2"
    (set_attr "length" "4,8")])
 
 
-(define_insn "*rotlsi3_mask"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-	(and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
-			   (match_operand:SI 2 "reg_or_cint_operand" "rn"))
-		(match_operand:SI 3 "mask_operand" "n")))]
-  ""
-  "rlw%I2nm %0,%1,%h2,%m3,%M3"
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")])
-
-(define_insn_and_split "*rotlsi3_mask_dot"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "reg_or_cint_operand" "rn,rn"))
-		 (match_operand:SI 3 "mask_operand" "n,n"))
-	 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r,r"))]
-  "rs6000_gen_cell_microcode
-   && (TARGET_32BIT || UINTVAL (operands[3]) <= 0x7fffffff)"
-  "@
-   rlw%I2nm. %0,%1,%h2,%m3,%M3
-   #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)"
-  [(set (match_dup 0)
-	(and:SI (rotate:SI (match_dup 1)
-			   (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_insn_and_split "*rotlsi3_mask_dot2"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "reg_or_cint_operand" "rn,rn"))
-		 (match_operand:SI 3 "mask_operand" "n,n"))
-	 (const_int 0)))
-   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
-	(and:SI (rotate:SI (match_dup 1)
-			   (match_dup 2))
-		(match_dup 3)))]
-  "rs6000_gen_cell_microcode
-   && (TARGET_32BIT || UINTVAL (operands[3]) <= 0x7fffffff)"
-  "@
-   rlw%I2nm. %0,%1,%h2,%m3,%M3
-   #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)"
-  [(set (match_dup 0)
-	(and:SI (rotate:SI (match_dup 1)
-			   (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-
 (define_insn "ashl<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
@@ -3861,91 +3744,24 @@  (define_insn_and_split "*ashl<mode>3_dot2"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
 	(compare:CC (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
 				(match_operand:SI 2 "reg_or_cint_operand" "rn,rn"))
-		    (const_int 0)))
-   (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
-	(ashift:GPR (match_dup 1)
-		    (match_dup 2)))]
-  "<MODE>mode == Pmode && rs6000_gen_cell_microcode"
-  "@
-   sl<wd>%I2. %0,%1,%<hH>2
-   #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
-  [(set (match_dup 0)
-	(ashift:GPR (match_dup 1)
-		    (match_dup 2)))
-   (set (match_dup 3)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-
-(define_insn "*ashlsi3_imm_mask"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-	(and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r")
-			   (match_operand:SI 2 "const_int_operand" "i"))
-		(match_operand:SI 3 "mask_operand" "n")))]
-  "includes_lshift_p (operands[2], operands[3])"
-  "rlwinm %0,%1,%h2,%m3,%M3"
-  [(set_attr "type" "shift")])
-
-(define_insn_and_split "*ashlsi3_imm_mask_dot"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "const_int_operand" "i,i"))
-		 (match_operand:SI 3 "mask_operand" "n,n"))
-	 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r,r"))]
-  "rs6000_gen_cell_microcode
-   && (TARGET_32BIT || UINTVAL (operands[3]) <= 0x7fffffff)
-   && includes_lshift_p (operands[2], operands[3])"
-  "@
-   rlwinm. %0,%1,%h2,%m3,%M3
-   #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)"
-  [(set (match_dup 0)
-	(and:SI (ashift:SI (match_dup 1)
-			   (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_insn_and_split "*ashlsi3_imm_mask_dot2"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "const_int_operand" "i,i"))
-		 (match_operand:SI 3 "mask_operand" "n,n"))
-	 (const_int 0)))
-   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
-	(and:SI (ashift:SI (match_dup 1)
-			   (match_dup 2))
-		(match_dup 3)))]
-  "rs6000_gen_cell_microcode
-   && (TARGET_32BIT || UINTVAL (operands[3]) <= 0x7fffffff)
-   && includes_lshift_p (operands[2], operands[3])"
+		    (const_int 0)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(ashift:GPR (match_dup 1)
+		    (match_dup 2)))]
+  "<MODE>mode == Pmode && rs6000_gen_cell_microcode"
   "@
-   rlwinm. %0,%1,%h2,%m3,%M3
+   sl<wd>%I2. %0,%1,%<hH>2
    #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)"
+  "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
   [(set (match_dup 0)
-	(and:SI (ashift:SI (match_dup 1)
-			   (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
+	(ashift:GPR (match_dup 1)
+		    (match_dup 2)))
+   (set (match_dup 3)
 	(compare:CC (match_dup 0)
 		    (const_int 0)))]
   ""
   [(set_attr "type" "shift")
+   (set_attr "maybe_var_shift" "yes")
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
@@ -4018,97 +3834,7 @@  (define_insn_and_split "*lshr<mode>3_dot2"
    (set_attr "length" "4,8")])
 
 
-(define_insn "*lshrsi3_imm_mask"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-	(and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
-			     (match_operand:SI 2 "const_int_operand" "i"))
-		(match_operand:SI 3 "mask_operand" "n")))]
-  "includes_rshift_p (operands[2], operands[3])"
-  "rlwinm %0,%1,%s2,%m3,%M3"
-  [(set_attr "type" "shift")])
-
-(define_insn_and_split "*lshrsi3_imm_mask_dot"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
-			      (match_operand:SI 2 "const_int_operand" "i,i"))
-		 (match_operand:SI 3 "mask_operand" "n,n"))
-	 (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r,r"))]
-  "rs6000_gen_cell_microcode
-   && (TARGET_32BIT || UINTVAL (operands[3]) <= 0x7fffffff)
-   && includes_rshift_p (operands[2], operands[3])"
-  "@
-   rlwinm. %0,%1,%s2,%m3,%M3
-   #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)"
-  [(set (match_dup 0)
-	(and:SI (lshiftrt:SI (match_dup 1)
-			     (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_insn_and_split "*lshrsi3_imm_mask_dot2"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
-			      (match_operand:SI 2 "const_int_operand" "i,i"))
-		 (match_operand:SI 3 "mask_operand" "n,n"))
-	 (const_int 0)))
-   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
-	(and:SI (lshiftrt:SI (match_dup 1)
-			     (match_dup 2))
-		(match_dup 3)))]
-  "rs6000_gen_cell_microcode
-   && (TARGET_32BIT || UINTVAL (operands[3]) <= 0x7fffffff)
-   && includes_rshift_p (operands[2], operands[3])"
-  "@
-   rlwinm. %0,%1,%s2,%m3,%M3
-   #"
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)"
-  [(set (match_dup 0)
-	(and:SI (lshiftrt:SI (match_dup 1)
-			     (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-
-(define_expand "ashr<mode>3"
-  [(parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
-		   (ashiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
-				 (match_operand:SI 2 "reg_or_cint_operand" "")))
-	      (clobber (reg:GPR CA_REGNO))])]
-  ""
-{
-  /* The generic code does not generate optimal code for the low word
-     (it should be a rlwimi and a rot).  Until we have target code to
-     solve this generically, keep this expander.  */
-
-  if (<MODE>mode == DImode && !TARGET_POWERPC64)
-    {
-      if (CONST_INT_P (operands[2]))
-	{
-	  emit_insn (gen_ashrdi3_no_power (operands[0], operands[1], operands[2]));
-	  DONE;
-	}
-      else
-	FAIL;
-    }
-})
-
-(define_insn "*ashr<mode>3"
+(define_insn "ashr<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(ashiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
 		      (match_operand:SI 2 "reg_or_cint_operand" "rn")))
@@ -5736,373 +5462,6 @@  (define_expand "subti3"
   emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1));
   DONE;
 })
-
-
-;; Shift by a variable amount is too complex to be worth open-coding.  We
-;; just handle shifts by constants.
-(define_insn "ashrdi3_no_power"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r")
-	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-		     (match_operand:SI 2 "const_int_operand" "M,i")))
-   (clobber (reg:SI CA_REGNO))]
-  "!TARGET_POWERPC64"
-{
-  switch (which_alternative)
-    {
-    default:
-      gcc_unreachable ();
-    case 0:
-      if (WORDS_BIG_ENDIAN)
-        return \"srawi %0,%1,31\;srawi %L0,%1,%h2\";
-      else
-        return \"srawi %L0,%L1,31\;srawi %0,%L1,%h2\";
-    case 1:
-      if (WORDS_BIG_ENDIAN)
-	return \"srwi %L0,%L1,%h2\;insrwi %L0,%1,%h2,0\;srawi %0,%1,%h2\";
-      else
-	return \"srwi %0,%1,%h2\;insrwi %0,%L1,%h2,0\;srawi %L0,%L1,%h2\";
-    }
-}
-  [(set_attr "type" "two,three")
-   (set_attr "length" "8,12")])
-
-(define_insn "*ashrdisi3_noppc64be"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
-        (subreg:SI (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-                                (const_int 32)) 4))]
-  "TARGET_32BIT && !TARGET_POWERPC64 && WORDS_BIG_ENDIAN"
-  "*
-{
-  if (REGNO (operands[0]) == REGNO (operands[1]))
-    return \"\";
-  else
-    return \"mr %0,%1\";
-}"
-   [(set_attr "length" "4")])
-
-
-;; PowerPC64 DImode operations.
-
-(define_insn "*rotldi3_internal4"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(and:DI (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-			   (match_operand:DI 2 "reg_or_cint_operand" "rn"))
-		(match_operand:DI 3 "mask64_operand" "n")))]
-  "TARGET_POWERPC64"
-  "rld%I2c%B3 %0,%1,%H2,%S3"
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")])
-
-(define_insn "*rotldi3_internal5"
-  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
-	(compare:CC (and:DI
-		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-				(match_operand:DI 2 "reg_or_cint_operand" "rn,rn"))
-		     (match_operand:DI 3 "mask64_operand" "n,n"))
-		    (const_int 0)))
-   (clobber (match_scratch:DI 4 "=r,r"))]
-  "TARGET_64BIT"
-  "@
-   rld%I2c%B3. %4,%1,%H2,%S3
-   #"
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_split
-  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC (and:DI
-		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
-				(match_operand:DI 2 "reg_or_cint_operand" ""))
-		     (match_operand:DI 3 "mask64_operand" ""))
-		    (const_int 0)))
-   (clobber (match_scratch:DI 4 ""))]
-  "TARGET_POWERPC64 && reload_completed"
-  [(set (match_dup 4)
-	(and:DI (rotate:DI (match_dup 1)
-				(match_dup 2))
-		     (match_dup 3)))
-   (set (match_dup 0)
-	(compare:CC (match_dup 4)
-		    (const_int 0)))]
-  "")
-
-(define_insn "*rotldi3_internal6"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC (and:DI
-		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-				(match_operand:DI 2 "reg_or_cint_operand" "rn,rn"))
-		     (match_operand:DI 3 "mask64_operand" "n,n"))
-		    (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
-	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
-  "TARGET_64BIT"
-  "@
-   rld%I2c%B3. %0,%1,%H2,%S3
-   #"
-  [(set_attr "type" "shift")
-   (set_attr "maybe_var_shift" "yes")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_split
-  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC (and:DI
-		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
-				(match_operand:DI 2 "reg_or_cint_operand" ""))
-		     (match_operand:DI 3 "mask64_operand" ""))
-		    (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "")
-	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
-  "TARGET_POWERPC64 && reload_completed"
-  [(set (match_dup 0)
-	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  "")
-
-
-(define_insn "*ashldi3_internal4"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-			   (match_operand:SI 2 "const_int_operand" "i"))
-		(match_operand:DI 3 "const_int_operand" "n")))]
-  "TARGET_POWERPC64 && includes_rldic_lshift_p (operands[2], operands[3])"
-  "rldic %0,%1,%H2,%W3"
-  [(set_attr "type" "shift")])
-
-(define_insn "ashldi3_internal5"
-  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "const_int_operand" "i,i"))
-		 (match_operand:DI 3 "const_int_operand" "n,n"))
-	 (const_int 0)))
-   (clobber (match_scratch:DI 4 "=r,r"))]
-  "TARGET_64BIT && includes_rldic_lshift_p (operands[2], operands[3])"
-  "@
-   rldic. %4,%1,%H2,%W3
-   #"
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_split
-  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
-			    (match_operand:SI 2 "const_int_operand" ""))
-		 (match_operand:DI 3 "const_int_operand" ""))
-	 (const_int 0)))
-   (clobber (match_scratch:DI 4 ""))]
-  "TARGET_POWERPC64 && reload_completed
-   && includes_rldic_lshift_p (operands[2], operands[3])"
-  [(set (match_dup 4)
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 0)
-	(compare:CC (match_dup 4)
-		    (const_int 0)))]
-  "")
-
-(define_insn "*ashldi3_internal6"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "const_int_operand" "i,i"))
-		    (match_operand:DI 3 "const_int_operand" "n,n"))
-	 (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
-  "TARGET_64BIT && includes_rldic_lshift_p (operands[2], operands[3])"
-  "@
-   rldic. %0,%1,%H2,%W3
-   #"
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_split
-  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
-			    (match_operand:SI 2 "const_int_operand" ""))
-		 (match_operand:DI 3 "const_int_operand" ""))
-	 (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "")
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
-  "TARGET_POWERPC64 && reload_completed
-   && includes_rldic_lshift_p (operands[2], operands[3])"
-  [(set (match_dup 0)
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  "")
-
-(define_insn "*ashldi3_internal7"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r")
-			   (match_operand:SI 2 "const_int_operand" "i"))
-		(match_operand:DI 3 "mask64_operand" "n")))]
-  "TARGET_POWERPC64 && includes_rldicr_lshift_p (operands[2], operands[3])"
-  "rldicr %0,%1,%H2,%S3"
-  [(set_attr "type" "shift")])
-
-(define_insn "ashldi3_internal8"
-  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "const_int_operand" "i,i"))
-		 (match_operand:DI 3 "mask64_operand" "n,n"))
-	 (const_int 0)))
-   (clobber (match_scratch:DI 4 "=r,r"))]
-  "TARGET_64BIT && includes_rldicr_lshift_p (operands[2], operands[3])"
-  "@
-   rldicr. %4,%1,%H2,%S3
-   #"
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_split
-  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
-			    (match_operand:SI 2 "const_int_operand" ""))
-		 (match_operand:DI 3 "mask64_operand" ""))
-	 (const_int 0)))
-   (clobber (match_scratch:DI 4 ""))]
-  "TARGET_POWERPC64 && reload_completed
-   && includes_rldicr_lshift_p (operands[2], operands[3])"
-  [(set (match_dup 4)
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 0)
-	(compare:CC (match_dup 4)
-		    (const_int 0)))]
-  "")
-
-(define_insn "*ashldi3_internal9"
-  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
-			    (match_operand:SI 2 "const_int_operand" "i,i"))
-		    (match_operand:DI 3 "mask64_operand" "n,n"))
-	 (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
-  "TARGET_64BIT && includes_rldicr_lshift_p (operands[2], operands[3])"
-  "@
-   rldicr. %0,%1,%H2,%S3
-   #"
-  [(set_attr "type" "shift")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
-
-(define_split
-  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
-	(compare:CC
-	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
-			    (match_operand:SI 2 "const_int_operand" ""))
-		 (match_operand:DI 3 "mask64_operand" ""))
-	 (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "")
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
-  "TARGET_POWERPC64 && reload_completed
-   && includes_rldicr_lshift_p (operands[2], operands[3])"
-  [(set (match_dup 0)
-	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
-		(match_dup 3)))
-   (set (match_dup 4)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  "")
-
-
-(define_insn_and_split "*anddi3_2rld"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(and:DI (match_operand:DI 1 "gpc_reg_operand" "%r")
-		(match_operand:DI 2 "and_2rld_operand" "n")))]
-  "TARGET_POWERPC64"
-  "#"
-  ""
-  [(set (match_dup 0)
-	(and:DI (rotate:DI (match_dup 1)
-			   (match_dup 4))
-		(match_dup 5)))
-   (set (match_dup 0)
-	(and:DI (rotate:DI (match_dup 0)
-			   (match_dup 6))
-		(match_dup 7)))]
-{
-  build_mask64_2_operands (operands[2], &operands[4]);
-}
-  [(set_attr "length" "8")])
-
-(define_insn_and_split "*anddi3_2rld_dot"
-  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
-	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
-			    (match_operand:DI 2 "and_2rld_operand" "n,n"))
-		    (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r,r"))]
-  "TARGET_64BIT && rs6000_gen_cell_microcode"
-  "@
-   #
-   #"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(and:DI (rotate:DI (match_dup 1)
-			   (match_dup 4))
-		(match_dup 5)))
-   (parallel [(set (match_dup 3)
-		   (compare:CC (and:DI (rotate:DI (match_dup 0)
-						  (match_dup 6))
-				       (match_dup 7))
-			       (const_int 0)))
-	      (clobber (match_dup 0))])]
-{
-  build_mask64_2_operands (operands[2], &operands[4]);
-}
-  [(set_attr "type" "two")
-   (set_attr "dot" "yes")
-   (set_attr "length" "8,12")])
-
-(define_insn_and_split "*anddi3_2rld_dot2"
-  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
-	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
-			    (match_operand:DI 2 "and_2rld_operand" "n,n"))
-		    (const_int 0)))
-   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
-	(and:DI (match_dup 1)
-		(match_dup 2)))]
-  "TARGET_64BIT && rs6000_gen_cell_microcode"
-  "@
-   #
-   #"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(and:DI (rotate:DI (match_dup 1)
-			   (match_dup 4))
-		(match_dup 5)))
-   (parallel [(set (match_dup 3)
-		   (compare:CC (and:DI (rotate:DI (match_dup 0)
-						  (match_dup 6))
-				       (match_dup 7))
-			       (const_int 0)))
-	      (set (match_dup 0)
-		   (and:DI (rotate:DI (match_dup 0)
-				      (match_dup 6))
-			   (match_dup 7)))])]
-{
-  build_mask64_2_operands (operands[2], &operands[4]);
-}
-  [(set_attr "type" "two")
-   (set_attr "dot" "yes")
-   (set_attr "length" "8,12")])
 
 ;; 128-bit logical operations expanders
 
@@ -7685,16 +7044,18 @@  (define_insn "*movdi_internal64"
   [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,mftgpr,mffgpr,mftgpr,mffgpr,vecsimple")
    (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4")])
 
-;; Generate all one-bits and clear left or right.
-;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
+; Some DImode loads are best done as a load of -1 followed by a mask
+; instruction.
 (define_split
-  [(set (match_operand:DI 0 "gpc_reg_operand" "")
-	(match_operand:DI 1 "mask64_operand" ""))]
-  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
-  [(set (match_dup 0) (const_int -1))
+  [(set (match_operand:DI 0 "gpc_reg_operand")
+	(match_operand:DI 1 "const_int_operand"))]
+  "TARGET_POWERPC64
+   && num_insns_constant (operands[1], DImode) > 1
+   && rs6000_is_valid_and_mask (operands[1], DImode)"
+  [(set (match_dup 0)
+	(const_int -1))
    (set (match_dup 0)
-	(and:DI (rotate:DI (match_dup 0)
-			   (const_int 0))
+	(and:DI (match_dup 0)
 		(match_dup 1)))]
   "")