===================================================================
@@ -69,7 +69,7 @@
extern void rs6000_generate_float2_double_code (rtx, rtx, rtx);
extern void rs6000_generate_vsigned2_code (bool, rtx, rtx, rtx);
extern int expand_block_clear (rtx[]);
-extern int expand_block_move (rtx[]);
+extern int expand_block_move (rtx[], bool);
extern bool expand_block_compare (rtx[]);
extern bool expand_strn_compare (rtx[], int);
extern bool rs6000_is_valid_mask (rtx, int *, int *, machine_mode);
===================================================================
@@ -2719,7 +2719,7 @@
#define MAX_MOVE_REG 4
int
-expand_block_move (rtx operands[])
+expand_block_move (rtx operands[], bool might_overlap)
{
rtx orig_dest = operands[0];
rtx orig_src = operands[1];
@@ -2730,6 +2730,7 @@
int bytes;
int offset;
int move_bytes;
+ rtx loads[MAX_MOVE_REG];
rtx stores[MAX_MOVE_REG];
int num_reg = 0;
@@ -2817,47 +2818,35 @@
gen_func.mov = gen_movqi;
}
+ /* Mode is always set to something other than BLKmode by one of the
+ cases of the if statement above. */
+ gcc_assert (mode != BLKmode);
+
src = adjust_address (orig_src, mode, offset);
dest = adjust_address (orig_dest, mode, offset);
- if (mode != BLKmode)
- {
- rtx tmp_reg = gen_reg_rtx (mode);
+ rtx tmp_reg = gen_reg_rtx (mode);
+
+ loads[num_reg] = (*gen_func.mov) (tmp_reg, src);
+ stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
- emit_insn ((*gen_func.mov) (tmp_reg, src));
- stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
- }
+ /* If we didn't succeed in doing it in one pass, we can't do it in the
+ might_overlap case. Bail out and return failure. */
+ if (might_overlap && num_reg >= MAX_MOVE_REG
+ && bytes > move_bytes)
+ return 0;
- if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
+ /* Emit loads and stores saved up. */
+ if (num_reg >= MAX_MOVE_REG || bytes == move_bytes)
{
int i;
for (i = 0; i < num_reg; i++)
+ emit_insn (loads[i]);
+ for (i = 0; i < num_reg; i++)
emit_insn (stores[i]);
num_reg = 0;
}
-
- if (mode == BLKmode)
- {
- /* Move the address into scratch registers. The movmemsi
- patterns require zero offset. */
- if (!REG_P (XEXP (src, 0)))
- {
- rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
- src = replace_equiv_address (src, src_reg);
- }
- set_mem_size (src, move_bytes);
-
- if (!REG_P (XEXP (dest, 0)))
- {
- rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
- dest = replace_equiv_address (dest, dest_reg);
- }
- set_mem_size (dest, move_bytes);
-
- emit_insn ((*gen_func.movmemsi) (dest, src,
- GEN_INT (move_bytes & 31),
- align_rtx));
- }
+
}
return 1;
===================================================================
@@ -9057,7 +9057,7 @@
FAIL;
})
-;; String/block move insn.
+;; String/block copy insn (source and destination must not overlap).
;; Argument 0 is the destination
;; Argument 1 is the source
;; Argument 2 is the length
@@ -9070,11 +9070,31 @@
(use (match_operand:SI 3 ""))])]
""
{
- if (expand_block_move (operands))
+ if (expand_block_move (operands, false))
DONE;
else
FAIL;
})
+
+;; String/block move insn (source and destination may overlap).
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+ [(parallel [(set (match_operand:BLK 0 "")
+ (match_operand:BLK 1 ""))
+ (use (match_operand:SI 2 ""))
+ (use (match_operand:SI 3 ""))])]
+ ""
+{
+ if (expand_block_move (operands, true))
+ DONE;
+ else
+ FAIL;
+})
+
;; Define insns that do load or store with update. Some of these we can
;; get by using pre-decrement or pre-increment, but the hardware can also