@@ -119,6 +119,7 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
extern int arm_gen_movmemqi (rtx *);
+extern bool gen_movmem_ldrd_strd (rtx *);
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
HOST_WIDE_INT);
@@ -11836,6 +11836,134 @@ arm_gen_movmemqi (rtx *operands)
return 1;
}
+/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
+by mode size. */
+inline static rtx
+next_consecutive_mem (rtx mem)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
+ rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
+
+ return adjust_automodify_address (mem, mode, addr, offset);
+}
+
+/* Copy using LDRD/STRD instructions whenever possible.
+ Returns true upon success. */
+bool
+gen_movmem_ldrd_strd (rtx *operands)
+{
+ unsigned HOST_WIDE_INT len;
+ HOST_WIDE_INT align;
+ rtx src, dst, base;
+ rtx reg0;
+ bool src_aligned, dst_aligned;
+ bool src_volatile, dst_volatile;
+
+ gcc_assert (CONST_INT_P (operands[2]));
+ gcc_assert (CONST_INT_P (operands[3]));
+
+ len = UINTVAL (operands[2]);
+ if (len > 64)
+ return false;
+
+ /* Maximum alignment we can assume for both src and dst buffers. */
+ align = INTVAL (operands[3]);
+
+ if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
+ return false;
+
+ /* Place src and dst addresses in registers
+ and update the corresponding mem rtx. */
+ dst = operands[0];
+ dst_volatile = MEM_VOLATILE_P (dst);
+ dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
+ base = copy_to_mode_reg (SImode, XEXP (dst, 0));
+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+ src = operands[1];
+ src_volatile = MEM_VOLATILE_P (src);
+ src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
+ base = copy_to_mode_reg (SImode, XEXP (src, 0));
+ src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+ if (!unaligned_access && !(src_aligned && dst_aligned))
+ return false;
+
+ if (src_volatile || dst_volatile)
+ return false;
+
+ /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
+ if (!(dst_aligned || src_aligned))
+ return arm_gen_movmemqi (operands);
+
+ src = adjust_address (src, DImode, 0);
+ dst = adjust_address (dst, DImode, 0);
+ while (len >= 8)
+ {
+ len -= 8;
+ reg0 = gen_reg_rtx (DImode);
+ if (src_aligned)
+ emit_move_insn (reg0, src);
+ else
+ emit_insn (gen_unaligned_loaddi (reg0, src));
+
+ if (dst_aligned)
+ emit_move_insn (dst, reg0);
+ else
+ emit_insn (gen_unaligned_storedi (dst, reg0));
+
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ }
+
+ gcc_assert (len < 8);
+ if (len >= 4)
+ {
+ /* More than a word but less than a double-word to copy. Copy a word. */
+ reg0 = gen_reg_rtx (SImode);
+ src = adjust_address (src, SImode, 0);
+ dst = adjust_address (dst, SImode, 0);
+ if (src_aligned)
+ emit_move_insn (reg0, src);
+ else
+ emit_insn (gen_unaligned_loadsi (reg0, src));
+
+ if (dst_aligned)
+ emit_move_insn (dst, reg0);
+ else
+ emit_insn (gen_unaligned_storesi (dst, reg0));
+
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ len -= 4;
+ }
+
+ if (len == 0)
+ return true;
+
+ /* Copy the remaining bytes. */
+ if (len >= 2)
+ {
+ dst = adjust_address (dst, HImode, 0);
+ src = adjust_address (src, HImode, 0);
+ reg0 = gen_reg_rtx (SImode);
+ emit_insn (gen_unaligned_loadhiu (reg0, src));
+ emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ if (len == 2)
+ return true;
+ }
+
+ dst = adjust_address (dst, QImode, 0);
+ src = adjust_address (src, QImode, 0);
+ reg0 = gen_reg_rtx (QImode);
+ emit_move_insn (reg0, src);
+ emit_move_insn (dst, reg0);
+ return true;
+}
+
/* Select a dominance comparison mode if possible for a test of the general
form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
COND_OR == DOM_CC_X_AND_Y => (X && Y)
@@ -4085,6 +4085,64 @@
(set_attr "predicable" "yes")
(set_attr "type" "store1")])
+;; Unaligned double-word load and store.
+;; Split after reload into two unaligned single-word accesses.
+;; It prevents lower_subreg from splitting some other aligned
+;; double-word accesses too early. Used for internal memcpy.
+
+(define_insn_and_split "unaligned_loaddi"
+ [(set (match_operand:DI 0 "s_register_operand" "=l,r")
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
+ UNSPEC_UNALIGNED_LOAD))]
+ "unaligned_access && TARGET_32BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
+ {
+ operands[2] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[3] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+
+ /* If the first destination register overlaps with the base address,
+ swap the order in which the loads are emitted. */
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ {
+ rtx tmp = operands[1];
+ operands[1] = operands[3];
+ operands[3] = tmp;
+ tmp = operands[0];
+ operands[0] = operands[2];
+ operands[2] = tmp;
+ }
+ }
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load2")])
+
+(define_insn_and_split "unaligned_storedi"
+ [(set (match_operand:DI 0 "memory_operand" "=o,o")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
+ {
+ operands[2] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[3] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ }
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store2")])
+
+
(define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
@@ -6864,10 +6922,18 @@
(match_operand:BLK 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")]
- "TARGET_EITHER"
+ ""
"
if (TARGET_32BIT)
{
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
+ && !optimize_function_for_size_p (cfun))
+ {
+ if (gen_movmem_ldrd_strd (operands))
+ DONE;
+ FAIL;
+ }
+
if (arm_gen_movmemqi (operands))
DONE;
FAIL;
@@ -14,7 +14,10 @@ void aligned_dest (char *src)
/* Expect a multi-word store for the main part of the copy, but subword
loads/stores for the remainder. */
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldmia" 0 } } */
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler-times "ldrh" 1 } } */
/* { dg-final { scan-assembler-times "strh" 1 } } */
/* { dg-final { scan-assembler-times "ldrb" 1 } } */
@@ -14,8 +14,11 @@ void aligned_src (char *dest)
/* Expect a multi-word load for the main part of the copy, but subword
loads/stores for the remainder. */
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
-/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler-times "strd" 0 } } */
+/* { dg-final { scan-assembler-times "stm" 0 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strh" 1 } } */
-/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strb" 1 } } */
@@ -14,5 +14,9 @@ void aligned_both (void)
/* We know both src and dest to be aligned: expect multiword loads/stores. */
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */