@@ -925,8 +925,6 @@ bool aarch64_split_128bit_move_p (rtx, rtx);
bool aarch64_mov128_immediate (rtx);
-void aarch64_split_simd_combine (rtx, rtx, rtx);
-
void aarch64_split_simd_move (rtx, rtx);
/* Check for a legitimate floating point constant for FMOV. */
@@ -4403,7 +4403,7 @@ (define_insn "*aarch64_combine_internal_be<mode>"
;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector.
-(define_insn "@aarch64_combinez<mode>"
+(define_insn "*aarch64_combinez<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
(match_operand:VDC 1 "nonimmediate_operand" "w,?r,m")
@@ -4417,7 +4417,7 @@ (define_insn "@aarch64_combinez<mode>"
(set_attr "arch" "simd,fp,simd")]
)
-(define_insn "@aarch64_combinez_be<mode>"
+(define_insn "*aarch64_combinez_be<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
(match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
@@ -4431,38 +4431,62 @@ (define_insn "@aarch64_combinez_be<mode>"
(set_attr "arch" "simd,fp,simd")]
)
-(define_expand "aarch64_combine<mode>"
- [(match_operand:<VDBL> 0 "register_operand")
- (match_operand:VDC 1 "register_operand")
- (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
+;; Form a vector whose first half (in array order) comes from operand 1
+;; and whose second half (in array order) comes from operand 2.
+;; This operand order follows the RTL vec_concat operation.
+(define_expand "@aarch64_vec_concat<mode>"
+ [(set (match_operand:<VDBL> 0 "register_operand")
+ (vec_concat:<VDBL>
+ (match_operand:VDC 1 "general_operand")
+ (match_operand:VDC 2 "general_operand")))]
"TARGET_SIMD"
{
- if (operands[2] == CONST0_RTX (<MODE>mode))
+ int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+ int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+ if (MEM_P (operands[1])
+ && MEM_P (operands[2])
+ && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
+ /* Use load_pair_lanes<mode>. */
+ ;
+ else if (operands[hi] == CONST0_RTX (<MODE>mode))
{
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
- operands[2]));
- else
- emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
- operands[2]));
+ /* Use *aarch64_combinez<mode>. */
+ if (!nonimmediate_operand (operands[lo], <MODE>mode))
+ operands[lo] = force_reg (<MODE>mode, operands[lo]);
}
else
- aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
- DONE;
-}
-)
+ {
+ /* Use *aarch64_combine_general<mode>. */
+ operands[lo] = force_reg (<MODE>mode, operands[lo]);
+ if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
+ {
+ if (MEM_P (operands[hi]))
+ {
+ rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
+ operands[hi] = replace_equiv_address (operands[hi], addr);
+ }
+ else
+ operands[hi] = force_reg (<MODE>mode, operands[hi]);
+ }
+ }
+})
-(define_expand "@aarch64_simd_combine<mode>"
+;; Form a vector whose least significant half comes from operand 1 and whose
+;; most significant half comes from operand 2. This operand order follows
+;; arm_neon.h vcombine* intrinsics.
+(define_expand "aarch64_combine<mode>"
[(match_operand:<VDBL> 0 "register_operand")
- (match_operand:VDC 1 "register_operand")
- (match_operand:VDC 2 "register_operand")]
+ (match_operand:VDC 1 "general_operand")
+ (match_operand:VDC 2 "general_operand")]
"TARGET_SIMD"
- {
- emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
- emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
- DONE;
- }
-[(set_attr "type" "multiple")]
+{
+ if (BYTES_BIG_ENDIAN)
+ std::swap (operands[1], operands[2]);
+ emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
+ operands[2]));
+ DONE;
+}
)
;; <su><addsub>l<q>.
@@ -4239,23 +4239,6 @@ aarch64_split_128bit_move_p (rtx dst, rtx src)
return true;
}
-/* Split a complex SIMD combine. */
-
-void
-aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
-{
- machine_mode src_mode = GET_MODE (src1);
- machine_mode dst_mode = GET_MODE (dst);
-
- gcc_assert (VECTOR_MODE_P (dst_mode));
- gcc_assert (register_operand (dst, dst_mode)
- && register_operand (src1, src_mode)
- && register_operand (src2, src_mode));
-
- emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2));
- return;
-}
-
/* Split a complex SIMD move. */
void
@@ -20941,37 +20924,13 @@ aarch64_expand_vector_init (rtx target, rtx vals)
of mode N in VALS and we must put their concatentation into TARGET. */
if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
{
- gcc_assert (known_eq (GET_MODE_SIZE (mode),
- 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
- rtx lo = XVECEXP (vals, 0, 0);
- rtx hi = XVECEXP (vals, 0, 1);
- machine_mode narrow_mode = GET_MODE (lo);
- gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
- gcc_assert (narrow_mode == GET_MODE (hi));
-
- /* When we want to concatenate a half-width vector with zeroes we can
- use the aarch64_combinez[_be] patterns. Just make sure that the
- zeroes are in the right half. */
- if (BYTES_BIG_ENDIAN
- && aarch64_simd_imm_zero (lo, narrow_mode)
- && general_operand (hi, narrow_mode))
- emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
- else if (!BYTES_BIG_ENDIAN
- && aarch64_simd_imm_zero (hi, narrow_mode)
- && general_operand (lo, narrow_mode))
- emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
- else
- {
- /* Else create the two half-width registers and combine them. */
- if (!REG_P (lo))
- lo = force_reg (GET_MODE (lo), lo);
- if (!REG_P (hi))
- hi = force_reg (GET_MODE (hi), hi);
-
- if (BYTES_BIG_ENDIAN)
- std::swap (lo, hi);
- emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
- }
+ machine_mode narrow_mode = GET_MODE (XVECEXP (vals, 0, 0));
+ gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode
+ && known_eq (GET_MODE_SIZE (mode),
+ 2 * GET_MODE_SIZE (narrow_mode)));
+ emit_insn (gen_aarch64_vec_concat (narrow_mode, target,
+ XVECEXP (vals, 0, 0),
+ XVECEXP (vals, 0, 1)));
return;
}
new file mode 100644
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** s32_1:
+** ldr q0, \[x0\]
+** ret
+*/
+int32x4_t s32_1(int32x2_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return vcombine_s32 (ptr[1], ptr[0]);
+ else
+ return vcombine_s32 (ptr[0], ptr[1]);
+}
+/*
+** s32_2:
+** add x([0-9])+, x0, #?8
+** ld1 {v0\.d}\[1\], \[x\1\]
+** ret
+*/
+int32x4_t s32_2(int32x2_t a0, int32x2_t *ptr) {
+ return vcombine_s32 (a0, ptr[1]);
+}
+/*
+** s32_3:
+** ldr d0, \[x0\], #?16
+** ld1 {v0\.d}\[1\], \[x0\]
+** ret
+*/
+int32x4_t s32_3(int32x2_t *ptr) {
+ return vcombine_s32 (ptr[0], ptr[2]);
+}
+
+/*
+** f32_1:
+** ldr q0, \[x0\]
+** ret
+*/
+float32x4_t f32_1(float32x2_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return vcombine_f32 (ptr[1], ptr[0]);
+ else
+ return vcombine_f32 (ptr[0], ptr[1]);
+}
+/*
+** f32_2:
+** add x([0-9])+, x0, #?8
+** ld1 {v0\.d}\[1\], \[x\1\]
+** ret
+*/
+float32x4_t f32_2(float32x2_t a0, float32x2_t *ptr) {
+ return vcombine_f32 (a0, ptr[1]);
+}
+/*
+** f32_3:
+** ldr d0, \[x0\], #?16
+** ld1 {v0\.d}\[1\], \[x0\]
+** ret
+*/
+float32x4_t f32_3(float32x2_t *ptr) {
+ return vcombine_f32 (ptr[0], ptr[2]);
+}