===================================================================
@@ -1,3 +1,38 @@
+2014-12-11 Felix Yang <felix.yang@huawei.com>
+ Jiji Jiang <jiangjiji@huawei.com>
+
+ * config/aarch64/aarch64-simd.md (aarch64_mul_n<mode>,
+ aarch64_<su>mull_n<mode>, aarch64_<su>mull<mode>,
+ aarch64_simd_<su>mull2_n<mode>, aarch64_<su>mull2_n<mode>,
+ aarch64_<su>mull_lane<mode>, aarch64_<su>mull2_lane<mode>_internal,
+ aarch64_<su>mull_laneq<mode>, aarch64_<su>mull2_laneq<mode>_internal,
+ aarch64_smull2_lane<mode>, aarch64_umull2_lane<mode>,
+ aarch64_smull2_laneq<mode>, aarch64_umull2_laneq<mode>,
+ aarch64_fmulx<mode>, aarch64_fmulx<mode>, aarch64_fmulx_lane<mode>,
+ aarch64_pmull2v16qi, aarch64_pmullv8qi): New patterns.
+ * config/aarch64/aarch64-simd-builtins.def (vec_widen_smult_hi_,
+ vec_widen_umult_hi_, umull, smull, smull_n, umull_n, mul_n, smull2_n,
+ umull2_n, smull_lane, umull_lane, smull_laneq, umull_laneq, pmull,
+ umull2_lane, smull2_laneq, umull2_laneq, fmulx, fmulx_lane, pmull2,
+ smull2_lane): New builtins.
+ * config/aarch64/arm_neon.h (vmul_n_f32, vmul_n_s16, vmul_n_s32,
+ vmul_n_u16, vmul_n_u32, vmulq_n_f32, vmulq_n_f64, vmulq_n_s16,
+ vmulq_n_s32, vmulq_n_u16, vmulq_n_u32, vmull_high_lane_s16,
+ vmull_high_lane_s32, vmull_high_lane_u16, vmull_high_lane_u32,
+ vmull_high_laneq_s16, vmull_high_laneq_s32, vmull_high_laneq_u16,
+ vmull_high_laneq_u32, vmull_high_n_s16, vmull_high_n_s32,
+ vmull_high_n_u16, vmull_high_n_u32, vmull_high_p8, vmull_high_s8,
+ vmull_high_s16, vmull_high_s32, vmull_high_u8, vmull_high_u16,
+ vmull_high_u32, vmull_lane_s16, vmull_lane_s32, vmull_lane_u16,
+ vmull_lane_u32, vmull_laneq_s16, vmull_laneq_s32, vmull_laneq_u16,
+ vmull_laneq_u32, vmull_n_s16, vmull_n_s32, vmull_n_u16, vmull_n_u32,
+ vmull_p8, vmull_s8, vmull_s16, vmull_s32, vmull_u8, vmull_u16,
+ vmull_u32, vmulx_f32, vmulx_lane_f32, vmulxd_f64, vmulxq_f32,
+ vmulxq_f64, vmulxq_lane_f32, vmulxq_lane_f64, vmulxs_f32): Rewrite
+ using builtin functions.
+ * config/aarch64/iterators.md (UNSPEC_FMULX, UNSPEC_FMULX_LANE,
+ VDQF_Q): New unspec and int iterator.
+
2015-01-19 Jiong Wang <jiong.wang@arm.com>
Andrew Pinski <apinski@cavium.com>
===================================================================
@@ -7580,671 +7580,6 @@ vmovn_u64 (uint64x2_t a)
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vmul_n_f32 (float32x2_t a, float32_t b)
-{
- float32x2_t result;
- __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vmul_n_s16 (int16x4_t a, int16_t b)
-{
- int16x4_t result;
- __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vmul_n_s32 (int32x2_t a, int32_t b)
-{
- int32x2_t result;
- __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vmul_n_u16 (uint16x4_t a, uint16_t b)
-{
- uint16x4_t result;
- __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vmul_n_u32 (uint32x2_t a, uint32_t b)
-{
- uint32x2_t result;
- __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-#define vmull_high_lane_s16(a, b, c) \
- __extension__ \
- ({ \
- int16x4_t b_ = (b); \
- int16x8_t a_ = (a); \
- int32x4_t result; \
- __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_lane_s32(a, b, c) \
- __extension__ \
- ({ \
- int32x2_t b_ = (b); \
- int32x4_t a_ = (a); \
- int64x2_t result; \
- __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_lane_u16(a, b, c) \
- __extension__ \
- ({ \
- uint16x4_t b_ = (b); \
- uint16x8_t a_ = (a); \
- uint32x4_t result; \
- __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_lane_u32(a, b, c) \
- __extension__ \
- ({ \
- uint32x2_t b_ = (b); \
- uint32x4_t a_ = (a); \
- uint64x2_t result; \
- __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_laneq_s16(a, b, c) \
- __extension__ \
- ({ \
- int16x8_t b_ = (b); \
- int16x8_t a_ = (a); \
- int32x4_t result; \
- __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_laneq_s32(a, b, c) \
- __extension__ \
- ({ \
- int32x4_t b_ = (b); \
- int32x4_t a_ = (a); \
- int64x2_t result; \
- __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_laneq_u16(a, b, c) \
- __extension__ \
- ({ \
- uint16x8_t b_ = (b); \
- uint16x8_t a_ = (a); \
- uint32x4_t result; \
- __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_high_laneq_u32(a, b, c) \
- __extension__ \
- ({ \
- uint32x4_t b_ = (b); \
- uint32x4_t a_ = (a); \
- uint64x2_t result; \
- __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmull_high_n_s16 (int16x8_t a, int16_t b)
-{
- int32x4_t result;
- __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vmull_high_n_s32 (int32x4_t a, int32_t b)
-{
- int64x2_t result;
- __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmull_high_n_u16 (uint16x8_t a, uint16_t b)
-{
- uint32x4_t result;
- __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vmull_high_n_u32 (uint32x4_t a, uint32_t b)
-{
- uint64x2_t result;
- __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vmull_high_p8 (poly8x16_t a, poly8x16_t b)
-{
- poly16x8_t result;
- __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vmull_high_s8 (int8x16_t a, int8x16_t b)
-{
- int16x8_t result;
- __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmull_high_s16 (int16x8_t a, int16x8_t b)
-{
- int32x4_t result;
- __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vmull_high_s32 (int32x4_t a, int32x4_t b)
-{
- int64x2_t result;
- __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vmull_high_u8 (uint8x16_t a, uint8x16_t b)
-{
- uint16x8_t result;
- __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmull_high_u16 (uint16x8_t a, uint16x8_t b)
-{
- uint32x4_t result;
- __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vmull_high_u32 (uint32x4_t a, uint32x4_t b)
-{
- uint64x2_t result;
- __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-#define vmull_lane_s16(a, b, c) \
- __extension__ \
- ({ \
- int16x4_t b_ = (b); \
- int16x4_t a_ = (a); \
- int32x4_t result; \
- __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_lane_s32(a, b, c) \
- __extension__ \
- ({ \
- int32x2_t b_ = (b); \
- int32x2_t a_ = (a); \
- int64x2_t result; \
- __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_lane_u16(a, b, c) \
- __extension__ \
- ({ \
- uint16x4_t b_ = (b); \
- uint16x4_t a_ = (a); \
- uint32x4_t result; \
- __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_lane_u32(a, b, c) \
- __extension__ \
- ({ \
- uint32x2_t b_ = (b); \
- uint32x2_t a_ = (a); \
- uint64x2_t result; \
- __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_laneq_s16(a, b, c) \
- __extension__ \
- ({ \
- int16x8_t b_ = (b); \
- int16x4_t a_ = (a); \
- int32x4_t result; \
- __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_laneq_s32(a, b, c) \
- __extension__ \
- ({ \
- int32x4_t b_ = (b); \
- int32x2_t a_ = (a); \
- int64x2_t result; \
- __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_laneq_u16(a, b, c) \
- __extension__ \
- ({ \
- uint16x8_t b_ = (b); \
- uint16x4_t a_ = (a); \
- uint32x4_t result; \
- __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
- : "=w"(result) \
- : "w"(a_), "x"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmull_laneq_u32(a, b, c) \
- __extension__ \
- ({ \
- uint32x4_t b_ = (b); \
- uint32x2_t a_ = (a); \
- uint64x2_t result; \
- __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmull_n_s16 (int16x4_t a, int16_t b)
-{
- int32x4_t result;
- __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vmull_n_s32 (int32x2_t a, int32_t b)
-{
- int64x2_t result;
- __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmull_n_u16 (uint16x4_t a, uint16_t b)
-{
- uint32x4_t result;
- __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vmull_n_u32 (uint32x2_t a, uint32_t b)
-{
- uint64x2_t result;
- __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vmull_p8 (poly8x8_t a, poly8x8_t b)
-{
- poly16x8_t result;
- __asm__ ("pmull %0.8h, %1.8b, %2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vmull_s8 (int8x8_t a, int8x8_t b)
-{
- int16x8_t result;
- __asm__ ("smull %0.8h, %1.8b, %2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmull_s16 (int16x4_t a, int16x4_t b)
-{
- int32x4_t result;
- __asm__ ("smull %0.4s, %1.4h, %2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vmull_s32 (int32x2_t a, int32x2_t b)
-{
- int64x2_t result;
- __asm__ ("smull %0.2d, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vmull_u8 (uint8x8_t a, uint8x8_t b)
-{
- uint16x8_t result;
- __asm__ ("umull %0.8h, %1.8b, %2.8b"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmull_u16 (uint16x4_t a, uint16x4_t b)
-{
- uint32x4_t result;
- __asm__ ("umull %0.4s, %1.4h, %2.4h"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vmull_u32 (uint32x2_t a, uint32x2_t b)
-{
- uint64x2_t result;
- __asm__ ("umull %0.2d, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vmulq_n_f32 (float32x4_t a, float32_t b)
-{
- float32x4_t result;
- __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vmulq_n_f64 (float64x2_t a, float64_t b)
-{
- float64x2_t result;
- __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vmulq_n_s16 (int16x8_t a, int16_t b)
-{
- int16x8_t result;
- __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmulq_n_s32 (int32x4_t a, int32_t b)
-{
- int32x4_t result;
- __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vmulq_n_u16 (uint16x8_t a, uint16_t b)
-{
- uint16x8_t result;
- __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
- : "=w"(result)
- : "w"(a), "x"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmulq_n_u32 (uint32x4_t a, uint32_t b)
-{
- uint32x4_t result;
- __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vmulx_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-#define vmulx_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x4_t b_ = (b); \
- float32x2_t a_ = (a); \
- float32x2_t result; \
- __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vmulxd_f64 (float64_t a, float64_t b)
-{
- float64_t result;
- __asm__ ("fmulx %d0, %d1, %d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vmulxq_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vmulxq_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-#define vmulxq_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x4_t b_ = (b); \
- float32x4_t a_ = (a); \
- float32x4_t result; \
- __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmulxq_lane_f64(a, b, c) \
- __extension__ \
- ({ \
- float64x2_t b_ = (b); \
- float64x2_t a_ = (a); \
- float64x2_t result; \
- __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vmulxs_f32 (float32_t a, float32_t b)
-{
- float32_t result;
- __asm__ ("fmulx %s0, %s1, %s2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmvn_p8 (poly8x8_t a)
{
@@ -18695,6 +18030,78 @@ vmul_n_f64 (float64x1_t __a, float64_t __b)
return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
}
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_n_f32 (float32x2_t __a, float32_t __b)
+{
+ return __builtin_aarch64_mul_nv2sf (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_n_s16 (int16x4_t __a, int16_t __b)
+{
+ return __builtin_aarch64_mul_nv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_n_s32 (int32x2_t __a, int32_t __b)
+{
+ return __builtin_aarch64_mul_nv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+ return (uint16x4_t) __builtin_aarch64_mul_nv4hi ((int16x4_t)__a,
+ (int16_t)__b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+ return (uint32x2_t) __builtin_aarch64_mul_nv2si ((int32x2_t)__a,
+ (int32_t)__b);
+}
+
+/* vmulq_n */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_n_f32 (float32x4_t __a, float32_t __b)
+{
+ return __builtin_aarch64_mul_nv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_n_f64 (float64x2_t __a, float64_t __b)
+{
+ return __builtin_aarch64_mul_nv2df (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_n_s16 (int16x8_t __a, int16_t __b)
+{
+ return __builtin_aarch64_mul_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_n_s32 (int32x4_t __a, int32_t __b)
+{
+ return __builtin_aarch64_mul_nv4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
+{
+ return (uint16x8_t) __builtin_aarch64_mul_nv8hi ((int16x8_t)__a,
+ (int16_t)__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
+{
+ return (uint32x4_t) __builtin_aarch64_mul_nv4si ((int32x4_t)__a,
+ (int32_t)__b);
+}
+
/* vmulq_lane */
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -18772,6 +18179,308 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, c
return __a * __aarch64_vget_lane_any (__b, __lane);
}
+/* vmull_high_lane */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+ return __builtin_aarch64_smull2_lanev8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+ return __builtin_aarch64_smull2_lanev4si (__a, __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_high_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c)
+{
+ return (uint32x4_t) __builtin_aarch64_umull2_lanev8hi ((int16x8_t) __a,
+ (int16x4_t) __b,
+ __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_high_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c)
+{
+ return (uint64x2_t) __builtin_aarch64_umull2_lanev4si ((int32x4_t) __a,
+ (int32x2_t) __b,
+ __c);
+}
+
+/* vmull_high_laneq */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+ return __builtin_aarch64_smull2_laneqv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+ return __builtin_aarch64_smull2_laneqv4si (__a, __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_high_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+ return (uint32x4_t) __builtin_aarch64_umull2_laneqv8hi ((int16x8_t)__a,
+ (int16x8_t)__b,
+ __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_high_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+ return (uint64x2_t) __builtin_aarch64_umull2_laneqv4si ((int32x4_t) __a,
+ (int32x4_t) __b,
+ __c);
+}
+
+/* vmull_high_n */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_high_n_s16 (int16x8_t __a, int16_t __b)
+{
+ return __builtin_aarch64_smull2_nv8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_high_n_s32 (int32x4_t __a, int32_t __b)
+{
+ return __builtin_aarch64_smull2_nv4si (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_high_n_u16 (uint16x8_t __a, uint16_t __b)
+{
+ return __builtin_aarch64_umull2_nv8hi_uuu (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_high_n_u32 (uint32x4_t __a, uint32_t __b)
+{
+ return __builtin_aarch64_umull2_nv4si_uuu (__a, __b);
+}
+
+/* vmull_high */
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmull_high_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+ return __builtin_aarch64_pmull2v16qi_ppp (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmull_high_s8 (int8x16_t __a, int8x16_t __b)
+{
+ return __builtin_aarch64_vec_widen_smult_hi_v16qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_high_s16 (int16x8_t __a, int16x8_t __b)
+{
+ return __builtin_aarch64_vec_widen_smult_hi_v8hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_high_s32 (int32x4_t __a, int32x4_t __b)
+{
+ return __builtin_aarch64_vec_widen_smult_hi_v4si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmull_high_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+ return __builtin_aarch64_vec_widen_umult_hi_v16qi_uuu (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_high_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+ return __builtin_aarch64_vec_widen_umult_hi_v8hi_uuu (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_high_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+ return __builtin_aarch64_vec_widen_umult_hi_v4si_uuu (__a, __b);
+}
+
+/* vmull_lane */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+ return __builtin_aarch64_smull_lanev4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+ return __builtin_aarch64_smull_lanev2si (__a, __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const unsigned int __c)
+{
+ return __builtin_aarch64_umull_lanev4hi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const unsigned int __c)
+{
+ return __builtin_aarch64_umull_lanev2si_uuuu (__a, __b, __c);
+}
+
+/* vmull_laneq */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
+{
+ return __builtin_aarch64_smull_laneqv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
+{
+ return __builtin_aarch64_smull_laneqv2si (__a, __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const unsigned int __c)
+{
+ return __builtin_aarch64_umull_laneqv4hi_uuuu (__a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const unsigned int __c)
+{
+ return __builtin_aarch64_umull_laneqv2si_uuuu (__a, __b, __c);
+}
+
+/* vmull_n */
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+ return __builtin_aarch64_smull_nv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+ return __builtin_aarch64_smull_nv2si (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+ return __builtin_aarch64_umull_nv4hi_uuu (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+ return __builtin_aarch64_umull_nv2si_uuu (__a, __b);
+}
+
+/* vmull */
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmull_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+ return __builtin_aarch64_pmullv8qi_ppp (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmull_s8 (int8x8_t __a, int8x8_t __b)
+{
+ return __builtin_aarch64_smullv8qi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+ return __builtin_aarch64_smullv4hi (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+ return __builtin_aarch64_smullv2si (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmull_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+ return __builtin_aarch64_umullv8qi_uuu (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+ return __builtin_aarch64_umullv4hi_uuu (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+ return __builtin_aarch64_umullv2si_uuu (__a, __b);
+}
+
+/* vmulx */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmulx_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return __builtin_aarch64_fmulxv2sf (__a, __b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmulx_lane_f32 (float32x2_t __a, float32x4_t __b, const int __c)
+{
+ return __builtin_aarch64_fmulx_lanev2sf (__a, __b, __c);
+}
+
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmulxd_f64 (float64_t __a, float64_t __b)
+{
+ return __builtin_aarch64_fmulxdf (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulxq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return __builtin_aarch64_fmulxv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulxq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return __builtin_aarch64_fmulxv2df (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulxq_lane_f32 (float32x4_t __a, float32x4_t __b, const int __c)
+{
+ return __builtin_aarch64_fmulx_lanev4sf (__a, __b, __c);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulxq_lane_f64 (float64x2_t __a, float64x2_t __b, const int __c)
+{
+ return __builtin_aarch64_fmulx_lanev2df (__a, __b, __c);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmulxs_f32 (float32_t __a, float32_t __b)
+{
+ return __builtin_aarch64_fmulxsf (__a, __b);
+}
+
/* vneg */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
===================================================================
@@ -276,6 +276,8 @@
UNSPEC_SHA256SU1 ; Used in aarch64-simd.md.
UNSPEC_PMULL ; Used in aarch64-simd.md.
UNSPEC_PMULL2 ; Used in aarch64-simd.md.
+ UNSPEC_FMULX ; Used in aarch64-simd.md.
+ UNSPEC_FMULX_LANE ; Used in aarch64-simd.md.
])
;; -------------------------------------------------------------------
@@ -466,6 +468,9 @@
)
+(define_mode_attr VDQF_Q [(V2SF "V4SF") (V4SF "V4SF")
+ (V2DF "V2DF")])
+
;; Widened mode register suffixes for VD_BHSI/VQW.
(define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s")
(V2SI "2d") (V16QI "8h")
===================================================================
@@ -1396,6 +1396,253 @@
}
)
+(define_insn "aarch64_mul_n<mode>"
+ [(set (match_operand:VMUL 0 "register_operand" "=w")
+ (mult:VMUL
+ (match_operand:VMUL 1 "register_operand" "w")
+ (vec_duplicate:VMUL
+ (match_operand:<VEL> 2 "register_operand" "<h_con>"))))]
+ "TARGET_SIMD"
+ "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
+ [(set_attr "type" "neon_mul_<Vetype>_long")]
+)
+
+(define_insn "aarch64_<su>mull_n<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_HSI 1 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (match_operand:<VEL> 2 "register_operand" "<vwx>")))))]
+ "TARGET_SIMD"
+ "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
+ [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+
+(define_insn "aarch64_<su>mull<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_BHSI 1 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_BHSI 2 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_mul_<Vetype>_long")]
+)
+
+(define_insn "aarch64_simd_<su>mull2_n<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
+ (match_operand:<VEL> 2 "register_operand" "<vw>")))))]
+ "TARGET_SIMD"
+ "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
+ [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_<su>mull2_n<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "")
+ (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand" ""))
+ (match_operand:<VEL> 2 "register_operand" "")]
+ "TARGET_SIMD"
+ {
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ emit_insn (gen_aarch64_simd_<su>mull2_n<mode> (operands[0],
+ operands[1],
+ operands[2], p));
+ DONE;
+
+ }
+)
+
+(define_insn "aarch64_<su>mull_lane<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_HSI 1 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mull_laneq<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_HSI 1 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ return "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mull2_lane<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE>
+ (vec_duplicate:<VHALF>
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_<su>mull2_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE>
+ (vec_duplicate:<VHALF>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
+)
+
+(define_expand "aarch64_smull2_lane<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "=w")
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ emit_insn (gen_aarch64_smull2_lane<mode>_internal (operands[0], operands[1],
+ operands[2], operands[3],
+ p));
+ DONE;
+})
+
+(define_expand "aarch64_umull2_lane<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "=w")
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ emit_insn (gen_aarch64_umull2_lane<mode>_internal (operands[0], operands[1],
+ operands[2], operands[3],
+ p));
+ DONE;
+})
+
+(define_expand "aarch64_smull2_laneq<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "=w")
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ emit_insn (gen_aarch64_smull2_laneq<mode>_internal (operands[0], operands[1],
+ operands[2], operands[3],
+ p));
+ DONE;
+})
+
+(define_expand "aarch64_umull2_laneq<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand" "=w")
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
+ emit_insn (gen_aarch64_umull2_laneq<mode>_internal (operands[0], operands[1],
+ operands[2], operands[3],
+ p));
+ DONE;
+})
+
+(define_insn "aarch64_fmulx<mode>"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
+ (match_operand:VDQF 2 "register_operand" "w")]
+ UNSPEC_FMULX))]
+ "TARGET_SIMD"
+ "fmulx\\t%0.<vtype>, %1.<vtype>, %2.<vtype>"
+ [(set_attr "type" "neon_mul_s")]
+)
+
+(define_insn "aarch64_fmulx<mode>"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
+ (match_operand:GPF 2 "register_operand" "w")]
+ UNSPEC_FMULX))]
+ "TARGET_SIMD"
+ "fmulx\\t%<s>0, %<s>1, %<s>2"
+ [(set_attr "type" "neon_mul_s")]
+)
+
+(define_insn "aarch64_fmulx_lane<mode>"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
+ (match_operand:<VDQF_Q> 2 "register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_FMULX_LANE))]
+ "TARGET_SIMD"
+ "fmulx\\t%0.<vtype>, %1.<vtype>, %2.<vetype>"
+ [(set_attr "type" "neon_mul_s")]
+)
+
+(define_insn "aarch64_pmull2v16qi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "w")
+ (match_operand:V16QI 2 "register_operand" "w")]
+ UNSPEC_PMULL2))]
+ "TARGET_SIMD"
+ "pmull2\\t%0.8h, %1.16b, %2.16b"
+ [(set_attr "type" "neon_mul_b_long")]
+)
+
+(define_insn "aarch64_pmullv8qi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
+ (match_operand:V8QI 2 "register_operand" "w")]
+ UNSPEC_PMULL))]
+ "TARGET_SIMD"
+ "pmull\\t%0.8h, %1.8b, %2.8b"
+ [(set_attr "type" "neon_mul_b_long")]
+)
+
;; FP vector operations.
;; AArch64 AdvSIMD supports single-precision (32-bit) and
;; double-precision (64-bit) floating-point data types and arithmetic as
===================================================================
@@ -187,6 +187,39 @@
BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0)
BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0)
+ /* Implemented by vec_widen_<su>mult_hi_<mode>. */
+ BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10)
+ BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10)
+ /* Implemented by aarch64_<su>mull<mode>. */
+ BUILTIN_VD_BHSI (BINOPU, umull, 0)
+ BUILTIN_VD_BHSI (BINOP, smull, 0)
+ /* Implemented by aarch64_<su>mull_n<mode>. */
+ BUILTIN_VD_HSI (BINOP, smull_n, 0)
+ BUILTIN_VD_HSI (BINOPU, umull_n, 0)
+ /* Implemented by aarch64_mul_n<mode>. */
+ BUILTIN_VMUL (BINOP, mul_n, 0)
+ /* Implemented by aarch64_<su>mull2_n<mode>. */
+ BUILTIN_VQ_HSI (BINOP, smull2_n, 0)
+ BUILTIN_VQ_HSI (BINOPU, umull2_n, 0)
+ /* Implemented by aarch64_<su>mull_lane<q><mode>. */
+ BUILTIN_VD_HSI (TERNOP, smull_lane, 0)
+ BUILTIN_VD_HSI (TERNOPU, umull_lane, 0)
+ BUILTIN_VD_HSI (TERNOP, smull_laneq, 0)
+ BUILTIN_VD_HSI (TERNOPU, umull_laneq, 0)
+ /* Implemented by aarch64_<su>mull2_lane<q><mode>. */
+ BUILTIN_VQ_HSI (TERNOP, smull2_lane, 0)
+ BUILTIN_VQ_HSI (TERNOP_LANE, umull2_lane, 0)
+ BUILTIN_VQ_HSI (TERNOP, smull2_laneq, 0)
+ BUILTIN_VQ_HSI (TERNOP_LANE, umull2_laneq, 0)
+ /* Implemented by aarch64_fmulx<mode>. */
+ BUILTIN_VDQF (BINOP, fmulx, 0)
+ BUILTIN_GPF (BINOP, fmulx, 0)
+ BUILTIN_VDQF (BINOP, fmulx_lane, 0)
+
+ /* Implemented by aarch64_pmull<2><mode>.*/
+ VAR1 (BINOPP, pmull, 0, v8qi)
+ VAR1 (BINOPP, pmull2, 0, v16qi)
+
BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
/* Implemented by aarch64_<sur>shl<mode>. */
BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)