diff mbox

[AArch64,2/2] PR/60825 Make {int,uint}64x1_t in arm_neon.h a proper vector type

Message ID 53A2D7E8.3050403@arm.com
State New
Headers show

Commit Message

Alan Lawrence June 19, 2014, 12:30 p.m. UTC
Similarly, this makes int64x1_t a proper vector type, updating arm_neon.h with 
many explicit vector construction/destruction operations (also including some 
range checking using __builtin_aarch64_im_lane_boundsi).

Change the vabs_s64 intrinsic from using __builtin_llabs to 
__builtin_aarch64_absdi, the latter is consistent with other intrinsics and 
should have different behaviour (aarch64_abs on the minimum negative value 
should be defined to return said minimum negative value rather than undefined). 
This __builtin was previously being generated as a binary operator (but this was 
not noticed as it was unused), so I've tweaked the qualifiers to force unary ops 
to unary.

gcc/ChangeLog:

	* config/aarch64/aarch64-builtins.c (aarch64_types_unop_qualifiers):
	Ignore third operand if present by marking qualifier_internal.

	* config/aarch64/aarch64-simd-builtins.def (abs): Comment.

	* config/aarch64/arm_neon.h (int64x1_t, uint64x1_t): Typedef to GCC
	vector extension.
	(aarch64_vget_lane_s64, aarch64_vdup_lane_s64,
	arch64_vdupq_lane_s64, aarch64_vdupq_lane_u64): Remove macro.
	(vqadd_s64, vqadd_u64, vqsub_s64, vqsub_u64, vqneg_s64, vqabs_s64,
	vcreate_s64, vcreate_u64, vreinterpret_s64_f64, vreinterpret_u64_f64,
	vcombine_u64, vbsl_s64, vbsl_u64, vceq_s64, vceq_u64, vceqz_s64,
	vceqz_u64, vcge_s64, vcge_u64, vcgez_s64, vcgt_s64, vcgt_u64,
	vcgtz_s64, vcle_s64, vcle_u64, vclez_s64, vclt_s64, vclt_u64,
	vcltz_s64, vdup_n_s64, vdup_n_u64, vld1_s64, vld1_u64, vmov_n_s64,
	vmov_n_u64, vqdmlals_lane_s32, vqdmlsls_lane_s32,
	vqdmulls_lane_s32, vqrshl_s64, vqrshl_u64, vqrshl_u64, vqshl_s64,
	vqshl_u64, vqshl_n_s64, vqshl_n_u64, vqshl_n_s64, vqshl_n_u64,
	vqshlu_n_s64, vrshl_s64, vrshl_u64, vrshr_n_s64, vrshr_n_u64,
	vrsra_n_s64, vrsra_n_u64, vshl_n_s64, vshl_n_u64, vshl_s64,
	vshl_u64, vshr_n_s64, vshr_n_u64, vsli_n_s64, vsli_n_u64,
	vsqadd_u64, vsra_n_s64, vsra_n_u64, vsri_n_s64, vsri_n_u64,
	vst1_s64, vst1_u64, vtst_s64, vtst_u64, vuqadd_s64): Wrap existing
	logic in GCC vector extensions
	
	(vpaddd_s64, vaddd_s64, vaddd_u64, vceqd_s64, vceqd_u64, vceqzd_s64
	vceqzd_u64, vcged_s64, vcged_u64, vcgezd_s64, vcgtd_s64, vcgtd_u64,
	vcgtzd_s64, vcled_s64, vcled_u64, vclezd_s64, vcltd_s64, vcltd_u64,
	vcltzd_s64, vqdmlals_s32, vqdmlsls_s32, vqmovnd_s64, vqmovnd_u64
	vqmovund_s64, vqrshld_s64, vqrshld_u64, vqrshrnd_n_s64,
	vqrshrnd_n_u64, vqrshrund_n_s64, vqshld_s64, vqshld_u64,
	vqshld_n_u64, vqshrnd_n_s64, vqshrnd_n_u64, vqshrund_n_s64,
	vrshld_u64, vrshrd_n_u64, vrsrad_n_u64, vshld_n_u64, vshld_s64,
	vshld_u64, vslid_n_u64, vsqaddd_u64, vsrad_n_u64, vsrid_n_u64,
	vsubd_s64, vsubd_u64, vtstd_s64, vtstd_u64): Fix type signature.

	(vabs_s64): Use GCC vector extensions; call __builtin_aarch64_absdi.

	(vget_high_s64, vget_high_u64): Reimplement with GCC vector
	extensions.

	(__GET_LOW, vget_low_u64): Wrap result using vcreate_u64.
	(vget_low_s64): Use __GET_LOW macro.
	(vget_lane_s64, vget_lane_u64, vdupq_lane_s64, vdupq_lane_u64): Use
	gcc vector extensions, add call to __builtin_aarch64_lane_boundsi.
	(vdup_lane_s64, vdup_lane_u64,): Add __builtin_aarch64_lane_bound_si.
	(vdupd_lane_s64, vdupd_lane_u64): Fix type signature, add
	__builtin_aarch64_lane_boundsi, use GCC vector extensions.

	(vcombine_s64): Use GCC vector extensions; remove cast.
	(vqaddd_s64, vqaddd_u64, vqdmulls_s32, vqshld_n_s64, vqshlud_n_s64,
	vqsubd_s64, vqsubd_u64, vrshld_s64, vrshrd_n_s64, vrsrad_n_s64,
	vshld_n_s64, vshrd_n_s64, vslid_n_s64, vsrad_n_s64, vsrid_n_s64):
	Fix type signature; remove cast.

gcc/testsuite/ChangeLog:

	* g++.dg/abi/mangle-neon-aarch64.C (f22, f23):
	New tests of [u]int64x1_t.

	* gcc.target/aarch64/aapcs64/func-ret-64x1_1.c: Add {u,}int64x1 cases.
	* gcc.target/aarch64/aapcs64/test_64x1_1.c: Likewise.

	* gcc.target/aarch64/scalar_intrinsics.c (test_vaddd_u64,
	test_vaddd_s64, test_vceqd_s64, test_vceqzd_s64, test_vcged_s64,
	test_vcled_s64, test_vcgezd_s64, test_vcged_u64, test_vcgtd_s64,
	test_vcltd_s64, test_vcgtzd_s64, test_vcgtd_u64, test_vclezd_s64,
	test_vcltzd_s64, test_vqaddd_u64, test_vqaddd_s64, test_vqdmlals_s32,
	test_vqdmlsls_s32, test_vqdmulls_s32, test_vuqaddd_s64,
	test_vsqaddd_u64, test_vqmovund_s64, test_vqmovnd_s64,
	test_vqmovnd_u64, test_vsubd_u64, test_vsubd_s64, test_vqsubd_u64,
	test_vqsubd_s64, test_vshld_s64, test_vshld_u64, test_vrshld_s64,
	test_vrshld_u64, test_vshrd_n_s64, test_vshrd_n_u64, test_vsrad_n_s64,
	test_vsrad_n_u64, test_vrshrd_n_s64, test_vrshrd_n_u64,
	test_vrsrad_n_s64, test_vrsrad_n_u64, test_vqrshld_s64,
	test_vqrshld_u64, test_vqshlud_n_s64, test_vqshld_s64, test_vqshld_u64,
	test_vqshld_n_u64, test_vqshrund_n_s64, test_vqrshrund_n_s64,
	test_vqshrnd_n_s64, test_vqshrnd_n_u64, test_vqrshrnd_n_s64,
	test_vqrshrnd_n_u64, test_vshld_n_s64, test_vshdl_n_u64,
	test_vslid_n_s64, test_vslid_n_u64, test_vsrid_n_s64,
	test_vsrid_n_u64): Fix signature to match intrinsic.
	
	(test_vabs_s64): Remove.
	(test_vaddd_s64_2, test_vsubd_s64_2): Use force_simd.

	(test_vdupd_lane_s64): Rename to...
	(test_vdupd_laneq_s64): ...and remove a call to force_simd.

	(test_vdupd_lane_u64): Rename to...
	(test_vdupd_laneq_u64): ...and remove a call to force_simd.

	(test_vtst_s64): Rename to...
	(test_vtstd_s64): ...and change int64x1_t to int64_t.

	(test_vtst_u64): Rename to...
	(test_vtstd_u64): ...and change uint64x1_t to uint64_t.

	* gcc.target/aarch64/singleton_intrinsics_1.c: New file.
	* gcc.target/aarch64/vdup_lane_1.c, gcc.target/aarch64/vdup_lane_2.c:
	Remove out-of-bounds tests.
	* gcc.target/aarch64/vneg_s.c (INDEX*, RUN_TEST): Remove INDEX macro.

Comments

Marcus Shawcroft June 20, 2014, 12:50 p.m. UTC | #1
On 19 June 2014 13:30, Alan Lawrence <alan.lawrence@arm.com> wrote:
> Similarly, this makes int64x1_t a proper vector type, updating arm_neon.h
> with many explicit vector construction/destruction operations (also
> including some range checking using __builtin_aarch64_im_lane_boundsi).
>
> Change the vabs_s64 intrinsic from using __builtin_llabs to
> __builtin_aarch64_absdi, the latter is consistent with other intrinsics and
> should have different behaviour (aarch64_abs on the minimum negative value
> should be defined to return said minimum negative value rather than
> undefined). This __builtin was previously being generated as a binary
> operator (but this was not noticed as it was unused), so I've tweaked the
> qualifiers to force unary ops to unary.

Ok /Marcus
James Greenhalgh June 24, 2014, 9:29 a.m. UTC | #2
On Thu, Jun 19, 2014 at 01:30:32PM +0100, Alan Lawrence wrote:
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
> index c71011a5157a207fe68fe814ed80658fd5e0f90f..b879fdacaa6544790e4d3ff98ca0055073d6d1d1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
> @@ -9,7 +9,7 @@ main (int argc, char **argv)
>    int64_t arr2[] = {1};
>    int64x1_t in2 = vld1_s64 (arr2);
>    int64x1_t actual = vext_s64 (in1, in2, 0);
> -  if (actual != in1)
> +  if (actual[0] != in1[0])
>      abort ();
>  
>    return 0;
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
> index 8d5072bf761d96ea5a95342423ae9861d05d024a..bd51e27c2156bfcaca6b26798c449369b2894c08 100644
> --- a/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
> @@ -9,7 +9,7 @@ main (int argc, char **argv)
>    uint64_t arr2[] = {1};
>    uint64x1_t in2 = vld1_u64 (arr2);
>    uint64x1_t actual = vext_u64 (in1, in2, 0);
> -  if (actual != in1)
> +  if (actual[0] != in1[0])
>      abort ();
>  
>    return 0;

Hi Alan,

Note that these files are also included by tests in the ARM backend, where
<u>int64x1_t is still a typedef to a scalar type, leading to:

  PASS->FAIL: gcc.target/arm/simd/vexts64_1.c (test for excess errors)
  ../aarch64/simd/ext_u64.x:12:23: error: subscripted value is neither array nor pointer nor vector

Thanks,
James
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 51407cbef59e0135a897ccdf4224b847dccdad88..91f68ebf2d8691c0b1c20c101c4d267c8ee24f30 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -140,9 +140,11 @@  typedef struct
   enum aarch64_type_qualifiers *qualifiers;
 } aarch64_simd_builtin_datum;
 
+/*  The qualifier_internal allows generation of a unary builtin from
+    a pattern with a third pseudo-operand such as a match_scratch.  */
 static enum aarch64_type_qualifiers
 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_none };
+  = { qualifier_none, qualifier_none, qualifier_internal };
 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
 static enum aarch64_type_qualifiers
 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 1b931bede943b8e8682064a0bb799f1d285c7301..268432cc117b7027ee9472fc5a4f9b1ea13bea0f 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -365,6 +365,8 @@ 
   BUILTIN_VDQF (UNOP, frecpe, 0)
   BUILTIN_VDQF (BINOP, frecps, 0)
 
+  /* Implemented by a mixture of abs2 patterns.  Note the DImode builtin is
+     only ever used for the int64x1_t intrinsic, there is no scalar version.  */
   BUILTIN_VALLDI (UNOP, abs, 2)
 
   VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 704fc217a67e9ccadf1faafdd1d49713b8a1d022..24a1d9cdd9ab966a6e99d9f6d25e032a83d2a432 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7352,6 +7352,8 @@  static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
+  { DImode,    "__builtin_aarch64_simd_di",     "11__Int64x1_t" },
+  { DImode,    "__builtin_aarch64_simd_udi",    "12__Uint64x1_t" },
   { V1DFmode,  "__builtin_aarch64_simd_df",	"13__Float64x1_t" },
   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 4900936d0cd60bcb7adacf5018c3ffe3bb9b6cc6..9fbfa2d48cdecfc038789710e9cd48512cf637a1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -38,7 +38,8 @@  typedef __builtin_aarch64_simd_hi int16x4_t
   __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_si int32x2_t
   __attribute__ ((__vector_size__ (8)));
-typedef int64_t int64x1_t;
+typedef __builtin_aarch64_simd_di int64x1_t
+  __attribute__ ((__vector_size__ (8)));
 typedef int32_t int32x1_t;
 typedef int16_t int16x1_t;
 typedef int8_t int8x1_t;
@@ -56,7 +57,8 @@  typedef __builtin_aarch64_simd_uhi uint16x4_t
   __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_usi uint32x2_t
   __attribute__ ((__vector_size__ (8)));
-typedef uint64_t uint64x1_t;
+typedef __builtin_aarch64_simd_udi uint64x1_t
+  __attribute__ ((__vector_size__ (8)));
 typedef uint32_t uint32x1_t;
 typedef uint16_t uint16x1_t;
 typedef uint8_t uint8x1_t;
@@ -479,7 +481,11 @@  typedef struct poly16x8x4_t
   __aarch64_vget_lane_any (v4hi, , ,__a, __b)
 #define __aarch64_vget_lane_s32(__a, __b) \
   __aarch64_vget_lane_any (v2si, , ,__a, __b)
-#define __aarch64_vget_lane_s64(__a, __b) (__a)
+#define __aarch64_vget_lane_s64(__a, __b) __extension__	\
+  ({							\
+    __builtin_aarch64_im_lane_boundsi (__b, 1);		\
+    __a[0];						\
+  })
 
 #define __aarch64_vget_lane_u8(__a, __b) \
   __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
@@ -487,7 +493,11 @@  typedef struct poly16x8x4_t
   __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
 #define __aarch64_vget_lane_u32(__a, __b) \
   __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
-#define __aarch64_vget_lane_u64(__a, __b) (__a)
+#define __aarch64_vget_lane_u64(__a, __b) __extension__	\
+  ({							\
+    __builtin_aarch64_im_lane_boundsi (__b, 1);		\
+    __a[0];						\
+  })
 
 #define __aarch64_vgetq_lane_f32(__a, __b) \
   __aarch64_vget_lane_any (v4sf, , , __a, __b)
@@ -535,14 +545,16 @@  typedef struct poly16x8x4_t
    __aarch64_vdup_lane_any (s16, , , __a, __b)
 #define __aarch64_vdup_lane_s32(__a, __b) \
    __aarch64_vdup_lane_any (s32, , , __a, __b)
-#define __aarch64_vdup_lane_s64(__a, __b) (__a)
+#define __aarch64_vdup_lane_s64(__a, __b) \
+  __aarch64_vdup_lane_any (s64, , , __a, __b)
 #define __aarch64_vdup_lane_u8(__a, __b) \
    __aarch64_vdup_lane_any (u8, , , __a, __b)
 #define __aarch64_vdup_lane_u16(__a, __b) \
    __aarch64_vdup_lane_any (u16, , , __a, __b)
 #define __aarch64_vdup_lane_u32(__a, __b) \
    __aarch64_vdup_lane_any (u32, , , __a, __b)
-#define __aarch64_vdup_lane_u64(__a, __b) (__a)
+#define __aarch64_vdup_lane_u64(__a, __b) \
+   __aarch64_vdup_lane_any (u64, , , __a, __b)
 
 /* __aarch64_vdup_laneq internal macros.  */
 #define __aarch64_vdup_laneq_f32(__a, __b) \
@@ -585,14 +597,16 @@  typedef struct poly16x8x4_t
    __aarch64_vdup_lane_any (s16, q, , __a, __b)
 #define __aarch64_vdupq_lane_s32(__a, __b) \
    __aarch64_vdup_lane_any (s32, q, , __a, __b)
-#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
+#define __aarch64_vdupq_lane_s64(__a, __b) \
+   __aarch64_vdup_lane_any (s64, q, , __a, __b)
 #define __aarch64_vdupq_lane_u8(__a, __b) \
    __aarch64_vdup_lane_any (u8, q, , __a, __b)
 #define __aarch64_vdupq_lane_u16(__a, __b) \
    __aarch64_vdup_lane_any (u16, q, , __a, __b)
 #define __aarch64_vdupq_lane_u32(__a, __b) \
    __aarch64_vdup_lane_any (u32, q, , __a, __b)
-#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
+#define __aarch64_vdupq_lane_u64(__a, __b) \
+   __aarch64_vdup_lane_any (u64, q, , __a, __b)
 
 /* __aarch64_vdupq_laneq internal macros.  */
 #define __aarch64_vdupq_laneq_f32(__a, __b) \
@@ -2120,7 +2134,7 @@  vqadd_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqadd_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -2144,8 +2158,7 @@  vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
-						     (uint64_t) __b);
+  return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -2217,7 +2230,7 @@  vqsub_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqsub_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -2241,8 +2254,7 @@  vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
-						     (uint64_t) __b);
+  return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -2314,7 +2326,7 @@  vqneg_s32 (int32x2_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqneg_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_sqnegdi (__a);
+  return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -2356,7 +2368,7 @@  vqabs_s32 (int32x2_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqabs_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_sqabsdi (__a);
+  return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -2446,7 +2458,7 @@  vcreate_s32 (uint64_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vcreate_s64 (uint64_t __a)
 {
-  return (int64x1_t) __a;
+  return (int64x1_t) {__a};
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -2476,7 +2488,7 @@  vcreate_u32 (uint64_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcreate_u64 (uint64_t __a)
 {
-  return (uint64x1_t) __a;
+  return (uint64x1_t) {__a};
 }
 
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
@@ -3178,7 +3190,7 @@  vreinterpretq_f64_u64 (uint64x2_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretdiv1df (__a);
+  return (int64x1_t) {__builtin_aarch64_reinterpretdiv1df (__a)};
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -3310,7 +3322,7 @@  vreinterpretq_s64_p16 (poly16x8_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretdiv1df_us (__a);
+  return (uint64x1_t) {__builtin_aarch64_reinterpretdiv1df_us (__a)};
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -4233,7 +4245,7 @@  vreinterpretq_u32_p16 (poly16x8_t __a)
 
 #define __GET_LOW(__TYPE) \
   uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
-  uint64_t lo = vgetq_lane_u64 (tmp, 0);  \
+  uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0));  \
   return vreinterpret_##__TYPE##_u64 (lo);
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -4281,7 +4293,7 @@  vget_low_s32 (int32x4_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vget_low_s64 (int64x2_t __a)
 {
-  return vgetq_lane_s64 (__a, 0);
+  __GET_LOW (s64);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -4305,7 +4317,7 @@  vget_low_u32 (uint32x4_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vget_low_u64 (uint64x2_t __a)
 {
-  return vgetq_lane_u64 (__a, 0);
+  return vcreate_u64 (vgetq_lane_u64 (__a, 0));
 }
 
 #undef __GET_LOW
@@ -4331,7 +4343,7 @@  vcombine_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vcombine_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
+  return __builtin_aarch64_combinedi (__a[0], __b[0]);
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -4364,8 +4376,7 @@  vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
-						   (int64x1_t) __b);
+  return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
 }
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
@@ -12948,7 +12959,7 @@  vaddlv_u32 (uint32x2_t a)
   return result;
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vpaddd_s64 (int64x2_t __a)
 {
   return __builtin_aarch64_addpdi (__a);
@@ -13859,7 +13870,7 @@  vabs_s32 (int32x2_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vabs_s64 (int64x1_t __a)
 {
-  return __builtin_llabs (__a);
+  return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -13900,14 +13911,14 @@  vabsq_s64 (int64x2_t __a)
 
 /* vadd */
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vaddd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vaddd_s64 (int64_t __a, int64_t __b)
 {
   return __a + __b;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vaddd_u64 (uint64_t __a, uint64_t __b)
 {
   return __a + __b;
 }
@@ -14075,7 +14086,8 @@  vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
 {
-  return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
+  return (int64x1_t)
+      {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14099,7 +14111,8 @@  vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
 {
-  return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
+  return (uint64x1_t)
+      {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -14350,7 +14363,7 @@  vceq_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceq_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14377,7 +14390,7 @@  vceq_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -14459,14 +14472,14 @@  vceqs_f32 (float32_t __a, float32_t __b)
   return __a == __b ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vceqd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqd_s64 (int64_t __a, int64_t __b)
 {
   return __a == __b ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqd_u64 (uint64_t __a, uint64_t __b)
 {
   return __a == __b ? -1ll : 0ll;
 }
@@ -14524,7 +14537,7 @@  vceqz_s32 (int32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceqz_s64 (int64x1_t __a)
 {
-  return __a == 0ll ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14554,7 +14567,7 @@  vceqz_u32 (uint32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceqz_u64 (uint64x1_t __a)
 {
-  return __a == 0ll ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -14650,14 +14663,14 @@  vceqzs_f32 (float32_t __a)
   return __a == 0.0f ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vceqzd_s64 (int64x1_t __a)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqzd_s64 (int64_t __a)
 {
   return __a == 0 ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vceqzd_u64 (int64x1_t __a)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqzd_u64 (uint64_t __a)
 {
   return __a == 0 ? -1ll : 0ll;
 }
@@ -14703,7 +14716,7 @@  vcge_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcge_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14730,7 +14743,7 @@  vcge_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -14805,14 +14818,14 @@  vcges_f32 (float32_t __a, float32_t __b)
   return __a >= __b ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcged_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcged_s64 (int64_t __a, int64_t __b)
 {
   return __a >= __b ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcged_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcged_u64 (uint64_t __a, uint64_t __b)
 {
   return __a >= __b ? -1ll : 0ll;
 }
@@ -14862,7 +14875,7 @@  vcgez_s32 (int32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgez_s64 (int64x1_t __a)
 {
-  return __a >= 0ll ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -14916,8 +14929,8 @@  vcgezs_f32 (float32_t __a)
   return __a >= 0.0f ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgezd_s64 (int64x1_t __a)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgezd_s64 (int64_t __a)
 {
   return __a >= 0 ? -1ll : 0ll;
 }
@@ -14963,7 +14976,7 @@  vcgt_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgt_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14990,7 +15003,7 @@  vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -15065,14 +15078,14 @@  vcgts_f32 (float32_t __a, float32_t __b)
   return __a > __b ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgtd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtd_s64 (int64_t __a, int64_t __b)
 {
   return __a > __b ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtd_u64 (uint64_t __a, uint64_t __b)
 {
   return __a > __b ? -1ll : 0ll;
 }
@@ -15122,7 +15135,7 @@  vcgtz_s32 (int32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgtz_s64 (int64x1_t __a)
 {
-  return __a > 0ll ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -15176,8 +15189,8 @@  vcgtzs_f32 (float32_t __a)
   return __a > 0.0f ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgtzd_s64 (int64x1_t __a)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtzd_s64 (int64_t __a)
 {
   return __a > 0 ? -1ll : 0ll;
 }
@@ -15223,7 +15236,7 @@  vcle_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcle_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15250,7 +15263,7 @@  vcle_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -15325,14 +15338,14 @@  vcles_f32 (float32_t __a, float32_t __b)
   return __a <= __b ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcled_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcled_s64 (int64_t __a, int64_t __b)
 {
   return __a <= __b ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcled_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcled_u64 (uint64_t __a, uint64_t __b)
 {
   return __a <= __b ? -1ll : 0ll;
 }
@@ -15382,7 +15395,7 @@  vclez_s32 (int32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclez_s64 (int64x1_t __a)
 {
-  return __a <= 0ll ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -15436,8 +15449,8 @@  vclezs_f32 (float32_t __a)
   return __a <= 0.0f ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vclezd_s64 (int64x1_t __a)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vclezd_s64 (int64_t __a)
 {
   return __a <= 0 ? -1ll : 0ll;
 }
@@ -15483,7 +15496,7 @@  vclt_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclt_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15510,7 +15523,7 @@  vclt_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -15585,14 +15598,14 @@  vclts_f32 (float32_t __a, float32_t __b)
   return __a < __b ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcltd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltd_s64 (int64_t __a, int64_t __b)
 {
   return __a < __b ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltd_u64 (uint64_t __a, uint64_t __b)
 {
   return __a < __b ? -1ll : 0ll;
 }
@@ -15642,7 +15655,7 @@  vcltz_s32 (int32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcltz_s64 (int64x1_t __a)
 {
-  return __a < 0ll ? -1ll : 0ll;
+  return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
@@ -15696,8 +15709,8 @@  vcltzs_f32 (float32_t __a)
   return __a < 0.0f ? -1 : 0;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcltzd_s64 (int64x1_t __a)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltzd_s64 (int64_t __a)
 {
   return __a < 0 ? -1ll : 0ll;
 }
@@ -16260,7 +16273,7 @@  vdup_n_s32 (int32_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vdup_n_s64 (int64_t __a)
 {
-  return __a;
+  return (int64x1_t) {__a};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16284,7 +16297,7 @@  vdup_n_u32 (uint32_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vdup_n_u64 (uint64_t __a)
 {
-  return __a;
+  return (uint64x1_t) {__a};
 }
 
 /* vdupq_n  */
@@ -16724,15 +16737,17 @@  vdupd_lane_f64 (float64x1_t __a, const int __b)
 }
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
+vdupd_lane_s64 (int64x1_t __a, const int __b)
 {
-  return __a;
+  __builtin_aarch64_im_lane_boundsi (__b, 1);
+  return __a[0];
 }
 
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
-vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
+vdupd_lane_u64 (uint64x1_t __a, const int __b)
 {
-  return __a;
+  __builtin_aarch64_im_lane_boundsi (__b, 1);
+  return __a[0];
 }
 
 /* vdupb_laneq  */
@@ -17352,7 +17367,7 @@  vld1_s32 (const int32_t *a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vld1_s64 (const int64_t *a)
 {
-  return *a;
+  return (int64x1_t) {*a};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -17379,7 +17394,7 @@  vld1_u32 (const uint32_t *a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vld1_u64 (const uint64_t *a)
 {
-  return *a;
+  return (uint64x1_t) {*a};
 }
 
 /* vld1q */
@@ -19202,7 +19217,7 @@  vmov_n_s32 (int32_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vmov_n_s64 (int64_t __a)
 {
-  return __a;
+  return (int64x1_t) {__a};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -19226,7 +19241,7 @@  vmov_n_u32 (uint32_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vmov_n_u64 (uint64_t __a)
 {
-   return __a;
+  return (uint64x1_t) {__a};
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -19580,10 +19595,10 @@  vqadds_s32 (int32x1_t __a, int32x1_t __b)
   return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqaddd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqaddd_s64 (int64_t __a, int64_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
+  return __builtin_aarch64_sqadddi (__a, __b);
 }
 
 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
@@ -19604,11 +19619,10 @@  vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
   return (uint32x1_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vqaddd_u64 (uint64_t __a, uint64_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
-						     (uint64_t) __b);
+  return __builtin_aarch64_uqadddi_uuu (__a, __b);
 }
 
 /* vqdmlal */
@@ -19727,8 +19741,8 @@  vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
 {
   return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
 }
@@ -19736,7 +19750,8 @@  vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
 {
-  return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
+  return (int64x1_t)
+      {__builtin_aarch64_sqdmlal_lanesi (__a[0], __b, __c, __d)};
 }
 
 /* vqdmlsl */
@@ -19855,8 +19870,8 @@  vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
 {
   return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
 }
@@ -19864,7 +19879,7 @@  vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
 {
-  return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
+  return (int64x1_t) {__builtin_aarch64_sqdmlsl_lanesi (__a[0], __b, __c, __d)};
 }
 
 /* vqdmulh */
@@ -20029,16 +20044,16 @@  vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
+  return __builtin_aarch64_sqdmullsi (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
+  return (int64x1_t) {__builtin_aarch64_sqdmull_lanesi (__a, __b, __c)};
 }
 
 /* vqmovn */
@@ -20092,7 +20107,7 @@  vqmovns_s32 (int32x1_t __a)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqmovnd_s64 (int64x1_t __a)
+vqmovnd_s64 (int64_t __a)
 {
   return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
 }
@@ -20110,7 +20125,7 @@  vqmovns_u32 (uint32x1_t __a)
 }
 
 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqmovnd_u64 (uint64x1_t __a)
+vqmovnd_u64 (uint64_t __a)
 {
   return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
 }
@@ -20148,7 +20163,7 @@  vqmovuns_s32 (int32x1_t __a)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqmovund_s64 (int64x1_t __a)
+vqmovund_s64 (int64_t __a)
 {
   return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
 }
@@ -20258,7 +20273,7 @@  vqrshl_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_sqrshldi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -20282,7 +20297,7 @@  vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_uqrshldi_uus ( __a, __b);
+  return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -20351,8 +20366,8 @@  vqrshls_s32 (int32x1_t __a, int32x1_t __b)
   return __builtin_aarch64_sqrshlsi (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqrshld_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqrshld_s64 (int64_t __a, int64_t __b)
 {
   return __builtin_aarch64_sqrshldi (__a, __b);
 }
@@ -20375,8 +20390,8 @@  vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
   return __builtin_aarch64_uqrshlsi_uus (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vqrshld_u64 (uint64_t __a, uint64_t __b)
 {
   return __builtin_aarch64_uqrshldi_uus (__a, __b);
 }
@@ -20432,7 +20447,7 @@  vqrshrns_n_s32 (int32x1_t __a, const int __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrshrnd_n_s64 (int64x1_t __a, const int __b)
+vqrshrnd_n_s64 (int64_t __a, const int __b)
 {
   return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
 }
@@ -20450,7 +20465,7 @@  vqrshrns_n_u32 (uint32x1_t __a, const int __b)
 }
 
 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
+vqrshrnd_n_u64 (uint64_t __a, const int __b)
 {
   return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
 }
@@ -20488,7 +20503,7 @@  vqrshruns_n_s32 (int32x1_t __a, const int __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrshrund_n_s64 (int64x1_t __a, const int __b)
+vqrshrund_n_s64 (int64_t __a, const int __b)
 {
   return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
 }
@@ -20516,7 +20531,7 @@  vqshl_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_sqshldi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -20540,7 +20555,7 @@  vqshl_u32 (uint32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_uqshldi_uus ( __a, __b);
+  return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -20609,8 +20624,8 @@  vqshls_s32 (int32x1_t __a, int32x1_t __b)
   return __builtin_aarch64_sqshlsi (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqshld_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqshld_s64 (int64_t __a, int64_t __b)
 {
   return __builtin_aarch64_sqshldi (__a, __b);
 }
@@ -20633,8 +20648,8 @@  vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
   return __builtin_aarch64_uqshlsi_uus (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vqshld_u64 (uint64_t __a, uint64_t __b)
 {
   return __builtin_aarch64_uqshldi_uus (__a, __b);
 }
@@ -20660,7 +20675,7 @@  vqshl_n_s32 (int32x2_t __a, const int __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqshl_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -20684,7 +20699,7 @@  vqshl_n_u32 (uint32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshl_n_u64 (uint64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -20753,10 +20768,10 @@  vqshls_n_s32 (int32x1_t __a, const int __b)
   return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqshld_n_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqshld_n_s64 (int64_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
+  return __builtin_aarch64_sqshl_ndi (__a, __b);
 }
 
 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
@@ -20777,8 +20792,8 @@  vqshls_n_u32 (uint32x1_t __a, const int __b)
   return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vqshld_n_u64 (uint64x1_t __a, const int __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vqshld_n_u64 (uint64_t __a, const int __b)
 {
   return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
 }
@@ -20806,7 +20821,7 @@  vqshlu_n_s32 (int32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshlu_n_s64 (int64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -20851,10 +20866,10 @@  vqshlus_n_s32 (int32x1_t __a, const int __b)
   return (int32x1_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqshlud_n_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vqshlud_n_s64 (int64_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
+  return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
 }
 
 /* vqshrn */
@@ -20908,7 +20923,7 @@  vqshrns_n_s32 (int32x1_t __a, const int __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshrnd_n_s64 (int64x1_t __a, const int __b)
+vqshrnd_n_s64 (int64_t __a, const int __b)
 {
   return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
 }
@@ -20926,7 +20941,7 @@  vqshrns_n_u32 (uint32x1_t __a, const int __b)
 }
 
 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqshrnd_n_u64 (uint64x1_t __a, const int __b)
+vqshrnd_n_u64 (uint64_t __a, const int __b)
 {
   return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
 }
@@ -20964,7 +20979,7 @@  vqshruns_n_s32 (int32x1_t __a, const int __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshrund_n_s64 (int64x1_t __a, const int __b)
+vqshrund_n_s64 (int64_t __a, const int __b)
 {
   return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
 }
@@ -20989,10 +21004,10 @@  vqsubs_s32 (int32x1_t __a, int32x1_t __b)
   return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqsubd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqsubd_s64 (int64_t __a, int64_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
+  return __builtin_aarch64_sqsubdi (__a, __b);
 }
 
 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
@@ -21013,11 +21028,10 @@  vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
   return (uint32x1_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vqsubd_u64 (uint64_t __a, uint64_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
-						     (uint64_t) __b);
+  return __builtin_aarch64_uqsubdi_uuu (__a, __b);
 }
 
 /* vrecpe  */
@@ -21303,7 +21317,7 @@  vrshl_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vrshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -21327,7 +21341,7 @@  vrshl_u32 (uint32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_urshldi_uus (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21378,14 +21392,14 @@  vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
   return __builtin_aarch64_urshlv2di_uus (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vrshld_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vrshld_s64 (int64_t __a, int64_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
+  return __builtin_aarch64_srshldi (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vrshld_u64 (uint64_t __a, int64_t __b)
 {
   return __builtin_aarch64_urshldi_uus (__a, __b);
 }
@@ -21413,7 +21427,7 @@  vrshr_n_s32 (int32x2_t __a, const int __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vrshr_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -21437,7 +21451,7 @@  vrshr_n_u32 (uint32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21488,14 +21502,14 @@  vrshrq_n_u64 (uint64x2_t __a, const int __b)
   return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vrshrd_n_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vrshrd_n_s64 (int64_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
+  return __builtin_aarch64_srshr_ndi (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vrshrd_n_u64 (uint64x1_t __a, const int __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vrshrd_n_u64 (uint64_t __a, const int __b)
 {
   return __builtin_aarch64_urshr_ndi_uus (__a, __b);
 }
@@ -21523,7 +21537,7 @@  vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
+  return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -21547,7 +21561,7 @@  vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
+  return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21598,14 +21612,14 @@  vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
+  return __builtin_aarch64_srsra_ndi (__a, __b, __c);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
   return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
 }
@@ -21710,7 +21724,7 @@  vshl_n_s32 (int32x2_t __a, const int __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshl_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -21734,7 +21748,7 @@  vshl_n_u32 (uint32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshl_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
+  return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21785,16 +21799,16 @@  vshlq_n_u64 (uint64x2_t __a, const int __b)
   return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vshld_n_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vshld_n_s64 (int64_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
+  return __builtin_aarch64_ashldi (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vshld_n_u64 (uint64x1_t __a, const int __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vshld_n_u64 (uint64_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
+  return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -21818,7 +21832,7 @@  vshl_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_sshldi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -21842,7 +21856,7 @@  vshl_u32 (uint32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_ushldi_uus (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21893,14 +21907,14 @@  vshlq_u64 (uint64x2_t __a, int64x2_t __b)
   return __builtin_aarch64_ushlv2di_uus (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vshld_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vshld_s64 (int64_t __a, int64_t __b)
 {
   return __builtin_aarch64_sshldi (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vshld_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vshld_u64 (uint64_t __a, uint64_t __b)
 {
   return __builtin_aarch64_ushldi_uus (__a, __b);
 }
@@ -22000,7 +22014,7 @@  vshr_n_s32 (int32x2_t __a, const int __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshr_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
+  return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -22024,7 +22038,7 @@  vshr_n_u32 (uint32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
+  return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22075,10 +22089,10 @@  vshrq_n_u64 (uint64x2_t __a, const int __b)
   return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vshrd_n_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vshrd_n_s64 (int64_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
+  return __builtin_aarch64_ashr_simddi (__a, __b);
 }
 
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
@@ -22110,7 +22124,7 @@  vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
+  return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -22134,7 +22148,7 @@  vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
+  return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22185,14 +22199,14 @@  vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
+  return __builtin_aarch64_ssli_ndi (__a, __b, __c);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
   return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
 }
@@ -22220,7 +22234,7 @@  vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return __builtin_aarch64_usqadddi_uus (__a, __b);
+  return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -22265,8 +22279,8 @@  vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
   return __builtin_aarch64_usqaddsi_uus (__a, __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vsqaddd_u64 (uint64_t __a, int64_t __b)
 {
   return __builtin_aarch64_usqadddi_uus (__a, __b);
 }
@@ -22313,7 +22327,7 @@  vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
+  return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -22337,7 +22351,7 @@  vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
+  return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22388,14 +22402,14 @@  vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
+  return __builtin_aarch64_ssra_ndi (__a, __b, __c);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
   return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
 }
@@ -22423,7 +22437,7 @@  vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+  return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -22447,7 +22461,7 @@  vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
+  return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22498,14 +22512,14 @@  vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
   return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
 {
-  return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
+  return __builtin_aarch64_ssri_ndi (__a, __b, __c);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
 {
   return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
 }
@@ -22559,7 +22573,7 @@  vst1_s32 (int32_t *a, int32x2_t b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_s64 (int64_t *a, int64x1_t b)
 {
-  *a = b;
+  *a = b[0];
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -22586,7 +22600,7 @@  vst1_u32 (uint32_t *a, uint32x2_t b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_u64 (uint64_t *a, uint64x1_t b)
 {
-  *a = b;
+  *a = b[0];
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -23537,14 +23551,14 @@  vst4q_f64 (float64_t * __a, float64x2x4_t val)
 
 /* vsub */
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsubd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vsubd_s64 (int64_t __a, int64_t __b)
 {
   return __a - __b;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vsubd_u64 (uint64_t __a, uint64_t __b)
 {
   return __a - __b;
 }
@@ -24174,7 +24188,7 @@  vtst_s32 (int32x2_t __a, int32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vtst_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (__a & __b) ? -1ll : 0ll;
+  return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -24201,7 +24215,7 @@  vtst_u32 (uint32x2_t __a, uint32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (__a & __b) ? -1ll : 0ll;
+  return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -24256,14 +24270,14 @@  vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
 						  (int64x2_t) __b);
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vtstd_s64 (int64x1_t __a, int64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vtstd_s64 (int64_t __a, int64_t __b)
 {
   return (__a & __b) ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vtstd_u64 (uint64_t __a, uint64_t __b)
 {
   return (__a & __b) ? -1ll : 0ll;
 }
@@ -24291,7 +24305,7 @@  vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
 {
-  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
+  return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -24336,8 +24350,8 @@  vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
   return __builtin_aarch64_suqaddsi_ssu (__a,  __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vuqaddd_s64 (int64_t __a, uint64_t __b)
 {
   return __builtin_aarch64_suqadddi_ssu (__a,  __b);
 }
diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
index 025b6904afa9f4ea39550ecd95d91a7be1d48cc6..09a20dc985ef04314e3435b5eb899035429400c4 100644
--- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
+++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
@@ -8,9 +8,11 @@ 
 void f0 (int8x8_t a) {}
 void f1 (int16x4_t a) {}
 void f2 (int32x2_t a) {}
+void f22 (int64x1_t a) {}
 void f3 (uint8x8_t a) {}
 void f4 (uint16x4_t a) {}
 void f5 (uint32x2_t a) {}
+void f23 (uint64x1_t a) {}
 void f6 (float32x2_t a) {}
 void f7 (poly8x8_t a) {}
 void f8 (poly16x4_t a) {}
@@ -35,9 +37,11 @@  void g1 (int8x16_t, int8x16_t) {}
 // { dg-final { scan-assembler "_Z2f010__Int8x8_t:" } }
 // { dg-final { scan-assembler "_Z2f111__Int16x4_t:" } }
 // { dg-final { scan-assembler "_Z2f211__Int32x2_t:" } }
+// { dg-final { scan-assembler "_Z3f2211__Int64x1_t:" } }
 // { dg-final { scan-assembler "_Z2f311__Uint8x8_t:" } }
 // { dg-final { scan-assembler "_Z2f412__Uint16x4_t:" } }
 // { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } }
+// { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } }
 // { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } }
 // { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } }
 // { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } }
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
index 673242687e4946d7bc1cb61c247510dfd128cc81..05957e2dcae1d830a404814062b993fad7030712 100644
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
@@ -11,5 +11,7 @@ 
 #include "abitest-2.h"
 #else
 FUNC_VAL_CHECK ( 0, float64x1_t, (float64x1_t) {123456.789}, D0, flat)
+FUNC_VAL_CHECK ( 1, int64x1_t, (int64x1_t) {0xdeadbeefcafebabeLL}, D0, flat)
+FUNC_VAL_CHECK ( 2, uint64x1_t, (uint64x1_t) {0xaaaabbbbccccddddULL}, D0, flat)
 #endif
 
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
index f1dc1a759b07fcc8a9c4310ac14f43274a3f378f..b5281d5a545b877b6831bed396bdd502486ce389 100644
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
@@ -12,5 +12,9 @@ 
 #else
 ARG (float64x1_t, (float64x1_t) {123456.789}, D0)
 ARG (float64_t, 987654.321, D1)
-LAST_ARG (float64x1_t, (float64x1_t) {13579.2468}, D2)
+ARG (float64x1_t, (float64x1_t) {13579.2468}, D2)
+ARG (int64x1_t, (int64x1_t) {0xcafebabe0cabfaffLL}, D3)
+ARG (uint64_t, 0xdeadbeefdeafbeeb, X0)
+ARG (int64_t, 0x0123456789abcdef, X1)
+LAST_ARG (uint64x1_t, (uint64x1_t) {0xaaaabbbbccccddddULL}, D4)
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index aa041cc2c20e2bb4354d4f168ac29334b16157a5..7a03091fd3485222232cc1af0aa6ceaa50f0ba42 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -11,45 +11,37 @@ 
 
 /* { dg-final { scan-assembler-times "\\tadd\\tx\[0-9\]+" 2 } } */
 
-uint64x1_t
-test_vaddd_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vaddd_u64 (uint64_t a, uint64_t b)
 {
   return vaddd_u64 (a, b);
 }
 
-int64x1_t
-test_vaddd_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vaddd_s64 (int64_t a, int64_t b)
 {
   return vaddd_s64 (a, b);
 }
 
 /* { dg-final { scan-assembler-times "\\tadd\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vaddd_s64_2 (int64x1_t a, int64x1_t b, int64x1_t c, int64x1_t d)
-{
-  return vqaddd_s64 (vaddd_s64 (vqaddd_s64 (a, b), vqaddd_s64 (c, d)),
-		     vqaddd_s64 (a, d));
-}
-
-/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-
-int64x1_t
-test_vabs_s64 (int64x1_t a)
+int64_t
+test_vaddd_s64_2 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  int64_t res;
   force_simd (a);
-  res = vabs_s64 (a);
+  force_simd (b);
+  res = vaddd_s64 (a, b);
   force_simd (res);
   return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vceqd_s64 (int64x1_t a, int64x1_t b)
+uint64_t
+test_vceqd_s64 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vceqd_s64 (a, b);
@@ -59,10 +51,10 @@  test_vceqd_s64 (int64x1_t a, int64x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
 
-uint64x1_t
-test_vceqzd_s64 (int64x1_t a)
+uint64_t
+test_vceqzd_s64 (int64_t a)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   res = vceqzd_s64 (a);
   force_simd (res);
@@ -71,10 +63,10 @@  test_vceqzd_s64 (int64x1_t a)
 
 /* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
 
-uint64x1_t
-test_vcged_s64 (int64x1_t a, int64x1_t b)
+uint64_t
+test_vcged_s64 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vcged_s64 (a, b);
@@ -82,10 +74,10 @@  test_vcged_s64 (int64x1_t a, int64x1_t b)
   return res;
 }
 
-uint64x1_t
-test_vcled_s64 (int64x1_t a, int64x1_t b)
+uint64_t
+test_vcled_s64 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vcled_s64 (a, b);
@@ -96,10 +88,10 @@  test_vcled_s64 (int64x1_t a, int64x1_t b)
 /* Idiom recognition will cause this testcase not to generate
    the expected cmge instruction, so do not check for it.  */
 
-uint64x1_t
-test_vcgezd_s64 (int64x1_t a)
+uint64_t
+test_vcgezd_s64 (int64_t a)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   res = vcgezd_s64 (a);
   force_simd (res);
@@ -108,10 +100,10 @@  test_vcgezd_s64 (int64x1_t a)
 
 /* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vcged_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vcged_u64 (uint64_t a, uint64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vcged_u64 (a, b);
@@ -121,10 +113,10 @@  test_vcged_u64 (uint64x1_t a, uint64x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
 
-uint64x1_t
-test_vcgtd_s64 (int64x1_t a, int64x1_t b)
+uint64_t
+test_vcgtd_s64 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vcgtd_s64 (a, b);
@@ -132,10 +124,10 @@  test_vcgtd_s64 (int64x1_t a, int64x1_t b)
   return res;
 }
 
-uint64x1_t
-test_vcltd_s64 (int64x1_t a, int64x1_t b)
+uint64_t
+test_vcltd_s64 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vcltd_s64 (a, b);
@@ -145,10 +137,10 @@  test_vcltd_s64 (int64x1_t a, int64x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
 
-uint64x1_t
-test_vcgtzd_s64 (int64x1_t a)
+uint64_t
+test_vcgtzd_s64 (int64_t a)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   res = vcgtzd_s64 (a);
   force_simd (res);
@@ -157,10 +149,10 @@  test_vcgtzd_s64 (int64x1_t a)
 
 /* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vcgtd_u64 (uint64_t a, uint64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vcgtd_u64 (a, b);
@@ -170,10 +162,10 @@  test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
 
-uint64x1_t
-test_vclezd_s64 (int64x1_t a)
+uint64_t
+test_vclezd_s64 (int64_t a)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   res = vclezd_s64 (a);
   force_simd (res);
@@ -183,10 +175,10 @@  test_vclezd_s64 (int64x1_t a)
 /* Idiom recognition will cause this testcase not to generate
    the expected cmlt instruction, so do not check for it.  */
 
-uint64x1_t
-test_vcltzd_s64 (int64x1_t a)
+uint64_t
+test_vcltzd_s64 (int64_t a)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   res = vcltzd_s64 (a);
   force_simd (res);
@@ -261,32 +253,28 @@  test_vdups_lane_u32 (uint32x4_t a)
 
 /* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
 
-int64x1_t
-test_vdupd_lane_s64 (int64x2_t a)
+int64_t
+test_vdupd_laneq_s64 (int64x2_t a)
 {
-  int64x1_t res;
-  force_simd (a);
-  res = vdupd_laneq_s64 (a, 1);
+  int64_t res = vdupd_laneq_s64 (a, 1);
   force_simd (res);
   return res;
 }
 
-uint64x1_t
-test_vdupd_lane_u64 (uint64x2_t a)
+uint64_t
+test_vdupd_laneq_u64 (uint64x2_t a)
 {
-  uint64x1_t res;
-  force_simd (a);
-  res = vdupd_laneq_u64 (a, 1);
+  uint64_t res = vdupd_laneq_u64 (a, 1);
   force_simd (res);
   return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
 
-int64x1_t
-test_vtst_s64 (int64x1_t a, int64x1_t b)
+uint64_t
+test_vtstd_s64 (int64_t a, int64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
   res = vtstd_s64 (a, b);
@@ -294,13 +282,13 @@  test_vtst_s64 (int64x1_t a, int64x1_t b)
   return res;
 }
 
-uint64x1_t
-test_vtst_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vtstd_u64 (uint64_t a, uint64_t b)
 {
-  uint64x1_t res;
+  uint64_t res;
   force_simd (a);
   force_simd (b);
-  res = vtstd_s64 (a, b);
+  res = vtstd_u64 (a, b);
   force_simd (res);
   return res;
 }
@@ -314,8 +302,8 @@  test_vpaddd_s64 (int64x2_t a)
 
 /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vqaddd_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vqaddd_u64 (uint64_t a, uint64_t b)
 {
   return vqaddd_u64 (a, b);
 }
@@ -344,10 +332,10 @@  test_vqaddb_u8 (uint8x1_t a, uint8x1_t b)
   return vqaddb_u8 (a, b);
 }
 
-/* { dg-final { scan-assembler-times "\\tsqadd\\td\[0-9\]+" 5 } } */
+/* { dg-final { scan-assembler-times "\\tsqadd\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vqaddd_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vqaddd_s64 (int64_t a, int64_t b)
 {
   return vqaddd_s64 (a, b);
 }
@@ -394,8 +382,8 @@  test_vqdmlalh_lane_s16 (int32x1_t a, int16x1_t b, int16x8_t c)
 
 /* { dg-final { scan-assembler-times "\\tsqdmlal\\td\[0-9\]+, s\[0-9\]+, s\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vqdmlals_s32 (int64x1_t a, int32x1_t b, int32x1_t c)
+int64_t
+test_vqdmlals_s32 (int64_t a, int32x1_t b, int32x1_t c)
 {
   return vqdmlals_s32 (a, b, c);
 }
@@ -426,8 +414,8 @@  test_vqdmlslh_lane_s16 (int32x1_t a, int16x1_t b, int16x8_t c)
 
 /* { dg-final { scan-assembler-times "\\tsqdmlsl\\td\[0-9\]+, s\[0-9\]+, s\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vqdmlsls_s32 (int64x1_t a, int32x1_t b, int32x1_t c)
+int64_t
+test_vqdmlsls_s32 (int64_t a, int32x1_t b, int32x1_t c)
 {
   return vqdmlsls_s32 (a, b, c);
 }
@@ -490,7 +478,7 @@  test_vqdmullh_lane_s16 (int16x1_t a, int16x8_t b)
 
 /* { dg-final { scan-assembler-times "\\tsqdmull\\td\[0-9\]+, s\[0-9\]+, s\[0-9\]+" 1 } } */
 
-int64x1_t
+int64_t
 test_vqdmulls_s32 (int32x1_t a, int32x1_t b)
 {
   return vqdmulls_s32 (a, b);
@@ -562,8 +550,8 @@  test_vuqadds_s32 (int32x1_t a, int8x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tsuqadd\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vuqaddd_s64 (int64x1_t a, int8x1_t b)
+int64_t
+test_vuqaddd_s64 (int64_t a, uint64_t b)
 {
   return vuqaddd_s64 (a, b);
 }
@@ -594,8 +582,8 @@  test_vsqadds_u32 (uint32x1_t a, int8x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tusqadd\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vsqaddd_u64 (uint64x1_t a, int8x1_t b)
+uint64_t
+test_vsqaddd_u64 (uint64_t a, int64_t b)
 {
   return vsqaddd_u64 (a, b);
 }
@@ -667,7 +655,7 @@  test_vqmovuns_s32 (int32x1_t a)
 /* { dg-final { scan-assembler-times "\\tsqxtun\\ts\[0-9\]+" 1 } } */
 
 int32x1_t
-test_vqmovund_s64 (int64x1_t a)
+test_vqmovund_s64 (int64_t a)
 {
   return vqmovund_s64 (a);
 }
@@ -691,7 +679,7 @@  test_vqmovns_s32 (int32x1_t a)
 /* { dg-final { scan-assembler-times "\\tsqxtn\\ts\[0-9\]+" 1 } } */
 
 int32x1_t
-test_vqmovnd_s64 (int64x1_t a)
+test_vqmovnd_s64 (int64_t a)
 {
   return vqmovnd_s64 (a);
 }
@@ -715,38 +703,42 @@  test_vqmovns_u32 (uint32x1_t a)
 /* { dg-final { scan-assembler-times "\\tuqxtn\\ts\[0-9\]+" 1 } } */
 
 uint32x1_t
-test_vqmovnd_u64 (uint64x1_t a)
+test_vqmovnd_u64 (uint64_t a)
 {
   return vqmovnd_u64 (a);
 }
 
 /* { dg-final { scan-assembler-times "\\tsub\\tx\[0-9\]+" 2 } } */
 
-uint64x1_t
-test_vsubd_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vsubd_u64 (uint64_t a, uint64_t b)
 {
   return vsubd_u64 (a, b);
 }
 
-int64x1_t
-test_vsubd_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vsubd_s64 (int64_t a, int64_t b)
 {
   return vsubd_s64 (a, b);
 }
 
 /* { dg-final { scan-assembler-times "\\tsub\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vsubd_s64_2 (int64x1_t a, int64x1_t b, int64x1_t c, int64x1_t d)
+int64_t
+test_vsubd_s64_2 (int64_t a, int64_t b)
 {
-  return vqsubd_s64 (vsubd_s64 (vqsubd_s64 (a, b), vqsubd_s64 (c, d)),
-		     vqsubd_s64 (a, d));
+  int64_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vsubd_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tuqsub\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vqsubd_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vqsubd_u64 (uint64_t a, uint64_t b)
 {
   return vqsubd_u64 (a, b);
 }
@@ -775,10 +767,10 @@  test_vqsubb_u8 (uint8x1_t a, uint8x1_t b)
   return vqsubb_u8 (a, b);
 }
 
-/* { dg-final { scan-assembler-times "\\tsqsub\\td\[0-9\]+" 5 } } */
+/* { dg-final { scan-assembler-times "\\tsqsub\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vqsubd_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vqsubd_s64 (int64_t a, int64_t b)
 {
   return vqsubd_s64 (a, b);
 }
@@ -809,32 +801,32 @@  test_vqsubb_s8 (int8x1_t a, int8x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tsshl\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vshld_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vshld_s64 (int64_t a, int64_t b)
 {
   return vshld_s64 (a, b);
 }
 
 /* { dg-final { scan-assembler-times "\\tushl\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vshld_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vshld_u64 (uint64_t a, uint64_t b)
 {
   return vshld_u64 (a, b);
 }
 
 /* { dg-final { scan-assembler-times "\\tsrshl\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vrshld_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vrshld_s64 (int64_t a, int64_t b)
 {
   return vrshld_s64 (a, b);
 }
 
 /* { dg-final { scan-assembler-times "\\turshl\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vrshld_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vrshld_u64 (uint64_t a, int64_t b)
 {
   return vrshld_u64 (a, b);
 }
@@ -844,64 +836,64 @@  test_vrshld_u64 (uint64x1_t a, uint64x1_t b)
 
 /* { dg-final { scan-assembler "\\tasr\\tx\[0-9\]+" } } */
 
-int64x1_t
-test_vshrd_n_s64 (int64x1_t a)
+int64_t
+test_vshrd_n_s64 (int64_t a)
 {
   return vshrd_n_s64 (a, 5);
 }
 
 /* { dg-final { scan-assembler-times "\\tlsr\\tx\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vshrd_n_u64 (uint64x1_t a)
+uint64_t
+test_vshrd_n_u64 (uint64_t a)
 {
   return vshrd_n_u64 (a, 3);
 }
 
 /* { dg-final { scan-assembler-times "\\tssra\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vsrad_n_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vsrad_n_s64 (int64_t a, int64_t b)
 {
   return vsrad_n_s64 (a, b, 2);
 }
 
 /* { dg-final { scan-assembler-times "\\tusra\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vsrad_n_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vsrad_n_u64 (uint64_t a, uint64_t b)
 {
   return vsrad_n_u64 (a, b, 5);
 }
 
 /* { dg-final { scan-assembler-times "\\tsrshr\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vrshrd_n_s64 (int64x1_t a)
+int64_t
+test_vrshrd_n_s64 (int64_t a)
 {
   return vrshrd_n_s64 (a, 5);
 }
 
 /* { dg-final { scan-assembler-times "\\turshr\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vrshrd_n_u64 (uint64x1_t a)
+uint64_t
+test_vrshrd_n_u64 (uint64_t a)
 {
   return vrshrd_n_u64 (a, 3);
 }
 
 /* { dg-final { scan-assembler-times "\\tsrsra\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vrsrad_n_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vrsrad_n_s64 (int64_t a, int64_t b)
 {
   return vrsrad_n_s64 (a, b, 3);
 }
 
 /* { dg-final { scan-assembler-times "\\tsrsra\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vrsrad_n_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vrsrad_n_u64 (uint64_t a, uint64_t b)
 {
   return vrsrad_n_u64 (a, b, 4);
 }
@@ -932,8 +924,8 @@  test_vqrshls_s32 (int32x1_t a, int32x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tsqrshl\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vqrshld_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vqrshld_s64 (int64_t a, int64_t b)
 {
   return vqrshld_s64 (a, b);
 }
@@ -964,8 +956,8 @@  test_vqrshls_u32 (uint32x1_t a, uint32x1_t b)
 
 /* { dg-final { scan-assembler-times "\\tuqrshl\\td\[0-9\]+" 1 } } */
 
-uint64x1_t
-test_vqrshld_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vqrshld_u64 (uint64_t a, uint64_t b)
 {
   return vqrshld_u64 (a, b);
 }
@@ -996,8 +988,8 @@  test_vqshlus_n_s32 (int32x1_t a)
 
 /* { dg-final { scan-assembler-times "\\tsqshlu\\td\[0-9\]+" 1 } } */
 
-int64x1_t
-test_vqshlud_n_s64 (int64x1_t a)
+int64_t
+test_vqshlud_n_s64 (int64_t a)
 {
   return vqshlud_n_s64 (a, 6);
 }
@@ -1046,14 +1038,14 @@  test_vqshls_n_s32 (int32x1_t a)
 
 /* { dg-final { scan-assembler-times "\\tsqshl\\td\[0-9\]+" 2 } } */
 
-int64x1_t
-test_vqshld_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vqshld_s64 (int64_t a, int64_t b)
 {
   return vqshld_s64 (a, b);
 }
 
-int64x1_t
-test_vqshld_n_s64 (int64x1_t a)
+int64_t
+test_vqshld_n_s64 (int64_t a)
 {
   return vqshld_n_s64 (a, 5);
 }
@@ -1102,14 +1094,14 @@  test_vqshls_n_u32 (uint32x1_t a)
 
 /* { dg-final { scan-assembler-times "\\tuqshl\\td\[0-9\]+" 2 } } */
 
-uint64x1_t
-test_vqshld_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vqshld_u64 (uint64_t a, int64_t b)
 {
   return vqshld_u64 (a, b);
 }
 
-uint64x1_t
-test_vqshld_n_u64 (uint64x1_t a)
+uint64_t
+test_vqshld_n_u64 (uint64_t a)
 {
   return vqshld_n_u64 (a, 5);
 }
@@ -1133,7 +1125,7 @@  test_vqshruns_n_s32 (int32x1_t a)
 /* { dg-final { scan-assembler-times "\\tsqshrun\\ts\[0-9\]+" 1 } } */
 
 int32x1_t
-test_vqshrund_n_s64 (int64x1_t a)
+test_vqshrund_n_s64 (int64_t a)
 {
   return vqshrund_n_s64 (a, 4);
 }
@@ -1157,7 +1149,7 @@  test_vqrshruns_n_s32 (int32x1_t a)
 /* { dg-final { scan-assembler-times "\\tsqrshrun\\ts\[0-9\]+" 1 } } */
 
 int32x1_t
-test_vqrshrund_n_s64 (int64x1_t a)
+test_vqrshrund_n_s64 (int64_t a)
 {
   return vqrshrund_n_s64 (a, 4);
 }
@@ -1181,7 +1173,7 @@  test_vqshrns_n_s32 (int32x1_t a)
 /* { dg-final { scan-assembler-times "\\tsqshrn\\ts\[0-9\]+" 1 } } */
 
 int32x1_t
-test_vqshrnd_n_s64 (int64x1_t a)
+test_vqshrnd_n_s64 (int64_t a)
 {
   return vqshrnd_n_s64 (a, 4);
 }
@@ -1205,7 +1197,7 @@  test_vqshrns_n_u32 (uint32x1_t a)
 /* { dg-final { scan-assembler-times "\\tuqshrn\\ts\[0-9\]+" 1 } } */
 
 uint32x1_t
-test_vqshrnd_n_u64 (uint64x1_t a)
+test_vqshrnd_n_u64 (uint64_t a)
 {
   return vqshrnd_n_u64 (a, 4);
 }
@@ -1229,7 +1221,7 @@  test_vqrshrns_n_s32 (int32x1_t a)
 /* { dg-final { scan-assembler-times "\\tsqrshrn\\ts\[0-9\]+" 1 } } */
 
 int32x1_t
-test_vqrshrnd_n_s64 (int64x1_t a)
+test_vqrshrnd_n_s64 (int64_t a)
 {
   return vqrshrnd_n_s64 (a, 4);
 }
@@ -1253,49 +1245,49 @@  test_vqrshrns_n_u32 (uint32x1_t a)
 /* { dg-final { scan-assembler-times "\\tuqrshrn\\ts\[0-9\]+" 1 } } */
 
 uint32x1_t
-test_vqrshrnd_n_u64 (uint64x1_t a)
+test_vqrshrnd_n_u64 (uint64_t a)
 {
   return vqrshrnd_n_u64 (a, 4);
 }
 
 /* { dg-final { scan-assembler-times "\\tlsl\\tx\[0-9\]+" 2 } } */
 
-int64x1_t
-test_vshl_n_s64 (int64x1_t a)
+int64_t
+test_vshld_n_s64 (int64_t a)
 {
   return vshld_n_s64 (a, 9);
 }
 
-uint64x1_t
-test_vshl_n_u64 (uint64x1_t a)
+uint64_t
+test_vshdl_n_u64 (uint64_t a)
 {
   return vshld_n_u64 (a, 9);
 }
 
 /* { dg-final { scan-assembler-times "\\tsli\\td\[0-9\]+" 2 } } */
 
-int64x1_t
-test_vsli_n_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vslid_n_s64 (int64_t a, int64_t b)
 {
   return vslid_n_s64 (a, b, 9);
 }
 
-uint64x1_t
-test_vsli_n_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vslid_n_u64 (uint64_t a, uint64_t b)
 {
   return vslid_n_u64 (a, b, 9);
 }
 
 /* { dg-final { scan-assembler-times "\\tsri\\td\[0-9\]+" 2 } } */
 
-int64x1_t
-test_vsri_n_s64 (int64x1_t a, int64x1_t b)
+int64_t
+test_vsrid_n_s64 (int64_t a, int64_t b)
 {
   return vsrid_n_s64 (a, b, 9);
 }
 
-uint64x1_t
-test_vsri_n_u64 (uint64x1_t a, uint64x1_t b)
+uint64_t
+test_vsrid_n_u64 (uint64_t a, uint64_t b)
 {
   return vsrid_n_u64 (a, b, 9);
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
index c71011a5157a207fe68fe814ed80658fd5e0f90f..b879fdacaa6544790e4d3ff98ca0055073d6d1d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
+++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
@@ -9,7 +9,7 @@  main (int argc, char **argv)
   int64_t arr2[] = {1};
   int64x1_t in2 = vld1_s64 (arr2);
   int64x1_t actual = vext_s64 (in1, in2, 0);
-  if (actual != in1)
+  if (actual[0] != in1[0])
     abort ();
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
index 8d5072bf761d96ea5a95342423ae9861d05d024a..bd51e27c2156bfcaca6b26798c449369b2894c08 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
+++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
@@ -9,7 +9,7 @@  main (int argc, char **argv)
   uint64_t arr2[] = {1};
   uint64x1_t in2 = vld1_u64 (arr2);
   uint64x1_t actual = vext_u64 (in1, in2, 0);
-  if (actual != in1)
+  if (actual[0] != in1[0])
     abort ();
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c b/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..329af947a46d2276493845bd38cd5c0e1f39e93a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c
@@ -0,0 +1,402 @@ 
+/* { dg-do assemble } */
+/* { dg-options "-O2 -dp" } */
+
+/* Test the [u]int64x1_t intrinsics.  */
+
+#include <arm_neon.h>
+
+/* { dg-final { scan-assembler-times "\\tadd\\td\[0-9\]+" 2 } } */
+
+uint64x1_t
+test_vadd_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vadd_u64 (a, b);
+}
+
+int64x1_t
+test_vadd_s64 (int64x1_t a, int64x1_t b)
+{
+  return vadd_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vabs_s64 (int64x1_t a)
+{
+  return vabs_s64 (a);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vceq_s64 (int64x1_t a, int64x1_t b)
+{
+  return vceq_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
+
+uint64x1_t
+test_vceqz_s64 (int64x1_t a)
+{
+  return vceqz_s64 (a);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
+
+uint64x1_t
+test_vcge_s64 (int64x1_t a, int64x1_t b)
+{
+  return vcge_s64 (a, b);
+}
+
+uint64x1_t
+test_vcle_s64 (int64x1_t a, int64x1_t b)
+{
+  return vcle_s64 (a, b);
+}
+
+/* Idiom recognition will cause this testcase not to generate
+   the expected cmge instruction, so do not check for it.  */
+
+uint64x1_t
+test_vcgez_s64 (int64x1_t a)
+{
+  return vcgez_s64 (a);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vcge_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vcge_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
+
+uint64x1_t
+test_vcgt_s64 (int64x1_t a, int64x1_t b)
+{
+  return vcgt_s64 (a, b);
+}
+
+uint64x1_t
+test_vclt_s64 (int64x1_t a, int64x1_t b)
+{
+  return vclt_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
+
+uint64x1_t
+test_vcgtz_s64 (int64x1_t a)
+{
+  return vcgtz_s64 (a);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vcgt_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vcgt_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
+
+uint64x1_t
+test_vclez_s64 (int64x1_t a)
+{
+  return vclez_s64 (a);
+}
+
+/* Compiling with "-dp" outputs the name of each .md pattern into the assembler.
+   This is what we look for here.  */
+/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
+
+int64x1_t
+test_vdup_laneq_s64 (int64x2_t a)
+{
+  return vdup_laneq_s64 (a, 1);
+}
+
+uint64x1_t
+test_vdup_laneq_u64 (uint64x2_t a)
+{
+  return vdup_laneq_u64 (a, 1);
+}
+
+/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
+
+uint64x1_t
+test_vtst_s64 (int64x1_t a, int64x1_t b)
+{
+  return vtst_s64 (a, b);
+}
+
+uint64x1_t
+test_vtst_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vtst_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vqadd_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vqadd_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsqadd\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vqadd_s64 (int64x1_t a, int64x1_t b)
+{
+  return vqadd_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsuqadd\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vuqadd_s64 (int64x1_t a, uint64x1_t b)
+{
+  return vuqadd_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tusqadd\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vsqadd_u64 (uint64x1_t a, int64x1_t b)
+{
+  return vsqadd_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsub\\td\[0-9\]+" 2 } } */
+
+uint64x1_t
+test_vsub_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vsub_u64 (a, b);
+}
+
+int64x1_t
+test_vsub_s64 (int64x1_t a, int64x1_t b)
+{
+  return vsub_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tuqsub\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vqsub_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vqsub_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsqsub\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vqsub_s64 (int64x1_t a, int64x1_t b)
+{
+  return vqsub_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsshl\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vshl_s64 (int64x1_t a, int64x1_t b)
+{
+  return vshl_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tushl\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vshl_u64 (uint64x1_t a, int64x1_t b)
+{
+  return vshl_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsrshl\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vrshl_s64 (int64x1_t a, int64x1_t b)
+{
+  return vrshl_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\turshl\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vrshl_u64 (uint64x1_t a, int64x1_t b)
+{
+  return vrshl_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsshr\\td\[0-9\]+" 3 } } */
+/* Idiom recognition compiles vcltz and vcgez to sshr rather than cmlt/cmge.  */
+
+int64x1_t
+test_vshr_n_s64 (int64x1_t a)
+{
+  return vshr_n_s64 (a, 5);
+}
+
+uint64x1_t
+test_vcltz_s64 (int64x1_t a)
+{
+  return vcltz_s64 (a);
+}
+
+/* { dg-final { scan-assembler-times "\\tushr\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vshr_n_u64 (uint64x1_t a)
+{
+  return vshr_n_u64 (a, 3);
+}
+
+/* { dg-final { scan-assembler-times "\\tssra\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vsra_n_s64 (int64x1_t a, int64x1_t b)
+{
+  return vsra_n_s64 (a, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "\\tusra\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vsra_n_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vsra_n_u64 (a, b, 5);
+}
+
+/* { dg-final { scan-assembler-times "\\tsrshr\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vrshr_n_s64 (int64x1_t a)
+{
+  return vrshr_n_s64 (a, 5);
+}
+
+/* { dg-final { scan-assembler-times "\\turshr\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vrshr_n_u64 (uint64x1_t a)
+{
+  return vrshr_n_u64 (a, 3);
+}
+
+/* { dg-final { scan-assembler-times "\\tsrsra\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vrsra_n_s64 (int64x1_t a, int64x1_t b)
+{
+  return vrsra_n_s64 (a, b, 3);
+}
+
+/* { dg-final { scan-assembler-times "\\tsrsra\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vrsra_n_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vrsra_n_u64 (a, b, 4);
+}
+
+/* { dg-final { scan-assembler-times "\\tsqrshl\\td\[0-9\]+" 1 } } */
+
+int64x1_t
+test_vqrshl_s64 (int64x1_t a, int64x1_t b)
+{
+  return vqrshl_s64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tuqrshl\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vqrshl_u64 (uint64x1_t a, int64x1_t b)
+{
+  return vqrshl_u64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\\tsqshlu\\td\[0-9\]+" 1 } } */
+
+uint64x1_t
+test_vqshlu_n_s64 (int64x1_t a)
+{
+  return vqshlu_n_s64 (a, 6);
+}
+
+/* { dg-final { scan-assembler-times "\\tsqshl\\td\[0-9\]+" 2 } } */
+
+int64x1_t
+test_vqshl_s64 (int64x1_t a, int64x1_t b)
+{
+  return vqshl_s64 (a, b);
+}
+
+int64x1_t
+test_vqshl_n_s64 (int64x1_t a)
+{
+  return vqshl_n_s64 (a, 5);
+}
+
+/* { dg-final { scan-assembler-times "\\tuqshl\\td\[0-9\]+" 2 } } */
+
+uint64x1_t
+test_vqshl_u64 (uint64x1_t a, int64x1_t b)
+{
+  return vqshl_u64 (a, b);
+}
+
+uint64x1_t
+test_vqshl_n_u64 (uint64x1_t a)
+{
+  return vqshl_n_u64 (a, 5);
+}
+
+/* { dg-final { scan-assembler-times "\\tshl\\td\[0-9\]+" 2 } } */
+
+int64x1_t
+test_vshl_n_s64 (int64x1_t a)
+{
+  return vshl_n_s64 (a, 9);
+}
+
+uint64x1_t
+test_vshl_n_u64 (uint64x1_t a)
+{
+  return vshl_n_u64 (a, 9);
+}
+
+/* { dg-final { scan-assembler-times "\\tsli\\td\[0-9\]+" 2 } } */
+
+int64x1_t
+test_vsli_n_s64 (int64x1_t a, int64x1_t b)
+{
+  return vsli_n_s64 (a, b, 9);
+}
+
+uint64x1_t
+test_vsli_n_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vsli_n_u64 (a, b, 9);
+}
+
+/* { dg-final { scan-assembler-times "\\tsri\\td\[0-9\]+" 2 } } */
+
+int64x1_t
+test_vsri_n_s64 (int64x1_t a, int64x1_t b)
+{
+  return vsri_n_s64 (a, b, 9);
+}
+
+uint64x1_t
+test_vsri_n_u64 (uint64x1_t a, uint64x1_t b)
+{
+  return vsri_n_u64 (a, b, 9);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
index 4582471c8aad3d855eb33494ac01a62c87978ca9..b1ddc89bf798990524534ba25ea15daf63159cd8 100644
--- a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -304,12 +304,6 @@  wrap_vdup_lane_s64_0 (int64x1_t a)
   return vdup_lane_s64 (a, 0);
 }
 
-int64x1_t __attribute__ ((noinline))
-wrap_vdup_lane_s64_1 (int64x1_t a)
-{
-  return vdup_lane_s64 (a, 1);
-}
-
 int __attribute__ ((noinline))
 test_vdup_lane_s64 ()
 {
@@ -325,12 +319,6 @@  test_vdup_lane_s64 ()
   if (c[0] != d[0])
     return 1;
 
-  c[0] = 1;
-  a = vld1_s64 (c);
-  b = wrap_vdup_lane_s64_1 (a);
-  vst1_s64 (d, b);
-  if (c[0] != d[0])
-    return 1;
   return 0;
 }
 
@@ -340,12 +328,6 @@  wrap_vdupq_lane_s64_0 (int64x1_t a)
   return vdupq_lane_s64 (a, 0);
 }
 
-int64x2_t __attribute__ ((noinline))
-wrap_vdupq_lane_s64_1 (int64x1_t a)
-{
-  return vdupq_lane_s64 (a, 1);
-}
-
 int __attribute__ ((noinline))
 test_vdupq_lane_s64 ()
 {
@@ -362,14 +344,6 @@  test_vdupq_lane_s64 ()
   for (i = 0; i < 2; i++)
     if (c[0] != d[i])
       return 1;
-
-  c[0] = 1;
-  a = vld1_s64 (c);
-  b = wrap_vdupq_lane_s64_1 (a);
-  vst1q_s64 (d, b);
-  for (i = 0; i < 2; i++)
-    if (c[0] != d[i])
-      return 1;
   return 0;
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
index 7c04e759a5291bf5213ad5abf5c75289afad7359..c4183ce1a321fed892cd67fd002de09697a86ed5 100644
--- a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
@@ -278,9 +278,9 @@  test_vdupd_lane_u64 ()
 }
 
 int64_t __attribute__ ((noinline))
-wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a)
+wrap_vdupd_lane_s64_0 (int64x1_t dummy, int64x1_t a)
 {
-  return vdupd_lane_u64 (a, 0);
+  return vdupd_lane_s64 (a, 0);
 }
 
 int __attribute__ ((noinline))
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
index accbf14074b9f9569f7e3662b6571075421f6a27..e818ab9e96ac7c8af2d96d900828fd6d34fa185a 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
@@ -5,7 +5,10 @@ 
 #include <arm_neon.h>
 #include <limits.h>
 
-/* Used to force a variable to a SIMD register.  */
+/* Used to force a variable to a SIMD register.  Also acts as a stronger
+   inhibitor of optimization than the below - necessary for int64x1_t
+   because more of the implementation is in terms of gcc vector extensions
+   (which support constant propagation) than for other types.  */
 #define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"	\
 	   : "=w"(V1)						\
 	   : "w"(V1)						\
@@ -38,14 +41,6 @@  extern void abort (void);
 #define DATA_TYPE_32 float
 #define DATA_TYPE_64 double
 #define DATA_TYPE(data_len) DATA_TYPE_##data_len
-#define INDEX64_8 [i]
-#define INDEX64_16 [i]
-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_8 [i]
-#define INDEX128_16 [i]
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
 
 #define FORCE_SIMD_INST64_8(data)
 #define FORCE_SIMD_INST64_16(data)
@@ -56,8 +51,6 @@  extern void abort (void);
 #define FORCE_SIMD_INST128_32(data)
 #define FORCE_SIMD_INST128_64(data)
 
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
 #define FORCE_SIMD_INST(reg_len, data_len, data) \
   CONCAT1 (FORCE_SIMD_INST, reg_len##_##data_len) (data)
 #define LOAD_INST(reg_len, data_len) \
@@ -77,8 +70,7 @@  extern void abort (void);
     for (i = 0; i < n; i++)						\
       {									\
         INHIB_OPTIMIZATION;						\
-	if (a INDEX (reg_len, data_len)					\
-	    != b INDEX (reg_len, data_len))				\
+	if (a[i] != b[i])						\
 	  return 1;							\
       }									\
   }