diff mbox

[AArch64,1/2] PR/60825 Make float64x1_t in arm_neon.h a proper vector type

Message ID 53A2D726.5090100@arm.com
State New
Headers show

Commit Message

Alan Lawrence June 19, 2014, 12:27 p.m. UTC
This updates the .md files to generate V1DFmode patterns instead of DFmode for 
create and reinterpret, and the corresponding __builtins.

The various other float64x1_t intrinsics can then be rewritten, generally I've 
tried to use gcc vector extensions rather than unnecessary/custom builtins where 
possible, and have started adding some range checking using 
__builtin_aarch64_im_lane_boundsi.

Finally, rewrite the cases in arm_neon.h and various tests, that relied on 
float64[x1]_t being assignment-compatible, including arm_neon.h vfma functions 
which had the wrong (but previously equivalent) type signature; and add some new 
ABI tests.

gcc/ChangeLog:
2014-06-19  Alan Lawrence  <alan.lawrence@arm.com>

	* config/aarch64/aarch64.c (aarch64_simd_mangle_map): Add entry for
	V1DFmode.
	* config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_type_mode):
	add V1DFmode
	(BUILTIN_VD1): New.
	(BUILTIN_VD_RE): Remove.
	(aarch64_init_simd_builtins): Add V1DF to modes/modenames.
	(aarch64_fold_builtin): Update reinterpret patterns, df becomes v1df.
	* config/aarch64/aarch64-simd-builtins.def (create): Make a v1df
	variant but not df.
	(vreinterpretv1df*, vreinterpret*v1df): New.
	(vreinterpretdf*, vreinterpret*df): Remove.
	* config/aarch64/aarch64-simd.md (aarch64_create, aarch64_reinterpret*):
	Generate V1DFmode pattern not DFmode.
	* config/aarch64/iterators.md (VD_RE): Include V1DF, remove DF.
	(VD1): New.
	* config/aarch64/arm_neon.h (float64x1_t): typedef with gcc extensions.
	(vcreate_f64): Remove cast, use v1df builtin.
	(vcombine_f64): Remove cast, get elements with gcc vector extensions.
	(vget_low_f64, vabs_f64, vceq_f64, vceqz_f64, vcge_f64, vgfez_f64,
	vcgt_f64, vcgtz_f64, vcle_f64, vclez_f64, vclt_f64, vcltz_f64,
	vdup_n_f64, vdupq_lane_f64, vld1_f64, vld2_f64, vld3_f64, vld4_f64,
	vmov_n_f64, vst1_f64): Use gcc vector extensions.
	(vget_lane_f64, vdupd_lane_f64, vmulq_lane_f64, ): Use gcc extensions,
	add range check using __builtin_aarch64_im_lane_boundsi.
	(vfma_lane_f64, vfmad_lane_f64, vfma_laneq_f64, vfmaq_lane_f64,
	vfms_lane_f64, vfmsd_lane_f64, vfms_laneq_f64, vfmsq_lane_f64): Fix
	type signature, use gcc vector extensions.
	(vreinterpret_p8_f64, vreinterpret_p16_f64, vreinterpret_f32_f64,
	vreinterpret_f64_f32, vreinterpret_f64_p8, vreinterpret_f64_p16,
	vreinterpret_f64_s8, vreinterpret_f64_s16, vreinterpret_f64_s32,
	vreinterpret_f64_s64, vreinterpret_f64_u8, vreinterpret_f64_u16,
	vreinterpret_f64_u32, vreinterpret_f64_u64, vreinterpret_s8_f64,
	vreinterpret_s16_f64, vreinterpret_s32_f64, vreinterpret_s64_f64,
	vreinterpret_u8_f64, vreinterpret_u16_f64, vreinterpret_u32_f64,
	vreinterpret_u64_f64): Use v1df builtin not df.

gcc/testsuite/ChangeLog:
2014-06-19  Alan Lawrence  <alan.lawrence@arm.com>

	* g++.dg/abi/mangle-neon-aarch64.C: Also test mangling of float64x1_t.
	* gcc.target/aarch64/aapcs/test_64x1_1.c: New test.
	* gcc.target/aarch64/aapcs/func-ret-64x1_1.c: New test.
	* gcc.target/aarch64/simd/ext_f64_1.c (main): Compare vector elements.
	* gcc.target/aarch64/vadd_f64.c: Rewrite with macro to use vector types.
	* gcc.target/aarch64/vsub_f64.c: Likewise.
	* gcc.target/aarch64/vdiv_f.c (INDEX*, RUN_TEST): Remove indexing scheme
	as now the same for all variants.
	* gcc.target/aarch64/vrnd_f64_1.c (compare_f64): Return float64_t not
	float64x1_t.

Comments

Marcus Shawcroft June 20, 2014, 12:49 p.m. UTC | #1
On 19 June 2014 13:27, Alan Lawrence <alan.lawrence@arm.com> wrote:
> This updates the .md files to generate V1DFmode patterns instead of DFmode
> for create and reinterpret, and the corresponding __builtins.
>
> The various other float64x1_t intrinsics can then be rewritten, generally
> I've tried to use gcc vector extensions rather than unnecessary/custom
> builtins where possible, and have started adding some range checking using
> __builtin_aarch64_im_lane_boundsi.
>
> Finally, rewrite the cases in arm_neon.h and various tests, that relied on
> float64[x1]_t being assignment-compatible, including arm_neon.h vfma
> functions which had the wrong (but previously equivalent) type signature;
> and add some new ABI tests.

OK /Marcus
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index fe4d39283b05f244b400f62d4e44097f51b237d7..51407cbef59e0135a897ccdf4224b847dccdad88 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -53,6 +53,7 @@  enum aarch64_simd_builtin_type_mode
   T_V4HI,
   T_V2SI,
   T_V2SF,
+  T_V1DF,
   T_DI,
   T_DF,
   T_V16QI,
@@ -76,6 +77,7 @@  enum aarch64_simd_builtin_type_mode
 #define v4hi_UP  T_V4HI
 #define v2si_UP  T_V2SI
 #define v2sf_UP  T_V2SF
+#define v1df_UP  T_V1DF
 #define di_UP    T_DI
 #define df_UP    T_DF
 #define v16qi_UP T_V16QI
@@ -346,6 +348,8 @@  aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR2 (T, N, MAP, v8qi, v16qi)
 #define BUILTIN_VD(T, N, MAP) \
   VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
+#define BUILTIN_VD1(T, N, MAP) \
+  VAR5 (T, N, MAP, v8qi, v4hi, v2si, v2sf, v1df)
 #define BUILTIN_VDC(T, N, MAP) \
   VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
 #define BUILTIN_VDIC(T, N, MAP) \
@@ -380,8 +384,6 @@  aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR3 (T, N, MAP, v8qi, v4hi, v2si)
 #define BUILTIN_VD_HSI(T, N, MAP) \
   VAR2 (T, N, MAP, v4hi, v2si)
-#define BUILTIN_VD_RE(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
 #define BUILTIN_VQ(T, N, MAP) \
   VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
 #define BUILTIN_VQN(T, N, MAP) \
@@ -694,13 +696,13 @@  aarch64_init_simd_builtins (void)
       aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
       const char *const modenames[] =
 	{
-	  "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
+	  "v8qi", "v4hi", "v2si", "v2sf", "v1df", "di", "df",
 	  "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
 	  "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
 	};
       const enum machine_mode modes[] =
 	{
-	  V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
+	  V8QImode, V4HImode, V2SImode, V2SFmode, V1DFmode, DImode, DFmode,
 	  V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
 	  V2DFmode, TImode, EImode, OImode, XImode, SImode,
 	  SFmode, HImode, QImode
@@ -1250,24 +1252,23 @@  aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
 	  return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
 	  break;
 	}
-      VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-      BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
-      BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
-      VAR1 (REINTERP_US, reinterpretdi, 0, df)
-      VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-      VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-      VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-      VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
-      BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
-      VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
+      VAR1 (REINTERP_SS, reinterpretdi, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv8qi, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv4hi, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv2si, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv2sf, 0, v1df)
+      BUILTIN_VD (REINTERP_SS, reinterpretv1df, 0)
+      BUILTIN_VD (REINTERP_SU, reinterpretv1df, 0)
+      VAR1 (REINTERP_US, reinterpretdi, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv8qi, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv4hi, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv2si, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv2sf, 0, v1df)
+      BUILTIN_VD (REINTERP_SP, reinterpretv1df, 0)
+      VAR1 (REINTERP_PS, reinterpretdi, 0, v1df)
+      VAR1 (REINTERP_PS, reinterpretv8qi, 0, v1df)
+      VAR1 (REINTERP_PS, reinterpretv4hi, 0, v1df)
+      VAR1 (REINTERP_PS, reinterpretv2sf, 0, v1df)
 	return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
       VAR1 (UNOP, floatv2si, 2, v2sf)
       VAR1 (UNOP, floatv4si, 2, v4sf)
@@ -1447,6 +1448,7 @@  aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 #undef BUILTIN_VALL
 #undef BUILTIN_VB
 #undef BUILTIN_VD
+#undef BUILTIN_VD1
 #undef BUILTIN_VDC
 #undef BUILTIN_VDIC
 #undef BUILTIN_VDN
@@ -1462,7 +1464,6 @@  aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 #undef BUILTIN_VDW
 #undef BUILTIN_VD_BHSI
 #undef BUILTIN_VD_HSI
-#undef BUILTIN_VD_RE
 #undef BUILTIN_VQ
 #undef BUILTIN_VQN
 #undef BUILTIN_VQW
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index faa0858e3be437fe645fa0a4aa70e4c250ebc02c..1b931bede943b8e8682064a0bb799f1d285c7301 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -39,7 +39,7 @@ 
    1-9 - CODE_FOR_<name><mode><1-9>
    10 - CODE_FOR_<name><mode>.  */
 
-  BUILTIN_VD_RE (CREATE, create, 0)
+  BUILTIN_VD1 (CREATE, create, 0)
   BUILTIN_VDC (COMBINE, combine, 0)
   BUILTIN_VB (BINOP, pmul, 0)
   BUILTIN_VDQF (UNOP, sqrt, 2)
@@ -51,28 +51,28 @@ 
   VAR1 (GETLANE, get_lane, 0, di)
   BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
 
-  VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-  BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
+  VAR1 (REINTERP_SS, reinterpretdi, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv8qi, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv4hi, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv2si, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv2sf, 0, v1df)
+  BUILTIN_VD (REINTERP_SS, reinterpretv1df, 0)
 
-  BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
+  BUILTIN_VD (REINTERP_SU, reinterpretv1df, 0)
 
-  VAR1 (REINTERP_US, reinterpretdi, 0, df)
-  VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-  VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-  VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-  VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
+  VAR1 (REINTERP_US, reinterpretdi, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv8qi, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv4hi, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv2si, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv2sf, 0, v1df)
 
-  BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
+  BUILTIN_VD (REINTERP_SP, reinterpretv1df, 0)
 
-  VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
+  VAR1 (REINTERP_PS, reinterpretdi, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv8qi, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv4hi, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv2si, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv2sf, 0, v1df)
 
   BUILTIN_VDQ_I (BINOP, dup_lane, 0)
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c239677a58116d97b788254d23bb144718263313..985044dab66ba6ee2ed080d118fb2e6ca43d3649 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2134,7 +2134,7 @@ 
 ;; Patterns for AArch64 SIMD Intrinsics.
 
 (define_expand "aarch64_create<mode>"
-  [(match_operand:VD_RE 0 "register_operand" "")
+  [(match_operand:VD1 0 "register_operand" "")
    (match_operand:DI 1 "general_operand" "")]
   "TARGET_SIMD"
 {
@@ -2224,7 +2224,7 @@ 
 
 (define_expand "aarch64_reinterpretv8qi<mode>"
   [(match_operand:V8QI 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2233,7 +2233,7 @@ 
 
 (define_expand "aarch64_reinterpretv4hi<mode>"
   [(match_operand:V4HI 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2242,7 +2242,7 @@ 
 
 (define_expand "aarch64_reinterpretv2si<mode>"
   [(match_operand:V2SI 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2251,7 +2251,7 @@ 
 
 (define_expand "aarch64_reinterpretv2sf<mode>"
   [(match_operand:V2SF 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2267,8 +2267,8 @@ 
   DONE;
 })
 
-(define_expand "aarch64_reinterpretdf<mode>"
-  [(match_operand:DF 0 "register_operand" "")
+(define_expand "aarch64_reinterpretv1df<mode>"
+  [(match_operand:V1DF 0 "register_operand" "")
    (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 979bbac77e3fee435781b576bcccd1cad071fe60..704fc217a67e9ccadf1faafdd1d49713b8a1d022 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7352,6 +7352,7 @@  static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
+  { V1DFmode,  "__builtin_aarch64_simd_df",	"13__Float64x1_t" },
   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
   /* 128-bit containerized types.  */
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index acf4c1111237cfc3b8e0b6d3af625b9e852a2a10..4900936d0cd60bcb7adacf5018c3ffe3bb9b6cc6 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -42,7 +42,8 @@  typedef int64_t int64x1_t;
 typedef int32_t int32x1_t;
 typedef int16_t int16x1_t;
 typedef int8_t int8x1_t;
-typedef double float64x1_t;
+typedef __builtin_aarch64_simd_df float64x1_t
+  __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_sf float32x2_t
   __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_poly8 poly8x8_t
@@ -461,7 +462,11 @@  typedef struct poly16x8x4_t
 
 #define __aarch64_vget_lane_f32(__a, __b) \
   __aarch64_vget_lane_any (v2sf, , , __a, __b)
-#define __aarch64_vget_lane_f64(__a, __b) (__a)
+#define __aarch64_vget_lane_f64(__a, __b) __extension__	\
+  ({							\
+    __builtin_aarch64_im_lane_boundsi (__b, 1);		\
+    __a[0];						\
+  })
 
 #define __aarch64_vget_lane_p8(__a, __b) \
   __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
@@ -518,7 +523,8 @@  typedef struct poly16x8x4_t
 
 #define __aarch64_vdup_lane_f32(__a, __b) \
    __aarch64_vdup_lane_any (f32, , , __a, __b)
-#define __aarch64_vdup_lane_f64(__a, __b) (__a)
+#define __aarch64_vdup_lane_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, , , __a, __b)
 #define __aarch64_vdup_lane_p8(__a, __b) \
    __aarch64_vdup_lane_any (p8, , , __a, __b)
 #define __aarch64_vdup_lane_p16(__a, __b) \
@@ -567,7 +573,8 @@  typedef struct poly16x8x4_t
 /* __aarch64_vdupq_lane internal macros.  */
 #define __aarch64_vdupq_lane_f32(__a, __b) \
    __aarch64_vdup_lane_any (f32, q, , __a, __b)
-#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
+#define __aarch64_vdupq_lane_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, q, , __a, __b)
 #define __aarch64_vdupq_lane_p8(__a, __b) \
    __aarch64_vdup_lane_any (p8, q, , __a, __b)
 #define __aarch64_vdupq_lane_p16(__a, __b) \
@@ -2475,7 +2482,7 @@  vcreate_u64 (uint64_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vcreate_f64 (uint64_t __a)
 {
-  return (float64x1_t) __builtin_aarch64_createdf (__a);
+  return __builtin_aarch64_createv1df (__a);
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -2643,7 +2650,7 @@  vgetq_lane_u64 (uint64x2_t __a, const int __b)
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv8qidf_ps (__a);
+  return __builtin_aarch64_reinterpretv8qiv1df_ps (__a);
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -2775,7 +2782,7 @@  vreinterpretq_p8_p16 (poly16x8_t __a)
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv4hidf_ps (__a);
+  return __builtin_aarch64_reinterpretv4hiv1df_ps (__a);
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
@@ -2907,7 +2914,7 @@  vreinterpretq_p16_p8 (poly8x16_t __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv2sfdf (__a);
+  return __builtin_aarch64_reinterpretv2sfv1df (__a);
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -3039,67 +3046,67 @@  vreinterpretq_f32_p16 (poly16x8_t __a)
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv2sf (__a);
+  return __builtin_aarch64_reinterpretv1dfv2sf (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_p8 (poly8x8_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv8qi_sp (__a);
+  return __builtin_aarch64_reinterpretv1dfv8qi_sp (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_p16 (poly16x4_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv4hi_sp (__a);
+  return __builtin_aarch64_reinterpretv1dfv4hi_sp (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv8qi (__a);
+  return __builtin_aarch64_reinterpretv1dfv8qi (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv4hi (__a);
+  return __builtin_aarch64_reinterpretv1dfv4hi (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv2si (__a);
+  return __builtin_aarch64_reinterpretv1dfv2si (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0));
+  return __builtin_aarch64_createv1df ((uint64_t) vget_lane_s64 (__a, 0));
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv8qi_su (__a);
+  return __builtin_aarch64_reinterpretv1dfv8qi_su (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u16 (uint16x4_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv4hi_su (__a);
+  return __builtin_aarch64_reinterpretv1dfv4hi_su (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv2si_su (__a);
+  return __builtin_aarch64_reinterpretv1dfv2si_su (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u64 (uint64x1_t __a)
 {
-  return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0));
+  return __builtin_aarch64_createv1df (vget_lane_u64 (__a, 0));
 }
 
 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
@@ -3171,7 +3178,7 @@  vreinterpretq_f64_u64 (uint64x2_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretdidf (__a);
+  return __builtin_aarch64_reinterpretdiv1df (__a);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -3303,7 +3310,7 @@  vreinterpretq_s64_p16 (poly16x8_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretdidf_us (__a);
+  return __builtin_aarch64_reinterpretdiv1df_us (__a);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -3435,7 +3442,7 @@  vreinterpretq_u64_p16 (poly16x8_t __a)
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv8qidf (__a);
+  return __builtin_aarch64_reinterpretv8qiv1df (__a);
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -3567,7 +3574,7 @@  vreinterpretq_s8_p16 (poly16x8_t __a)
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv4hidf (__a);
+  return __builtin_aarch64_reinterpretv4hiv1df (__a);
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
@@ -3699,7 +3706,7 @@  vreinterpretq_s16_p16 (poly16x8_t __a)
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv2sidf (__a);
+  return __builtin_aarch64_reinterpretv2siv1df (__a);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
@@ -3831,7 +3838,7 @@  vreinterpretq_s32_p16 (poly16x8_t __a)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv8qidf_us (__a);
+  return __builtin_aarch64_reinterpretv8qiv1df_us (__a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -3963,7 +3970,7 @@  vreinterpretq_u8_p16 (poly16x8_t __a)
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv4hidf_us (__a);
+  return __builtin_aarch64_reinterpretv4hiv1df_us (__a);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -4095,7 +4102,7 @@  vreinterpretq_u16_p16 (poly16x8_t __a)
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv2sidf_us (__a);
+  return __builtin_aarch64_reinterpretv2siv1df_us (__a);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
@@ -4238,7 +4245,7 @@  vget_low_f32 (float32x4_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vget_low_f64 (float64x2_t __a)
 {
-  return vgetq_lane_f64 (__a, 0);
+  return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -4364,7 +4371,7 @@  vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vcombine_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
+  return __builtin_aarch64_combinedf (__a[0], __b[0]);
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
@@ -13828,7 +13835,7 @@  vabs_f32 (float32x2_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vabs_f64 (float64x1_t __a)
 {
-  return __builtin_fabs (__a);
+  return (float64x1_t) {__builtin_fabs (__a[0])};
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -14312,7 +14319,7 @@  vceq_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceq_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14482,7 +14489,7 @@  vceqz_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceqz_f64 (float64x1_t __a)
 {
-  return __a == 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a == (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14672,7 +14679,7 @@  vcge_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcge_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a >= __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14828,7 +14835,7 @@  vcgez_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgez_f64 (float64x1_t __a)
 {
-  return __a >= 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -14932,7 +14939,7 @@  vcgt_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a > __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15088,7 +15095,7 @@  vcgtz_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgtz_f64 (float64x1_t __a)
 {
-  return __a > 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a > (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15192,7 +15199,7 @@  vcle_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcle_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a <= __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15348,7 +15355,7 @@  vclez_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclez_f64 (float64x1_t __a)
 {
-  return __a <= 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a <= (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15452,7 +15459,7 @@  vclt_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a < __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -15608,7 +15615,7 @@  vcltz_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcltz_f64 (float64x1_t __a)
 {
-  return __a < 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a < (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16217,7 +16224,7 @@  vdup_n_f32 (float32_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vdup_n_f64 (float64_t __a)
 {
-  return __a;
+  return (float64x1_t) {__a};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -16710,9 +16717,10 @@  vdups_lane_u32 (uint32x2_t __a, const int __b)
 
 /* vdupd_lane  */
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
+vdupd_lane_f64 (float64x1_t __a, const int __b)
 {
-  return __a;
+  __builtin_aarch64_im_lane_boundsi (__b, 1);
+  return __a[0];
 }
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
@@ -17100,18 +17108,18 @@  vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfma_lane_f64 (float64_t __a, float64_t __b,
-	       float64_t __c, const int __lane)
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
+	       float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (__b, __c, __a);
+  return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vfmad_lane_f64 (float64_t __a, float64_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (__b, __c, __a);
+  return __builtin_fma (__b, __c[0], __a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
@@ -17132,11 +17140,12 @@  vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfma_laneq_f64 (float64_t __a, float64_t __b,
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
 	        float64x2_t __c, const int __lane)
 {
-  return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+  float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
+  return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
@@ -17166,9 +17175,9 @@  vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
+  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
 }
 
 /* vfmaq_laneq  */
@@ -17202,18 +17211,18 @@  vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfms_lane_f64 (float64_t __a, float64_t __b,
-	       float64_t __c, const int __lane)
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
+	       float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (-__b, __c, __a);
+  return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vfmsd_lane_f64 (float64_t __a, float64_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (-__b, __c, __a);
+  return __builtin_fma (-__b, __c[0], __a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
@@ -17234,11 +17243,12 @@  vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfms_laneq_f64 (float64_t __a, float64_t __b,
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
 	        float64x2_t __c, const int __lane)
 {
-  return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+  float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
+  return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
@@ -17268,9 +17278,9 @@  vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
+  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
 }
 
 /* vfmsq_laneq  */
@@ -17304,7 +17314,7 @@  vld1_f32 (const float32_t *a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vld1_f64 (const float64_t *a)
 {
-  return *a;
+  return (float64x1_t) {*a};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -17482,8 +17492,8 @@  vld2_f64 (const float64_t * __a)
   float64x1x2_t ret;
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
-  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
   return ret;
 }
 
@@ -17748,9 +17758,9 @@  vld3_f64 (const float64_t * __a)
   float64x1x3_t ret;
   __builtin_aarch64_simd_ci __o;
   __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
-  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
-  ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
   return ret;
 }
 
@@ -18038,10 +18048,10 @@  vld4_f64 (const float64_t * __a)
   float64x1x4_t ret;
   __builtin_aarch64_simd_xi __o;
   __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
-  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
-  ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
-  ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
+  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
   return ret;
 }
 
@@ -19156,7 +19166,7 @@  vmov_n_f32 (float32_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vmov_n_f64 (float64_t __a)
 {
-  return __a;
+  return (float64x1_t) {__a};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -19378,7 +19388,8 @@  vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
 {
-  return __a * __b;
+  __builtin_aarch64_im_lane_boundsi (__lane, 1);
+  return __a * __b[0];
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
@@ -21202,7 +21213,7 @@  vrndn_f32 (float32x2_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vrndn_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_frintndf (__a);
+  return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -22510,7 +22521,7 @@  vst1_f32 (float32_t *a, float32x2_t b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_f64 (float64_t *a, float64x1_t b)
 {
-  *a = b;
+  *a = b[0];
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 05611f4cd61bc1f1e01766ced61abd5476623554..558cdbb1b8d97c9ac866eb24bfcf50ac77e6f5a8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -138,8 +138,11 @@ 
 ;; Double vector modes for combines.
 (define_mode_iterator VDIC [V8QI V4HI V2SI])
 
-;; Double vector modes.
-(define_mode_iterator VD_RE [V8QI V4HI V2SI DI DF V2SF])
+;; Double vector modes, inc. V1DF and the DI "vector" mode, for VREINTERPRET.
+(define_mode_iterator VD_RE [V8QI V4HI V2SI DI V1DF V2SF])
+
+;; Double vector modes inc V1DF
+(define_mode_iterator VD1 [V8QI V4HI V2SI V2SF V1DF])
 
 ;; Vector modes except double int.
 (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
index 09540e84144bb937ebb0a0611c891c9e593669cf..025b6904afa9f4ea39550ecd95d91a7be1d48cc6 100644
--- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
+++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
@@ -27,8 +27,9 @@  void f17 (float32x4_t a) {}
 void f18 (float64x2_t a) {}
 void f19 (poly8x16_t a) {}
 void f20 (poly16x8_t a) {}
+void f21 (float64x1_t a) {}
 
-void f21 (int8x16_t, int8x16_t) {}
+void g1 (int8x16_t, int8x16_t) {}
 
 
 // { dg-final { scan-assembler "_Z2f010__Int8x8_t:" } }
@@ -52,4 +53,5 @@  void f21 (int8x16_t, int8x16_t) {}
 // { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } }
 // { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } }
 // { dg-final { scan-assembler "_Z3f2012__Poly16x8_t:" } }
-// { dg-final { scan-assembler "_Z3f2111__Int8x16_tS_:" } }
+// { dg-final { scan-assembler "_Z3f2113__Float64x1_t:" } }
+// { dg-final { scan-assembler "_Z2g111__Int8x16_tS_:" } }
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..673242687e4946d7bc1cb61c247510dfd128cc81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
@@ -0,0 +1,15 @@ 
+/* Test AAPCS64 layout.
+
+  Test 64-bit singleton vector types which should be in FP/SIMD registers.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+/* { dg-additional-sources "abitest.S" } */
+
+#ifndef IN_FRAMEWORK
+#define TESTFILE "func-ret-64x1_1.c"
+#include <arm_neon.h>
+#include "abitest-2.h"
+#else
+FUNC_VAL_CHECK ( 0, float64x1_t, (float64x1_t) {123456.789}, D0, flat)
+#endif
+
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1dc1a759b07fcc8a9c4310ac14f43274a3f378f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
@@ -0,0 +1,16 @@ 
+/* Test AAPCS64 layout.
+
+   Test 64-bit singleton vector types which should be in FP/SIMD registers.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define TESTFILE "test_64x1_1.c"
+#include <arm_neon.h>
+
+#include "abitest.h"
+#else
+ARG (float64x1_t, (float64x1_t) {123456.789}, D0)
+ARG (float64_t, 987654.321, D1)
+LAST_ARG (float64x1_t, (float64x1_t) {13579.2468}, D2)
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
index 09aecba2a110c7885d3c1455f1c8789953fb3d71..42389aaaa76b6af2d4c730b7b78ea055163bb44d 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
@@ -14,7 +14,7 @@  main (int argc, char **argv)
   float64x1_t in1 = {0};
   float64x1_t in2 = {1};
   float64x1_t actual = vext_f64 (in1, in2, 0);
-  if (actual != in1)
+  if (actual[0] != in1[0])
     abort ();
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/vadd_f64.c b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
index c3bf7349597aa9b75e0bc34cfd4cde4dc16b95f3..f35c42dcfbd2a8da19f183e4d23d365702a087dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
@@ -4,9 +4,6 @@ 
 
 #include <arm_neon.h>
 
-#define FLT_EPSILON __FLT_EPSILON__
-#define DBL_EPSILON __DBL_EPSILON__
-
 #define TESTA0 0.33333
 #define TESTA1 -1.7777
 #define TESTA2 0
@@ -42,70 +39,41 @@  extern void abort (void);
     || (ABS (a - b) < epsilon)				\
    )
 
-int
-test_vadd_f64 ()
-{
-  float64x1_t a;
-  float64x1_t b;
-  float64x1_t c;
-
-  a = TESTA0;
-  b = TESTB0;
-  c = ANSW0;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA1;
-  b = TESTB1;
-  c = ANSW1;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA2;
-  b = TESTB2;
-  c = ANSW2;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA3;
-  b = TESTB3;
-  c = ANSW3;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA4;
-  b = TESTB4;
-  c = ANSW4;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA5;
-  b = TESTB5;
-  c = ANSW5;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  return 0;
+#define TEST(N)					\
+int						\
+test_vadd_f64_##N ()				\
+{						\
+  float64x1_t a = { TESTA##N };			\
+  float64x1_t b = { TESTB##N };			\
+  float64x1_t c = { ANSW##N };			\
+						\
+  a = vadd_f64 (a, b);				\
+  return !FP_equals (a[0], c[0], EPSILON);	\
 }
 
+TEST (0)
+TEST (1)
+TEST (2)
+TEST (3)
+TEST (4)
+TEST (5)
+
 /* { dg-final { scan-assembler-times "fadd\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */
 
 int
 main (int argc, char **argv)
 {
-  if (test_vadd_f64 ())
+  if (test_vadd_f64_0 ())
+    abort ();
+  if (test_vadd_f64_1 ())
+    abort ();
+  if (test_vadd_f64_2 ())
+    abort ();
+  if (test_vadd_f64_3 ())
+    abort ();
+  if (test_vadd_f64_4 ())
+    abort ();
+  if (test_vadd_f64_5 ())
     abort ();
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
index cc3a9570c0fac0dcbf38f38314a416cca5e58c6e..9e1b768eda3a88ea37a5da8ffa405e29ec2f2d60 100644
--- a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
@@ -99,13 +99,6 @@ 
 #define EPSILON_64 __DBL_EPSILON__
 #define EPSILON(data_len) EPSILON_##data_len
 
-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
-
 #define LOAD_INST(reg_len, data_len) \
   CONCAT1 (vld1, POSTFIX (reg_len, data_len))
 #define DIV_INST(reg_len, data_len) \
@@ -135,9 +128,7 @@ 
   for (i = 0; i < n; i++)						\
   {									\
     INHIB_OPTIMIZATION;							\
-    if (!FP_equals ((a) INDEX (reg_len, data_len),			\
-		    (c) INDEX (reg_len, data_len),			\
-		    EPSILON (data_len)))				\
+    if (!FP_equals ((a) [i], (c) [i], EPSILON (data_len)))		\
       return 1;								\
   }									\
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
index 2451ecdcfb6440c100675d34342ee1f5d517c2d5..31efc4f2752b6e32808d7ba382c9f378e9e73299 100644
--- a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
@@ -30,7 +30,7 @@  set_rounding_mode (uint32_t mode)
   asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
 }
 
-float64x1_t __attribute__ ((noinline))
+float64_t __attribute__ ((noinline))
 compare_f64 (float64x1_t passed, float64_t expected)
 {
   return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
diff --git a/gcc/testsuite/gcc.target/aarch64/vsub_f64.c b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
index abf4fc42d49dc695f435b1e0f331737c8e9367b0..91d74638201e386f500717542973ed46f9c7c5cf 100644
--- a/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
@@ -4,9 +4,6 @@ 
 
 #include <arm_neon.h>
 
-#define FLT_EPSILON __FLT_EPSILON__
-#define DBL_EPSILON __DBL_EPSILON__
-
 #define TESTA0 1
 #define TESTA1 0.2223
 #define TESTA2 0
@@ -44,70 +41,41 @@  extern void abort (void);
      || ((b > a) && (b < (a + epsilon))))	\
 )
 
-int
-test_vsub_f64 ()
-{
-  float64x1_t a;
-  float64x1_t b;
-  float64x1_t c;
-
-  a = TESTA0;
-  b = TESTB0;
-  c = ANSW0;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA1;
-  b = TESTB1;
-  c = ANSW1;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA2;
-  b = TESTB2;
-  c = ANSW2;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA3;
-  b = TESTB3;
-  c = ANSW3;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA4;
-  b = TESTB4;
-  c = ANSW4;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA5;
-  b = TESTB5;
-  c = ANSW5;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  return 0;
+#define TEST(N)					\
+int						\
+test_vsub_f64_##N ()				\
+{						\
+  float64x1_t a = { TESTA##N };			\
+  float64x1_t b = { TESTB##N };			\
+  float64x1_t c = { ANSW##N };			\
+						\
+  a = vsub_f64 (a, b);				\
+  return !FP_equals (a[0], c[0], EPSILON);	\
 }
 
+TEST (0)
+TEST (1)
+TEST (2)
+TEST (3)
+TEST (4)
+TEST (5)
+
 /* { dg-final { scan-assembler-times "fsub\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */
 
 int
 main (int argc, char **argv)
 {
-  if (test_vsub_f64 ())
+  if (test_vsub_f64_0 ())
+    abort ();
+  if (test_vsub_f64_1 ())
+    abort ();
+  if (test_vsub_f64_2 ())
+    abort ();
+  if (test_vsub_f64_3 ())
+    abort ();
+  if (test_vsub_f64_4 ())
+    abort ();
+  if (test_vsub_f64_5 ())
     abort ();
 
   return 0;