@@ -20848,11 +20848,9 @@ __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, s
__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
u64, int64x2_t)
-#undef __LD2_LANE_FUNC
-
/* vld2q_lane */
-#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
__extension__ extern __inline intype \
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
@@ -20868,22 +20866,20 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
-__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
-__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
-__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
-__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
-
-#undef __LD2_LANE_FUNC
+__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
+__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
+__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
+__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
+__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
+__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
/* vld3_lane */
@@ -20947,11 +20943,9 @@ __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, s
__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
u64, int64x2_t)
-#undef __LD3_LANE_FUNC
-
/* vld3q_lane */
-#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
__extension__ extern __inline intype \
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
@@ -20969,22 +20963,20 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
-__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
-__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
-__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
-__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
-
-#undef __LD3_LANE_FUNC
+__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
+__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
+__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
+__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
+__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
+__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
/* vld4_lane */
@@ -21056,11 +21048,9 @@ __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, s
__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
u64, int64x2_t)
-#undef __LD4_LANE_FUNC
-
/* vld4q_lane */
-#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
__extension__ extern __inline intype \
__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \
vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
@@ -21080,22 +21070,20 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
-__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
-__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
-__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
-__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
-
-#undef __LD4_LANE_FUNC
+__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
+__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
+__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
+__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
+__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
+__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
/* vmax */
@@ -35752,6 +35740,15 @@ vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
__a, __lane1);
}
+__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf,
+ v8bf, bf, bf16, bfloat16x8_t)
+__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
+__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf,
+ v8bf, bf, bf16, bfloat16x8_t)
+__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
+__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
+ v8bf, bf, bf16, bfloat16x8_t)
+__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
#pragma GCC pop_options
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
@@ -35965,4 +35962,11 @@ vaddq_p128 (poly128_t __a, poly128_t __b)
#undef __aarch64_vdupq_laneq_u32
#undef __aarch64_vdupq_laneq_u64
+#undef __LD2_LANE_FUNC
+#undef __LD2Q_LANE_FUNC
+#undef __LD3_LANE_FUNC
+#undef __LD3Q_LANE_FUNC
+#undef __LD4_LANE_FUNC
+#undef __LD4Q_LANE_FUNC
+
#endif
new file mode 100644
@@ -0,0 +1,74 @@
+/* { dg-do run { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+typedef union
+{
+ bfloat16_t bf16;
+ uint16_t u16;
+} bfloat16_u_t;
+
+#define VARIANTS(VARIANT, STRUCT) \
+VARIANT (bfloat16, , 4, _bf16, 3, STRUCT) \
+VARIANT (bfloat16, q, 8, _bf16, 7, STRUCT)
+
+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
+ int \
+ test_vld##STRUCT##Q##_lane##SUFFIX (const bfloat16_u_t *data, \
+ const bfloat16_u_t *overwrite) \
+ { \
+ BASE##x##ELTS##x##STRUCT##_t vectors; \
+ bfloat16_u_t temp[ELTS]; \
+ int i,j; \
+ for (i = 0; i < STRUCT; i++, data += ELTS) \
+ vectors.val[i] = vld1##Q##SUFFIX ((bfloat16_t *)data); \
+ vectors = vld##STRUCT##Q##_lane##SUFFIX ((bfloat16_t *) overwrite, \
+ vectors, LANE); \
+ while (--i >= 0) \
+ { \
+ vst1##Q##SUFFIX ((bfloat16_t *)temp, vectors.val[i]); \
+ data -= ELTS; /* Point at value loaded before vldN_lane. */ \
+ for (j = 0; j < ELTS; j++) \
+ if (temp[j].u16 != (j == LANE ? overwrite[i].u16 : data[j].u16)) \
+ return 1; \
+ } \
+ return 0; \
+ }
+
+/* Tests of vld2_lane and vld2q_lane. */
+VARIANTS (TESTMETH, 2)
+/* Tests of vld3_lane and vld3q_lane. */
+VARIANTS (TESTMETH, 3)
+/* Tests of vld4_lane and vld4q_lane. */
+VARIANTS (TESTMETH, 4)
+
+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
+ if (test_vld##STRUCT##Q##_lane##SUFFIX ((const bfloat16_u_t *)orig_data, \
+ BASE##_data) != 0) \
+ abort ();
+
+int
+main (int argc, char **argv)
+{
+ /* Original data for all vector formats. */
+ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
+ 0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL,
+ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
+ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
+
+ /* Data with which vldN_lane will overwrite some of previous. */
+ bfloat16_u_t bfloat16_data[4];
+ bfloat16_data[0].u16 = 0xABAB;
+ bfloat16_data[1].u16 = 0x0;
+ bfloat16_data[2].u16 = 0xCAFE;
+ bfloat16_data[3].u16 = 0x1234;
+
+ VARIANTS (CHECK, 2);
+ VARIANTS (CHECK, 3);
+ VARIANTS (CHECK, 4);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-O2 --save-temps" } */
+
+#include <arm_neon.h>
+
+bfloat16x4x2_t
+test_vld2_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x2_t b)
+{
+ return vld2_lane_bf16 (ptr, b, 2);
+}
+
+bfloat16x8x2_t
+test_vld2q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x2_t b)
+{
+ return vld2q_lane_bf16 (ptr, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ld2\\t{v2.h - v3.h}\\\[2\\\], \\\[x0\\\]" 2 } } */
+
+bfloat16x4x3_t
+test_vld3_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x3_t b)
+{
+ return vld3_lane_bf16 (ptr, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ld3\t{v4.h - v6.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
+
+bfloat16x8x3_t
+test_vld3q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x3_t b)
+{
+ return vld3q_lane_bf16 (ptr, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ld3\t{v1.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
+
+bfloat16x4x4_t
+test_vld4_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x4_t b)
+{
+ return vld4_lane_bf16 (ptr, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ld4\t{v4.h - v7.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
+
+bfloat16x8x4_t
+test_vld4q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x4_t b)
+{
+ return vld4q_lane_bf16 (ptr, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ld4\t{v0.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4x2_t
+f_vld2_lane_bf16 (bfloat16_t * p, bfloat16x4x2_t v)
+{
+ bfloat16x4x2_t res;
+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vld2_lane_bf16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vld2_lane_bf16 (p, v, -1);
+ return res;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8x2_t
+f_vld2q_lane_bf16 (bfloat16_t * p, bfloat16x8x2_t v)
+{
+ bfloat16x8x2_t res;
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vld2q_lane_bf16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vld2q_lane_bf16 (p, v, -1);
+ return res;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4x3_t
+f_vld3_lane_bf16 (bfloat16_t * p, bfloat16x4x3_t v)
+{
+ bfloat16x4x3_t res;
+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vld3_lane_bf16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vld3_lane_bf16 (p, v, -1);
+ return res;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8x3_t
+f_vld3q_lane_bf16 (bfloat16_t * p, bfloat16x8x3_t v)
+{
+ bfloat16x8x3_t res;
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vld3q_lane_bf16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vld3q_lane_bf16 (p, v, -1);
+ return res;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4x4_t
+f_vld4_lane_bf16 (bfloat16_t * p, bfloat16x4x4_t v)
+{
+ bfloat16x4x4_t res;
+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vld4_lane_bf16 (p, v, 4);
+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vld4_lane_bf16 (p, v, -1);
+ return res;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8x4_t
+f_vld4q_lane_bf16 (bfloat16_t * p, bfloat16x8x4_t v)
+{
+ bfloat16x8x4_t res;
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vld4q_lane_bf16 (p, v, 8);
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vld4q_lane_bf16 (p, v, -1);
+ return res;
+}