diff mbox

[AArch64] VDUP Testcases

Message ID 53232CAD.1010806@arm.com
State New
Headers show

Commit Message

Alex Velenko March 14, 2014, 4:22 p.m. UTC
Hi,
This patch adds vdup intrinsic testcases for AArch64. those testcases
are nice to have, as it allows to reason about vdup consistency for
both LE and BE compiler flavors.

This patch covers following intrinsics:

vdup_lane_f32
vdup_lane_s[8,16]
vdup_lane_s[32,64]
vdup_n_[p,s,u][8,16]
vdup_n_[s,u][32,64]

vdupb_lane_[s,u]8
vduph_lane_[s,u]16
vdupd_lane_[f,s,u]64
vdups_lane_[f,s,u]32

vdupq_lane_[f,s][32,64]
vdupq_lane_s[8,16]
vdup[q]_n_f32
vdupq_n_f64
vdupq_n_[s,p,u][8,16]
vdupq_n_[s,u][32,64]

Is it OK for trunk?

Kind regards,
Alex

gcc/testsuite/

2014-03-14  Alex Velenko  <Alex.Velenko@arm.com>

	* gcc.target/aarch64/vdup_lane_1.c: New testcase.
	* gcc.target/aarch64/vdup_lane_2.c: New testcase.
	* gcc.target/aarch64/vdup_n_1.c: New testcase.

Comments

Marcus Shawcroft March 20, 2014, 12:34 p.m. UTC | #1
On 14 March 2014 16:22, Alex Velenko <Alex.Velenko@arm.com> wrote:
> Hi,
> This patch adds vdup intrinsic testcases for AArch64. those testcases
> are nice to have, as it allows to reason about vdup consistency for
> both LE and BE compiler flavors.

> gcc/testsuite/
>
> 2014-03-14  Alex Velenko  <Alex.Velenko@arm.com>
>
>         * gcc.target/aarch64/vdup_lane_1.c: New testcase.
>         * gcc.target/aarch64/vdup_lane_2.c: New testcase.
>         * gcc.target/aarch64/vdup_n_1.c: New testcase.

This is OK for stage-1.
/Marcus
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..4582471c8aad3d855eb33494ac01a62c87978ca9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -0,0 +1,430 @@ 
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps -O1" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_f32_0 (float32x2_t a)
+{
+  return vdup_lane_f32 (a, 0);
+}
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_f32_1 (float32x2_t a)
+{
+  return vdup_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_f32 ()
+{
+  float32x2_t a;
+  float32x2_t b;
+  int i;
+  float32_t c[2] = { 0.0 , 3.14 };
+  float32_t d[2];
+
+  a = vld1_f32 (c);
+  b = wrap_vdup_lane_f32_0 (a);
+  vst1_f32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_f32_1 (a);
+  vst1_f32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_f32_0 (float32x2_t a)
+{
+  return vdupq_lane_f32 (a, 0);
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_f32_1 (float32x2_t a)
+{
+  return vdupq_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_f32 ()
+{
+  float32x2_t a;
+  float32x4_t b;
+  int i;
+  float32_t c[2] = { 0.0 , 3.14 };
+  float32_t d[4];
+
+  a = vld1_f32 (c);
+  b = wrap_vdupq_lane_f32_0 (a);
+  vst1q_f32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_f32_1 (a);
+  vst1q_f32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_lane_s8_0 (int8x8_t a)
+{
+  return vdup_lane_s8 (a, 0);
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_lane_s8_1 (int8x8_t a)
+{
+  return vdup_lane_s8 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s8 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+
+  a = vld1_s8 (c);
+  b = wrap_vdup_lane_s8_0 (a);
+  vst1_s8 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_s8_1 (a);
+  vst1_s8 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_lane_s8_0 (int8x8_t a)
+{
+  return vdupq_lane_s8 (a, 0);
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_lane_s8_1 (int8x8_t a)
+{
+  return vdupq_lane_s8 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s8 ()
+{
+  int8x8_t a;
+  int8x16_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[16];
+
+  a = vld1_s8 (c);
+  b = wrap_vdupq_lane_s8_0 (a);
+  vst1q_s8 (d, b);
+  for (i = 0; i < 16; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_s8_1 (a);
+  vst1q_s8 (d, b);
+  for (i = 0; i < 16; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_lane_s16_0 (int16x4_t a)
+{
+  return vdup_lane_s16 (a, 0);
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_lane_s16_1 (int16x4_t a)
+{
+  return vdup_lane_s16 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s16 ()
+{
+  int16x4_t a;
+  int16x4_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[4];
+
+  a = vld1_s16 (c);
+  b = wrap_vdup_lane_s16_0 (a);
+  vst1_s16 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_s16_1 (a);
+  vst1_s16 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_lane_s16_0 (int16x4_t a)
+{
+  return vdupq_lane_s16 (a, 0);
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_lane_s16_1 (int16x4_t a)
+{
+  return vdupq_lane_s16 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s16 ()
+{
+  int16x4_t a;
+  int16x8_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[8];
+
+  a = vld1_s16 (c);
+  b = wrap_vdupq_lane_s16_0 (a);
+  vst1q_s16 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_s16_1 (a);
+  vst1q_s16 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_s32_0 (int32x2_t a)
+{
+  return vdup_lane_s32 (a, 0);
+}
+
+int32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_s32_1 (int32x2_t a)
+{
+  return vdup_lane_s32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s32 ()
+{
+  int32x2_t a;
+  int32x2_t b;
+  int i;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[2];
+
+  a = vld1_s32 (c);
+  b = wrap_vdup_lane_s32_0 (a);
+  vst1_s32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_s32_1 (a);
+  vst1_s32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_s32_0 (int32x2_t a)
+{
+  return vdupq_lane_s32 (a, 0);
+}
+
+int32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_s32_1 (int32x2_t a)
+{
+  return vdupq_lane_s32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s32 ()
+{
+  int32x2_t a;
+  int32x4_t b;
+  int i;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[4];
+
+  a = vld1_s32 (c);
+  b = wrap_vdupq_lane_s32_0 (a);
+  vst1q_s32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_s32_1 (a);
+  vst1q_s32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int64x1_t __attribute__ ((noinline))
+wrap_vdup_lane_s64_0 (int64x1_t a)
+{
+  return vdup_lane_s64 (a, 0);
+}
+
+int64x1_t __attribute__ ((noinline))
+wrap_vdup_lane_s64_1 (int64x1_t a)
+{
+  return vdup_lane_s64 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s64 ()
+{
+  int64x1_t a;
+  int64x1_t b;
+  int64_t c[1];
+  int64_t d[1];
+
+  c[0] = 0;
+  a = vld1_s64 (c);
+  b = wrap_vdup_lane_s64_0 (a);
+  vst1_s64 (d, b);
+  if (c[0] != d[0])
+    return 1;
+
+  c[0] = 1;
+  a = vld1_s64 (c);
+  b = wrap_vdup_lane_s64_1 (a);
+  vst1_s64 (d, b);
+  if (c[0] != d[0])
+    return 1;
+  return 0;
+}
+
+int64x2_t __attribute__ ((noinline))
+wrap_vdupq_lane_s64_0 (int64x1_t a)
+{
+  return vdupq_lane_s64 (a, 0);
+}
+
+int64x2_t __attribute__ ((noinline))
+wrap_vdupq_lane_s64_1 (int64x1_t a)
+{
+  return vdupq_lane_s64 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s64 ()
+{
+  int64x1_t a;
+  int64x2_t b;
+  int i;
+  int64_t c[1];
+  int64_t d[2];
+
+  c[0] = 0;
+  a = vld1_s64 (c);
+  b = wrap_vdupq_lane_s64_0 (a);
+  vst1q_s64 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  c[0] = 1;
+  a = vld1_s64 (c);
+  b = wrap_vdupq_lane_s64_1 (a);
+  vst1q_s64 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+  return 0;
+}
+
+int
+main ()
+{
+
+  if (test_vdup_lane_f32 ())
+    abort ();
+  if (test_vdup_lane_s8 ())
+    abort ();
+  if (test_vdup_lane_s16 ())
+    abort ();
+  if (test_vdup_lane_s32 ())
+    abort ();
+  if (test_vdup_lane_s64 ())
+    abort ();
+  if (test_vdupq_lane_f32 ())
+    abort ();
+  if (test_vdupq_lane_s8 ())
+    abort ();
+  if (test_vdupq_lane_s16 ())
+    abort ();
+  if (test_vdupq_lane_s32 ())
+    abort ();
+  if (test_vdupq_lane_s64 ())
+    abort ();
+
+  return 0;
+}
+
+/* Asm check for test_vdup_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdupq_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Asm check for test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Asm check for test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..7c04e759a5291bf5213ad5abf5c75289afad7359
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
@@ -0,0 +1,343 @@ 
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O1 --save-temps" } */
+
+#include <arm_neon.h>
+
+#define force_simd(V1)   asm volatile (""	\
+          : "=w"(V1)				\
+          : "w"(V1)				\
+          : /* No clobbers */)
+
+extern void abort (void);
+
+float32_t __attribute__ ((noinline))
+wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a)
+{
+  return vdups_lane_f32 (a, 0);
+}
+
+float32_t __attribute__ ((noinline))
+wrap_vdups_lane_f32_1 (float32x2_t a)
+{
+  return vdups_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_f32 ()
+{
+  float32x2_t a;
+  float32_t b;
+  float32_t c[2] = { 0.0, 1.0 };
+
+  a = vld1_f32 (c);
+  b = wrap_vdups_lane_f32_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdups_lane_f32_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+float64_t __attribute__ ((noinline))
+wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a)
+{
+  return vdupd_lane_f64 (a, 0);
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_f64 ()
+{
+  float64x1_t a;
+  float64_t b;
+  float64_t c[1] = { 0.0 };
+  a = vld1_f64 (c);
+  b = wrap_vdupd_lane_f64_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int8_t __attribute__ ((noinline))
+wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a)
+{
+  int8_t result = vdupb_lane_s8 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+int8_t __attribute__ ((noinline))
+wrap_vdupb_lane_s8_1 (int8x8_t a)
+{
+  int8_t result = vdupb_lane_s8 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdupb_lane_s8 ()
+{
+  int8x8_t a;
+  int8_t b;
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+  a = vld1_s8 (c);
+  b = wrap_vdupb_lane_s8_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdupb_lane_s8_1 (a);
+  if (c[1] != b)
+    return 1;
+
+  return 0;
+}
+
+uint8_t __attribute__ ((noinline))
+wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a)
+{
+  uint8_t result = vdupb_lane_u8 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+uint8_t __attribute__ ((noinline))
+wrap_vdupb_lane_u8_1 (uint8x8_t a)
+{
+  uint8_t result = vdupb_lane_u8 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdupb_lane_u8 ()
+{
+  uint8x8_t a;
+  uint8_t b;
+  uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+  a = vld1_u8 (c);
+  b = wrap_vdupb_lane_u8_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdupb_lane_u8_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+int16_t __attribute__ ((noinline))
+wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a)
+{
+  int16_t result = vduph_lane_s16 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+int16_t __attribute__ ((noinline))
+wrap_vduph_lane_s16_1 (int16x4_t a)
+{
+  int16_t result = vduph_lane_s16 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vduph_lane_s16 ()
+{
+  int16x4_t a;
+  int16_t b;
+  int16_t c[4] = { 0, 1, 2, 3 };
+
+  a = vld1_s16 (c);
+  b = wrap_vduph_lane_s16_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vduph_lane_s16_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+uint16_t __attribute__ ((noinline))
+wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a)
+{
+  uint16_t result = vduph_lane_u16 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+uint16_t __attribute__ ((noinline))
+wrap_vduph_lane_u16_1 (uint16x4_t a)
+{
+  uint16_t result = vduph_lane_u16 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vduph_lane_u16 ()
+{
+  uint16x4_t a;
+  uint16_t b;
+  uint16_t c[4] = { 0, 1, 2, 3 };
+
+  a = vld1_u16 (c);
+  b = wrap_vduph_lane_u16_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vduph_lane_u16_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+int32_t __attribute__ ((noinline))
+wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a)
+{
+  int32_t result = vdups_lane_s32 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+int32_t __attribute__ ((noinline))
+wrap_vdups_lane_s32_1 (int32x2_t a)
+{
+  int32_t result = vdups_lane_s32 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_s32 ()
+{
+  int32x2_t a;
+  int32_t b;
+  int32_t c[2] = { 0, 1 };
+
+  a = vld1_s32 (c);
+  b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdups_lane_s32_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+uint32_t __attribute__ ((noinline))
+wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a)
+{
+  uint32_t result = vdups_lane_u32 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+uint32_t __attribute__ ((noinline))
+wrap_vdups_lane_u32_1 (uint32x2_t a)
+{
+  uint32_t result = vdups_lane_u32 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_u32 ()
+{
+  uint32x2_t a;
+  uint32_t b;
+  uint32_t c[2] = { 0, 1 };
+  a = vld1_u32 (c);
+  b = wrap_vdups_lane_u32_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdups_lane_u32_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+uint64_t __attribute__ ((noinline))
+wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a)
+{
+  return vdupd_lane_u64 (a, 0);;
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_u64 ()
+{
+  uint64x1_t a;
+  uint64_t b;
+  uint64_t c[1] = { 0 };
+
+  a = vld1_u64 (c);
+  b = wrap_vdupd_lane_u64_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int64_t __attribute__ ((noinline))
+wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a)
+{
+  return vdupd_lane_u64 (a, 0);
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_s64 ()
+{
+  int64x1_t a;
+  int64_t b;
+  int64_t c[1] = { 0 };
+
+  a = vld1_s64 (c);
+  b = wrap_vdupd_lane_s64_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdups_lane_f32 ())
+    abort ();
+  if (test_vdupd_lane_f64 ())
+    abort ();
+  if (test_vdupb_lane_s8 ())
+    abort ();
+  if (test_vdupb_lane_u8 ())
+    abort ();
+  if (test_vduph_lane_s16 ())
+    abort ();
+  if (test_vduph_lane_u16 ())
+    abort ();
+  if (test_vdups_lane_s32 ())
+    abort ();
+  if (test_vdups_lane_u32 ())
+    abort ();
+  if (test_vdupd_lane_s64 ())
+    abort ();
+  if (test_vdupd_lane_u64 ())
+    abort ();
+  return 0;
+}
+
+/* Asm check for vdupb_lane_s8, vdupb_lane_u8.  */
+/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */
+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */
+
+/* Asm check for vduph_lane_h16, vduph_lane_h16.  */
+/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
+
+/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32.  */
+/* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32.  */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
+
+/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64.  */
+/* Attempts to make the compiler generate vdupd are not practical.  */
+/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } }
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..a79910d68d744314205f9ecab67ec07560681725
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
@@ -0,0 +1,619 @@ 
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O1 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_n_f32 (float32_t a)
+{
+  return vdup_n_f32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_f32 ()
+{
+  float32_t a = 1.0;
+  float32x2_t b;
+  float32_t c[2];
+  int i;
+
+  b = wrap_vdup_n_f32 (a);
+  vst1_f32 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_n_f32 (float32_t a)
+{
+  return vdupq_n_f32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_f32 ()
+{
+  float32_t a = 1.0;
+  float32x4_t b;
+  float32_t c[4];
+  int i;
+
+  b = wrap_vdupq_n_f32 (a);
+  vst1q_f32 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+float64x1_t __attribute__ ((noinline))
+wrap_vdup_n_f64 (float64_t a)
+{
+  return vdup_n_f64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_f64 ()
+{
+  float64_t a = 1.0;
+  float64x1_t b;
+  float64_t c[1];
+  int i;
+
+  b = wrap_vdup_n_f64 (a);
+  vst1_f64 (c, b);
+  for (i = 0; i < 1; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+float64x2_t __attribute__ ((noinline))
+wrap_vdupq_n_f64 (float64_t a)
+{
+  return vdupq_n_f64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_f64 ()
+{
+  float64_t a = 1.0;
+  float64x2_t b;
+  float64_t c[2];
+  int i;
+
+  b = wrap_vdupq_n_f64 (a);
+  vst1q_f64 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly8x8_t __attribute__ ((noinline))
+wrap_vdup_n_p8 (poly8_t a)
+{
+  return vdup_n_p8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_p8 ()
+{
+  poly8_t a = 1;
+  poly8x8_t b;
+  poly8_t c[8];
+  int i;
+
+  b = wrap_vdup_n_p8 (a);
+  vst1_p8 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly8x16_t __attribute__ ((noinline))
+wrap_vdupq_n_p8 (poly8_t a)
+{
+  return vdupq_n_p8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_p8 ()
+{
+  poly8_t a = 1;
+  poly8x16_t b;
+  poly8_t c[16];
+  int i;
+
+  b = wrap_vdupq_n_p8 (a);
+  vst1q_p8 (c, b);
+  for (i = 0; i < 16; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_n_s8 (int8_t a)
+{
+  return vdup_n_s8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s8 ()
+{
+  int8_t a = 1;
+  int8x8_t b;
+  int8_t c[8];
+  int i;
+
+  b = wrap_vdup_n_s8 (a);
+  vst1_s8 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_n_s8 (int8_t a)
+{
+  return vdupq_n_s8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s8 ()
+{
+  int8_t a = 1;
+  int8x16_t b;
+  int8_t c[16];
+  int i;
+
+  b = wrap_vdupq_n_s8 (a);
+  vst1q_s8 (c, b);
+  for (i = 0; i < 16; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint8x8_t __attribute__ ((noinline))
+wrap_vdup_n_u8 (uint8_t a)
+{
+  return vdup_n_u8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u8 ()
+{
+  uint8_t a = 1;
+  uint8x8_t b;
+  uint8_t c[8];
+  int i;
+
+  b = wrap_vdup_n_u8 (a);
+  vst1_u8 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint8x16_t __attribute__ ((noinline))
+wrap_vdupq_n_u8 (uint8_t a)
+{
+  return vdupq_n_u8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u8 ()
+{
+  uint8_t a = 1;
+  uint8x16_t b;
+  uint8_t c[16];
+  int i;
+
+  b = wrap_vdupq_n_u8 (a);
+  vst1q_u8 (c, b);
+  for (i = 0; i < 16; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly16x4_t __attribute__ ((noinline))
+wrap_vdup_n_p16 (poly16_t a)
+{
+  return vdup_n_p16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_p16 ()
+{
+  poly16_t a = 1;
+  poly16x4_t b;
+  poly16_t c[4];
+  int i;
+
+  b = wrap_vdup_n_p16 (a);
+  vst1_p16 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly16x8_t __attribute__ ((noinline))
+wrap_vdupq_n_p16 (poly16_t a)
+{
+  return vdupq_n_p16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_p16 ()
+{
+  poly16_t a = 1;
+  poly16x8_t b;
+  poly16_t c[8];
+  int i;
+
+  b = wrap_vdupq_n_p16 (a);
+  vst1q_p16 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_n_s16 (int16_t a)
+{
+  return vdup_n_s16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s16 ()
+{
+  int16_t a = 1;
+  int16x4_t b;
+  int16_t c[4];
+  int i;
+
+  b = wrap_vdup_n_s16 (a);
+  vst1_s16 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_n_s16 (int16_t a)
+{
+  return vdupq_n_s16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s16 ()
+{
+  int16_t a = 1;
+  int16x8_t b;
+  int16_t c[8];
+  int i;
+
+  b = wrap_vdupq_n_s16 (a);
+  vst1q_s16 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint16x4_t __attribute__ ((noinline))
+wrap_vdup_n_u16 (uint16_t a)
+{
+  return vdup_n_u16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u16 ()
+{
+  uint16_t a = 1;
+  uint16x4_t b;
+  uint16_t c[4];
+  int i;
+
+  b = wrap_vdup_n_u16 (a);
+  vst1_u16 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint16x8_t __attribute__ ((noinline))
+wrap_vdupq_n_u16 (uint16_t a)
+{
+  return vdupq_n_u16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u16 ()
+{
+  uint16_t a = 1;
+  uint16x8_t b;
+  uint16_t c[8];
+  int i;
+
+  b = wrap_vdupq_n_u16 (a);
+  vst1q_u16 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int32x2_t __attribute__ ((noinline))
+wrap_vdup_n_s32 (int32_t a)
+{
+  return vdup_n_s32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s32 ()
+{
+  int32_t a = 1;
+  int32x2_t b;
+  int32_t c[2];
+  int i;
+
+  b = wrap_vdup_n_s32 (a);
+  vst1_s32 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int32x4_t __attribute__ ((noinline))
+wrap_vdupq_n_s32 (int32_t a)
+{
+  return vdupq_n_s32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s32 ()
+{
+  int32_t a = 1;
+  int32x4_t b;
+  int32_t c[4];
+  int i;
+
+  b = wrap_vdupq_n_s32 (a);
+  vst1q_s32 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint32x2_t __attribute__ ((noinline))
+wrap_vdup_n_u32 (uint32_t a)
+{
+  return vdup_n_u32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u32 ()
+{
+  uint32_t a = 1;
+  uint32x2_t b;
+  uint32_t c[2];
+  int i;
+
+  b = wrap_vdup_n_u32 (a);
+  vst1_u32 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint32x4_t __attribute__ ((noinline))
+wrap_vdupq_n_u32 (uint32_t a)
+{
+  return vdupq_n_u32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u32 ()
+{
+  uint32_t a = 1;
+  uint32x4_t b;
+  uint32_t c[4];
+  int i;
+
+  b = wrap_vdupq_n_u32 (a);
+  vst1q_u32 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int64x1_t __attribute__ ((noinline))
+wrap_vdup_n_s64 (int64_t a)
+{
+  return vdup_n_s64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s64 ()
+{
+  int64_t a = 1;
+  int64x1_t b;
+  int64_t c[1];
+  int i;
+
+  b = wrap_vdup_n_s64 (a);
+  vst1_s64 (c, b);
+  for (i = 0; i < 1; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int64x2_t __attribute__ ((noinline))
+wrap_vdupq_n_s64 (int64_t a)
+{
+  return vdupq_n_s64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s64 ()
+{
+  int64_t a = 1;
+  int64x2_t b;
+  int64_t c[2];
+  int i;
+
+  b = wrap_vdupq_n_s64 (a);
+  vst1q_s64 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint64x1_t __attribute__ ((noinline))
+wrap_vdup_n_u64 (uint64_t a)
+{
+  return vdup_n_u64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u64 ()
+{
+  uint64_t a = 1;
+  uint64x1_t b;
+  uint64_t c[1];
+  int i;
+
+  b = wrap_vdup_n_u64 (a);
+  vst1_u64 (c, b);
+  for (i = 0; i < 1; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint64x2_t __attribute__ ((noinline))
+wrap_vdupq_n_u64 (uint64_t a)
+{
+  return vdupq_n_u64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u64 ()
+{
+  uint64_t a = 1;
+  uint64x2_t b;
+  uint64_t c[2];
+  int i;
+
+  b = wrap_vdupq_n_u64 (a);
+  vst1q_u64 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdup_n_f32 ())
+    abort ();
+  if (test_vdup_n_f64 ())
+    abort ();
+  if (test_vdup_n_p8 ())
+    abort ();
+  if (test_vdup_n_u8 ())
+    abort ();
+  if (test_vdup_n_s8 ())
+    abort ();
+  if (test_vdup_n_p16 ())
+    abort ();
+  if (test_vdup_n_s16 ())
+    abort ();
+  if (test_vdup_n_u16 ())
+    abort ();
+  if (test_vdup_n_s32 ())
+    abort ();
+  if (test_vdup_n_u32 ())
+    abort ();
+  if (test_vdup_n_s64 ())
+    abort ();
+  if (test_vdup_n_u64 ())
+    abort ();
+  if (test_vdupq_n_f32 ())
+    abort ();
+  if (test_vdupq_n_f64 ())
+    abort ();
+  if (test_vdupq_n_p8 ())
+    abort ();
+  if (test_vdupq_n_u8 ())
+    abort ();
+  if (test_vdupq_n_s8 ())
+    abort ();
+  if (test_vdupq_n_p16 ())
+    abort ();
+  if (test_vdupq_n_s16 ())
+    abort ();
+  if (test_vdupq_n_u16 ())
+    abort ();
+  if (test_vdupq_n_s32 ())
+    abort ();
+  if (test_vdupq_n_u32 ())
+    abort ();
+  if (test_vdupq_n_s64 ())
+    abort ();
+  if (test_vdupq_n_u64 ())
+    abort ();
+  return 0;
+}
+
+/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64.
+   Cannot force floating point value in general purpose regester.  */
+
+/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdup_n_s32, test_vdup_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
+
+/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
+
+/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out.
+   Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
+   are not practical.  */
+
+/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */