Patchwork [AArch64] NEON vdup testcases

login
register
mail settings
Submitter Alex Velenko
Date Jan. 16, 2014, 12:12 p.m.
Message ID <52D7CCB4.5040105@arm.com>
Download mbox | patch
Permalink /patch/311720/
State New
Headers show

Comments

Alex Velenko - Jan. 16, 2014, 12:12 p.m.
[AArch64] VDUP testcases

Hi,

This patch implements test cases for following NEON intrinsics:
vdup_lane_f32
vdup_lane_s[8,16]
vdup_lane_s[32,64]
vdup_n_[p,s,u][8,16]
vdup_n_[s,u][32,64]

vdupb_lane_[s,u]8
vduph_lane_[s,u]16
vdupd_lane_[f,s,u]64
vdups_lane_[f,s,u]32

vdupq_lane_[f,s][32,64]
vdupq_lane_s[8,16]
vdup[q]_n_f32
vdupq_n_f64
vdupq_n_[s,p,u][8,16]
vdupq_n_[s,u][32,64]

Tests succeed on both Little-Endian and Big-Eendian.

Ok for trunk?

Thanks,
Alex

gcc/testsuite/

2014-01-16  Alex Velenko  <Alex.Velenko@arm.com>

	        * gcc.target/aarch64/vdup_lane_1.c: New testcase.
	        * gcc.target/aarch64/vdup_lane_2.c: New testcase.
	        * gcc.target/aarch64/vdup_n_1.c: New testcase.
Alex Velenko - Jan. 22, 2014, 10:27 a.m.
On 16/01/14 12:12, Alex Velenko wrote:
> [AArch64] VDUP testcases
>
> Hi,
>
> This patch implements test cases for following NEON intrinsics:
> vdup_lane_f32
> vdup_lane_s[8,16]
> vdup_lane_s[32,64]
> vdup_n_[p,s,u][8,16]
> vdup_n_[s,u][32,64]
>
> vdupb_lane_[s,u]8
> vduph_lane_[s,u]16
> vdupd_lane_[f,s,u]64
> vdups_lane_[f,s,u]32
>
> vdupq_lane_[f,s][32,64]
> vdupq_lane_s[8,16]
> vdup[q]_n_f32
> vdupq_n_f64
> vdupq_n_[s,p,u][8,16]
> vdupq_n_[s,u][32,64]
>
> Tests succeed on both Little-Endian and Big-Eendian.
>
> Ok for trunk?
>
> Thanks,
> Alex
>
> gcc/testsuite/
>
> 2014-01-16  Alex Velenko  <Alex.Velenko@arm.com>
>
>              * gcc.target/aarch64/vdup_lane_1.c: New testcase.
>              * gcc.target/aarch64/vdup_lane_2.c: New testcase.
>              * gcc.target/aarch64/vdup_n_1.c: New testcase.

Ping!

Hi,
Can someone, please, review the patch?
Kind regards,
Alex Velenko
Marcus Shawcroft - Jan. 27, 2014, 5:47 p.m.
On 16 January 2014 12:12, Alex Velenko <Alex.Velenko@arm.com> wrote:

> This patch implements test cases for following NEON intrinsics:
> vdup_lane_f32
> vdup_lane_s[8,16]
> vdup_lane_s[32,64]
> vdup_n_[p,s,u][8,16]
> vdup_n_[s,u][32,64]
>
> vdupb_lane_[s,u]8
> vduph_lane_[s,u]16
> vdupd_lane_[f,s,u]64
> vdups_lane_[f,s,u]32
>
> vdupq_lane_[f,s][32,64]
> vdupq_lane_s[8,16]
> vdup[q]_n_f32
> vdupq_n_f64
> vdupq_n_[s,p,u][8,16]
> vdupq_n_[s,u][32,64]


+int
+__attribute__ ((noinline)) test_vdupq_lane_s16 ()

The function name should be placed in column1, hence the above should
be written:

int __attribute__ ((noinline))
test_vdupq_lane_s16 ()

+/* Covers vdups_lane_f32, vdups_lane_s32, vdups_lane_u32.  */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+,
v\[0-9\]+\.s\\\[0\\\]" 3 } } */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+,
v\[0-9\]+\.s\\\[1\\\]" 3 } } */
+
+#212 "/work/tempdev//src/gcc/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c"

This should not be here.

Cheers
/Marcus

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..a80e10146a6e45b44c3a09701da949a8e9aa7653
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -0,0 +1,409 @@ 
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_simd(V1)   asm volatile ("orr %0.16b, %1.16b, %1.16b"	\
+	   : "=w"(V1)						\
+	   : "w"(V1)						\
+	   : /* No clobbers */);
+
+int
+__attribute__ ((noinline)) test_vdup_lane_f32 ()
+{
+  float32x2_t a;
+  float32x2_t b;
+  int i = 0;
+  float32_t c[2] = { 0.0E0 , 3.14 };
+  float32_t d[2];
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_f32 (a, 0);
+  vst1_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_f32 (a, 1);
+  vst1_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Covers test_vdup_lane_f32 and test_vdup_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_f32 ()
+{
+  float32x2_t a;
+  float32x4_t b;
+  int i = 0;
+  float32_t c[2] = { 0.0E0 , 3.14 };
+  float32_t d[4];
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_f32 (a, 0);
+  vst1q_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_f32 (a, 1);
+  vst1q_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s8 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s8 (a, 0);
+  vst1_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  force_simd (a)
+  b = vdup_lane_s8 (a, 4);
+  vst1_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[4] != d[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s8 ()
+{
+  int8x8_t a;
+  int8x16_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[16];
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s8 (a, 0);
+  vst1q_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s8 (a, 4);
+  vst1q_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (c[4] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Covers test_vdupq_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s16 ()
+{
+  int16x4_t a;
+  int16x4_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[4];
+  a = vld1_s16 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s16 (a, 0);
+  vst1_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s16 (a, 2);
+  vst1_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[2] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Covers test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Covers test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[2\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s16 ()
+{
+  int16x4_t a;
+  int16x8_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[8];
+  a = vld1_s16 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s16 (a, 0);
+  vst1q_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s16 (a, 2);
+  vst1q_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[2] != d[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Covers test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[2\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s32 ()
+{
+  int32x2_t a;
+  int32x2_t b;
+  int i = 0;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[2];
+  a = vld1_s32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s32 (a, 0);
+  vst1_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s32 (a, 1);
+  vst1_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s32 ()
+{
+  int32x2_t a;
+  int32x4_t b;
+  int i = 0;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[4];
+  a = vld1_s32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s32 (a, 0);
+  vst1q_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s32 (a, 1);
+  vst1q_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s64 ()
+{
+  uint64x1_t a;
+  uint64x1_t b;
+  uint64_t c[1];
+  uint64_t d[1];
+
+  c[0] = 0;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s64 (a, 0);
+  vst1_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  if (c[0] != d[0])
+    return 1;
+
+  c[0] = 1;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s64 (a, 0);
+  vst1_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  if (c[0] != d[0])
+    return 1;
+  return 0;
+}
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s64 ()
+{
+  int64x1_t a;
+  int64x2_t b;
+  int i = 0;
+  int64_t c[1];
+  int64_t d[2];
+  c[0] = 0;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s64 (a, 0);
+  vst1q_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+  i = 0;
+  c[0] = 1;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s64 (a, 0);
+  vst1q_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdup_lane_f32 ())
+    abort ();
+  if (test_vdup_lane_s8 ())
+    abort ();
+  if (test_vdup_lane_s16 ())
+    abort ();
+  if (test_vdup_lane_s32 ())
+    abort ();
+  if (test_vdup_lane_s64 ())
+    abort ();
+  if (test_vdupq_lane_f32 ())
+    abort ();
+  if (test_vdupq_lane_s8 ())
+    abort ();
+  if (test_vdupq_lane_s16 ())
+    abort ();
+  if (test_vdupq_lane_s32 ())
+    abort ();
+  if (test_vdupq_lane_s64 ())
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..319ce4575c8f78c75ad5ece3acc12ab6e7b2ac24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
@@ -0,0 +1,259 @@ 
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_simd(V1)   asm volatile ("orr %0.16b, %1.16b, %1.16b"	\
+	   : "=w"(V1)						\
+	   : "w"(V1)						\
+	   : /* No clobbers */);
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vdups_lane_f32 ()
+{
+  float32x2_t a;
+  float32_t b;
+  float32_t c[2] = { 0.0, 1.0 };
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_f32 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_f32 (a, 1);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+/* Covers vdups_lane_f32, vdups_lane_s32, vdups_lane_u32.  */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 3 } } */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
+
+#212 "/work/tempdev//src/gcc/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c"
+int __attribute__ ((noinline))
+test_vdupd_lane_f64 ()
+{
+  float64x1_t a;
+  float64_t b;
+  float64_t c[1] = { 0.0 };
+  a = vld1_f64 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupd_lane_f64 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+/* Covers vdups_lane_f64, vdups_lane_s64, vdups_lane_u64.  */
+/* Attempts to make the compiler generate
+   "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[\[0\\\]"
+   are not practical.  */
+
+int __attribute__ ((noinline))
+test_vdupb_lane_s8 ()
+{
+  int8x8_t a;
+  int8_t b;
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_s8 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_s8 (a, 4);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[4] != b)
+    return 1;
+
+  return 0;
+}
+
+/* Covers vdupb_lane_s8, vdupb_lane_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[4\\\]" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdupb_lane_u8 ()
+{
+  uint8x8_t a;
+  uint8_t b;
+  uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  a = vld1_u8 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_u8 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_u8 (a, 4);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[4] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vduph_lane_u16 ()
+{
+  uint16x4_t a;
+  uint16_t b;
+  uint16_t c[4] = { 0, 1, 2, 3 };
+  a = vld1_u16 (c);
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_u16 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_u16 (a, 2);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[2] != b)
+    return 1;
+  return 0;
+}
+
+/* Covers vduph_lane_h16, vduph_lane_h16.  */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[2\\\]" 2 } } */
+int __attribute__ ((noinline))
+test_vduph_lane_s16 ()
+{
+  int16x4_t a;
+  int16_t b;
+  int16_t c[4] = { 0, 1, 2, 3 };
+  a = vld1_s16 (c);
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_s16 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_s16 (a, 2);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[2] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_s32 ()
+{
+  int32x2_t a;
+  int32_t b;
+  int32_t c[2] = { 0, 1 };
+  a = vld1_s32 (c);
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_s32 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_s32 (a, 1);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_u32 ()
+{
+  uint32x2_t a;
+  uint32_t b;
+  uint32_t c[2] = { 0, 1 };
+  a = vld1_u32 (c);
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_u32 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_u32 (a, 1);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+int __attribute__ ((noinline))
+test_vdupd_lane_u64 ()
+{
+  uint64x1_t a;
+  uint64_t b;
+  uint64_t c[1] = { 0 };
+  a = vld1_u64 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupd_lane_u64 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_s64 ()
+{
+  int64x1_t a;
+  int64_t b;
+  int64_t c[1] = { 0 };
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupd_lane_s64 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdups_lane_f32 ())
+    abort ();
+  if (test_vdupb_lane_s8 ())
+    abort ();
+  if (test_vdupb_lane_u8 ())
+    abort ();
+  if (test_vduph_lane_s16 ())
+    abort ();
+  if (test_vduph_lane_u16 ())
+    abort ();
+  if (test_vdups_lane_s32 ())
+    abort ();
+  if (test_vdups_lane_u32 ())
+    abort ();
+  if (test_vdupd_lane_s64 ())
+    abort ();
+  if (test_vdupd_lane_u64 ())
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..06bee4cdd2532dd4302e1c4437a74970e4bc2966
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
@@ -0,0 +1,659 @@ 
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_gp(V1)   asm volatile ("orr %0, %1, %1"		\
+	   : "=r"(V1)						\
+	   : "r"(V1)						\
+	   : /* No clobbers */);
+
+int __attribute__ ((noinline))
+test_vdup_n_f32 ()
+{
+  float32_t a;
+  float32x2_t b;
+  float32_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = (1.0);
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_f32 (a);
+  INHIB_OPTIMIZATION;
+  vst1_f32 (c, b);
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdupq_n_f32 ()
+{
+  float32_t a;
+  float32x4_t b;
+  float32_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1.0;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_f32 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_f32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdup_n_f64 ()
+{
+  float64_t a;
+  float64x1_t b;
+  float64_t c[1];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1.0;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_f64 (a);
+  INHIB_OPTIMIZATION;
+  vst1_f64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 1; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdupq_n_f64 ()
+{
+  float64_t a;
+  float64x2_t b;
+  float64_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1.0;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_f64 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_f64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+        return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdup_n_p8 ()
+{
+  poly8_t a;
+  poly8x8_t b;
+  poly8_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_p8 (a);
+  INHIB_OPTIMIZATION;
+  vst1_p8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdupq_n_p8 ()
+{
+  poly8_t a;
+  poly8x16_t b;
+  poly8_t c[16];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_p8 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_p8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_s8 ()
+{
+  int8_t a;
+  int8x8_t b;
+  int8_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s8 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s8 ()
+{
+  int8_t a;
+  int8x16_t b;
+  int8_t c[16];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s8 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u8 ()
+{
+  uint8_t a;
+  uint8x8_t b;
+  uint8_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u8 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u8 ()
+{
+  uint8_t a;
+  uint8x16_t b;
+  uint8_t c[16];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u8 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_p16 ()
+{
+  poly16_t a;
+  poly16x4_t b;
+  poly16_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_p16 (a);
+  INHIB_OPTIMIZATION;
+  vst1_p16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdupq_n_p16 ()
+{
+  poly16_t a;
+  poly16x8_t b;
+  poly16_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_p16 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_p16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_s16 ()
+{
+  int16_t a;
+  int16x4_t b;
+  int16_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s16 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s16 ()
+{
+  int16_t a;
+  int16x8_t b;
+  int16_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s16 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u16 ()
+{
+  uint16_t a;
+  uint16x4_t b;
+  uint16_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u16 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u16 ()
+{
+  uint16_t a;
+  uint16x8_t b;
+  uint16_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u16 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s32 ()
+{
+  int32_t a;
+  int32x2_t b;
+  int32_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s32 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_s32, test_vdup_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdupq_n_s32 ()
+{
+  int32_t a;
+  int32x4_t b;
+  int32_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s32 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_s32, test_vdupq_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_u32 ()
+{
+  uint32_t a;
+  uint32x2_t b;
+  uint32_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u32 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+
+int __attribute__ ((noinline))
+test_vdupq_n_u32 ()
+{
+  uint32_t a;
+  uint32x4_t b;
+  uint32_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u32 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s64 ()
+{
+  int64_t a;
+  int64x1_t b;
+  int64_t c[1];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s64 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 1; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_s64, test_vdup_n_u64.  */
+/* Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
+   are not practical.  */
+
+int __attribute__ ((noinline))
+test_vdupq_n_s64 ()
+{
+  int64_t a;
+  int64x2_t b;
+  int64_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s64 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_s64, test_vdupq_n_u64.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_u64 ()
+{
+  uint64_t a;
+  uint64x1_t b;
+  uint64_t c[1];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u64 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 1; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u64 ()
+{
+  uint64_t a;
+  uint64x2_t b;
+  uint64_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u64 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdup_n_f32 ())
+    abort ();
+  if (test_vdup_n_f64 ())
+    abort ();
+  if (test_vdup_n_p8 ())
+    abort ();
+  if (test_vdup_n_u8 ())
+    abort ();
+  if (test_vdup_n_s8 ())
+    abort ();
+  if (test_vdup_n_p16 ())
+    abort ();
+  if (test_vdup_n_s16 ())
+    abort ();
+  if (test_vdup_n_u16 ())
+    abort ();
+  if (test_vdup_n_s32 ())
+    abort ();
+  if (test_vdup_n_u32 ())
+    abort ();
+  if (test_vdup_n_s64 ())
+    abort ();
+  if (test_vdup_n_u64 ())
+    abort ();
+  if (test_vdupq_n_f32 ())
+    abort ();
+  if (test_vdupq_n_f64 ())
+    abort ();
+  if (test_vdupq_n_p8 ())
+    abort ();
+  if (test_vdupq_n_u8 ())
+    abort ();
+  if (test_vdupq_n_s8 ())
+    abort ();
+  if (test_vdupq_n_p16 ())
+    abort ();
+  if (test_vdupq_n_s16 ())
+    abort ();
+  if (test_vdupq_n_u16 ())
+    abort ();
+  if (test_vdupq_n_s32 ())
+    abort ();
+  if (test_vdupq_n_u32 ())
+    abort ();
+  if (test_vdupq_n_s64 ())
+    abort ();
+  if (test_vdupq_n_u64 ())
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */