diff mbox

[AArch64] Refactor reduc_<su>plus patterns.

Message ID 1367336579-18667-1-git-send-email-james.greenhalgh@arm.com
State New
Headers show

Commit Message

James Greenhalgh April 30, 2013, 3:42 p.m. UTC
Hi,

This patch refactors the reduc_<su>plus RTL patterns in the
AArch64 back-end. We do this to provide a more uniform
interface for arm_neon.h builtins. Because of this we can rewrite
the intrinsics in arm_neon.h to use these standard pattern names,
and allow the signed varients to fold to tree where appropriate.

Patch regression tested on aarch64-none-elf with no regressions.

Thanks,
James

---
gcc/

2013-04-30  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64-builtins.c
	(aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_.
	* config/aarch64/aarch64-simd-builtins.def
	(reduc_splus_): Add new modes.
	(reduc_uplus_): New.
	* config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove.
	(reduc_uplus_v4sf): Likewise.
	(reduc_splus_v4sf): Likewise.
	(aarch64_addv<mode>): Likewise.
	(reduc_uplus_<mode>): Likewise.
	(reduc_splus_<mode>): Likewise.
	(aarch64_addvv2di): Likewise.
	(reduc_uplus_v2di): Likewise.
	(reduc_splus_v2di): Likewise.
	(aarch64_addvv2si): Likewise.
	(reduc_uplus_v2si): Likewise.
	(reduc_splus_v2si): Likewise.
	(reduc_<sur>plus_<mode>): New.
	(reduc_<sur>plus_v2di): Likewise.
	(reduc_<sur>plus_v2si): Likewise.
	(reduc_<sur>plus_v4sf): Likewise.
	(aarch64_addpv4sf): Likewise.
	* config/aarch64/arm_neon.h
	(vaddv<q>_<s,u,f><8, 16, 32, 64): Rewrite using builtins.
	* config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV,
	add UNSPEC_SADDV, UNSPEC_UADDV.
	(SUADDV): New.
	(sur): Add UNSPEC_SADDV, UNSPEC_UADDV.

gcc/testsuite/

2013-04-30  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.target/aarch64/vect-vaddv.c: New.

Comments

Marcus Shawcroft April 30, 2013, 5:29 p.m. UTC | #1
OK
/Marcus

On 30 April 2013 16:42, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>
> Hi,
>
> This patch refactors the reduc_<su>plus RTL patterns in the
> AArch64 back-end. We do this to provide a more uniform
> interface for arm_neon.h builtins. Because of this we can rewrite
> the intrinsics in arm_neon.h to use these standard pattern names,
> and allow the signed varients to fold to tree where appropriate.
>
> Patch regression tested on aarch64-none-elf with no regressions.
>
> Thanks,
> James
>
> ---
> gcc/
>
> 2013-04-30  James Greenhalgh  <james.greenhalgh@arm.com>
>
>         * config/aarch64/aarch64-builtins.c
>         (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_.
>         * config/aarch64/aarch64-simd-builtins.def
>         (reduc_splus_): Add new modes.
>         (reduc_uplus_): New.
>         * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove.
>         (reduc_uplus_v4sf): Likewise.
>         (reduc_splus_v4sf): Likewise.
>         (aarch64_addv<mode>): Likewise.
>         (reduc_uplus_<mode>): Likewise.
>         (reduc_splus_<mode>): Likewise.
>         (aarch64_addvv2di): Likewise.
>         (reduc_uplus_v2di): Likewise.
>         (reduc_splus_v2di): Likewise.
>         (aarch64_addvv2si): Likewise.
>         (reduc_uplus_v2si): Likewise.
>         (reduc_splus_v2si): Likewise.
>         (reduc_<sur>plus_<mode>): New.
>         (reduc_<sur>plus_v2di): Likewise.
>         (reduc_<sur>plus_v2si): Likewise.
>         (reduc_<sur>plus_v4sf): Likewise.
>         (aarch64_addpv4sf): Likewise.
>         * config/aarch64/arm_neon.h
>         (vaddv<q>_<s,u,f><8, 16, 32, 64): Rewrite using builtins.
>         * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV,
>         add UNSPEC_SADDV, UNSPEC_UADDV.
>         (SUADDV): New.
>         (sur): Add UNSPEC_SADDV, UNSPEC_UADDV.
>
> gcc/testsuite/
>
> 2013-04-30  James Greenhalgh  <james.greenhalgh@arm.com>
>
>         * gcc.target/aarch64/vect-vaddv.c: New.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 8eb32c6..4fdfe24 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1365,7 +1365,7 @@  aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 
 	  switch (fcode)
 	    {
-	      BUILTIN_VDQF (UNOP, addv, 0)
+	      BUILTIN_VALL (UNOP, reduc_splus_, 10)
 		new_stmt = gimple_build_assign_with_ops (
 						REDUC_PLUS_EXPR,
 						gimple_call_lhs (stmt),
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 97a597e..e420173 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -234,8 +234,9 @@ 
   BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
   BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
 
-  /* Implemented by aarch64_addv<mode>.  */
-  BUILTIN_VDQF (UNOP, addv, 0)
+  /* Implemented by reduc_<sur>plus_<mode>.  */
+  BUILTIN_VALL (UNOP, reduc_splus_, 10)
+  BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
 
   /* Implemented by reduc_<maxmin_uns>_<mode>.  */
   BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8a48739..13384aa 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1438,156 +1438,70 @@ 
    (set_attr "simd_mode" "<MODE>")]
 )
 
-;; FP 'across lanes' add.
+;; 'across lanes' add.
 
-(define_insn "aarch64_addpv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
-		    UNSPEC_FADDV))]
- "TARGET_SIMD"
- "faddp\\t%0.4s, %1.4s, %1.4s"
-  [(set_attr "simd_type" "simd_fadd")
-   (set_attr "simd_mode" "V4SF")]
-)
-
-(define_expand "reduc_uplus_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
-       (match_operand:V4SF 1 "register_operand" "w"))]
- "TARGET_SIMD"
-{
-  rtx tmp = gen_reg_rtx (V4SFmode);
-  emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
-  emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
-  DONE;
-})
-
-(define_expand "reduc_splus_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
-       (match_operand:V4SF 1 "register_operand" "w"))]
- "TARGET_SIMD"
-{
-  rtx tmp = gen_reg_rtx (V4SFmode);
-  emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
-  emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
-  DONE;
-})
-
-(define_expand "aarch64_addvv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
-		     UNSPEC_FADDV))]
- "TARGET_SIMD"
-{
-  emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1]));
-  DONE;
-})
-
-(define_insn "aarch64_addv<mode>"
- [(set (match_operand:V2F 0 "register_operand" "=w")
-       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
-		    UNSPEC_FADDV))]
- "TARGET_SIMD"
- "faddp\\t%<Vetype>0, %1.<Vtype>"
-  [(set_attr "simd_type" "simd_fadd")
-   (set_attr "simd_mode" "<MODE>")]
-)
-
-(define_expand "reduc_uplus_<mode>"
- [(set (match_operand:V2F 0 "register_operand" "=w")
-       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
-		    UNSPEC_FADDV))]
- "TARGET_SIMD"
- ""
-)
-
-(define_expand "reduc_splus_<mode>"
- [(set (match_operand:V2F 0 "register_operand" "=w")
-       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
-		    UNSPEC_FADDV))]
- "TARGET_SIMD"
- ""
-)
-
-;; Reduction across lanes.
-
-(define_insn "aarch64_addv<mode>"
+(define_insn "reduc_<sur>plus_<mode>"
  [(set (match_operand:VDQV 0 "register_operand" "=w")
        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
+		    SUADDV))]
  "TARGET_SIMD"
  "addv\\t%<Vetype>0, %1.<Vtype>"
   [(set_attr "simd_type" "simd_addv")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_expand "reduc_splus_<mode>"
- [(set (match_operand:VDQV 0 "register_operand" "=w")
-       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
- "TARGET_SIMD"
- ""
-)
-
-(define_expand "reduc_uplus_<mode>"
- [(set (match_operand:VDQV 0 "register_operand" "=w")
-       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
- "TARGET_SIMD"
- ""
-)
-
-(define_insn "aarch64_addvv2di"
+(define_insn "reduc_<sur>plus_v2di"
  [(set (match_operand:V2DI 0 "register_operand" "=w")
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
+		    SUADDV))]
  "TARGET_SIMD"
  "addp\\t%d0, %1.2d"
-  [(set_attr "simd_type" "simd_add")
+  [(set_attr "simd_type" "simd_addv")
    (set_attr "simd_mode" "V2DI")]
 )
 
-(define_expand "reduc_uplus_v2di"
- [(set (match_operand:V2DI 0 "register_operand" "=w")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
- "TARGET_SIMD"
- ""
-)
-
-(define_expand "reduc_splus_v2di"
- [(set (match_operand:V2DI 0 "register_operand" "=w")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
- "TARGET_SIMD"
- ""
-)
-
-(define_insn "aarch64_addvv2si"
+(define_insn "reduc_<sur>plus_v2si"
  [(set (match_operand:V2SI 0 "register_operand" "=w")
        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
+		    SUADDV))]
  "TARGET_SIMD"
  "addp\\t%0.2s, %1.2s, %1.2s"
-  [(set_attr "simd_type" "simd_add")
+  [(set_attr "simd_type" "simd_addv")
    (set_attr "simd_mode" "V2SI")]
 )
 
-(define_expand "reduc_uplus_v2si"
- [(set (match_operand:V2SI 0 "register_operand" "=w")
-       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
+(define_insn "reduc_<sur>plus_<mode>"
+ [(set (match_operand:V2F 0 "register_operand" "=w")
+       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
+		    SUADDV))]
  "TARGET_SIMD"
- ""
+ "faddp\\t%<Vetype>0, %1.<Vtype>"
+  [(set_attr "simd_type" "simd_fadd")
+   (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_expand "reduc_splus_v2si"
- [(set (match_operand:V2SI 0 "register_operand" "=w")
-       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
-		    UNSPEC_ADDV))]
+(define_insn "aarch64_addpv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
+		    UNSPEC_FADDV))]
  "TARGET_SIMD"
- ""
+ "faddp\\t%0.4s, %1.4s, %1.4s"
+  [(set_attr "simd_type" "simd_fadd")
+   (set_attr "simd_mode" "V4SF")]
 )
 
+(define_expand "reduc_<sur>plus_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
+		    SUADDV))]
+ "TARGET_SIMD"
+{
+  rtx tmp = gen_reg_rtx (V4SFmode);
+  emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
+  emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
+  DONE;
+})
+
 ;; 'across lanes' max and min ops.
 
 (define_insn "reduc_<maxmin_uns>_<mode>"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index cdefa86..608db35 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4655,116 +4655,6 @@  vaddlvq_u32 (uint32x4_t a)
   return result;
 }
 
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
-vaddv_s8 (int8x8_t a)
-{
-  int8_t result;
-  __asm__ ("addv %b0,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
-vaddv_s16 (int16x4_t a)
-{
-  int16_t result;
-  __asm__ ("addv %h0,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
-vaddv_u8 (uint8x8_t a)
-{
-  uint8_t result;
-  __asm__ ("addv %b0,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
-vaddv_u16 (uint16x4_t a)
-{
-  uint16_t result;
-  __asm__ ("addv %h0,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8_t __attribute__ ((__always_inline__))
-vaddvq_s8 (int8x16_t a)
-{
-  int8_t result;
-  __asm__ ("addv %b0,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16_t __attribute__ ((__always_inline__))
-vaddvq_s16 (int16x8_t a)
-{
-  int16_t result;
-  __asm__ ("addv %h0,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
-vaddvq_s32 (int32x4_t a)
-{
-  int32_t result;
-  __asm__ ("addv %s0,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
-vaddvq_u8 (uint8x16_t a)
-{
-  uint8_t result;
-  __asm__ ("addv %b0,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
-vaddvq_u16 (uint16x8_t a)
-{
-  uint16_t result;
-  __asm__ ("addv %h0,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-vaddvq_u32 (uint32x4_t a)
-{
-  uint32_t result;
-  __asm__ ("addv %s0,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
 {
@@ -16995,22 +16885,6 @@  vaddlv_u32 (uint32x2_t a)
   return result;
 }
 
-__extension__ static __inline int32_t __attribute__ ((__always_inline__))
-vaddv_s32 (int32x2_t a)
-{
-  int32_t result;
-  __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
-  return result;
-}
-
-__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-vaddv_u32 (uint32x2_t a)
-{
-  uint32_t result;
-  __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
-  return result;
-}
-
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vpaddd_s64 (int64x2_t __a)
 {
@@ -18026,24 +17900,117 @@  vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   return __a + __b;
 }
 
+/* vaddv */
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vaddv_s8 (int8x8_t __a)
+{
+  return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vaddv_s16 (int16x4_t __a)
+{
+  return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddv_s32 (int32x2_t __a)
+{
+  return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vaddv_u8 (uint8x8_t __a)
+{
+  return vget_lane_u8 ((uint8x8_t)
+		__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vaddv_u16 (uint16x4_t __a)
+{
+  return vget_lane_u16 ((uint16x4_t)
+		__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddv_u32 (uint32x2_t __a)
+{
+  return vget_lane_u32 ((uint32x2_t)
+		__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vaddvq_s8 (int8x16_t __a)
+{
+  return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vaddvq_s16 (int16x8_t __a)
+{
+  return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddvq_s32 (int32x4_t __a)
+{
+  return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vaddvq_s64 (int64x2_t __a)
+{
+  return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vaddvq_u8 (uint8x16_t __a)
+{
+  return vgetq_lane_u8 ((uint8x16_t)
+		__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vaddvq_u16 (uint16x8_t __a)
+{
+  return vgetq_lane_u16 ((uint16x8_t)
+		__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddvq_u32 (uint32x4_t __a)
+{
+  return vgetq_lane_u32 ((uint32x4_t)
+		__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vaddvq_u64 (uint64x2_t __a)
+{
+  return vgetq_lane_u64 ((uint64x2_t)
+		__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
+}
+
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vaddv_f32 (float32x2_t __a)
 {
-  float32x2_t t = __builtin_aarch64_addvv2sf (__a);
+  float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
   return vget_lane_f32 (t, 0);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vaddvq_f32 (float32x4_t __a)
 {
-  float32x4_t t = __builtin_aarch64_addvv4sf (__a);
+  float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
   return vgetq_lane_f32 (t, 0);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vaddvq_f64 (float64x2_t __a)
 {
-  float64x2_t t = __builtin_aarch64_addvv2df (__a);
+  float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
   return vgetq_lane_f64 (t, 0);
 }
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3f9a584..5945d23 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -170,7 +170,8 @@ 
     UNSPEC_FMINNMV	; Used in aarch64-simd.md.
     UNSPEC_FMINV	; Used in aarch64-simd.md.
     UNSPEC_FADDV	; Used in aarch64-simd.md.
-    UNSPEC_ADDV		; Used in aarch64-simd.md.
+    UNSPEC_SADDV	; Used in aarch64-simd.md.
+    UNSPEC_UADDV	; Used in aarch64-simd.md.
     UNSPEC_SMAXV	; Used in aarch64-simd.md.
     UNSPEC_SMINV	; Used in aarch64-simd.md.
     UNSPEC_UMAXV	; Used in aarch64-simd.md.
@@ -686,6 +687,8 @@ 
 (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
 			       UNSPEC_FMAXNMV UNSPEC_FMINNMV])
 
+(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
+
 (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
 			      UNSPEC_SRHADD UNSPEC_URHADD
 			      UNSPEC_SHSUB UNSPEC_UHSUB
@@ -777,6 +780,7 @@ 
 		      (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
 		      (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
 		      (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
+		      (UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
 		      (UNSPEC_SSLI  "s") (UNSPEC_USLI  "u")
 		      (UNSPEC_SSRI  "s") (UNSPEC_USRI  "u")
 		      (UNSPEC_USRA  "u") (UNSPEC_SSRA  "s")
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
new file mode 100644
index 0000000..7db1204
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
@@ -0,0 +1,128 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps -ffast-math" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+extern float fabsf (float);
+extern double fabs (double);
+
+#define NUM_TESTS 16
+#define DELTA 0.000001
+
+int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
+		       -4, 34, 110, -110, 6, 4, 75, -34};
+int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
+			 -4, 34, 110, -110, 6, 4, 75, -34};
+int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
+			 -4, 34, 110, -110, 6, 4, 75, -34};
+int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76,
+			 -4, 34, 110, -110, 6, 4, 75, -34};
+
+uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
+			 4, 34, 110, 110, 6, 4, 75, 34};
+uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
+			   4, 34, 110, 110, 6, 4, 75, 34};
+uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
+			   4, 34, 110, 110, 6, 4, 75, 34};
+
+uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76,
+			   4, 34, 110, 110, 6, 4, 75, 34};
+
+float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
+			 200.0f, -800.0f, -13.0f, -0.5f,
+			 7.9f, -870.0f, 10.4f, 310.11f,
+			 0.0f, -865.0f, -2213.0f, -1.5f};
+
+double input_float64[] = {0.1, -0.1, 0.4, 10.3,
+			  200.0, -800.0, -13.0, -0.5,
+			  7.9, -870.0, 10.4, 310.11,
+			  0.0, -865.0, -2213.0, -1.5};
+
+#define EQUALF(a, b) (fabsf (a - b) < DELTA)
+#define EQUALD(a, b) (fabs (a - b) < DELTA)
+#define EQUALL(a, b) (a == b)
+
+#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT)				\
+int									\
+test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void)			\
+{									\
+  int i, j;								\
+  int moves = (NUM_TESTS - LANES) + 1;					\
+  TYPE##_t out_l[NUM_TESTS];						\
+  TYPE##_t out_v[NUM_TESTS];						\
+									\
+  /* Calculate linearly.  */						\
+  for (i = 0; i < moves; i++)						\
+    {									\
+      out_l[i] = input_##TYPE[i];					\
+      for (j = 1; j < LANES; j++)					\
+	out_l[i] += input_##TYPE[i + j];				\
+    }									\
+									\
+  /* Calculate using vector reduction intrinsics.  */			\
+  for (i = 0; i < moves; i++)						\
+    {									\
+      TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i);	\
+      out_v[i] = vaddv##Q##_##SUFFIX (t1);				\
+    }									\
+									\
+  /* Compare.  */							\
+  for (i = 0; i < moves; i++)						\
+    {									\
+      if (!EQUAL##FLOAT (out_v[i], out_l[i]))				\
+	return 0;							\
+    }									\
+  return 1;								\
+}
+
+#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F)	\
+TEST (STYPE,  , TYPE, W32, F)				\
+TEST (STYPE, q, TYPE, W64, F)				\
+
+BUILD_VARIANTS (int8, s8, 8, 16, L)
+BUILD_VARIANTS (uint8, u8, 8, 16, L)
+/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
+/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
+BUILD_VARIANTS (int16, s16, 4, 8, L)
+BUILD_VARIANTS (uint16, u16, 4, 8, L)
+/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
+/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
+BUILD_VARIANTS (int32, s32, 2, 4, L)
+BUILD_VARIANTS (uint32, u32, 2, 4, L)
+/* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
+/* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
+TEST (s64, q, int64, 2, D)
+TEST (u64, q, uint64, 2, D)
+/* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
+
+BUILD_VARIANTS (float32, f32, 2, 4, F)
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
+/* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
+TEST (f64, q, float64, 2, D)
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
+
+#undef TEST
+#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT)		\
+{							\
+  if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ())	\
+    abort ();						\
+}
+
+int
+main (int argc, char **argv)
+{
+BUILD_VARIANTS (int8, s8, 8, 16, L)
+BUILD_VARIANTS (uint8, u8, 8, 16, L)
+BUILD_VARIANTS (int16, s16, 4, 8, L)
+BUILD_VARIANTS (uint16, u16, 4, 8, L)
+BUILD_VARIANTS (int32, s32, 2, 4, L)
+BUILD_VARIANTS (uint32, u32, 2, 4, L)
+
+BUILD_VARIANTS (float32, f32, 2, 4, F)
+TEST (f64, q, float64, 2, D)
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */