diff mbox series

[arm] Implement non-GE-setting SIMD32 intrinsics

Message ID 1fbe0f58-d42e-4ae0-51f6-8705c15a75be@foss.arm.com
State New
Headers show
Series [arm] Implement non-GE-setting SIMD32 intrinsics | expand

Commit Message

Kyrill Tkachov Sept. 25, 2019, 10:15 a.m. UTC
Hi all,

This patch is part of a series to implement the SIMD32 ACLE intrinsics [1].
The interesting parts implementation-wise involve adding support for 
setting and reading
the Q bit for saturation and the GE-bits for the packed SIMD instructions.
That will come in a later patch.

For now, this patch implements the other intrinsics that don't need 
anything special ;
just a mapping from arm_acle.h function to builtin to RTL expander+unspec.

I've compressed as many as I could with iterators so that we end up 
needing only 3
new define_insns.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Will commit to trunk within the next day or two.

Thanks,

Kyrill

[1] https://developer.arm.com/docs/101028/latest/data-processing-intrinsics

2019-09-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.md (arm_<simd32_op>): New define_insn.
     (arm_<sup>xtb16): Likewise.
     (arm_usada8): Likewise.
     * config/arm/arm_acle.h (__qadd8, __qsub8, __shadd8, __shsub8,
     __uhadd8, __uhsub8, __uqadd8, __uqsub8, __qadd16, __qasx, __qsax,
     __qsub16, __shadd16, __shasx, __shsax, __shsub16, __uhadd16, __uhasx,
     __uhsax, __uhsub16, __uqadd16, __uqasx, __uqsax, __uqsub16, __sxtab16,
     __sxtb16, __uxtab16, __uxtb16): Define.
     * config/arm/arm_acle_builtins.def: Define builtins for the above.
     * config/arm/unspecs.md: Define unspecs for the above.
     * config/arm/iterators.md (SIMD32_NOGE_BINOP): New int_iterator.
     (USXTB16): Likewise.
     (simd32_op): New int_attribute.
     (sup): Handle UNSPEC_SXTB16, UNSPEC_UXTB16.
     * doc/sourcebuild.exp (arm_simd32_ok): Document.

2019-09-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * lib/target-supports.exp
     (check_effective_target_arm_simd32_ok_nocache): New procedure.
     (check_effective_target_arm_simd32_ok): Likewise.
     (add_options_for_arm_simd32): Likewise.
     * gcc.target/arm/acle/simd32.c: New test.
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 7f966b952bb2f394bdad2c742f82d143404458a8..d091f6744b5054428fdd11c6c10a4628c7a52d9e 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5058,6 +5058,36 @@ 
    (set_attr "predicable" "yes")]
 )
 
+(define_insn "arm_<sup>xtb16"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")] USXTB16))]
+  "TARGET_INT_SIMD"
+  "<sup>xtb16%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_dsp_reg")])
+
+(define_insn "arm_<simd32_op>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "s_register_operand" "r")] SIMD32_NOGE_BINOP))]
+  "TARGET_INT_SIMD"
+  "<simd32_op>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_dsp_reg")])
+
+(define_insn "arm_usada8"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "s_register_operand" "r")] UNSPEC_USADA8))]
+  "TARGET_INT_SIMD"
+  "usada8%?\\t%0, %1, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_dsp_reg")])
+
 (define_expand "extendsfdf2"
   [(set (match_operand:DF                  0 "s_register_operand")
 	(float_extend:DF (match_operand:SF 1 "s_register_operand")))]
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index 6857ab1787df0ffa672e5078e5a0b9c9cc52e695..9c6f12d556654b094a23a327c030820172a03a4c 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -173,6 +173,238 @@  __arm_mrrc2 (const unsigned int __coproc, const unsigned int __opc1,
 #endif /*  __ARM_ARCH >= 5.  */
 #endif /* (!__thumb__ || __thumb2__) &&  __ARM_ARCH >= 4.  */
 
+#ifdef __ARM_FEATURE_SIMD32
+typedef int32_t int16x2_t;
+typedef uint32_t uint16x2_t;
+typedef int32_t int8x4_t;
+typedef uint32_t uint8x4_t;
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sxtab16 (int16x2_t __a, int8x4_t __b)
+{
+  return __builtin_arm_sxtab16 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sxtb16 (int8x4_t __a)
+{
+  return __builtin_arm_sxtb16 (__a);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uxtab16 (uint16x2_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_uxtab16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uxtb16 (uint8x4_t __a)
+{
+  return __builtin_arm_uxtb16 (__a);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__qadd8 (int8x4_t __a, int8x4_t __b)
+{
+  return __builtin_arm_qadd8 (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__qsub8 (int8x4_t __a, int8x4_t __b)
+{
+  return __builtin_arm_qsub8 (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__shadd8 (int8x4_t __a, int8x4_t __b)
+{
+  return __builtin_arm_shadd8 (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__shsub8 (int8x4_t __a, int8x4_t __b)
+{
+  return __builtin_arm_shsub8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uhadd8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_uhadd8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uhsub8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_uhsub8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uqadd8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_uqadd8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uqsub8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_uqsub8 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__qadd16 (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_qadd16 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__qasx (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_qasx (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__qsax (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_qsax (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__qsub16 (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_qsub16 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__shadd16 (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_shadd16 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__shasx (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_shasx (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__shsax (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_shsax (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__shsub16 (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_shsub16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uhadd16 (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uhadd16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uhasx (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uhasx (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uhsax (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uhsax (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uhsub16 (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uhsub16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uqadd16 (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uqadd16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uqasx (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uqasx (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uqsax (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uqsax (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uqsub16 (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uqsub16 (__a, __b);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smusd (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_smusd (__a, __b);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smusdx (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_smusdx (__a, __b);
+}
+
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usad8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_usad8 (__a, __b);
+}
+
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usada8 (uint8x4_t __a, uint8x4_t __b, uint32_t __c)
+{
+  return __builtin_arm_usada8 (__a, __b, __c);
+}
+
+#endif
+
 #pragma GCC push_options
 #ifdef __ARM_FEATURE_CRC32
 #ifdef __ARM_FP
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index b2438d66da23d52dfcd185f700246bd445612955..c675fc46dae6552b8762e9bbb6147d8a6d15133a 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -42,3 +42,36 @@  VAR1 (MCRR, mcrr, void)
 VAR1 (MCRR, mcrr2, void)
 VAR1 (MRRC, mrrc, di)
 VAR1 (MRRC, mrrc2, di)
+
+VAR1 (BINOP, sxtab16, si)
+VAR1 (UBINOP, uxtab16, si)
+VAR1 (UNOP, sxtb16, si)
+VAR1 (BSWAP, uxtb16, si)
+VAR1 (BINOP, qadd8, si)
+VAR1 (BINOP, qsub8, si)
+VAR1 (BINOP, shadd8, si)
+VAR1 (BINOP, shsub8, si)
+VAR1 (UBINOP, uhadd8, si)
+VAR1 (UBINOP, uhsub8, si)
+VAR1 (UBINOP, uqadd8, si)
+VAR1 (UBINOP, uqsub8, si)
+VAR1 (BINOP, qadd16, si)
+VAR1 (BINOP, qasx, si)
+VAR1 (BINOP, qsax, si)
+VAR1 (BINOP, qsub16, si)
+VAR1 (BINOP, shadd16, si)
+VAR1 (BINOP, shasx, si)
+VAR1 (BINOP, shsax, si)
+VAR1 (BINOP, shsub16, si)
+VAR1 (UBINOP, uhadd16, si)
+VAR1 (UBINOP, uhasx, si)
+VAR1 (UBINOP, uhsax, si)
+VAR1 (UBINOP, uhsub16, si)
+VAR1 (UBINOP, uqadd16, si)
+VAR1 (UBINOP, uqasx, si)
+VAR1 (UBINOP, uqsax, si)
+VAR1 (UBINOP, uqsub16, si)
+VAR1 (BINOP, smusd, si)
+VAR1 (BINOP, smusdx, si)
+VAR1 (UBINOP, usad8, si)
+VAR1 (UBINOP, usada8, si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 2d8ef3f5fbd5ca1a34c5dc66ddcd8780b138d5e0..538f5bf6b0116f49b27eef589b0140aa7792e976 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -430,6 +430,19 @@ 
 (define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M
                                        UNSPEC_SHA1P])
 
+(define_int_iterator USXTB16 [UNSPEC_SXTB16 UNSPEC_UXTB16])
+(define_int_iterator SIMD32_NOGE_BINOP
+				[UNSPEC_QADD8 UNSPEC_QSUB8 UNSPEC_SHADD8
+				 UNSPEC_SHSUB8 UNSPEC_UHADD8 UNSPEC_UHSUB8
+				 UNSPEC_UQADD8 UNSPEC_UQSUB8
+				 UNSPEC_QADD16 UNSPEC_QASX UNSPEC_QSAX
+				 UNSPEC_QSUB16 UNSPEC_SHADD16 UNSPEC_SHASX
+				 UNSPEC_SHSAX UNSPEC_SHSUB16 UNSPEC_UHADD16
+				 UNSPEC_UHASX UNSPEC_UHSAX UNSPEC_UHSUB16
+				 UNSPEC_UQADD16 UNSPEC_UQASX UNSPEC_UQSAX
+				 UNSPEC_UQSUB16 UNSPEC_SMUSD UNSPEC_SMUSDX
+				 UNSPEC_SXTAB16 UNSPEC_UXTAB16 UNSPEC_USAD8])
+
 (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH])
 
 (define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE])
@@ -835,6 +848,7 @@ 
 ;; Mapping between vector UNSPEC operations and the signed ('s'),
 ;; unsigned ('u'), poly ('p') or float ('f') nature of their data type.
 (define_int_attr sup [
+  (UNSPEC_SXTB16 "s") (UNSPEC_UXTB16 "u")
   (UNSPEC_VADDL_S "s") (UNSPEC_VADDL_U "u")
   (UNSPEC_VADDW_S "s") (UNSPEC_VADDW_U "u")
   (UNSPEC_VRHADD_S "s") (UNSPEC_VRHADD_U "u")
@@ -1023,6 +1037,22 @@ 
 		      (UNSPEC_VCMLA180 "180")
 		      (UNSPEC_VCMLA270 "270")])
 
+(define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
+			    (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
+			    (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
+			    (UNSPEC_UQADD8 "uqadd8") (UNSPEC_UQSUB8 "uqsub8")
+			    (UNSPEC_QADD16 "qadd16") (UNSPEC_QASX "qasx")
+			    (UNSPEC_QSAX "qsax") (UNSPEC_QSUB16 "qsub16")
+			    (UNSPEC_SHADD16 "shadd16") (UNSPEC_SHASX "shasx")
+			    (UNSPEC_SHSAX "shsax") (UNSPEC_SHSUB16 "shsub16")
+			    (UNSPEC_UHADD16 "uhadd16") (UNSPEC_UHASX "uhasx")
+			    (UNSPEC_UHSAX "uhsax") (UNSPEC_UHSUB16 "uhsub16")
+			    (UNSPEC_UQADD16 "uqadd16") (UNSPEC_UQASX "uqasx")
+			    (UNSPEC_UQSAX "uqsax") (UNSPEC_UQSUB16 "uqsub16")
+			    (UNSPEC_SMUSD "smusd") (UNSPEC_SMUSDX "smusdx")
+			    (UNSPEC_SXTAB16 "sxtab16") (UNSPEC_UXTAB16 "uxtab16")
+			    (UNSPEC_USAD8 "usad8")])
+
 ;; Both kinds of return insn.
 (define_code_iterator RETURNS [return simple_return])
 (define_code_attr return_str [(return "") (simple_return "simple_")])
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a9f99d04a8a5b3591f9f06910c292aac893ebb85..08a6cd77ce08d8c9cf42abcf3c9277b769043cfd 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -90,8 +90,42 @@ 
   UNSPEC_SP_TEST	; Represent the testing of stack protector's canary
 			; against the guard.
   UNSPEC_PIC_RESTORE	; Use to restore fdpic register
+
+  UNSPEC_SXTAB16	; Represent the SXTAB16 operation.
+  UNSPEC_UXTAB16	; Represent the UXTAB16 operation.
+  UNSPEC_SXTB16		; Represent the SXTB16 operation.
+  UNSPEC_UXTB16		; Represent the UXTB16 operation.
+  UNSPEC_QADD8		; Represent the QADD8 operation.
+  UNSPEC_QSUB8		; Represent the QSUB8 operation.
+  UNSPEC_SHADD8		; Represent the SHADD8 operation.
+  UNSPEC_SHSUB8		; Represent the SHSUB8 operation.
+  UNSPEC_UHADD8		; Represent the UHADD8 operation.
+  UNSPEC_UHSUB8		; Represent the UHSUB8 operation.
+  UNSPEC_UQADD8		; Represent the UQADD8 operation.
+  UNSPEC_UQSUB8		; Represent the UQSUB8 operation.
+  UNSPEC_QADD16		; Represent the QADD16 operation.
+  UNSPEC_QASX		; Represent the QASX operation.
+  UNSPEC_QSAX		; Represent the QSAX operation.
+  UNSPEC_QSUB16		; Represent the QSUB16 operation.
+  UNSPEC_SHADD16	; Represent the SHADD16 operation.
+  UNSPEC_SHASX		; Represent the SHASX operation.
+  UNSPEC_SHSAX		; Represent the SSAX operation.
+  UNSPEC_SHSUB16	; Represent the SHSUB16 operation.
+  UNSPEC_UHADD16	; Represent the UHADD16 operation.
+  UNSPEC_UHASX		; Represent the UHASX operation.
+  UNSPEC_UHSAX		; Represent the USAX operation.
+  UNSPEC_UHSUB16	; Represent the UHSUB16 operation.
+  UNSPEC_UQADD16	; Represent the UQADD16 operation.
+  UNSPEC_UQASX		; Represent the UQASX operation.
+  UNSPEC_UQSAX		; Represent the UQSAX operation.
+  UNSPEC_UQSUB16	; Represent the UQSUB16 operation.
+  UNSPEC_SMUSD		; Represent the SMUSD operation.
+  UNSPEC_SMUSDX		; Represent the SMUSDX operation.
+  UNSPEC_USAD8		; Represent the USAD8 operation.
+  UNSPEC_USADA8		; Represent the USADA8 operation.
 ])
 
+
 (define_c_enum "unspec" [
   UNSPEC_WADDC		; Used by the intrinsic form of the iWMMXt WADDC instruction.
   UNSPEC_WABS		; Used by the intrinsic form of the iWMMXt WABS instruction.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 4ace224a8ff5ed4fafed10a69ef00ffb2d7d8c39..9b98f0132635f470bf25a420b68dbdfeafd73c7a 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1900,6 +1900,13 @@  in @ref{arm_coproc2_ok} in addition the following: @code{MCRR} and @code{MRRC}.
 @item arm_coproc4_ok
 ARM target supports all the coprocessor instructions also listed as supported
 in @ref{arm_coproc3_ok} in addition the following: @code{MCRR2} and @code{MRRC2}.
+
+@item arm_simd32_ok
+@anchor{arm_simd32_ok}
+ARM Target supports options suitable for accessing the SIMD32 intrinsics from
+@code{arm_acle.h}.
+Some multilibs may be incompatible with these options.
+
 @end table
 
 @subsubsection AArch64-specific attributes
diff --git a/gcc/testsuite/gcc.target/arm/acle/simd32.c b/gcc/testsuite/gcc.target/arm/acle/simd32.c
new file mode 100644
index 0000000000000000000000000000000000000000..f5c116d13968eefa42bea86b1e44bba8c66d7b77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/simd32.c
@@ -0,0 +1,246 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_simd32_ok } */
+/* { dg-add-options arm_simd32 } */
+
+#include <arm_acle.h>
+
+int16x2_t
+test_sxtab16 (int16x2_t a, int8x4_t b)
+{
+  return __sxtab16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "sxtab16\t...?, ...?, ...?" 1 } } */
+
+
+int16x2_t
+test_sxtb16 (int8x4_t a)
+{
+  return __sxtb16 (a);
+}
+
+/* { dg-final { scan-assembler-times "sxtab16\t...?, ...?" 1 } } */
+
+int8x4_t
+test_qadd8 (int8x4_t a, int8x4_t b)
+{
+  return __qadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tqadd8\t...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_qsub8 (int8x4_t a, int8x4_t b)
+{
+  return __qsub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tqsub8\t...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_shadd8 (int8x4_t a, int8x4_t b)
+{
+  return __shadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tshadd8\t...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_shsub8 (int8x4_t a, int8x4_t b)
+{
+  return __shsub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tshsub8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_uhadd8 (uint8x4_t a, uint8x4_t b)
+{
+  return __uhadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuhadd8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_uhsub8 (uint8x4_t a, uint8x4_t b)
+{
+  return __uhsub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuhsub8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_uqadd8 (uint8x4_t a, uint8x4_t b)
+{
+  return __uqadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuqadd8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_uqsub8 (uint8x4_t a, uint8x4_t b)
+{
+  return __uqsub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuqsub8\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_qadd16 (int16x2_t a, int16x2_t b)
+{
+  return __qadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tqadd16\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_qasx (int16x2_t a, int16x2_t b)
+{
+  return __qasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tqasx\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_qsax (int16x2_t a, int16x2_t b)
+{
+  return __qsax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tqsax\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_qsub16 (int16x2_t a, int16x2_t b)
+{
+  return __qsub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tqsub16\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_shadd16 (int16x2_t a, int16x2_t b)
+{
+  return __shadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tshadd16\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_shasx (int16x2_t a, int16x2_t b)
+{
+  return __shasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tshasx\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_shsax (int16x2_t a, int16x2_t b)
+{
+  return __shsax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tshsax\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_shsub16 (int16x2_t a, int16x2_t b)
+{
+  return __shsub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tshsub16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uhadd16 (uint16x2_t a, uint16x2_t b)
+{
+  return __uhadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuhadd16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uhasx (uint16x2_t a, uint16x2_t b)
+{
+  return __uhasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuhasx\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uhsax (uint16x2_t a, uint16x2_t b)
+{
+  return __uhsax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuhsax\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uhsub16 (uint16x2_t a, uint16x2_t b)
+{
+  return __uhsub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuhsub16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uqadd16 (uint16x2_t a, uint16x2_t b)
+{
+  return __uqadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuqadd16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uqasx (uint16x2_t a, uint16x2_t b)
+{
+  return __uqasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuqasx\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uqsax (uint16x2_t a, uint16x2_t b)
+{
+  return __uqsax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuqsax\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uqsub16 (uint16x2_t a, uint16x2_t b)
+{
+  return __uqsub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuqsub16\t...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smusd (int16x2_t a, int16x2_t b)
+{
+  return __smusd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsmusd\t...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smusdx (int16x2_t a, int16x2_t b)
+{
+  return __smusdx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsmusdx\t...?, ...?, ...?" 1 } } */
+
+uint32_t
+test_usad8 (uint8x4_t a, uint8x4_t b)
+{
+  return __usad8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusad8\t...?, ...?, ...?" 1 } } */
+
+uint32_t
+test_usada8 (uint8x4_t a, uint8x4_t b, uint32_t c)
+{
+  return __usada8 (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "\tusada8\t...?, ...?, ...?, ...?" 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6a1aaca9691b7fe9ae5e0e5b1874c7af34a3a6e3..0268acd91d8f65e7a24653592dbe1d374d24359c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3806,6 +3806,45 @@  proc check_effective_target_arm_neon_ok { } {
 		check_effective_target_arm_neon_ok_nocache]
 }
 
+
+# Return 1 if this is an ARM target supporting the SIMD32 intrinsics
+# from arm_acle.h.  Some multilibs may be incompatible with these options.
+# Also set et_arm_simd32_flags to the best options to add.
+# arm_acle.h includes stdint.h which can cause trouble with incompatible
+# -mfloat-abi= options.
+
+proc check_effective_target_arm_simd32_ok_nocache { } {
+    global et_arm_simd32_flags
+    set et_arm_simd32_flags ""
+    foreach flags {"" "-march=armv6" "-march=armv6 -mfloat-abi=softfp" "-march=armv6 -mfloat-abi=hard"} {
+      if { [check_no_compiler_messages_nocache arm_simd32_ok object {
+	#include <arm_acle.h>
+	int dummy;
+	#ifndef __ARM_FEATURE_SIMD32
+	#error not SIMD32
+	#endif
+      } "$flags"] } {
+	set et_arm_simd32_flags $flags
+	return 1
+      }
+    }
+
+  return 0
+}
+
+proc check_effective_target_arm_simd32_ok { } {
+    return [check_cached_effective_target arm_simd32_ok \
+		check_effective_target_arm_simd32_ok_nocache]
+}
+
+proc add_options_for_arm_simd32 { flags } {
+    if { ! [check_effective_target_arm_simd32_ok] } {
+	return "$flags"
+    }
+    global et_arm_simd32_flags
+    return "$flags $et_arm_simd32_flags"
+}
+
 # Return 1 if this is an ARM target supporting -mfpu=neon without any
 # -mfloat-abi= option.  Useful in tests where add_options is not
 # supported (such as lto tests).