[arm,4/X] Add initial support for GE-setting SIMD32 intrinsics
diff mbox series

Message ID 1de505a8-c93a-4beb-6574-62945c9b8ae9@foss.arm.com
State New
Headers show
Series
  • [arm,4/X] Add initial support for GE-setting SIMD32 intrinsics
Related show

Commit Message

Kyrill Tkachov Nov. 7, 2019, 10:27 a.m. UTC
Hi all,

This patch adds in plumbing for the ACLE intrinsics that set the GE bits in
APSR.  These are special SIMD instructions in Armv6 that pack bytes or
halfwords into the 32-bit general-purpose registers and set the GE bits in
APSR to indicate if some of the "lanes" of the result have overflowed or 
have
some other instruction-specific property.
These bits can then be used by the SEL instruction (accessed through the 
__sel
intrinsic) to select lanes for further processing.

This situation is similar to the Q-setting intrinsics: we have to track 
the GE
fake register, detect when a function reads it through __sel and restrict
existing patterns that may generate GE-clobbering instruction from
straight-line C code when reading the GE bits matters.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Committed to trunk.
Thanks,
Kyrill


2019-11-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/aout.h (REGISTER_NAMES): Add apsrge.
     * config/arm/arm.md (APSRGE_REGNUM): Define.
     (arm_<simd32_op>): New define_insn.
     (arm_sel): Likewise.
     * config/arm/arm.h (FIXED_REGISTERS): Add entry for apsrge.
     (CALL_USED_REGISTERS): Likewise.
     (REG_ALLOC_ORDER): Likewise.
     (FIRST_PSEUDO_REGISTER): Update value.
     (ARM_GE_BITS_READ): Define.
     * config/arm/arm.c (arm_conditional_register_usage): Clear
     APSRGE_REGNUM from operand_reg_set.
     (arm_ge_bits_access): Define.
     * config/arm/arm-builtins.c (arm_check_builtin_call): Handle
     ARM_BUIILTIN_sel.
     * config/arm/arm-protos.h (arm_ge_bits_access): Declare prototype.
     * config/arm/arm-fixed.md (add<mode>3): Convert to define_expand.
     FAIL if ARM_GE_BITS_READ.
     (*arm_add<mode>3): New define_insn.
     (sub<mode>3): Convert to define_expand.  FAIL if ARM_GE_BITS_READ.
     (*arm_sub<mode>3): New define_insn.
     * config/arm/arm_acle.h (__sel, __sadd8, __ssub8, __uadd8, __usub8,
     __sadd16, __sasx, __ssax, __ssub16, __uadd16, __uasx, __usax,
     __usub16): Define.
     * config/arm/arm_acle_builtins.def: Define builtins for the above.
     * config/arm/iterators.md (SIMD32_GE): New int_iterator.
     (simd32_op): Handle the above.
     * config/arm/unspecs.md (UNSPEC_GE_SET): Define.
     (UNSPEC_SEL, UNSPEC_SADD8, UNSPEC_SSUB8, UNSPEC_UADD8, UNSPEC_USUB8,
     UNSPEC_SADD16, UNSPEC_SASX, UNSPEC_SSAX, UNSPEC_SSUB16, UNSPEC_UADD16,
     UNSPEC_UASX, UNSPEC_USAX, UNSPEC_USUB16): Define.

2019-11-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/arm/acle/simd32.c: Update test.
     * gcc.target/arm/acle/simd32_sel.c: New test.

Patch
diff mbox series

diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index a5f83cb503f61cc1cab0e61795edde33250610e7..72782758853a869bcb9a9d69f3fa0da979cd711f 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -72,7 +72,7 @@ 
   "wr8",   "wr9",   "wr10",  "wr11",				\
   "wr12",  "wr13",  "wr14",  "wr15",				\
   "wcgr0", "wcgr1", "wcgr2", "wcgr3",				\
-  "cc", "vfpcc", "sfp", "afp", "apsrq"				\
+  "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge"		\
 }
 #endif
 
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 995f50785f6ebff7b3cd47185516f7bcb4fd5b81..2d902d0b325bc1fe5e22831ef8a59a2bb37c1225 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -3370,6 +3370,13 @@  arm_check_builtin_call (location_t , vec<location_t> , tree fndecl,
 	  = tree_cons (get_identifier ("acle qbit"), NULL_TREE,
 		       DECL_ATTRIBUTES (cfun->decl));
     }
+  if (fcode == ARM_BUILTIN_sel)
+    {
+      if (cfun && cfun->decl)
+	DECL_ATTRIBUTES (cfun->decl)
+	  = tree_cons (get_identifier ("acle gebits"), NULL_TREE,
+		       DECL_ATTRIBUTES (cfun->decl));
+    }
   return true;
 }
 
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index 85dbc5d05c35921bc5115df68d30292a712729cf..6d949ba7064c0587d4c5d7b855f2c04c6d0e08e7 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -28,11 +28,22 @@ 
    (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "alu_sreg")])
 
-(define_insn "add<mode>3"
+(define_expand "add<mode>3"
+  [(set (match_operand:ADDSUB 0 "s_register_operand")
+	(plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand")
+		     (match_operand:ADDSUB 2 "s_register_operand")))]
+  "TARGET_INT_SIMD"
+  {
+    if (ARM_GE_BITS_READ)
+      FAIL;
+  }
+)
+
+(define_insn "*arm_add<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
 	(plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
 		     (match_operand:ADDSUB 2 "s_register_operand" "r")))]
-  "TARGET_INT_SIMD"
+  "TARGET_INT_SIMD && !ARM_GE_BITS_READ"
   "sadd<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "alu_dsp_reg")])
@@ -76,11 +87,22 @@ 
    (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "alu_sreg")])
 
-(define_insn "sub<mode>3"
+(define_expand "sub<mode>3"
+  [(set (match_operand:ADDSUB 0 "s_register_operand")
+	(minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand")
+		     (match_operand:ADDSUB 2 "s_register_operand")))]
+  "TARGET_INT_SIMD"
+  {
+    if (ARM_GE_BITS_READ)
+      FAIL;
+  }
+)
+
+(define_insn "*arm_sub<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
 	(minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
 		      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
-  "TARGET_INT_SIMD"
+  "TARGET_INT_SIMD && !ARM_GE_BITS_READ"
   "ssub<qaddsub_suf>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "alu_dsp_reg")])
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 963dc3e92f0119f424014a023edb51fbf32fc63f..a3f246bc1770a3942a6c9d2551063cb008f37afe 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -61,6 +61,7 @@  extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
 extern void arm_emit_speculation_barrier_function (void);
 extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
 extern bool arm_q_bit_access (void);
+extern bool arm_ge_bits_access (void);
 
 #ifdef RTX_CODE
 extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 1bbd006fa22a3ccc2b5f732aa11c3f1c7cf7958d..bf7123f3abbeca846f875962128f864d7c046a9a 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -722,6 +722,8 @@  extern int arm_arch_cmse;
 			fp exactly at all times.
 	apsrq		Nor this, it is used to track operations on the Q bit
 			of APSR by ACLE saturating intrinsics.
+	apsrge		Nor this, it is used to track operations on the GE bits
+			of APSR by ACLE SIMD32 intrinsics
 
    *: See TARGET_CONDITIONAL_REGISTER_USAGE  */
 
@@ -769,7 +771,7 @@  extern int arm_arch_cmse;
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,			\
   /* Specials.  */		\
-  1,1,1,1,1			\
+  1,1,1,1,1,1			\
 }
 
 /* 1 for registers not available across function calls.
@@ -799,7 +801,7 @@  extern int arm_arch_cmse;
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,			\
   /* Specials.  */		\
-  1,1,1,1,1			\
+  1,1,1,1,1,1			\
 }
 
 #ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
@@ -974,10 +976,11 @@  extern int arm_arch_cmse;
   ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \
    && (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1))
 
-/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP + 1 APSRQ.  */
+/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP
+   + 1 APSRQ + 1 APSRGE.  */
 /* Intel Wireless MMX Technology registers add 16 + 4 more.  */
 /* VFP (VFP3) adds 32 (64) + 1 VFPCC.  */
-#define FIRST_PSEUDO_REGISTER   105
+#define FIRST_PSEUDO_REGISTER   106
 
 #define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO)
 
@@ -1061,7 +1064,7 @@  extern int arm_regs_in_sequence[];
   /* Registers not for general use.  */		\
   CC_REGNUM, VFPCC_REGNUM,			\
   FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM,	\
-  SP_REGNUM, PC_REGNUM, APSRQ_REGNUM		\
+  SP_REGNUM, PC_REGNUM, APSRQ_REGNUM, APSRGE_REGNUM	\
 }
 
 /* Use different register alloc ordering for Thumb.  */
@@ -1402,6 +1405,7 @@  machine_function;
 #endif
 
 #define ARM_Q_BIT_READ (arm_q_bit_access ())
+#define ARM_GE_BITS_READ (arm_ge_bits_access ())
 
 /* As in the machine_function, a global set of call-via labels, for code 
    that is in text_section.  */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1ce6931c6e993160ca859e7736963da33fda56b5..0c39e96be39d02cf395a0e51c4c8d7b247af7689 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28777,8 +28777,9 @@  arm_conditional_register_usage (void)
 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
     }
 
-  /* The Q bit is only accessed via special ACLE patterns.  */
+  /* The Q and GE bits are only accessed via special ACLE patterns.  */
   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
+  CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
 
   SUBTARGET_CONDITIONAL_REGISTER_USAGE
 }
@@ -32025,6 +32026,16 @@  arm_q_bit_access (void)
   return true;
 }
 
+/* Have we recorded an explicit access to the GE bits of PSTATE?.  */
+bool
+arm_ge_bits_access (void)
+{
+  if (cfun && cfun->decl)
+    return lookup_attribute ("acle gebits",
+			     DECL_ATTRIBUTES (cfun->decl));
+  return true;
+}
+
 #if CHECKING_P
 namespace selftest {
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 05c8ca2772d4475a25b037e3e745c9558e1c5742..4501a8518775790250dcb7dc3dff4f34b1635076 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -40,6 +40,7 @@ 
    (CC_REGNUM       100)	; Condition code pseudo register
    (VFPCC_REGNUM    101)	; VFP Condition code pseudo register
    (APSRQ_REGNUM    104)	; Q bit pseudo register
+   (APSRGE_REGNUM   105)	; GE bits pseudo register
   ]
 )
 ;; 3rd operand to select_dominance_cc_mode
@@ -5834,8 +5835,8 @@ 
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec:SI
 	  [(match_operand:SI 1 "s_register_operand" "r")
-	   (match_operand:SI 2 "s_register_operand" "r")
-	   (match_operand:SI 3 "s_register_operand" "r")] UNSPEC_USADA8))]
+	  (match_operand:SI 2 "s_register_operand" "r")
+	  (match_operand:SI 3 "s_register_operand" "r")] UNSPEC_USADA8))]
   "TARGET_INT_SIMD"
   "usada8%?\\t%0, %1, %2, %3"
   [(set_attr "predicable" "yes")
@@ -5852,6 +5853,29 @@ 
   [(set_attr "predicable" "yes")
    (set_attr "type" "smlald")])
 
+(define_insn "arm_<simd32_op>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "s_register_operand" "r")] SIMD32_GE))
+   (set (reg:CC APSRGE_REGNUM)
+	(unspec:CC [(reg:CC APSRGE_REGNUM)] UNSPEC_GE_SET))]
+  "TARGET_INT_SIMD"
+  "<simd32_op>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_sreg")])
+
+(define_insn "arm_sel"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "s_register_operand" "r")
+	   (reg:CC APSRGE_REGNUM)] UNSPEC_SEL))]
+  "TARGET_INT_SIMD"
+  "sel%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_sreg")])
+
 (define_expand "extendsfdf2"
   [(set (match_operand:DF                  0 "s_register_operand")
 	(float_extend:DF (match_operand:SF 1 "s_register_operand")))]
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index fb144cf789c87db92d688c2444d1204e0bb14ffe..b8d02a5502f273fcba492bbeba2542b13334a8ea 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -431,6 +431,97 @@  __smlsldx (int16x2_t __a, int16x2_t __b, int64_t __c)
   return __builtin_arm_smlsldx (__a, __b, __c);
 }
 
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sel (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_sel (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sadd8 (int8x4_t __a, int8x4_t __b)
+{
+  return __builtin_arm_sadd8 (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ssub8 (int8x4_t __a, int8x4_t __b)
+{
+  return __builtin_arm_ssub8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uadd8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_uadd8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usub8 (uint8x4_t __a, uint8x4_t __b)
+{
+  return __builtin_arm_usub8 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sadd16 (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_sadd16 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sasx (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_sasx (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ssax (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_ssax (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ssub16 (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_ssub16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uadd16 (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uadd16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uasx (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_uasx (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usax (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_usax (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usub16 (uint16x2_t __a, uint16x2_t __b)
+{
+  return __builtin_arm_usub16 (__a, __b);
+}
+
 #endif
 
 #ifdef __ARM_FEATURE_SAT
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index 85dd87e9d8e1507b579d511245537aa75c3ce3d8..715c3c94e8c8f6355e880a36eb275be80d1a3912 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -91,3 +91,19 @@  VAR1 (TERNOP, smlatb, si)
 VAR1 (TERNOP, smlatt, si)
 VAR1 (TERNOP, smlawb, si)
 VAR1 (TERNOP, smlawt, si)
+
+VAR1 (BINOP, sadd8, si)
+VAR1 (BINOP, ssub8, si)
+VAR1 (BINOP, sadd16, si)
+VAR1 (BINOP, sasx, si)
+VAR1 (BINOP, ssax, si)
+VAR1 (BINOP, ssub16, si)
+
+VAR1 (UBINOP, uadd8, si)
+VAR1 (UBINOP, usub8, si)
+VAR1 (UBINOP, uadd16, si)
+VAR1 (UBINOP, uasx, si)
+VAR1 (UBINOP, usax, si)
+VAR1 (UBINOP, usub16, si)
+
+VAR1 (UBINOP, sel, si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 006ffd7fc6c8c69e169a4f2e6321d312b70225d6..2394a959d19b6a7f2cff8fb7609da6231dee14d6 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -448,6 +448,11 @@ 
 
 (define_int_iterator SMLAWBT [UNSPEC_SMLAWB UNSPEC_SMLAWT])
 
+(define_int_iterator SIMD32_GE [UNSPEC_SADD8 UNSPEC_SSUB8 UNSPEC_UADD8
+				UNSPEC_USUB8 UNSPEC_SADD16 UNSPEC_SASX
+				UNSPEC_SSAX UNSPEC_SSUB16 UNSPEC_UADD16
+				UNSPEC_UASX UNSPEC_USAX UNSPEC_USUB16])
+
 (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH])
 
 (define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE])
@@ -1064,7 +1069,13 @@ 
 			    (UNSPEC_SXTAB16 "sxtab16") (UNSPEC_UXTAB16 "uxtab16")
 			    (UNSPEC_USAD8 "usad8") (UNSPEC_SMLALD "smlald")
 			    (UNSPEC_SMLALDX "smlaldx") (UNSPEC_SMLSLD "smlsld")
-			    (UNSPEC_SMLSLDX "smlsldx")])
+			    (UNSPEC_SMLSLDX "smlsldx")(UNSPEC_SADD8 "sadd8")
+			    (UNSPEC_UADD8 "uadd8") (UNSPEC_SSUB8 "ssub8")
+			    (UNSPEC_USUB8 "usub8") (UNSPEC_SADD16 "sadd16")
+			    (UNSPEC_SASX "sasx") (UNSPEC_SSAX "ssax")
+			    (UNSPEC_SSUB16 "ssub16") (UNSPEC_UADD16 "uadd16")
+			    (UNSPEC_UASX "uasx") (UNSPEC_USAX "usax")
+			    (UNSPEC_USUB16 "usub16")])
 
 ;; Both kinds of return insn.
 (define_code_iterator RETURNS [return simple_return])
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 06988abb67eee27e948d8039df6a94eefb187618..dfd6dea3421aaa1e1c597d4e7c6c0ad7a856b9fe 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -71,6 +71,7 @@ 
   UNSPEC_UNALIGNED_STORE ; Same for str/strh.
   UNSPEC_PIC_UNIFIED    ; Create a common pic addressing form.
   UNSPEC_Q_SET          ; Represent setting the Q bit.
+  UNSPEC_GE_SET         ; Represent setting the GE bits.
   UNSPEC_APSR_READ      ; Represent reading the APSR.
 
   UNSPEC_LL		; Represent an unpaired load-register-exclusive.
@@ -132,6 +133,19 @@ 
   UNSPEC_SMLSLDX	; Represent the SMLSLDX operation.
   UNSPEC_SMLAWB		; Represent the SMLAWB operation.
   UNSPEC_SMLAWT		; Represent the SMLAWT operation.
+  UNSPEC_SEL		; Represent the SEL operation.
+  UNSPEC_SADD8		; Represent the SADD8 operation.
+  UNSPEC_SSUB8		; Represent the SSUB8 operation.
+  UNSPEC_UADD8		; Represent the UADD8 operation.
+  UNSPEC_USUB8		; Represent the USUB8 operation.
+  UNSPEC_SADD16		; Represent the SADD16 operation.
+  UNSPEC_SASX		; Represent the SASX operation.
+  UNSPEC_SSAX		; Represent the SSAX operation.
+  UNSPEC_SSUB16		; Represent the SSUB16 operation.
+  UNSPEC_UADD16		; Represent the UADD16 operation.
+  UNSPEC_UASX		; Represent the UASX operation.
+  UNSPEC_USAX		; Represent the USAX operation.
+  UNSPEC_USUB16		; Represent the USUB16 operation.
 ])
 
 
diff --git a/gcc/testsuite/gcc.target/arm/acle/simd32.c b/gcc/testsuite/gcc.target/arm/acle/simd32.c
index e43ea96befdcbc581f61cb3fa798a49f13cd640a..d4304d867f357085877983ca08cd245e444e1958 100644
--- a/gcc/testsuite/gcc.target/arm/acle/simd32.c
+++ b/gcc/testsuite/gcc.target/arm/acle/simd32.c
@@ -276,3 +276,99 @@  test_smlsldx (int16x2_t a, int16x2_t b, int64_t c)
 }
 
 /* { dg-final { scan-assembler-times "\tsmlsldx\t...?, ...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_sadd8 (int8x4_t a, int8x4_t b)
+{
+  return __sadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsadd8\t...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_ssub8 (int8x4_t a, int8x4_t b)
+{
+  return __ssub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tssub8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_uadd8 (uint8x4_t a, uint8x4_t b)
+{
+  return __uadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuadd8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_usub8 (uint8x4_t a, uint8x4_t b)
+{
+  return __usub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusub8\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_sadd16 (int16x2_t a, int16x2_t b)
+{
+  return __sadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsadd8\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_sasx (int16x2_t a, int16x2_t b)
+{
+  return __sasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsasx\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_ssax (int16x2_t a, int16x2_t b)
+{
+  return __ssax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tssax\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_ssub16 (int16x2_t a, int16x2_t b)
+{
+  return __ssub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tssub16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uadd16 (uint16x2_t a, uint16x2_t b)
+{
+  return __uadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuadd16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uasx (uint16x2_t a, uint16x2_t b)
+{
+  return __uasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuasx\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_usax (uint16x2_t a, uint16x2_t b)
+{
+  return __usax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusax\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_usub16 (uint16x2_t a, uint16x2_t b)
+{
+  return __usub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusub16\t...?, ...?, ...?" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/acle/simd32_sel.c b/gcc/testsuite/gcc.target/arm/acle/simd32_sel.c
new file mode 100644
index 0000000000000000000000000000000000000000..9affc7a8563c78c10f47ae27d31e8a4da00e9f1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/simd32_sel.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_simd32_ok } */
+/* { dg-add-options arm_simd32 } */
+
+#include <arm_acle.h>
+
+int8x4_t
+test_sel (int8x4_t a, int8x4_t b, uint8x4_t c, uint8x4_t d)
+{
+  int8x4_t res1 = __sadd8 (a, b);
+  return __sel (c, d);
+}
+
+/* { dg-final { scan-assembler-times "sadd8\t...?, ...?, ...?" 1 } } */
+/* { dg-final { scan-assembler-times "sel\t...?, ...?, ...?" 1 } } */