diff mbox series

[Arm] Implement scalar Custom Datapath Extension intrinsics

Message ID AM6PR08MB315792C1CB25894B4AFAD2AEE0C00@AM6PR08MB3157.eurprd08.prod.outlook.com
State New
Headers show
Series [Arm] Implement scalar Custom Datapath Extension intrinsics | expand

Commit Message

Matthew Malcomson April 8, 2020, 8:45 a.m. UTC
Patch rebased onto recent trunk.


This patch introduces the scalar CDE (Custom Datapath Extension)
intrinsics for the arm backend.

There is nothing beyond the standard in this patch.  We simply build upon what
has been done by Dennis for the vector intrinsics.

We do add `+cdecp6` to the default arguments for `target-supports.exp`, this
allows for using coprocessor 6 in tests. This patch uses an alternate
coprocessor to ease assembler scanning by looking for a use of coprocessor 6.

We also ensure that any DImode registers are put in an even-odd register pair
when compiling for a target with CDE -- this avoids faulty code generation for -Os
when producing the cx*d instructions.

Testing done:
Bootstrapped and regtested for arm-none-linux-gnueabihf.

gcc/ChangeLog:

2020-04-08  Matthew Malcomson  <matthew.malcomson@arm.com>

	* config/arm/arm.c (arm_hard_regno_mode_ok): DImode registers forced into
	even-odd register pairs for TARGET_CDE.
	* config/arm/arm.h (ARM_CCDE_CONST_1): New.
	(ARM_CCDE_CONST_2): New.
	(ARM_CCDE_CONST_3): New.
	* config/arm/arm.md (arm_cx1si, arm_cx1di arm_cx1asi, arm_cx1adi arm_cx2si,
	arm_cx2di arm_cx2asi, arm_cx2adi arm_cx3si, arm_cx3di arm_cx3asi,
	arm_cx3adi): New patterns.
	* config/arm/arm_cde.h (__arm_cx1, __arm_cx1a, __arm_cx2, __arm_cx2a,
	__arm_cx3, __arm_cx3a, __arm_cx1d, __arm_cx1da, __arm_cx2d, __arm_cx2da,
	__arm_cx3d, __arm_cx3da): New ACLE function macros.
	* config/arm/arm_cde_builtins.def (cx1, cx1a, cx2, cx2a, cx3, cx3a): Define
	intrinsics.
	* config/arm/iterators.md (cde_suffix, cde_dest): New mode attributes.
	* config/arm/predicates.md (const_int_ccde1_operand,
	const_int_ccde2_operand, const_int_ccde3_operand): New.
	* config/arm/unspecs.md (UNSPEC_CDE, UNSPEC_CDEA): New.

gcc/testsuite/ChangeLog:

2020-04-08  Matthew Malcomson  <matthew.malcomson@arm.com>

	* gcc.target/arm/acle/cde-errors.c: New test.
	* gcc.target/arm/acle/cde.c: New test.
	* lib/target-supports.exp: Update CDE flags to enable coprocessor 6.



###############     Attachment also inlined for ease of reply    ###############
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index ca36a74cd1fa161c388961588fa0f96030b7888e..83886a2fcb3844f6a5060e451125a6cd2d505c5c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -576,6 +576,9 @@ extern int arm_arch_cde;
 extern int arm_arch_cde_coproc;
 extern const int arm_arch_cde_coproc_bits[];
 #define ARM_CDE_CONST_COPROC	7
+#define ARM_CCDE_CONST_1	((1 << 13) - 1)
+#define ARM_CCDE_CONST_2	((1 << 9 ) - 1)
+#define ARM_CCDE_CONST_3	((1 << 6 ) - 1)
 #define ARM_VCDE_CONST_1	((1 << 11) - 1)
 #define ARM_VCDE_CONST_2	((1 << 6 ) - 1)
 #define ARM_VCDE_CONST_3	((1 << 3 ) - 1)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index da0bfbc35501ba40324a38ee9ebc194f43196837..be076e4ac59be7f224b769bbca4013a554b50c07 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -25057,10 +25057,11 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       if (ARM_NUM_REGS (mode) > 4)
 	return false;
 
-      if (TARGET_THUMB2 && !TARGET_HAVE_MVE)
+      if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
 	return true;
 
-      return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
+      return !((TARGET_LDRD || TARGET_CDE)
+	       && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
     }
 
   if (regno == FRAME_POINTER_REGNUM
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 6d5560398dae3d0ace0342b4907542d2a6865f70..9c4d66f4efe70d9ab8896865cbf45285e5cfbaf9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4408,6 +4408,70 @@
    (set_attr "shift" "3")
    (set_attr "type" "logic_shift_reg")])
 
+;; Custom Datapath Extension insns.
+(define_insn "arm_cx1<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+	               (match_operand:SI 2 "const_int_ccde1_operand" "i")]
+	    UNSPEC_CDE))]
+   "TARGET_CDE"
+   "cx1<cde_suffix>\\tp%c1, <cde_dest>, %2"
+)
+
+(define_insn "arm_cx1a<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SIDI 2 "s_register_operand" "0")
+	               (match_operand:SI 3 "const_int_ccde1_operand" "i")]
+	    UNSPEC_CDEA))]
+   "TARGET_CDE"
+   "cx1<cde_suffix>a\\tp%c1, <cde_dest>, %3"
+)
+
+(define_insn "arm_cx2<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SI 2 "s_register_operand" "r")
+	               (match_operand:SI 3 "const_int_ccde2_operand" "i")]
+	    UNSPEC_CDE))]
+   "TARGET_CDE"
+   "cx2<cde_suffix>\\tp%c1, <cde_dest>, %2, %3"
+)
+
+(define_insn "arm_cx2a<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SIDI 2 "s_register_operand" "0")
+		       (match_operand:SI 3 "s_register_operand" "r")
+	               (match_operand:SI 4 "const_int_ccde2_operand" "i")]
+	    UNSPEC_CDEA))]
+   "TARGET_CDE"
+   "cx2<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4"
+)
+
+(define_insn "arm_cx3<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SI 2 "s_register_operand" "r")
+		       (match_operand:SI 3 "s_register_operand" "r")
+	               (match_operand:SI 4 "const_int_ccde3_operand" "i")]
+	    UNSPEC_CDE))]
+   "TARGET_CDE"
+   "cx3<cde_suffix>\\tp%c1, <cde_dest>, %2, %3, %4"
+)
+
+(define_insn "arm_cx3a<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SIDI 2 "s_register_operand" "0")
+		       (match_operand:SI 3 "s_register_operand" "r")
+		       (match_operand:SI 4 "s_register_operand" "r")
+                       (match_operand:SI 5 "const_int_ccde3_operand" "i")]
+	    UNSPEC_CDEA))]
+   "TARGET_CDE"
+   "cx3<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4, %5"
+)
+
 ;; Shift and rotation insns
 
 (define_expand "ashldi3"
diff --git a/gcc/config/arm/arm_cde.h b/gcc/config/arm/arm_cde.h
index 4c9f7ebeed4e2abf532f53040f5891da8b1aadac..9adcb99bcaf1a7a0f92199225f87e436363899e7 100644
--- a/gcc/config/arm/arm_cde.h
+++ b/gcc/config/arm/arm_cde.h
@@ -35,6 +35,42 @@ extern "C" {
 
 #if defined (__ARM_FEATURE_CDE)
 
+#define __arm_cx1(coproc, imm) \
+	__builtin_arm_cx1si(coproc, imm)
+
+#define __arm_cx1a(coproc, acc, imm) \
+	__builtin_arm_cx1asi(coproc, acc, imm)
+
+#define __arm_cx2(coproc, n, imm) \
+	__builtin_arm_cx2si(coproc, n, imm)
+
+#define __arm_cx2a(coproc, acc, n, imm) \
+	__builtin_arm_cx2asi(coproc, acc, n, imm)
+
+#define __arm_cx3(coproc, n, m, imm) \
+	__builtin_arm_cx3si(coproc, n, m, imm)
+
+#define __arm_cx3a(coproc, acc, n, m, imm) \
+	__builtin_arm_cx3asi(coproc, acc, n, m, imm)
+
+#define __arm_cx1d(coproc, imm) \
+	__builtin_arm_cx1di(coproc, imm)
+
+#define __arm_cx1da(coproc, acc, imm) \
+	__builtin_arm_cx1adi(coproc, acc, imm)
+
+#define __arm_cx2d(coproc, n, imm) \
+	__builtin_arm_cx2di(coproc, n, imm)
+
+#define __arm_cx2da(coproc, acc, n, imm) \
+	__builtin_arm_cx2adi(coproc, acc, n, imm)
+
+#define __arm_cx3d(coproc, n, m, imm) \
+	__builtin_arm_cx3di(coproc, n, m, imm)
+
+#define __arm_cx3da(coproc, acc, n, m, imm) \
+	__builtin_arm_cx3adi(coproc, acc, n, m, imm)
+
 #if defined (__ARM_FP) || defined (__ARM_FEATURE_MVE)
 
 /* CDE builtins using FPU/MVE registers.  */
diff --git a/gcc/config/arm/arm_cde_builtins.def b/gcc/config/arm/arm_cde_builtins.def
index a9fea937b9650f21a26d8183572b550e39b0fe7d..8b2cfc074cd45cbf073b83e370514265a82140cc 100644
--- a/gcc/config/arm/arm_cde_builtins.def
+++ b/gcc/config/arm/arm_cde_builtins.def
@@ -23,6 +23,13 @@
   VAR1 (T, N, A, IMM_MAX, ECF_FLAG) \
   VAR1 (T, N, B, IMM_MAX, ECF_FLAG)
 
+CDE_VAR2 (CX_IMM, cx1, si, di, ARM_CCDE_CONST_1, ECF_CONST)
+CDE_VAR2 (CX_UNARY, cx1a, si, di, ARM_CCDE_CONST_1, ECF_CONST)
+CDE_VAR2 (CX_UNARY, cx2, si, di, ARM_CCDE_CONST_2, ECF_CONST)
+CDE_VAR2 (CX_BINARY, cx2a, si, di, ARM_CCDE_CONST_2, ECF_CONST)
+CDE_VAR2 (CX_BINARY, cx3, si, di, ARM_CCDE_CONST_3, ECF_CONST)
+CDE_VAR2 (CX_TERNARY, cx3a, si, di, ARM_CCDE_CONST_3, ECF_CONST)
+
 CDE_VAR2 (CX_IMM, vcx1, si, di, ARM_VCDE_CONST_1, ECF_CONST)
 CDE_VAR2 (CX_UNARY, vcx1a, si, di, ARM_VCDE_CONST_1, ECF_CONST)
 CDE_VAR2 (CX_UNARY, vcx2, si, di, ARM_VCDE_CONST_2, ECF_CONST)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index c94198772f27dfda62886fecd37393960456c3c0..502f4f53da38bb7c89863e10ee26bdf6bfe5aaf6 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -896,6 +896,9 @@
 
 (define_mode_attr VSF2BF [(V2SF "V4BF") (V4SF "V8BF")])
 
+(define_mode_attr cde_suffix [(SI "") (DI "d")])
+(define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 59cf5b67f8a0a8ac56a664711090d682a5a93ad5..3ad47d2961c9359803f61488a5030bdc6fa6c152 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -231,6 +231,18 @@
        (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CDE_CONST_COPROC)")
        (match_test "arm_arch_cde_coproc_bits[UINTVAL (op)] & arm_arch_cde_coproc")))
 
+(define_predicate "const_int_ccde1_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_1)")))
+
+(define_predicate "const_int_ccde2_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_2)")))
+
+(define_predicate "const_int_ccde3_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_3)")))
+
 (define_predicate "const_int_vcde1_operand"
   (and (match_operand 0 "const_int_operand")
        (match_test "IN_RANGE (UINTVAL (op), 0, ARM_VCDE_CONST_1)")))
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 1645c32dfb2a43dde6ee947637edbca2df8f2309..3250b0319e3cb4417ffd6b4af11c71ae7929f26e 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -154,6 +154,8 @@
   UNSPEC_SMUADX		; Represent the SMUADX operation.
   UNSPEC_SSAT16		; Represent the SSAT16 operation.
   UNSPEC_USAT16		; Represent the USAT16 operation.
+  UNSPEC_CDE		; Custom Datapath Extension instruction.
+  UNSPEC_CDEA		; Custom Datapath Extension instruction.
   UNSPEC_VCDE		; Custom Datapath Extension instruction.
   UNSPEC_VCDEA		; Custom Datapath Extension instruction.
 ])
diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
new file mode 100644
index 0000000000000000000000000000000000000000..827e03b771230a45e658c0f873455cc8caa35773
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
@@ -0,0 +1,111 @@
+/* Test the Custom Datapath Extension ACLE intrinsic.  */
+
+/* This file is to check we catch incorrect uses of the ACLE.  */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8m_main_cde_ok } */
+/* { dg-add-options arm_v8m_main_cde } */
+/* { dg-additional-options "-save-temps" } */
+
+#include "arm_cde.h"
+
+/*
+   These are the scalar intrinsics.
+uint32_t __arm_cx1(int coproc, uint32_t imm);
+uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
+uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
+
+uint64_t __arm_cx1d(int coproc, uint32_t imm);
+uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
+uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm);
+*/
+
+/* Incorrect types as the constants.  */
+uint64_t test_cde (uint32_t n, uint32_t m)
+{
+  uint64_t accum = 0;
+
+  /* `coproc` not enabled.  */
+  accum += __arm_cx1   (7,                        0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx1a  (7, (uint32_t)accum,       0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2   (7, n,                     0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2a  (7, (uint32_t)accum, n,    0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3   (7, n, m,                  0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3a  (7, (uint32_t)accum, n, m, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+
+  accum += __arm_cx1d  (7,                        0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx1da (7, accum,                 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2d  (7, n,                     0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2da (7, accum, n,              0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3d  (7, n, m,                  0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3da (7, accum, n, m,           0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+
+  /* `coproc` out of range.  */
+  accum += __arm_cx1   (8,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1a  (8, (uint32_t)accum,       0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2   (8, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2a  (8, (uint32_t)accum, n,    0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3   (8, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  accum += __arm_cx1d  (8,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1da (8, accum,                 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2d  (8, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2da (8, accum, n,              0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3d  (8, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3da (8, accum, n, m,           0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  /* `imm` out of range.  */
+  accum += __arm_cx1   (0,                        8192); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1a  (0, (uint32_t)accum,       8192); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2   (0, n,                     512); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2a  (0, (uint32_t)accum, n,    512); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3   (0, n, m,                  64); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3a  (0, (uint32_t)accum, n, m, 64); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  accum += __arm_cx1d  (0,                        8192); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1da (0, accum,                 8192); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2d  (0, n,                     512); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2da (0, accum, n,              512); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3d  (0, n, m,                  64); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3da (0, accum, n, m,           64); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  /* `imm` is not an immediate.  */
+  accum += __arm_cx1   (0,                        n); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1a  (0, (uint32_t)accum,       n); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2   (0, n,                     n); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2a  (0, (uint32_t)accum, n,    n); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3   (0, n, m,                  n); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3a  (0, (uint32_t)accum, n, m, n); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  accum += __arm_cx1d  (0,                        n); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1da (0, accum,                 n); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2d  (0, n,                     n); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2da (0, accum, n,              n); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3d  (0, n, m,                  n); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3da (0, accum, n, m,           n); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  /* `coproc` is not an immediate.  */
+  accum += __arm_cx1   ((int)m,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1a  ((int)m, (uint32_t)accum,       0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2   ((int)m, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2a  ((int)m, (uint32_t)accum, n,    0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3   ((int)m, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3a  ((int)m, (uint32_t)accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  accum += __arm_cx1d  ((int)m,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1da ((int)m, accum,                 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2d  ((int)m, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2da ((int)m, accum, n,              0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3d  ((int)m, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3da ((int)m, accum, n, m,           0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  return accum;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/acle/cde.c b/gcc/testsuite/gcc.target/arm/acle/cde.c
new file mode 100644
index 0000000000000000000000000000000000000000..f3ba8f00189389a94100ea2ddb0a09dde9abc613
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/cde.c
@@ -0,0 +1,229 @@
+/* { dg-do compile } */
+/* { dg-skip-if "Require optimsation to compile DCE tests" { *-*-* } { "-O0" } { "" } } */
+/* { dg-require-effective-target arm_v8m_main_cde_ok } */
+/* { dg-add-options arm_v8m_main_cde } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* These are the scalar intrinsics.
+uint32_t __arm_cx1(int coproc, uint32_t imm);
+uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
+uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
+
+uint64_t __arm_cx1d(int coproc, uint32_t imm);
+uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
+uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm); */
+
+#include "arm_cde.h"
+
+#define TEST_CDE_SCALAR_INTRINSIC(name, accum_type, arguments) \
+  accum_type test_cde_##name (__attribute__ ((unused)) uint32_t n, \
+			      __attribute__ ((unused)) uint32_t m) \
+  {  \
+     accum_type accum = 0; \
+     accum += __arm_##name  arguments;  \
+     return accum;  \
+  }
+
+/* Basic test that we produce the assembly as expected.  */
+/*
+** test_cde_cx1:
+**	cx1	p0, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1, uint32_t, (0, 33))
+
+/*
+** test_cde_cx1a:
+**	movs	r0, #0
+**	cx1a	p0, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1a, uint32_t, (0, accum, 33))
+
+/*
+** test_cde_cx2:
+**	cx2	p0, r0, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2, uint32_t, (0, n, 33))
+
+/*
+** test_cde_cx2a:
+**	movs	(r[0-9]+), #0
+**	cx2a	p0, \1, r0, #33
+**	mov	r0, \1
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2a, uint32_t, (0, accum, n, 33))
+
+/*
+** test_cde_cx3:
+**	cx3	p0, r0, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3, uint32_t, (0, n, m, 33))
+
+/*
+** test_cde_cx3a:
+**	movs	(r[0-9]+), #0
+**	cx3a	p0, \1, r0, r1, #33
+**	mov	r0, \1
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3a, uint32_t, (0, accum, n, m, 33))
+
+/*
+** test_cde_cx1d:
+**	cx1d	p0, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1d, uint64_t, (0, 33))
+
+/*
+** test_cde_cx1da:
+**	movs	r0, #0
+**	movs	r1, #0
+**	cx1da	p0, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1da, uint64_t, (0, accum, 33))
+
+/*
+** test_cde_cx2d:
+**	cx2d	p0, r0, r1, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2d, uint64_t, (0, n, 33))
+
+/* This particular function gets optimised by the compiler in two different
+   ways depending on the optimisation level.  So does test_cde_cx3da.  That's
+   why we have two different regexes in each of these function body checks.  */
+/*
+** test_cde_cx2da:
+** (
+**	mov	(r[0-9]+), r0
+**	movs	r0, #0
+**	movs	r1, #0
+**	cx2da	p0, r0, r1, \1, #33
+** |
+**	movs	(r[0-9]+), #0
+**	movs	(r[0-9]+), #0
+**	cx2da	p0, \2, \3, r0, #33
+**	mov	r0, \2
+**	mov	r1, \3
+** )
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2da, uint64_t, (0, accum, n, 33))
+
+/*
+** test_cde_cx3d:
+**	cx3d	p0, r0, r1, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3d, uint64_t, (0, n, m, 33))
+
+/*
+** test_cde_cx3da:
+**	...
+** (
+**	movs	(r[0-9]+), #0
+**	movs	(r[0-9]+), #0
+**	cx3da	p0, \1, \2, r0, r1, #33
+**	mov	r0, \1
+**	mov	r1, \2
+** |
+**      movs	r0, #0
+**      movs	r1, #0
+**      cx3da	p0, r0, r1, r[0-9]+, r[0-9]+, #33
+** )
+**	...
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3da, uint64_t, (0, accum, n, m, 33))
+
+
+
+/* Ensure this function gets DCE'd out after optimisation.
+   Should be such since the ACLE specification mentions these functions are
+   stateless and pure.  */
+/*
+** test_cde_dce:
+**	bx	lr
+*/
+void test_cde_dce (uint32_t n, uint32_t m)
+{
+  uint64_t accum = 0;
+  __arm_cx1   (0, 33);
+  __arm_cx1a  (0, accum, 33);
+  __arm_cx2   (0, n, 33);
+  __arm_cx2a  (0, accum, n, 33);
+  __arm_cx3   (0, n, m, 33);
+  __arm_cx3a  (0, accum, n, m, 33);
+  __arm_cx1d   (0, 33);
+  __arm_cx1da  (0, accum, 33);
+  __arm_cx2d   (0, n, 33);
+  __arm_cx2da  (0, accum, n, 33);
+  __arm_cx3d   (0, n, m, 33);
+  __arm_cx3da  (0, accum, n, m, 33);
+}
+
+/* Checking this function allows constants with symbolic names.
+   This test must be run under some level of optimisation.
+   The actual check we perform is that the function is provided something that,
+   at the point of expansion, is an immediate.  That check is not as strict as
+   having something that is an immediate directly.
+
+   Since we've already checked these intrinsics generate code in the manner we
+   expect (above), here we just check that all the instructions we expect are
+   there.  To ensure the instructions are from these functions we use different
+   constants and search for those specifically with `scan-assembler-times`.  */
+
+/* Checking this function allows constants with symbolic names.  */
+uint32_t test_cde2 (uint32_t n, uint32_t m)
+{
+  int coproc = 6;
+  uint32_t imm = 30;
+  uint32_t accum = 0;
+  accum += __arm_cx1   (coproc, imm);
+  accum += __arm_cx1a  (coproc, accum, imm);
+  accum += __arm_cx2   (coproc, n, imm);
+  accum += __arm_cx2a  (coproc, accum, n, imm);
+  accum += __arm_cx3   (coproc, n, m, imm);
+  accum += __arm_cx3a  (coproc, accum, n, m, imm);
+  return accum;
+}
+
+/* Checking this function allows constants with symbolic names.  */
+uint64_t test_cdedi2 (uint32_t n, uint32_t m)
+{
+  int coproc = 6;
+  uint32_t imm = 30;
+  uint64_t accum = 0;
+  accum += __arm_cx1d   (coproc, imm);
+  accum += __arm_cx1da  (coproc, accum, imm);
+  accum += __arm_cx2d   (coproc, n, imm);
+  accum += __arm_cx2da  (coproc, accum, n, imm);
+  accum += __arm_cx3d   (coproc, n, m, imm);
+  accum += __arm_cx3da  (coproc, accum, n, m, imm);
+  return accum;
+}
+
+/* { dg-final { scan-assembler-times "cx1\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx1a\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2a\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3a\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx1d\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2d\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3d\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx1da\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2da\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3da\\tp6" 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 050b4ba452fda03bb13c3722e6edc313b25fb1eb..1e8c0e2bd2913be773d2d608a82953adf2832d01 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5117,13 +5117,13 @@ proc add_options_for_arm_v8_2a_bf16_neon { flags } {
 
 foreach { armfunc armflag armdef } {
 	arm_v8m_main_cde
-		"-march=armv8-m.main+cdecp0 -mthumb"
+		"-march=armv8-m.main+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE)"
 	arm_v8m_main_cde_fp
-		"-march=armv8-m.main+fp+cdecp0 -mthumb"
+		"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FP)"
 	arm_v8_1m_main_cde_mve
-		"-march=armv8.1-m.main+mve+cdecp0 -mthumb"
+		"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FEATURE_MVE)"
 	} {
     eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {

Comments

Kyrylo Tkachov April 8, 2020, 9:02 a.m. UTC | #1
Hi Matthew,

> -----Original Message-----
> From: Matthew Malcomson <Matthew.Malcomson@arm.com>
> Sent: 08 April 2020 09:46
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Ramana Radhakrishnan
> <Ramana.Radhakrishnan@arm.com>; nd <nd@arm.com>;
> nickc@redhat.com; Richard Earnshaw <Richard.Earnshaw@arm.com>
> Subject: [PATCH] [Arm] Implement scalar Custom Datapath Extension
> intrinsics
> 
> Patch rebased onto recent trunk.
> 
> 
> This patch introduces the scalar CDE (Custom Datapath Extension)
> intrinsics for the arm backend.
> 
> There is nothing beyond the standard in this patch.  We simply build upon
> what
> has been done by Dennis for the vector intrinsics.
> 
> We do add `+cdecp6` to the default arguments for `target-supports.exp`, this
> allows for using coprocessor 6 in tests. This patch uses an alternate
> coprocessor to ease assembler scanning by looking for a use of coprocessor 6.
> 
> We also ensure that any DImode registers are put in an even-odd register
> pair
> when compiling for a target with CDE -- this avoids faulty code generation for
> -Os
> when producing the cx*d instructions.
> 
> Testing done:
> Bootstrapped and regtested for arm-none-linux-gnueabihf.
> 
> gcc/ChangeLog:
> 
> 2020-04-08  Matthew Malcomson  <matthew.malcomson@arm.com>
> 
> 	* config/arm/arm.c (arm_hard_regno_mode_ok): DImode registers
> forced into
> 	even-odd register pairs for TARGET_CDE.
> 	* config/arm/arm.h (ARM_CCDE_CONST_1): New.
> 	(ARM_CCDE_CONST_2): New.
> 	(ARM_CCDE_CONST_3): New.
> 	* config/arm/arm.md (arm_cx1si, arm_cx1di arm_cx1asi,
> arm_cx1adi arm_cx2si,
> 	arm_cx2di arm_cx2asi, arm_cx2adi arm_cx3si, arm_cx3di arm_cx3asi,
> 	arm_cx3adi): New patterns.
> 	* config/arm/arm_cde.h (__arm_cx1, __arm_cx1a, __arm_cx2,
> __arm_cx2a,
> 	__arm_cx3, __arm_cx3a, __arm_cx1d, __arm_cx1da, __arm_cx2d,
> __arm_cx2da,
> 	__arm_cx3d, __arm_cx3da): New ACLE function macros.
> 	* config/arm/arm_cde_builtins.def (cx1, cx1a, cx2, cx2a, cx3, cx3a):
> Define
> 	intrinsics.
> 	* config/arm/iterators.md (cde_suffix, cde_dest): New mode
> attributes.
> 	* config/arm/predicates.md (const_int_ccde1_operand,
> 	const_int_ccde2_operand, const_int_ccde3_operand): New.
> 	* config/arm/unspecs.md (UNSPEC_CDE, UNSPEC_CDEA): New.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-04-08  Matthew Malcomson  <matthew.malcomson@arm.com>
> 
> 	* gcc.target/arm/acle/cde-errors.c: New test.
> 	* gcc.target/arm/acle/cde.c: New test.
> 	* lib/target-supports.exp: Update CDE flags to enable coprocessor 6.
> 
> 
> 
> ###############     Attachment also inlined for ease of reply
> ###############
> 
> 
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index
> ca36a74cd1fa161c388961588fa0f96030b7888e..83886a2fcb3844f6a5060e451
> 125a6cd2d505c5c 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -576,6 +576,9 @@ extern int arm_arch_cde;
>  extern int arm_arch_cde_coproc;
>  extern const int arm_arch_cde_coproc_bits[];
>  #define ARM_CDE_CONST_COPROC	7
> +#define ARM_CCDE_CONST_1	((1 << 13) - 1)
> +#define ARM_CCDE_CONST_2	((1 << 9 ) - 1)
> +#define ARM_CCDE_CONST_3	((1 << 6 ) - 1)
>  #define ARM_VCDE_CONST_1	((1 << 11) - 1)
>  #define ARM_VCDE_CONST_2	((1 << 6 ) - 1)
>  #define ARM_VCDE_CONST_3	((1 << 3 ) - 1)
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index
> da0bfbc35501ba40324a38ee9ebc194f43196837..be076e4ac59be7f224b769b
> bca4013a554b50c07 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -25057,10 +25057,11 @@ arm_hard_regno_mode_ok (unsigned int
> regno, machine_mode mode)
>        if (ARM_NUM_REGS (mode) > 4)
>  	return false;
> 
> -      if (TARGET_THUMB2 && !TARGET_HAVE_MVE)
> +      if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
>  	return true;
> 
> -      return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno &
> 1) != 0);
> +      return !((TARGET_LDRD || TARGET_CDE)
> +	       && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
>      }
> 
>    if (regno == FRAME_POINTER_REGNUM
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index
> 6d5560398dae3d0ace0342b4907542d2a6865f70..9c4d66f4efe70d9ab889686
> 5cbf45285e5cfbaf9 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -4408,6 +4408,70 @@
>     (set_attr "shift" "3")
>     (set_attr "type" "logic_shift_reg")])
> 
> 
> 
> 
> 
> +;; Custom Datapath Extension insns.
> +(define_insn "arm_cx1<mode>"
> +   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
> +	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
> +	               (match_operand:SI 2 "const_int_ccde1_operand" "i")]
> +	    UNSPEC_CDE))]
> +   "TARGET_CDE"
> +   "cx1<cde_suffix>\\tp%c1, <cde_dest>, %2"
> +)

I wonder whether we want to use the "coproc" type here as we do for the MVE-based ones so that we get at least some attempt at scheduling.
I think it would be better to do that rather than leave it blank. We can always refine it later once we get more data on implementations.
Also please format the indentation of the RTL patterns. My mail client has definitely screwed up the formatting here, but I've also looked at the patch in an editor 😉
The (unspec:SIDI...) should be indented one level more than the SET so that the source and destination of the set align (as long as doing so would not break the 80 column limit, which I think it wouldn't).

Ok with those changes. You can address the comments either in a respin or as a follow-up with a separate patch if it's more convenient.
Thanks,
Kyrill

> +
> +(define_insn "arm_cx1a<mode>"
> +   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
> +	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
> +		       (match_operand:SIDI 2 "s_register_operand" "0")
> +	               (match_operand:SI 3 "const_int_ccde1_operand" "i")]
> +	    UNSPEC_CDEA))]
> +   "TARGET_CDE"
> +   "cx1<cde_suffix>a\\tp%c1, <cde_dest>, %3"
> +)
> +
> +(define_insn "arm_cx2<mode>"
> +   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
> +	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
> +		       (match_operand:SI 2 "s_register_operand" "r")
> +	               (match_operand:SI 3 "const_int_ccde2_operand" "i")]
> +	    UNSPEC_CDE))]
> +   "TARGET_CDE"
> +   "cx2<cde_suffix>\\tp%c1, <cde_dest>, %2, %3"
> +)
> +
> +(define_insn "arm_cx2a<mode>"
> +   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
> +	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
> +		       (match_operand:SIDI 2 "s_register_operand" "0")
> +		       (match_operand:SI 3 "s_register_operand" "r")
> +	               (match_operand:SI 4 "const_int_ccde2_operand" "i")]
> +	    UNSPEC_CDEA))]
> +   "TARGET_CDE"
> +   "cx2<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4"
> +)
> +
> +(define_insn "arm_cx3<mode>"
> +   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
> +	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
> +		       (match_operand:SI 2 "s_register_operand" "r")
> +		       (match_operand:SI 3 "s_register_operand" "r")
> +	               (match_operand:SI 4 "const_int_ccde3_operand" "i")]
> +	    UNSPEC_CDE))]
> +   "TARGET_CDE"
> +   "cx3<cde_suffix>\\tp%c1, <cde_dest>, %2, %3, %4"
> +)
> +
> +(define_insn "arm_cx3a<mode>"
> +   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
> +	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
> +		       (match_operand:SIDI 2 "s_register_operand" "0")
> +		       (match_operand:SI 3 "s_register_operand" "r")
> +		       (match_operand:SI 4 "s_register_operand" "r")
> +                       (match_operand:SI 5 "const_int_ccde3_operand" "i")]
> +	    UNSPEC_CDEA))]
> +   "TARGET_CDE"
> +   "cx3<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4, %5"
> +)
> +
> 
> 
> 
> 
>  ;; Shift and rotation insns
> 
>  (define_expand "ashldi3"
> diff --git a/gcc/config/arm/arm_cde.h b/gcc/config/arm/arm_cde.h
> index
> 4c9f7ebeed4e2abf532f53040f5891da8b1aadac..9adcb99bcaf1a7a0f92199225
> f87e436363899e7 100644
> --- a/gcc/config/arm/arm_cde.h
> +++ b/gcc/config/arm/arm_cde.h
> @@ -35,6 +35,42 @@ extern "C" {
> 
>  #if defined (__ARM_FEATURE_CDE)
> 
> +#define __arm_cx1(coproc, imm) \
> +	__builtin_arm_cx1si(coproc, imm)
> +
> +#define __arm_cx1a(coproc, acc, imm) \
> +	__builtin_arm_cx1asi(coproc, acc, imm)
> +
> +#define __arm_cx2(coproc, n, imm) \
> +	__builtin_arm_cx2si(coproc, n, imm)
> +
> +#define __arm_cx2a(coproc, acc, n, imm) \
> +	__builtin_arm_cx2asi(coproc, acc, n, imm)
> +
> +#define __arm_cx3(coproc, n, m, imm) \
> +	__builtin_arm_cx3si(coproc, n, m, imm)
> +
> +#define __arm_cx3a(coproc, acc, n, m, imm) \
> +	__builtin_arm_cx3asi(coproc, acc, n, m, imm)
> +
> +#define __arm_cx1d(coproc, imm) \
> +	__builtin_arm_cx1di(coproc, imm)
> +
> +#define __arm_cx1da(coproc, acc, imm) \
> +	__builtin_arm_cx1adi(coproc, acc, imm)
> +
> +#define __arm_cx2d(coproc, n, imm) \
> +	__builtin_arm_cx2di(coproc, n, imm)
> +
> +#define __arm_cx2da(coproc, acc, n, imm) \
> +	__builtin_arm_cx2adi(coproc, acc, n, imm)
> +
> +#define __arm_cx3d(coproc, n, m, imm) \
> +	__builtin_arm_cx3di(coproc, n, m, imm)
> +
> +#define __arm_cx3da(coproc, acc, n, m, imm) \
> +	__builtin_arm_cx3adi(coproc, acc, n, m, imm)
> +
>  #if defined (__ARM_FP) || defined (__ARM_FEATURE_MVE)
> 
>  /* CDE builtins using FPU/MVE registers.  */
> diff --git a/gcc/config/arm/arm_cde_builtins.def
> b/gcc/config/arm/arm_cde_builtins.def
> index
> a9fea937b9650f21a26d8183572b550e39b0fe7d..8b2cfc074cd45cbf073b83e3
> 70514265a82140cc 100644
> --- a/gcc/config/arm/arm_cde_builtins.def
> +++ b/gcc/config/arm/arm_cde_builtins.def
> @@ -23,6 +23,13 @@
>    VAR1 (T, N, A, IMM_MAX, ECF_FLAG) \
>    VAR1 (T, N, B, IMM_MAX, ECF_FLAG)
> 
> +CDE_VAR2 (CX_IMM, cx1, si, di, ARM_CCDE_CONST_1, ECF_CONST)
> +CDE_VAR2 (CX_UNARY, cx1a, si, di, ARM_CCDE_CONST_1, ECF_CONST)
> +CDE_VAR2 (CX_UNARY, cx2, si, di, ARM_CCDE_CONST_2, ECF_CONST)
> +CDE_VAR2 (CX_BINARY, cx2a, si, di, ARM_CCDE_CONST_2, ECF_CONST)
> +CDE_VAR2 (CX_BINARY, cx3, si, di, ARM_CCDE_CONST_3, ECF_CONST)
> +CDE_VAR2 (CX_TERNARY, cx3a, si, di, ARM_CCDE_CONST_3, ECF_CONST)
> +
>  CDE_VAR2 (CX_IMM, vcx1, si, di, ARM_VCDE_CONST_1, ECF_CONST)
>  CDE_VAR2 (CX_UNARY, vcx1a, si, di, ARM_VCDE_CONST_1, ECF_CONST)
>  CDE_VAR2 (CX_UNARY, vcx2, si, di, ARM_VCDE_CONST_2, ECF_CONST)
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index
> c94198772f27dfda62886fecd37393960456c3c0..502f4f53da38bb7c89863e10e
> e26bdf6bfe5aaf6 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -896,6 +896,9 @@
> 
>  (define_mode_attr VSF2BF [(V2SF "V4BF") (V4SF "V8BF")])
> 
> +(define_mode_attr cde_suffix [(SI "") (DI "d")])
> +(define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")])
> +
>  ;;----------------------------------------------------------------------------
>  ;; Code attributes
>  ;;----------------------------------------------------------------------------
> diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
> index
> 59cf5b67f8a0a8ac56a664711090d682a5a93ad5..3ad47d2961c9359803f61488
> a5030bdc6fa6c152 100644
> --- a/gcc/config/arm/predicates.md
> +++ b/gcc/config/arm/predicates.md
> @@ -231,6 +231,18 @@
>         (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CDE_CONST_COPROC)")
>         (match_test "arm_arch_cde_coproc_bits[UINTVAL (op)] &
> arm_arch_cde_coproc")))
> 
> +(define_predicate "const_int_ccde1_operand"
> +  (and (match_operand 0 "const_int_operand")
> +       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_1)")))
> +
> +(define_predicate "const_int_ccde2_operand"
> +  (and (match_operand 0 "const_int_operand")
> +       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_2)")))
> +
> +(define_predicate "const_int_ccde3_operand"
> +  (and (match_operand 0 "const_int_operand")
> +       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_3)")))
> +
>  (define_predicate "const_int_vcde1_operand"
>    (and (match_operand 0 "const_int_operand")
>         (match_test "IN_RANGE (UINTVAL (op), 0, ARM_VCDE_CONST_1)")))
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index
> 1645c32dfb2a43dde6ee947637edbca2df8f2309..3250b0319e3cb4417ffd6b4a
> f11c71ae7929f26e 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -154,6 +154,8 @@
>    UNSPEC_SMUADX		; Represent the SMUADX operation.
>    UNSPEC_SSAT16		; Represent the SSAT16 operation.
>    UNSPEC_USAT16		; Represent the USAT16 operation.
> +  UNSPEC_CDE		; Custom Datapath Extension instruction.
> +  UNSPEC_CDEA		; Custom Datapath Extension instruction.
>    UNSPEC_VCDE		; Custom Datapath Extension instruction.
>    UNSPEC_VCDEA		; Custom Datapath Extension instruction.
>  ])
> diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
> b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..827e03b771230a45e658c0f
> 873455cc8caa35773
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
> @@ -0,0 +1,111 @@
> +/* Test the Custom Datapath Extension ACLE intrinsic.  */
> +
> +/* This file is to check we catch incorrect uses of the ACLE.  */
> +
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8m_main_cde_ok } */
> +/* { dg-add-options arm_v8m_main_cde } */
> +/* { dg-additional-options "-save-temps" } */
> +
> +#include "arm_cde.h"
> +
> +/*
> +   These are the scalar intrinsics.
> +uint32_t __arm_cx1(int coproc, uint32_t imm);
> +uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
> +uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
> +uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
> +uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
> +uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m,
> uint32_t imm);
> +
> +uint64_t __arm_cx1d(int coproc, uint32_t imm);
> +uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
> +uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
> +uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
> +uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
> +uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m,
> uint32_t imm);
> +*/
> +
> +/* Incorrect types as the constants.  */
> +uint64_t test_cde (uint32_t n, uint32_t m)
> +{
> +  uint64_t accum = 0;
> +
> +  /* `coproc` not enabled.  */
> +  accum += __arm_cx1   (7,                        0); /* { dg-error {coprocessor 7 is
> not enabled with \+cdecp7} } */
> +  accum += __arm_cx1a  (7, (uint32_t)accum,       0); /* { dg-error
> {coprocessor 7 is not enabled with \+cdecp7} } */
> +  accum += __arm_cx2   (7, n,                     0); /* { dg-error {coprocessor 7 is
> not enabled with \+cdecp7} } */
> +  accum += __arm_cx2a  (7, (uint32_t)accum, n,    0); /* { dg-error
> {coprocessor 7 is not enabled with \+cdecp7} } */
> +  accum += __arm_cx3   (7, n, m,                  0); /* { dg-error {coprocessor 7 is
> not enabled with \+cdecp7} } */
> +  accum += __arm_cx3a  (7, (uint32_t)accum, n, m, 0); /* { dg-error
> {coprocessor 7 is not enabled with \+cdecp7} } */
> +
> +  accum += __arm_cx1d  (7,                        0); /* { dg-error {coprocessor 7 is
> not enabled with \+cdecp7} } */
> +  accum += __arm_cx1da (7, accum,                 0); /* { dg-error {coprocessor 7
> is not enabled with \+cdecp7} } */
> +  accum += __arm_cx2d  (7, n,                     0); /* { dg-error {coprocessor 7 is
> not enabled with \+cdecp7} } */
> +  accum += __arm_cx2da (7, accum, n,              0); /* { dg-error {coprocessor
> 7 is not enabled with \+cdecp7} } */
> +  accum += __arm_cx3d  (7, n, m,                  0); /* { dg-error {coprocessor 7 is
> not enabled with \+cdecp7} } */
> +  accum += __arm_cx3da (7, accum, n, m,           0); /* { dg-error
> {coprocessor 7 is not enabled with \+cdecp7} } */
> +
> +  /* `coproc` out of range.  */
> +  accum += __arm_cx1   (8,                        0); /* { dg-error {coproc must be a
> constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx1a  (8, (uint32_t)accum,       0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2   (8, n,                     0); /* { dg-error {coproc must be a
> constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2a  (8, (uint32_t)accum, n,    0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3   (8, n, m,                  0); /* { dg-error {coproc must be a
> constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +
> +  accum += __arm_cx1d  (8,                        0); /* { dg-error {coproc must be a
> constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx1da (8, accum,                 0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2d  (8, n,                     0); /* { dg-error {coproc must be a
> constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2da (8, accum, n,              0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3d  (8, n, m,                  0); /* { dg-error {coproc must be
> a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3da (8, accum, n, m,           0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +
> +  /* `imm` out of range.  */
> +  accum += __arm_cx1   (0,                        8192); /* { dg-error {argument 2
> must be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx1a  (0, (uint32_t)accum,       8192); /* { dg-error
> {argument 3 must be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx2   (0, n,                     512); /* { dg-error {argument 3
> must be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx2a  (0, (uint32_t)accum, n,    512); /* { dg-error
> {argument 4 must be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx3   (0, n, m,                  64); /* { dg-error {argument 4
> must be a constant immediate in range \[0-63\]} } */
> +  accum += __arm_cx3a  (0, (uint32_t)accum, n, m, 64); /* { dg-error
> {argument 5 must be a constant immediate in range \[0-63\]} } */
> +
> +  accum += __arm_cx1d  (0,                        8192); /* { dg-error {argument 2
> must be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx1da (0, accum,                 8192); /* { dg-error {argument
> 3 must be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx2d  (0, n,                     512); /* { dg-error {argument 3
> must be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx2da (0, accum, n,              512); /* { dg-error {argument
> 4 must be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx3d  (0, n, m,                  64); /* { dg-error {argument 4
> must be a constant immediate in range \[0-63\]} } */
> +  accum += __arm_cx3da (0, accum, n, m,           64); /* { dg-error {argument
> 5 must be a constant immediate in range \[0-63\]} } */
> +
> +  /* `imm` is not an immediate.  */
> +  accum += __arm_cx1   (0,                        n); /* { dg-error {argument 2 must
> be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx1a  (0, (uint32_t)accum,       n); /* { dg-error {argument
> 3 must be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx2   (0, n,                     n); /* { dg-error {argument 3 must
> be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx2a  (0, (uint32_t)accum, n,    n); /* { dg-error {argument
> 4 must be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx3   (0, n, m,                  n); /* { dg-error {argument 4 must
> be a constant immediate in range \[0-63\]} } */
> +  accum += __arm_cx3a  (0, (uint32_t)accum, n, m, n); /* { dg-error
> {argument 5 must be a constant immediate in range \[0-63\]} } */
> +
> +  accum += __arm_cx1d  (0,                        n); /* { dg-error {argument 2 must
> be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx1da (0, accum,                 n); /* { dg-error {argument 3
> must be a constant immediate in range \[0-8191\]} } */
> +  accum += __arm_cx2d  (0, n,                     n); /* { dg-error {argument 3 must
> be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx2da (0, accum, n,              n); /* { dg-error {argument 4
> must be a constant immediate in range \[0-511\]} } */
> +  accum += __arm_cx3d  (0, n, m,                  n); /* { dg-error {argument 4
> must be a constant immediate in range \[0-63\]} } */
> +  accum += __arm_cx3da (0, accum, n, m,           n); /* { dg-error {argument 5
> must be a constant immediate in range \[0-63\]} } */
> +
> +  /* `coproc` is not an immediate.  */
> +  accum += __arm_cx1   ((int)m,                        0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx1a  ((int)m, (uint32_t)accum,       0); /* { dg-error
> {coproc must be a constant immediate in range \[0-7\] enabled with
> \+cdecp<N>} } */
> +  accum += __arm_cx2   ((int)m, n,                     0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2a  ((int)m, (uint32_t)accum, n,    0); /* { dg-error
> {coproc must be a constant immediate in range \[0-7\] enabled with
> \+cdecp<N>} } */
> +  accum += __arm_cx3   ((int)m, n, m,                  0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3a  ((int)m, (uint32_t)accum, n, m, 0); /* { dg-error
> {coproc must be a constant immediate in range \[0-7\] enabled with
> \+cdecp<N>} } */
> +
> +  accum += __arm_cx1d  ((int)m,                        0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx1da ((int)m, accum,                 0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2d  ((int)m, n,                     0); /* { dg-error {coproc must
> be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx2da ((int)m, accum, n,              0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3d  ((int)m, n, m,                  0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +  accum += __arm_cx3da ((int)m, accum, n, m,           0); /* { dg-error {coproc
> must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
> +
> +  return accum;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/arm/acle/cde.c
> b/gcc/testsuite/gcc.target/arm/acle/cde.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..f3ba8f00189389a94100ea2
> ddb0a09dde9abc613
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/acle/cde.c
> @@ -0,0 +1,229 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "Require optimsation to compile DCE tests" { *-*-* } { "-O0" }
> { "" } } */
> +/* { dg-require-effective-target arm_v8m_main_cde_ok } */
> +/* { dg-add-options arm_v8m_main_cde } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/* These are the scalar intrinsics.
> +uint32_t __arm_cx1(int coproc, uint32_t imm);
> +uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
> +uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
> +uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
> +uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
> +uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m,
> uint32_t imm);
> +
> +uint64_t __arm_cx1d(int coproc, uint32_t imm);
> +uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
> +uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
> +uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
> +uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
> +uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m,
> uint32_t imm); */
> +
> +#include "arm_cde.h"
> +
> +#define TEST_CDE_SCALAR_INTRINSIC(name, accum_type, arguments) \
> +  accum_type test_cde_##name (__attribute__ ((unused)) uint32_t n, \
> +			      __attribute__ ((unused)) uint32_t m) \
> +  {  \
> +     accum_type accum = 0; \
> +     accum += __arm_##name  arguments;  \
> +     return accum;  \
> +  }
> +
> +/* Basic test that we produce the assembly as expected.  */
> +/*
> +** test_cde_cx1:
> +**	cx1	p0, r0, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx1, uint32_t, (0, 33))
> +
> +/*
> +** test_cde_cx1a:
> +**	movs	r0, #0
> +**	cx1a	p0, r0, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx1a, uint32_t, (0, accum, 33))
> +
> +/*
> +** test_cde_cx2:
> +**	cx2	p0, r0, r0, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx2, uint32_t, (0, n, 33))
> +
> +/*
> +** test_cde_cx2a:
> +**	movs	(r[0-9]+), #0
> +**	cx2a	p0, \1, r0, #33
> +**	mov	r0, \1
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx2a, uint32_t, (0, accum, n, 33))
> +
> +/*
> +** test_cde_cx3:
> +**	cx3	p0, r0, r0, r1, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx3, uint32_t, (0, n, m, 33))
> +
> +/*
> +** test_cde_cx3a:
> +**	movs	(r[0-9]+), #0
> +**	cx3a	p0, \1, r0, r1, #33
> +**	mov	r0, \1
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx3a, uint32_t, (0, accum, n, m, 33))
> +
> +/*
> +** test_cde_cx1d:
> +**	cx1d	p0, r0, r1, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx1d, uint64_t, (0, 33))
> +
> +/*
> +** test_cde_cx1da:
> +**	movs	r0, #0
> +**	movs	r1, #0
> +**	cx1da	p0, r0, r1, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx1da, uint64_t, (0, accum, 33))
> +
> +/*
> +** test_cde_cx2d:
> +**	cx2d	p0, r0, r1, r0, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx2d, uint64_t, (0, n, 33))
> +
> +/* This particular function gets optimised by the compiler in two different
> +   ways depending on the optimisation level.  So does test_cde_cx3da.
> That's
> +   why we have two different regexes in each of these function body checks.
> */
> +/*
> +** test_cde_cx2da:
> +** (
> +**	mov	(r[0-9]+), r0
> +**	movs	r0, #0
> +**	movs	r1, #0
> +**	cx2da	p0, r0, r1, \1, #33
> +** |
> +**	movs	(r[0-9]+), #0
> +**	movs	(r[0-9]+), #0
> +**	cx2da	p0, \2, \3, r0, #33
> +**	mov	r0, \2
> +**	mov	r1, \3
> +** )
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx2da, uint64_t, (0, accum, n, 33))
> +
> +/*
> +** test_cde_cx3d:
> +**	cx3d	p0, r0, r1, r0, r1, #33
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx3d, uint64_t, (0, n, m, 33))
> +
> +/*
> +** test_cde_cx3da:
> +**	...
> +** (
> +**	movs	(r[0-9]+), #0
> +**	movs	(r[0-9]+), #0
> +**	cx3da	p0, \1, \2, r0, r1, #33
> +**	mov	r0, \1
> +**	mov	r1, \2
> +** |
> +**      movs	r0, #0
> +**      movs	r1, #0
> +**      cx3da	p0, r0, r1, r[0-9]+, r[0-9]+, #33
> +** )
> +**	...
> +**	bx	lr
> +*/
> +TEST_CDE_SCALAR_INTRINSIC (cx3da, uint64_t, (0, accum, n, m, 33))
> +
> +
> +
> +/* Ensure this function gets DCE'd out after optimisation.
> +   Should be such since the ACLE specification mentions these functions are
> +   stateless and pure.  */
> +/*
> +** test_cde_dce:
> +**	bx	lr
> +*/
> +void test_cde_dce (uint32_t n, uint32_t m)
> +{
> +  uint64_t accum = 0;
> +  __arm_cx1   (0, 33);
> +  __arm_cx1a  (0, accum, 33);
> +  __arm_cx2   (0, n, 33);
> +  __arm_cx2a  (0, accum, n, 33);
> +  __arm_cx3   (0, n, m, 33);
> +  __arm_cx3a  (0, accum, n, m, 33);
> +  __arm_cx1d   (0, 33);
> +  __arm_cx1da  (0, accum, 33);
> +  __arm_cx2d   (0, n, 33);
> +  __arm_cx2da  (0, accum, n, 33);
> +  __arm_cx3d   (0, n, m, 33);
> +  __arm_cx3da  (0, accum, n, m, 33);
> +}
> +
> +/* Checking this function allows constants with symbolic names.
> +   This test must be run under some level of optimisation.
> +   The actual check we perform is that the function is provided something
> that,
> +   at the point of expansion, is an immediate.  That check is not as strict as
> +   having something that is an immediate directly.
> +
> +   Since we've already checked these intrinsics generate code in the manner
> we
> +   expect (above), here we just check that all the instructions we expect are
> +   there.  To ensure the instructions are from these functions we use
> different
> +   constants and search for those specifically with `scan-assembler-times`.
> */
> +
> +/* Checking this function allows constants with symbolic names.  */
> +uint32_t test_cde2 (uint32_t n, uint32_t m)
> +{
> +  int coproc = 6;
> +  uint32_t imm = 30;
> +  uint32_t accum = 0;
> +  accum += __arm_cx1   (coproc, imm);
> +  accum += __arm_cx1a  (coproc, accum, imm);
> +  accum += __arm_cx2   (coproc, n, imm);
> +  accum += __arm_cx2a  (coproc, accum, n, imm);
> +  accum += __arm_cx3   (coproc, n, m, imm);
> +  accum += __arm_cx3a  (coproc, accum, n, m, imm);
> +  return accum;
> +}
> +
> +/* Checking this function allows constants with symbolic names.  */
> +uint64_t test_cdedi2 (uint32_t n, uint32_t m)
> +{
> +  int coproc = 6;
> +  uint32_t imm = 30;
> +  uint64_t accum = 0;
> +  accum += __arm_cx1d   (coproc, imm);
> +  accum += __arm_cx1da  (coproc, accum, imm);
> +  accum += __arm_cx2d   (coproc, n, imm);
> +  accum += __arm_cx2da  (coproc, accum, n, imm);
> +  accum += __arm_cx3d   (coproc, n, m, imm);
> +  accum += __arm_cx3da  (coproc, accum, n, m, imm);
> +  return accum;
> +}
> +
> +/* { dg-final { scan-assembler-times "cx1\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx2\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx3\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx1a\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx2a\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx3a\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx1d\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx2d\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx3d\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx1da\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx2da\\tp6" 1 } } */
> +/* { dg-final { scan-assembler-times "cx3da\\tp6" 1 } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-
> supports.exp
> index
> 050b4ba452fda03bb13c3722e6edc313b25fb1eb..1e8c0e2bd2913be773d2d60
> 8a82953adf2832d01 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5117,13 +5117,13 @@ proc add_options_for_arm_v8_2a_bf16_neon
> { flags } {
> 
>  foreach { armfunc armflag armdef } {
>  	arm_v8m_main_cde
> -		"-march=armv8-m.main+cdecp0 -mthumb"
> +		"-march=armv8-m.main+cdecp0+cdecp6 -mthumb"
>  		"defined (__ARM_FEATURE_CDE)"
>  	arm_v8m_main_cde_fp
> -		"-march=armv8-m.main+fp+cdecp0 -mthumb"
> +		"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb"
>  		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FP)"
>  	arm_v8_1m_main_cde_mve
> -		"-march=armv8.1-m.main+mve+cdecp0 -mthumb"
> +		"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb"
>  		"defined (__ARM_FEATURE_CDE) && defined
> (__ARM_FEATURE_MVE)"
>  	} {
>      eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index ca36a74cd1fa161c388961588fa0f96030b7888e..83886a2fcb3844f6a5060e451125a6cd2d505c5c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -576,6 +576,9 @@  extern int arm_arch_cde;
 extern int arm_arch_cde_coproc;
 extern const int arm_arch_cde_coproc_bits[];
 #define ARM_CDE_CONST_COPROC	7
+#define ARM_CCDE_CONST_1	((1 << 13) - 1)
+#define ARM_CCDE_CONST_2	((1 << 9 ) - 1)
+#define ARM_CCDE_CONST_3	((1 << 6 ) - 1)
 #define ARM_VCDE_CONST_1	((1 << 11) - 1)
 #define ARM_VCDE_CONST_2	((1 << 6 ) - 1)
 #define ARM_VCDE_CONST_3	((1 << 3 ) - 1)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index da0bfbc35501ba40324a38ee9ebc194f43196837..be076e4ac59be7f224b769bbca4013a554b50c07 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -25057,10 +25057,11 @@  arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       if (ARM_NUM_REGS (mode) > 4)
 	return false;
 
-      if (TARGET_THUMB2 && !TARGET_HAVE_MVE)
+      if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
 	return true;
 
-      return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
+      return !((TARGET_LDRD || TARGET_CDE)
+	       && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
     }
 
   if (regno == FRAME_POINTER_REGNUM
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 6d5560398dae3d0ace0342b4907542d2a6865f70..9c4d66f4efe70d9ab8896865cbf45285e5cfbaf9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4408,6 +4408,70 @@ 
    (set_attr "shift" "3")
    (set_attr "type" "logic_shift_reg")])
 


+;; Custom Datapath Extension insns.
+(define_insn "arm_cx1<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+	               (match_operand:SI 2 "const_int_ccde1_operand" "i")]
+	    UNSPEC_CDE))]
+   "TARGET_CDE"
+   "cx1<cde_suffix>\\tp%c1, <cde_dest>, %2"
+)
+
+(define_insn "arm_cx1a<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SIDI 2 "s_register_operand" "0")
+	               (match_operand:SI 3 "const_int_ccde1_operand" "i")]
+	    UNSPEC_CDEA))]
+   "TARGET_CDE"
+   "cx1<cde_suffix>a\\tp%c1, <cde_dest>, %3"
+)
+
+(define_insn "arm_cx2<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SI 2 "s_register_operand" "r")
+	               (match_operand:SI 3 "const_int_ccde2_operand" "i")]
+	    UNSPEC_CDE))]
+   "TARGET_CDE"
+   "cx2<cde_suffix>\\tp%c1, <cde_dest>, %2, %3"
+)
+
+(define_insn "arm_cx2a<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SIDI 2 "s_register_operand" "0")
+		       (match_operand:SI 3 "s_register_operand" "r")
+	               (match_operand:SI 4 "const_int_ccde2_operand" "i")]
+	    UNSPEC_CDEA))]
+   "TARGET_CDE"
+   "cx2<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4"
+)
+
+(define_insn "arm_cx3<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SI 2 "s_register_operand" "r")
+		       (match_operand:SI 3 "s_register_operand" "r")
+	               (match_operand:SI 4 "const_int_ccde3_operand" "i")]
+	    UNSPEC_CDE))]
+   "TARGET_CDE"
+   "cx3<cde_suffix>\\tp%c1, <cde_dest>, %2, %3, %4"
+)
+
+(define_insn "arm_cx3a<mode>"
+   [(set (match_operand:SIDI 0 "s_register_operand" "=r")
+	 (unspec:SIDI [(match_operand:SI 1 "const_int_coproc_operand" "i")
+		       (match_operand:SIDI 2 "s_register_operand" "0")
+		       (match_operand:SI 3 "s_register_operand" "r")
+		       (match_operand:SI 4 "s_register_operand" "r")
+                       (match_operand:SI 5 "const_int_ccde3_operand" "i")]
+	    UNSPEC_CDEA))]
+   "TARGET_CDE"
+   "cx3<cde_suffix>a\\tp%c1, <cde_dest>, %3, %4, %5"
+)
+


 ;; Shift and rotation insns
 
 (define_expand "ashldi3"
diff --git a/gcc/config/arm/arm_cde.h b/gcc/config/arm/arm_cde.h
index 4c9f7ebeed4e2abf532f53040f5891da8b1aadac..9adcb99bcaf1a7a0f92199225f87e436363899e7 100644
--- a/gcc/config/arm/arm_cde.h
+++ b/gcc/config/arm/arm_cde.h
@@ -35,6 +35,42 @@  extern "C" {
 
 #if defined (__ARM_FEATURE_CDE)
 
+#define __arm_cx1(coproc, imm) \
+	__builtin_arm_cx1si(coproc, imm)
+
+#define __arm_cx1a(coproc, acc, imm) \
+	__builtin_arm_cx1asi(coproc, acc, imm)
+
+#define __arm_cx2(coproc, n, imm) \
+	__builtin_arm_cx2si(coproc, n, imm)
+
+#define __arm_cx2a(coproc, acc, n, imm) \
+	__builtin_arm_cx2asi(coproc, acc, n, imm)
+
+#define __arm_cx3(coproc, n, m, imm) \
+	__builtin_arm_cx3si(coproc, n, m, imm)
+
+#define __arm_cx3a(coproc, acc, n, m, imm) \
+	__builtin_arm_cx3asi(coproc, acc, n, m, imm)
+
+#define __arm_cx1d(coproc, imm) \
+	__builtin_arm_cx1di(coproc, imm)
+
+#define __arm_cx1da(coproc, acc, imm) \
+	__builtin_arm_cx1adi(coproc, acc, imm)
+
+#define __arm_cx2d(coproc, n, imm) \
+	__builtin_arm_cx2di(coproc, n, imm)
+
+#define __arm_cx2da(coproc, acc, n, imm) \
+	__builtin_arm_cx2adi(coproc, acc, n, imm)
+
+#define __arm_cx3d(coproc, n, m, imm) \
+	__builtin_arm_cx3di(coproc, n, m, imm)
+
+#define __arm_cx3da(coproc, acc, n, m, imm) \
+	__builtin_arm_cx3adi(coproc, acc, n, m, imm)
+
 #if defined (__ARM_FP) || defined (__ARM_FEATURE_MVE)
 
 /* CDE builtins using FPU/MVE registers.  */
diff --git a/gcc/config/arm/arm_cde_builtins.def b/gcc/config/arm/arm_cde_builtins.def
index a9fea937b9650f21a26d8183572b550e39b0fe7d..8b2cfc074cd45cbf073b83e370514265a82140cc 100644
--- a/gcc/config/arm/arm_cde_builtins.def
+++ b/gcc/config/arm/arm_cde_builtins.def
@@ -23,6 +23,13 @@ 
   VAR1 (T, N, A, IMM_MAX, ECF_FLAG) \
   VAR1 (T, N, B, IMM_MAX, ECF_FLAG)
 
+CDE_VAR2 (CX_IMM, cx1, si, di, ARM_CCDE_CONST_1, ECF_CONST)
+CDE_VAR2 (CX_UNARY, cx1a, si, di, ARM_CCDE_CONST_1, ECF_CONST)
+CDE_VAR2 (CX_UNARY, cx2, si, di, ARM_CCDE_CONST_2, ECF_CONST)
+CDE_VAR2 (CX_BINARY, cx2a, si, di, ARM_CCDE_CONST_2, ECF_CONST)
+CDE_VAR2 (CX_BINARY, cx3, si, di, ARM_CCDE_CONST_3, ECF_CONST)
+CDE_VAR2 (CX_TERNARY, cx3a, si, di, ARM_CCDE_CONST_3, ECF_CONST)
+
 CDE_VAR2 (CX_IMM, vcx1, si, di, ARM_VCDE_CONST_1, ECF_CONST)
 CDE_VAR2 (CX_UNARY, vcx1a, si, di, ARM_VCDE_CONST_1, ECF_CONST)
 CDE_VAR2 (CX_UNARY, vcx2, si, di, ARM_VCDE_CONST_2, ECF_CONST)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index c94198772f27dfda62886fecd37393960456c3c0..502f4f53da38bb7c89863e10ee26bdf6bfe5aaf6 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -896,6 +896,9 @@ 
 
 (define_mode_attr VSF2BF [(V2SF "V4BF") (V4SF "V8BF")])
 
+(define_mode_attr cde_suffix [(SI "") (DI "d")])
+(define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 59cf5b67f8a0a8ac56a664711090d682a5a93ad5..3ad47d2961c9359803f61488a5030bdc6fa6c152 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -231,6 +231,18 @@ 
        (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CDE_CONST_COPROC)")
        (match_test "arm_arch_cde_coproc_bits[UINTVAL (op)] & arm_arch_cde_coproc")))
 
+(define_predicate "const_int_ccde1_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_1)")))
+
+(define_predicate "const_int_ccde2_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_2)")))
+
+(define_predicate "const_int_ccde3_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "IN_RANGE (UINTVAL (op), 0, ARM_CCDE_CONST_3)")))
+
 (define_predicate "const_int_vcde1_operand"
   (and (match_operand 0 "const_int_operand")
        (match_test "IN_RANGE (UINTVAL (op), 0, ARM_VCDE_CONST_1)")))
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 1645c32dfb2a43dde6ee947637edbca2df8f2309..3250b0319e3cb4417ffd6b4af11c71ae7929f26e 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -154,6 +154,8 @@ 
   UNSPEC_SMUADX		; Represent the SMUADX operation.
   UNSPEC_SSAT16		; Represent the SSAT16 operation.
   UNSPEC_USAT16		; Represent the USAT16 operation.
+  UNSPEC_CDE		; Custom Datapath Extension instruction.
+  UNSPEC_CDEA		; Custom Datapath Extension instruction.
   UNSPEC_VCDE		; Custom Datapath Extension instruction.
   UNSPEC_VCDEA		; Custom Datapath Extension instruction.
 ])
diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
new file mode 100644
index 0000000000000000000000000000000000000000..827e03b771230a45e658c0f873455cc8caa35773
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
@@ -0,0 +1,111 @@ 
+/* Test the Custom Datapath Extension ACLE intrinsic.  */
+
+/* This file is to check we catch incorrect uses of the ACLE.  */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8m_main_cde_ok } */
+/* { dg-add-options arm_v8m_main_cde } */
+/* { dg-additional-options "-save-temps" } */
+
+#include "arm_cde.h"
+
+/*
+   These are the scalar intrinsics.
+uint32_t __arm_cx1(int coproc, uint32_t imm);
+uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
+uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
+
+uint64_t __arm_cx1d(int coproc, uint32_t imm);
+uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
+uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm);
+*/
+
+/* Incorrect types as the constants.  */
+uint64_t test_cde (uint32_t n, uint32_t m)
+{
+  uint64_t accum = 0;
+
+  /* `coproc` not enabled.  */
+  accum += __arm_cx1   (7,                        0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx1a  (7, (uint32_t)accum,       0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2   (7, n,                     0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2a  (7, (uint32_t)accum, n,    0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3   (7, n, m,                  0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3a  (7, (uint32_t)accum, n, m, 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+
+  accum += __arm_cx1d  (7,                        0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx1da (7, accum,                 0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2d  (7, n,                     0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx2da (7, accum, n,              0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3d  (7, n, m,                  0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+  accum += __arm_cx3da (7, accum, n, m,           0); /* { dg-error {coprocessor 7 is not enabled with \+cdecp7} } */
+
+  /* `coproc` out of range.  */
+  accum += __arm_cx1   (8,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1a  (8, (uint32_t)accum,       0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2   (8, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2a  (8, (uint32_t)accum, n,    0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3   (8, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  accum += __arm_cx1d  (8,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1da (8, accum,                 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2d  (8, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2da (8, accum, n,              0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3d  (8, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3da (8, accum, n, m,           0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  /* `imm` out of range.  */
+  accum += __arm_cx1   (0,                        8192); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1a  (0, (uint32_t)accum,       8192); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2   (0, n,                     512); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2a  (0, (uint32_t)accum, n,    512); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3   (0, n, m,                  64); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3a  (0, (uint32_t)accum, n, m, 64); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  accum += __arm_cx1d  (0,                        8192); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1da (0, accum,                 8192); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2d  (0, n,                     512); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2da (0, accum, n,              512); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3d  (0, n, m,                  64); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3da (0, accum, n, m,           64); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  /* `imm` is not an immediate.  */
+  accum += __arm_cx1   (0,                        n); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1a  (0, (uint32_t)accum,       n); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2   (0, n,                     n); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2a  (0, (uint32_t)accum, n,    n); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3   (0, n, m,                  n); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3a  (0, (uint32_t)accum, n, m, n); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  accum += __arm_cx1d  (0,                        n); /* { dg-error {argument 2 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx1da (0, accum,                 n); /* { dg-error {argument 3 must be a constant immediate in range \[0-8191\]} } */
+  accum += __arm_cx2d  (0, n,                     n); /* { dg-error {argument 3 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx2da (0, accum, n,              n); /* { dg-error {argument 4 must be a constant immediate in range \[0-511\]} } */
+  accum += __arm_cx3d  (0, n, m,                  n); /* { dg-error {argument 4 must be a constant immediate in range \[0-63\]} } */
+  accum += __arm_cx3da (0, accum, n, m,           n); /* { dg-error {argument 5 must be a constant immediate in range \[0-63\]} } */
+
+  /* `coproc` is not an immediate.  */
+  accum += __arm_cx1   ((int)m,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1a  ((int)m, (uint32_t)accum,       0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2   ((int)m, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2a  ((int)m, (uint32_t)accum, n,    0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3   ((int)m, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3a  ((int)m, (uint32_t)accum, n, m, 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  accum += __arm_cx1d  ((int)m,                        0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx1da ((int)m, accum,                 0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2d  ((int)m, n,                     0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx2da ((int)m, accum, n,              0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3d  ((int)m, n, m,                  0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+  accum += __arm_cx3da ((int)m, accum, n, m,           0); /* { dg-error {coproc must be a constant immediate in range \[0-7\] enabled with \+cdecp<N>} } */
+
+  return accum;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/acle/cde.c b/gcc/testsuite/gcc.target/arm/acle/cde.c
new file mode 100644
index 0000000000000000000000000000000000000000..f3ba8f00189389a94100ea2ddb0a09dde9abc613
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/cde.c
@@ -0,0 +1,229 @@ 
+/* { dg-do compile } */
+/* { dg-skip-if "Require optimsation to compile DCE tests" { *-*-* } { "-O0" } { "" } } */
+/* { dg-require-effective-target arm_v8m_main_cde_ok } */
+/* { dg-add-options arm_v8m_main_cde } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* These are the scalar intrinsics.
+uint32_t __arm_cx1(int coproc, uint32_t imm);
+uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
+uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
+uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
+
+uint64_t __arm_cx1d(int coproc, uint32_t imm);
+uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
+uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
+uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
+uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm); */
+
+#include "arm_cde.h"
+
+#define TEST_CDE_SCALAR_INTRINSIC(name, accum_type, arguments) \
+  accum_type test_cde_##name (__attribute__ ((unused)) uint32_t n, \
+			      __attribute__ ((unused)) uint32_t m) \
+  {  \
+     accum_type accum = 0; \
+     accum += __arm_##name  arguments;  \
+     return accum;  \
+  }
+
+/* Basic test that we produce the assembly as expected.  */
+/*
+** test_cde_cx1:
+**	cx1	p0, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1, uint32_t, (0, 33))
+
+/*
+** test_cde_cx1a:
+**	movs	r0, #0
+**	cx1a	p0, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1a, uint32_t, (0, accum, 33))
+
+/*
+** test_cde_cx2:
+**	cx2	p0, r0, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2, uint32_t, (0, n, 33))
+
+/*
+** test_cde_cx2a:
+**	movs	(r[0-9]+), #0
+**	cx2a	p0, \1, r0, #33
+**	mov	r0, \1
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2a, uint32_t, (0, accum, n, 33))
+
+/*
+** test_cde_cx3:
+**	cx3	p0, r0, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3, uint32_t, (0, n, m, 33))
+
+/*
+** test_cde_cx3a:
+**	movs	(r[0-9]+), #0
+**	cx3a	p0, \1, r0, r1, #33
+**	mov	r0, \1
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3a, uint32_t, (0, accum, n, m, 33))
+
+/*
+** test_cde_cx1d:
+**	cx1d	p0, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1d, uint64_t, (0, 33))
+
+/*
+** test_cde_cx1da:
+**	movs	r0, #0
+**	movs	r1, #0
+**	cx1da	p0, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx1da, uint64_t, (0, accum, 33))
+
+/*
+** test_cde_cx2d:
+**	cx2d	p0, r0, r1, r0, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2d, uint64_t, (0, n, 33))
+
+/* This particular function gets optimised by the compiler in two different
+   ways depending on the optimisation level.  So does test_cde_cx3da.  That's
+   why we have two different regexes in each of these function body checks.  */
+/*
+** test_cde_cx2da:
+** (
+**	mov	(r[0-9]+), r0
+**	movs	r0, #0
+**	movs	r1, #0
+**	cx2da	p0, r0, r1, \1, #33
+** |
+**	movs	(r[0-9]+), #0
+**	movs	(r[0-9]+), #0
+**	cx2da	p0, \2, \3, r0, #33
+**	mov	r0, \2
+**	mov	r1, \3
+** )
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx2da, uint64_t, (0, accum, n, 33))
+
+/*
+** test_cde_cx3d:
+**	cx3d	p0, r0, r1, r0, r1, #33
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3d, uint64_t, (0, n, m, 33))
+
+/*
+** test_cde_cx3da:
+**	...
+** (
+**	movs	(r[0-9]+), #0
+**	movs	(r[0-9]+), #0
+**	cx3da	p0, \1, \2, r0, r1, #33
+**	mov	r0, \1
+**	mov	r1, \2
+** |
+**      movs	r0, #0
+**      movs	r1, #0
+**      cx3da	p0, r0, r1, r[0-9]+, r[0-9]+, #33
+** )
+**	...
+**	bx	lr
+*/
+TEST_CDE_SCALAR_INTRINSIC (cx3da, uint64_t, (0, accum, n, m, 33))
+
+
+
+/* Ensure this function gets DCE'd out after optimisation.
+   Should be such since the ACLE specification mentions these functions are
+   stateless and pure.  */
+/*
+** test_cde_dce:
+**	bx	lr
+*/
+void test_cde_dce (uint32_t n, uint32_t m)
+{
+  uint64_t accum = 0;
+  __arm_cx1   (0, 33);
+  __arm_cx1a  (0, accum, 33);
+  __arm_cx2   (0, n, 33);
+  __arm_cx2a  (0, accum, n, 33);
+  __arm_cx3   (0, n, m, 33);
+  __arm_cx3a  (0, accum, n, m, 33);
+  __arm_cx1d   (0, 33);
+  __arm_cx1da  (0, accum, 33);
+  __arm_cx2d   (0, n, 33);
+  __arm_cx2da  (0, accum, n, 33);
+  __arm_cx3d   (0, n, m, 33);
+  __arm_cx3da  (0, accum, n, m, 33);
+}
+
+/* Checking this function allows constants with symbolic names.
+   This test must be run under some level of optimisation.
+   The actual check we perform is that the function is provided something that,
+   at the point of expansion, is an immediate.  That check is not as strict as
+   having something that is an immediate directly.
+
+   Since we've already checked these intrinsics generate code in the manner we
+   expect (above), here we just check that all the instructions we expect are
+   there.  To ensure the instructions are from these functions we use different
+   constants and search for those specifically with `scan-assembler-times`.  */
+
+/* Checking this function allows constants with symbolic names.  */
+uint32_t test_cde2 (uint32_t n, uint32_t m)
+{
+  int coproc = 6;
+  uint32_t imm = 30;
+  uint32_t accum = 0;
+  accum += __arm_cx1   (coproc, imm);
+  accum += __arm_cx1a  (coproc, accum, imm);
+  accum += __arm_cx2   (coproc, n, imm);
+  accum += __arm_cx2a  (coproc, accum, n, imm);
+  accum += __arm_cx3   (coproc, n, m, imm);
+  accum += __arm_cx3a  (coproc, accum, n, m, imm);
+  return accum;
+}
+
+/* Checking this function allows constants with symbolic names.  */
+uint64_t test_cdedi2 (uint32_t n, uint32_t m)
+{
+  int coproc = 6;
+  uint32_t imm = 30;
+  uint64_t accum = 0;
+  accum += __arm_cx1d   (coproc, imm);
+  accum += __arm_cx1da  (coproc, accum, imm);
+  accum += __arm_cx2d   (coproc, n, imm);
+  accum += __arm_cx2da  (coproc, accum, n, imm);
+  accum += __arm_cx3d   (coproc, n, m, imm);
+  accum += __arm_cx3da  (coproc, accum, n, m, imm);
+  return accum;
+}
+
+/* { dg-final { scan-assembler-times "cx1\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx1a\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2a\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3a\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx1d\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2d\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3d\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx1da\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx2da\\tp6" 1 } } */
+/* { dg-final { scan-assembler-times "cx3da\\tp6" 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 050b4ba452fda03bb13c3722e6edc313b25fb1eb..1e8c0e2bd2913be773d2d608a82953adf2832d01 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5117,13 +5117,13 @@  proc add_options_for_arm_v8_2a_bf16_neon { flags } {
 
 foreach { armfunc armflag armdef } {
 	arm_v8m_main_cde
-		"-march=armv8-m.main+cdecp0 -mthumb"
+		"-march=armv8-m.main+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE)"
 	arm_v8m_main_cde_fp
-		"-march=armv8-m.main+fp+cdecp0 -mthumb"
+		"-march=armv8-m.main+fp+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FP)"
 	arm_v8_1m_main_cde_mve
-		"-march=armv8.1-m.main+mve+cdecp0 -mthumb"
+		"-march=armv8.1-m.main+mve+cdecp0+cdecp6 -mthumb"
 		"defined (__ARM_FEATURE_CDE) && defined (__ARM_FEATURE_MVE)"
 	} {
     eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {