diff mbox

[ARM,4/7,ping3] Adapt atomic compare and swap to ARMv8-M Baseline

Message ID 37085c23-a27f-8dbc-7c15-3c5eac6dd8dd@foss.arm.com
State New
Headers show

Commit Message

Thomas Preudhomme Oct. 24, 2016, 8:05 a.m. UTC
Ping?

Best regards,

Thomas

On 14/10/16 14:50, Thomas Preudhomme wrote:
> Ping?
>
> Best regards,
>
> Thomas
>
> On 03/10/16 17:45, Thomas Preudhomme wrote:
>> Ping?
>>
>> Best regards,
>>
>> Thomas
>>
>> On 22/09/16 14:46, Thomas Preudhomme wrote:
>>> Hi,
>>>
>>> This patch is part of a patch series to add support for atomic operations on
>>> ARMv8-M Baseline targets in GCC. This specific patch makes the necessary change
>>> for compare and swap to work for ARMv8-M Baseline, doubleword integers excepted.
>>> Namely, it adds Thumb-1 specific constraints to compare_and_swap. The
>>> constraints are chosen so that once the pattern is splitted, the individual
>>> instructions have their constraints respected. In particular, the constraints
>>> for the cbranchsi4_* pattern must be duplicated here, which explains the use of
>>> several alternatives.
>>>
>>> Note: changes to enable other atomic operation are in the next patch of the
>>> series.
>>>
>>> ChangeLog entry is as follows:
>>>
>>> *** gcc/ChangeLog ***
>>>
>>> 2016-07-05  Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>
>>>         * config/arm/sync.md (atomic_compare_and_swap<mode>_1): Add new ARMv8-M
>>>         Baseline only alternatives to (i) hold store atomic success value in a
>>>         return register rather than a scratch register, (ii) use a low register
>>>         for it and to (iii) ensure the cbranchsi insn generated by the split
>>>         respect the constraints of Thumb-1 cbranchsi4_insn and
>>>         cbranchsi4_scratch.
>>>         * config/arm/thumb1.md (cbranchsi4_insn): Add comment to indicate
>>>         constraints must match those in atomic_compare_and_swap.
>>>         (cbranchsi4_scratch): Likewise.
>>>
>>>
>>> Testing: No code generation difference for ARMv7-A, ARMv7VE and ARMv8-A on all
>>> atomic and synchronization testcases in the testsuite [2]. Patchset was also
>>> bootstrapped with --enable-itm --enable-gomp on ARMv8-A in ARM and Thumb mode at
>>> optimization level -O1 and above [1] without any regression in the testsuite and
>>> no code generation difference in libitm and libgomp.
>>>
>>> Code generation for ARMv8-M Baseline has been manually examined and compared
>>> against ARMv8-A Thumb-2 for the following configuration without finding any
>>> issue:
>>>
>>> gcc.dg/atomic-op-2.c at -Os
>>> gcc.dg/atomic-compare-exchange-2.c at -Os
>>> gcc.dg/atomic-compare-exchange-3.c at -O3
>>>
>>>
>>> Is this ok for trunk?
>>>
>>> Best regards,
>>>
>>> Thomas
>>>
>>> [1] CFLAGS_FOR_TARGET and CXXFLAGS_FOR_TARGET were set to "-O1 -g", "-O3 -g" and
>>> undefined ("-O2 -g")
>>> [2] The exact list is:
>>>
>>> gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
>>> gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
>>> gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
>>> gcc/testsuite/gcc.dg/atomic-exchange-1.c
>>> gcc/testsuite/gcc.dg/atomic-exchange-2.c
>>> gcc/testsuite/gcc.dg/atomic-exchange-3.c
>>> gcc/testsuite/gcc.dg/atomic-fence.c
>>> gcc/testsuite/gcc.dg/atomic-flag.c
>>> gcc/testsuite/gcc.dg/atomic-generic.c
>>> gcc/testsuite/gcc.dg/atomic-generic-aux.c
>>> gcc/testsuite/gcc.dg/atomic-invalid-2.c
>>> gcc/testsuite/gcc.dg/atomic-load-1.c
>>> gcc/testsuite/gcc.dg/atomic-load-2.c
>>> gcc/testsuite/gcc.dg/atomic-load-3.c
>>> gcc/testsuite/gcc.dg/atomic-lockfree.c
>>> gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
>>> gcc/testsuite/gcc.dg/atomic-noinline.c
>>> gcc/testsuite/gcc.dg/atomic-noinline-aux.c
>>> gcc/testsuite/gcc.dg/atomic-op-1.c
>>> gcc/testsuite/gcc.dg/atomic-op-2.c
>>> gcc/testsuite/gcc.dg/atomic-op-3.c
>>> gcc/testsuite/gcc.dg/atomic-op-6.c
>>> gcc/testsuite/gcc.dg/atomic-store-1.c
>>> gcc/testsuite/gcc.dg/atomic-store-2.c
>>> gcc/testsuite/gcc.dg/atomic-store-3.c
>>> gcc/testsuite/g++.dg/ext/atomic-1.C
>>> gcc/testsuite/g++.dg/ext/atomic-2.C
>>> gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-char.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-consume.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-int.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-release.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
>>> gcc/testsuite/gcc.target/arm/atomic-op-short.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_1.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_2.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_3.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_4.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_5.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_6.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_8.c
>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_9.c
>>> gcc/testsuite/gcc.target/arm/sync-1.c
>>> gcc/testsuite/gcc.target/arm/synchronize.c
>>> gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
>>> gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
>>> gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
>>> gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
>>> libstdc++-v3/testsuite/29_atomics/atomic/60658.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/62259.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/64658.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/65147.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/65913.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/70766.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/49445.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/constexpr.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/copy_list.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/default.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/direct_list.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/single_value.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/51811.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/56011.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/integral_assignment.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/integral_conversion.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/pointer_partial_void.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/requirements/base_classes.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic/requirements/compare_exchange_lowering.cc
>>>
>>>
>>> libstdc++-v3/testsuite/29_atomics/atomic/requirements/explicit_instantiation/1.cc
>>>
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/clear/1.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/1.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/56012.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/aggregate.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/default.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/requirements/standard_layout.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/requirements/trivial.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/test_and_set/explicit.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/test_and_set/implicit.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/60940.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/65147.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/constexpr.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/copy_list.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/default.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/direct_list.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/single_value.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/bitwise.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/decrement.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/increment.cc
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/integral_assignment.cc
>>>
>>>
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/integral_conversion.cc
>>>
>>>
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/requirements/standard_layout.cc
>>>
>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/requirements/trivial.cc
>>> libstdc++-v3/testsuite/29_atomics/headers/atomic/functions_std_c++0x.cc
>>> libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc
>>> libstdc++-v3/testsuite/29_atomics/headers/atomic/types_std_c++0x.cc

Comments

Kyrill Tkachov Oct. 26, 2016, 4:01 p.m. UTC | #1
Hi Thomas,

On 24/10/16 09:05, Thomas Preudhomme wrote:
> Ping?
>
> Best regards,
>
> Thomas
>
> On 14/10/16 14:50, Thomas Preudhomme wrote:
>> Ping?
>>
>> Best regards,
>>
>> Thomas
>>
>> On 03/10/16 17:45, Thomas Preudhomme wrote:
>>> Ping?
>>>
>>> Best regards,
>>>
>>> Thomas
>>>
>>> On 22/09/16 14:46, Thomas Preudhomme wrote:
>>>> Hi,
>>>>
>>>> This patch is part of a patch series to add support for atomic operations on
>>>> ARMv8-M Baseline targets in GCC. This specific patch makes the necessary change
>>>> for compare and swap to work for ARMv8-M Baseline, doubleword integers excepted.
>>>> Namely, it adds Thumb-1 specific constraints to compare_and_swap. The
>>>> constraints are chosen so that once the pattern is splitted, the individual
>>>> instructions have their constraints respected. In particular, the constraints
>>>> for the cbranchsi4_* pattern must be duplicated here, which explains the use of
>>>> several alternatives.
>>>>
>>>> Note: changes to enable other atomic operation are in the next patch of the
>>>> series.
>>>>
>>>> ChangeLog entry is as follows:
>>>>
>>>> *** gcc/ChangeLog ***
>>>>
>>>> 2016-07-05  Thomas Preud'homme <thomas.preudhomme@arm.com>
>>>>
>>>>         * config/arm/sync.md (atomic_compare_and_swap<mode>_1): Add new ARMv8-M
>>>>         Baseline only alternatives to (i) hold store atomic success value in a
>>>>         return register rather than a scratch register, (ii) use a low register
>>>>         for it and to (iii) ensure the cbranchsi insn generated by the split
>>>>         respect the constraints of Thumb-1 cbranchsi4_insn and
>>>>         cbranchsi4_scratch.
>>>>         * config/arm/thumb1.md (cbranchsi4_insn): Add comment to indicate
>>>>         constraints must match those in atomic_compare_and_swap.
>>>>         (cbranchsi4_scratch): Likewise.
>>>>
>>>>
>>>> Testing: No code generation difference for ARMv7-A, ARMv7VE and ARMv8-A on all
>>>> atomic and synchronization testcases in the testsuite [2]. Patchset was also
>>>> bootstrapped with --enable-itm --enable-gomp on ARMv8-A in ARM and Thumb mode at
>>>> optimization level -O1 and above [1] without any regression in the testsuite and
>>>> no code generation difference in libitm and libgomp.
>>>>
>>>> Code generation for ARMv8-M Baseline has been manually examined and compared
>>>> against ARMv8-A Thumb-2 for the following configuration without finding any
>>>> issue:
>>>>
>>>> gcc.dg/atomic-op-2.c at -Os
>>>> gcc.dg/atomic-compare-exchange-2.c at -Os
>>>> gcc.dg/atomic-compare-exchange-3.c at -O3
>>>>
>>>>
>>>> Is this ok for trunk?
>>>>

This is ok.
Thanks,
Kyrill

>>>> Best regards,
>>>>
>>>> Thomas
>>>>
>>>> [1] CFLAGS_FOR_TARGET and CXXFLAGS_FOR_TARGET were set to "-O1 -g", "-O3 -g" and
>>>> undefined ("-O2 -g")
>>>> [2] The exact list is:
>>>>
>>>> gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
>>>> gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
>>>> gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
>>>> gcc/testsuite/gcc.dg/atomic-exchange-1.c
>>>> gcc/testsuite/gcc.dg/atomic-exchange-2.c
>>>> gcc/testsuite/gcc.dg/atomic-exchange-3.c
>>>> gcc/testsuite/gcc.dg/atomic-fence.c
>>>> gcc/testsuite/gcc.dg/atomic-flag.c
>>>> gcc/testsuite/gcc.dg/atomic-generic.c
>>>> gcc/testsuite/gcc.dg/atomic-generic-aux.c
>>>> gcc/testsuite/gcc.dg/atomic-invalid-2.c
>>>> gcc/testsuite/gcc.dg/atomic-load-1.c
>>>> gcc/testsuite/gcc.dg/atomic-load-2.c
>>>> gcc/testsuite/gcc.dg/atomic-load-3.c
>>>> gcc/testsuite/gcc.dg/atomic-lockfree.c
>>>> gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
>>>> gcc/testsuite/gcc.dg/atomic-noinline.c
>>>> gcc/testsuite/gcc.dg/atomic-noinline-aux.c
>>>> gcc/testsuite/gcc.dg/atomic-op-1.c
>>>> gcc/testsuite/gcc.dg/atomic-op-2.c
>>>> gcc/testsuite/gcc.dg/atomic-op-3.c
>>>> gcc/testsuite/gcc.dg/atomic-op-6.c
>>>> gcc/testsuite/gcc.dg/atomic-store-1.c
>>>> gcc/testsuite/gcc.dg/atomic-store-2.c
>>>> gcc/testsuite/gcc.dg/atomic-store-3.c
>>>> gcc/testsuite/g++.dg/ext/atomic-1.C
>>>> gcc/testsuite/g++.dg/ext/atomic-2.C
>>>> gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-acquire.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-char.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-consume.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-int.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-release.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c
>>>> gcc/testsuite/gcc.target/arm/atomic-op-short.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_1.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_2.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_3.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_4.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_5.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_6.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_8.c
>>>> gcc/testsuite/gcc.target/arm/atomic_loaddi_9.c
>>>> gcc/testsuite/gcc.target/arm/sync-1.c
>>>> gcc/testsuite/gcc.target/arm/synchronize.c
>>>> gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
>>>> gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
>>>> gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
>>>> gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
>>>> libstdc++-v3/testsuite/29_atomics/atomic/60658.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/62259.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/64658.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/65147.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/65913.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/70766.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/49445.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/constexpr.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/copy_list.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/default.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/direct_list.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/single_value.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/51811.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/56011.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/integral_assignment.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/integral_conversion.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/operators/pointer_partial_void.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/requirements/base_classes.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic/requirements/compare_exchange_lowering.cc
>>>>
>>>>
>>>> libstdc++-v3/testsuite/29_atomics/atomic/requirements/explicit_instantiation/1.cc
>>>>
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/clear/1.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/1.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/56012.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/aggregate.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/cons/default.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/requirements/standard_layout.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/requirements/trivial.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/test_and_set/explicit.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_flag/test_and_set/implicit.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/60940.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/65147.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/constexpr.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/copy_list.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/default.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/direct_list.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/cons/single_value.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/bitwise.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/decrement.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/increment.cc
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/integral_assignment.cc
>>>>
>>>>
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/operators/integral_conversion.cc
>>>>
>>>>
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/requirements/standard_layout.cc
>>>>
>>>> libstdc++-v3/testsuite/29_atomics/atomic_integral/requirements/trivial.cc
>>>> libstdc++-v3/testsuite/29_atomics/headers/atomic/functions_std_c++0x.cc
>>>> libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc
>>>> libstdc++-v3/testsuite/29_atomics/headers/atomic/types_std_c++0x.cc
diff mbox

Patch

diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index b1e87cdd5d9587d7b301d0dd0072fc41079a04d3..debca40a7ef92e37f0a308d965bd289f6dd74693 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -189,21 +189,23 @@ 
   DONE;
 })
 
+;; Constraints of this pattern must be at least as strict as those of the
+;; cbranchsi operations in thumb1.md and aim to be as permissive.
 (define_insn_and_split "atomic_compare_and_swap<mode>_1"
-  [(set (match_operand 0 "cc_register_operand" "=&c")		;; bool out
+  [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l")		;; bool out
 	(unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
-   (set (match_operand:SI 1 "s_register_operand" "=&r")		;; val out
+   (set (match_operand:SI 1 "s_register_operand" "=&r,&l,&0,&l*h")	;; val out
 	(zero_extend:SI
-	  (match_operand:NARROW 2 "mem_noofs_operand" "+Ua")))	;; memory
+	  (match_operand:NARROW 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua")))	;; memory
    (set (match_dup 2)
 	(unspec_volatile:NARROW
-	  [(match_operand:SI 3 "arm_add_operand" "rIL")		;; expected
-	   (match_operand:NARROW 4 "s_register_operand" "r")	;; desired
+	  [(match_operand:SI 3 "arm_add_operand" "rIL,lIL*h,J,*r")	;; expected
+	   (match_operand:NARROW 4 "s_register_operand" "r,r,r,r")	;; desired
 	   (match_operand:SI 5 "const_int_operand")		;; is_weak
 	   (match_operand:SI 6 "const_int_operand")		;; mod_s
 	   (match_operand:SI 7 "const_int_operand")]		;; mod_f
 	  VUNSPEC_ATOMIC_CAS))
-   (clobber (match_scratch:SI 8 "=&r"))]
+   (clobber (match_scratch:SI 8 "=&r,X,X,X"))]
   "<sync_predtab>"
   "#"
   "&& reload_completed"
@@ -211,27 +213,30 @@ 
   {
     arm_split_compare_and_swap (operands);
     DONE;
-  })
+  }
+  [(set_attr "arch" "32,v8mb,v8mb,v8mb")])
 
 (define_mode_attr cas_cmp_operand
   [(SI "arm_add_operand") (DI "cmpdi_operand")])
 (define_mode_attr cas_cmp_str
   [(SI "rIL") (DI "rDi")])
 
+;; Constraints of this pattern must be at least as strict as those of the
+;; cbranchsi operations in thumb1.md and aim to be as permissive.
 (define_insn_and_split "atomic_compare_and_swap<mode>_1"
-  [(set (match_operand 0 "cc_register_operand" "=&c")		;; bool out
+  [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l")		;; bool out
 	(unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
-   (set (match_operand:SIDI 1 "s_register_operand" "=&r")	;; val out
-	(match_operand:SIDI 2 "mem_noofs_operand" "+Ua"))	;; memory
+   (set (match_operand:SIDI 1 "s_register_operand" "=&r,&l,&0,&l*h")	;; val out
+	(match_operand:SIDI 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua"))	;; memory
    (set (match_dup 2)
 	(unspec_volatile:SIDI
-	  [(match_operand:SIDI 3 "<cas_cmp_operand>" "<cas_cmp_str>") ;; expect
-	   (match_operand:SIDI 4 "s_register_operand" "r")	;; desired
+	  [(match_operand:SIDI 3 "<cas_cmp_operand>" "<cas_cmp_str>,lIL*h,J,*r") ;; expect
+	   (match_operand:SIDI 4 "s_register_operand" "r,r,r,r")	;; desired
 	   (match_operand:SI 5 "const_int_operand")		;; is_weak
 	   (match_operand:SI 6 "const_int_operand")		;; mod_s
 	   (match_operand:SI 7 "const_int_operand")]		;; mod_f
 	  VUNSPEC_ATOMIC_CAS))
-   (clobber (match_scratch:SI 8 "=&r"))]
+   (clobber (match_scratch:SI 8 "=&r,X,X,X"))]
   "<sync_predtab>"
   "#"
   "&& reload_completed"
@@ -239,7 +244,8 @@ 
   {
     arm_split_compare_and_swap (operands);
     DONE;
-  })
+  }
+  [(set_attr "arch" "32,v8mb,v8mb,v8mb")])
 
 (define_insn_and_split "atomic_exchange<mode>"
   [(set (match_operand:QHSD 0 "s_register_operand" "=&r")	;; output
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index cd98de7dcb40de483a9f93c0674bd216f4b0c56a..67f2878b45fe47abaaf24d97213613d1572dcd91 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1059,6 +1059,9 @@ 
 	    (const_string "multiple")))]
 )
 
+;; Changes to the constraints of this pattern must be propagated to those of
+;; atomic compare_and_swap splitters in sync.md.  These must be at least as
+;; strict as the constraints here and aim to be as permissive.
 (define_insn "cbranchsi4_insn"
   [(set (pc) (if_then_else
 	      (match_operator 0 "arm_comparison_operator"
@@ -1120,6 +1123,9 @@ 
    (set_attr "type" "multiple")]
 )
 
+;; Changes to the constraints of this pattern must be propagated to those of
+;; atomic compare_and_swap splitters in sync.md.  These must be at least as
+;; strict as the constraints here and aim to be as permissive.
 (define_insn "cbranchsi4_scratch"
   [(set (pc) (if_then_else
 	      (match_operator 4 "arm_comparison_operator"