From patchwork Tue Mar  3 16:06:51 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Richard Earnshaw (lists)" <Richard.Earnshaw@arm.com>
X-Patchwork-Id: 1248493
Return-Path: 
 <gcc-patches-return-520540-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized)
	smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131;
	helo=sourceware.org;
	envelope-from=gcc-patches-return-520540-incoming=patchwork.ozlabs.org@gcc.gnu.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=arm.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org
	header.a=rsa-sha1 header.s=default header.b=t8sfwiv8;
	dkim-atps=neutral
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id 48X22j4Fbqz9sPg
	for <incoming@patchwork.ozlabs.org>;
	Wed,  4 Mar 2020 03:07:09 +1100 (AEDT)
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:to
	:from:subject:message-id:date:mime-version:content-type; q=dns;
	s=default; b=C9W09JCFMfQI8oZZ5VLchAYp6t+UyDlY769vHXtfjkWRGtPGw0
	XoXQtl96cYQXOgnWsC/13uk7ygRfQ4KgbCe1lGNKsQ/rvVs6q91LQTJubPLWpti+
	LLqB2PfszDdzwOiffM0EKaKKb4xh3GpdBZv8yWlx2vD4xDFwL3EbrfwRo=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:to
	:from:subject:message-id:date:mime-version:content-type; s=
	default; bh=TQcadadIDTq/7OoAg+4dgckBZVY=; b=t8sfwiv8IAAwNPJGnfRG
	5svGCAST/zSSgI76iWxhzLedzi46L7Kf0brZtP5wbDSqrjk0q86X2X31AFAtQSNL
	wxW6Ojp8tRF6Qo0HtIpSY4Qe/RXGB4SF2UMtBNNnvQkC2qe3frwrXws4ZzjGPD0Q
	+erMp0Bg27v3oxJ7T+FUONs=
Received: (qmail 121247 invoked by alias); 3 Mar 2020 16:06:59 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 121238 invoked by uid 89); 3 Mar 2020 16:06:59 -0000
Authentication-Results: sourceware.org; auth=none
X-Spam-SWARE-Status: No, score=-16.4 required=5.0 tests=AWL, BAYES_00,
	GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT,
	SCC_10_SHORT_WORD_LINES, SCC_20_SHORT_WORD_LINES,
	SCC_35_SHORT_WORD_LINES, SCC_5_SHORT_WORD_LINES,
	SPF_PASS autolearn=ham version=3.3.1 spammy=1968, Body,
	Everything, danger
X-HELO: foss.arm.com
Received: from foss.arm.com (HELO foss.arm.com) (217.140.110.172) by
	sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP;
	Tue, 03 Mar 2020 16:06:55 +0000
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])	by
	usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4DD332F;
	Tue,  3 Mar 2020 08:06:53 -0800 (PST)
Received: from e120077-lin.cambridge.arm.com (e120077-lin.cambridge.arm.com
	[10.2.78.81])	by usa-sjc-imap-foss1.foss.arm.com (Postfix)
	with ESMTPSA id D2D0D3F6C4; Tue,  3 Mar 2020 08:06:52 -0800 (PST)
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
From: "Richard Earnshaw (lists)" <Richard.Earnshaw@arm.com>
Subject: [committed] libgcc: arm: convert thumb1 code to unified syntax
Message-ID: <36d699ca-c555-8b20-08d0-a9328ec25771@arm.com>
Date: Tue, 3 Mar 2020 16:06:51 +0000
User-Agent: Mozilla/5.0 (X11; Linux x86_64;
	rv:60.0) Gecko/20100101 Thunderbird/60.9.0
MIME-Version: 1.0

Unified syntax has been the official syntax for thumb1 assembly for over 
10 years now.  It's time we made preparations for that becoming the 
default in the assembler.  But before we can start doing that we really 
need to clean up some laggards from the olden days.  Libgcc support for 
thumb1 is one such example.

This patch converts all of the legacy (disjoint) syntax that I could 
find over to unified code.  The identification was done by using a trick 
version of gas that defaulted to unified mode which then faults if 
legacy syntax is encountered.  The code produced was then compared 
against the old code to check for differences.  One such difference does 
exist, but that is because in unified syntax 'movs rd, rn' is encoded as 
'lsls rd, rn, #0', rather than 'adds rd, rn, #0'; but that is a 
deliberate change that was introduced because the lsls encoding more 
closely reflects the behaviour of 'movs' in arm state (where only some 
of the condition flags are modified).

libgcc:
	* config/arm/bpabi-v6m.S (aeabi_lcmp): Convert thumb1 code
	to unified syntax.
	(aeabi_ulcmp, aeabi_ldivmod, aeabi_uldivmod): Likewise.
	(aeabi_frsub, aeabi_cfcmpeq, aeabi_fcmpeq): Likewise.
	(aeabi_fcmp, aeabi_drsub, aeabi_cdrcmple): Likewise.
	(aeabi_cdcmpeq, aeabi_dcmpeq, aeabi_dcmp): Likewise.
	* config/arm/lib1funcs.S (Lend_fde): Convert thumb1 code
	to unified syntax.
	(divsi3, modsi3): Likewise.
	(clzdi2, ctzsi2): Likewise.
	* config/arm/libunwind.S (restore_core_regs): Convert
	thumb1 code to unified syntax.
	(UNWIND_WRAPPER): Likewise.

Committed to trunk.

R.
diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S
index 29fe8faa6e6..1a403efc872 100644
--- a/libgcc/config/arm/bpabi-v6m.S
+++ b/libgcc/config/arm/bpabi-v6m.S
@@ -39,21 +39,21 @@ FUNC_START aeabi_lcmp
 	cmp	xxh, yyh
 	beq	1f
 	bgt	2f
-	mov	r0, #1
-	neg	r0, r0
+	movs	r0, #1
+	negs	r0, r0
 	RET
 2:
-	mov	r0, #1
+	movs	r0, #1
 	RET
 1:
-	sub	r0, xxl, yyl
+	subs	r0, xxl, yyl
 	beq	1f
 	bhi	2f
-	mov	r0, #1
-	neg	r0, r0
+	movs	r0, #1
+	negs	r0, r0
 	RET
 2:
-	mov	r0, #1
+	movs	r0, #1
 1:
 	RET
 	FUNC_END aeabi_lcmp
@@ -65,15 +65,15 @@ FUNC_START aeabi_lcmp
 FUNC_START aeabi_ulcmp
 	cmp	xxh, yyh
 	bne	1f
-	sub	r0, xxl, yyl
+	subs	r0, xxl, yyl
 	beq	2f
 1:
 	bcs	1f
-	mov	r0, #1
-	neg	r0, r0
+	movs	r0, #1
+	negs	r0, r0
 	RET
 1:
-	mov	r0, #1
+	movs	r0, #1
 2:
 	RET
 	FUNC_END aeabi_ulcmp
@@ -91,29 +91,29 @@ FUNC_START aeabi_ulcmp
 	cmp	xxl, #0
 2:
 	beq	3f
-	mov	xxh, #0
-	mvn	xxh, xxh		@ 0xffffffff
-	mov	xxl, xxh
+	movs	xxh, #0
+	mvns	xxh, xxh		@ 0xffffffff
+	movs	xxl, xxh
 3:
 	.else
 	blt	6f
 	bgt	4f
 	cmp	xxl, #0
 	beq	5f
-4:	mov	xxl, #0
-	mvn	xxl, xxl		@ 0xffffffff
-	lsr	xxh, xxl, #1		@ 0x7fffffff
+4:	movs	xxl, #0
+	mvns	xxl, xxl		@ 0xffffffff
+	lsrs	xxh, xxl, #1		@ 0x7fffffff
 	b	5f
-6:	mov	xxh, #0x80
-	lsl	xxh, xxh, #24		@ 0x80000000
-	mov	xxl, #0
+6:	movs	xxh, #0x80
+	lsls	xxh, xxh, #24		@ 0x80000000
+	movs	xxl, #0
 5:
 	.endif
 	@ tailcalls are tricky on v6-m.
 	push	{r0, r1, r2}
 	ldr	r0, 1f
 	adr	r1, 1f
-	add	r0, r1
+	adds	r0, r1
 	str	r0, [sp, #8]
 	@ We know we are not on armv4t, so pop pc is safe.
 	pop	{r0, r1, pc}
@@ -128,15 +128,15 @@ FUNC_START aeabi_ulcmp
 FUNC_START aeabi_ldivmod
 	test_div_by_zero signed
 
-	push {r0, r1}
-	mov r0, sp
-	push {r0, lr}
-	ldr r0, [sp, #8]
-	bl SYM(__gnu_ldivmod_helper)
-	ldr r3, [sp, #4]
-	mov lr, r3
-	add sp, sp, #8
-	pop {r2, r3}
+	push	{r0, r1}
+	mov	r0, sp
+	push	{r0, lr}
+	ldr	r0, [sp, #8]
+	bl	SYM(__gnu_ldivmod_helper)
+	ldr	r3, [sp, #4]
+	mov	lr, r3
+	add	sp, sp, #8
+	pop	{r2, r3}
 	RET
 	FUNC_END aeabi_ldivmod
 
@@ -147,15 +147,15 @@ FUNC_START aeabi_ldivmod
 FUNC_START aeabi_uldivmod
 	test_div_by_zero unsigned
 
-	push {r0, r1}
-	mov r0, sp
-	push {r0, lr}
-	ldr r0, [sp, #8]
-	bl SYM(__udivmoddi4)
-	ldr r3, [sp, #4]
-	mov lr, r3
-	add sp, sp, #8
-	pop {r2, r3}
+	push	{r0, r1}
+	mov	r0, sp
+	push	{r0, lr}
+	ldr	r0, [sp, #8]
+	bl	SYM(__udivmoddi4)
+	ldr	r3, [sp, #4]
+	mov	lr, r3
+	add	sp, sp, #8
+	pop	{r2, r3}
 	RET
 	FUNC_END aeabi_uldivmod
 	
@@ -166,9 +166,9 @@ FUNC_START aeabi_uldivmod
 FUNC_START aeabi_frsub
 
       push	{r4, lr}
-      mov	r4, #1
-      lsl	r4, #31
-      eor	r0, r0, r4
+      movs	r4, #1
+      lsls	r4, #31
+      eors	r0, r0, r4
       bl	__aeabi_fadd
       pop	{r4, pc}
 
@@ -181,7 +181,7 @@ FUNC_START aeabi_frsub
 FUNC_START aeabi_cfrcmple
 
 	mov	ip, r0
-	mov	r0, r1
+	movs	r0, r1
 	mov	r1, ip
 	b	6f
 
@@ -196,8 +196,8 @@ FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
 	cmp	r0, #0
 	@ Clear the C flag if the return value was -1, indicating
 	@ that the first operand was smaller than the second.
-	bmi 1f
-	mov	r1, #0
+	bmi	1f
+	movs	r1, #0
 	cmn	r0, r1
 1:
 	pop	{r0, r1, r2, r3, r4, pc}
@@ -210,8 +210,8 @@ FUNC_START	aeabi_fcmpeq
 
 	push	{r4, lr}
 	bl	__eqsf2
-	neg	r0, r0
-	add	r0, r0, #1
+	negs	r0, r0
+	adds	r0, r0, #1
 	pop	{r4, pc}
 
 	FUNC_END aeabi_fcmpeq
@@ -223,10 +223,10 @@ FUNC_START	aeabi_fcmp\cond
 	bl	__\helper\mode
 	cmp	r0, #0
 	b\cond	1f
-	mov	r0, #0
+	movs	r0, #0
 	pop	{r4, pc}
 1:
-	mov	r0, #1
+	movs	r0, #1
 	pop	{r4, pc}
 
 	FUNC_END aeabi_fcmp\cond
@@ -244,9 +244,9 @@ COMPARISON ge, ge
 FUNC_START aeabi_drsub
 
       push	{r4, lr}
-      mov	r4, #1
-      lsl	r4, #31
-      eor	xxh, xxh, r4
+      movs	r4, #1
+      lsls	r4, #31
+      eors	xxh, xxh, r4
       bl	__aeabi_dadd
       pop	{r4, pc}
 
@@ -259,10 +259,10 @@ FUNC_START aeabi_drsub
 FUNC_START aeabi_cdrcmple
 
 	mov	ip, r0
-	mov	r0, r2
+	movs	r0, r2
 	mov	r2, ip
 	mov	ip, r1
-	mov	r1, r3
+	movs	r1, r3
 	mov	r3, ip
 	b	6f
 
@@ -277,8 +277,8 @@ FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
 	cmp	r0, #0
 	@ Clear the C flag if the return value was -1, indicating
 	@ that the first operand was smaller than the second.
-	bmi 1f
-	mov	r1, #0
+	bmi	1f
+	movs	r1, #0
 	cmn	r0, r1
 1:
 	pop	{r0, r1, r2, r3, r4, pc}
@@ -291,8 +291,8 @@ FUNC_START	aeabi_dcmpeq
 
 	push	{r4, lr}
 	bl	__eqdf2
-	neg	r0, r0
-	add	r0, r0, #1
+	negs	r0, r0
+	adds	r0, r0, #1
 	pop	{r4, pc}
 
 	FUNC_END aeabi_dcmpeq
@@ -304,10 +304,10 @@ FUNC_START	aeabi_dcmp\cond
 	bl	__\helper\mode
 	cmp	r0, #0
 	b\cond	1f
-	mov	r0, #0
+	movs	r0, #0
 	pop	{r4, pc}
 1:
-	mov	r0, #1
+	movs	r0, #1
 	pop	{r4, pc}
 
 	FUNC_END aeabi_dcmp\cond
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index e4b73caabbd..e8d2158f8d6 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -22,6 +22,10 @@ a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
+/* Everything in this file should now use unified syntax.  */
+
+	.syntax unified
+
 /* An executable stack is *not* required for these functions.  */
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
@@ -270,7 +274,7 @@ LSYM(Lend_fde):
 #ifdef NOT_ISA_TARGET_32BIT
 
 	push	{r0, lr}
-	mov	r0, #0
+	movs	r0, #0
 	bl	SYM(__aeabi_idiv0)
 	@ We know we are not on armv4t, so pop pc is safe.
 	pop	{r1, pc}
@@ -310,7 +314,7 @@ LSYM(Lend_fde):
 	push	{ r1, lr }
 98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
 	bl	SYM (__div0)
-	mov	r0, #0			@ About as wrong as it could be.
+	movs	r0, #0			@ About as wrong as it could be.
 #if defined (__INTERWORKING__)
 	pop	{ r1, r2 }
 	bx	r2
@@ -349,7 +353,7 @@ SYM (\name):
 #define THUMB_FUNC .thumb_func
 #define THUMB_CODE .force_thumb
 # if defined(__thumb2__)
-#define THUMB_SYNTAX .syntax divided
+#define THUMB_SYNTAX
 # else
 #define THUMB_SYNTAX
 # endif
@@ -725,8 +729,8 @@ pc		.req	r15
 /* ------------------------------------------------------------------------ */
 .macro THUMB_DIV_MOD_BODY modulo
 	@ Load the constant 0x10000000 into our work register.
-	mov	work, #1
-	lsl	work, #28
+	movs	work, #1
+	lsls	work, #28
 LSYM(Loop1):
 	@ Unless the divisor is very big, shift it up in multiples of
 	@ four bits, since this is the amount of unwinding in the main
@@ -736,12 +740,12 @@ LSYM(Loop1):
 	bhs	LSYM(Lbignum)
 	cmp	divisor, dividend
 	bhs	LSYM(Lbignum)
-	lsl	divisor, #4
-	lsl	curbit,  #4
+	lsls	divisor, #4
+	lsls	curbit,  #4
 	b	LSYM(Loop1)
 LSYM(Lbignum):
 	@ Set work to 0x80000000
-	lsl	work, #3
+	lsls	work, #3
 LSYM(Loop2):
 	@ For very big divisors, we must shift it a bit at a time, or
 	@ we will be in danger of overflowing.
@@ -749,8 +753,8 @@ LSYM(Loop2):
 	bhs	LSYM(Loop3)
 	cmp	divisor, dividend
 	bhs	LSYM(Loop3)
-	lsl	divisor, #1
-	lsl	curbit,  #1
+	lsls	divisor, #1
+	lsls	curbit,  #1
 	b	LSYM(Loop2)
 LSYM(Loop3):
 	@ Test for possible subtractions ...
@@ -758,39 +762,39 @@ LSYM(Loop3):
 	@ ... On the final pass, this may subtract too much from the dividend, 
 	@ so keep track of which subtractions are done, we can fix them up 
 	@ afterwards.
-	mov	overdone, #0
+	movs	overdone, #0
 	cmp	dividend, divisor
 	blo	LSYM(Lover1)
-	sub	dividend, dividend, divisor
+	subs	dividend, dividend, divisor
 LSYM(Lover1):
-	lsr	work, divisor, #1
+	lsrs	work, divisor, #1
 	cmp	dividend, work
 	blo	LSYM(Lover2)
-	sub	dividend, dividend, work
+	subs	dividend, dividend, work
 	mov	ip, curbit
-	mov	work, #1
-	ror	curbit, work
-	orr	overdone, curbit
+	movs	work, #1
+	rors	curbit, work
+	orrs	overdone, curbit
 	mov	curbit, ip
 LSYM(Lover2):
-	lsr	work, divisor, #2
+	lsrs	work, divisor, #2
 	cmp	dividend, work
 	blo	LSYM(Lover3)
-	sub	dividend, dividend, work
+	subs	dividend, dividend, work
 	mov	ip, curbit
-	mov	work, #2
-	ror	curbit, work
-	orr	overdone, curbit
+	movs	work, #2
+	rors	curbit, work
+	orrs	overdone, curbit
 	mov	curbit, ip
 LSYM(Lover3):
-	lsr	work, divisor, #3
+	lsrs	work, divisor, #3
 	cmp	dividend, work
 	blo	LSYM(Lover4)
-	sub	dividend, dividend, work
+	subs	dividend, dividend, work
 	mov	ip, curbit
-	mov	work, #3
-	ror	curbit, work
-	orr	overdone, curbit
+	movs	work, #3
+	rors	curbit, work
+	orrs	overdone, curbit
 	mov	curbit, ip
 LSYM(Lover4):
 	mov	ip, curbit
@@ -800,46 +804,46 @@ LSYM(Lover4):
 	@ since the "bit" will have been shifted out at the bottom.
 	cmp	dividend, divisor
 	blo	LSYM(Lover1)
-	sub	dividend, dividend, divisor
-	orr	result, result, curbit
+	subs	dividend, dividend, divisor
+	orrs	result, result, curbit
 LSYM(Lover1):
-	lsr	work, divisor, #1
+	lsrs	work, divisor, #1
 	cmp	dividend, work
 	blo	LSYM(Lover2)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #1
-	orr	result, work
+	subs	dividend, dividend, work
+	lsrs	work, curbit, #1
+	orrs	result, work
 LSYM(Lover2):
-	lsr	work, divisor, #2
+	lsrs	work, divisor, #2
 	cmp	dividend, work
 	blo	LSYM(Lover3)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #2
-	orr	result, work
+	subs	dividend, dividend, work
+	lsrs	work, curbit, #2
+	orrs	result, work
 LSYM(Lover3):
-	lsr	work, divisor, #3
+	lsrs	work, divisor, #3
 	cmp	dividend, work
 	blo	LSYM(Lover4)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #3
-	orr	result, work
+	subs	dividend, dividend, work
+	lsrs	work, curbit, #3
+	orrs	result, work
 LSYM(Lover4):
   .endif
 	
 	cmp	dividend, #0			@ Early termination?
 	beq	LSYM(Lover5)
-	lsr	curbit,  #4			@ No, any more bits to do?
+	lsrs	curbit,  #4			@ No, any more bits to do?
 	beq	LSYM(Lover5)
-	lsr	divisor, #4
+	lsrs	divisor, #4
 	b	LSYM(Loop3)
 LSYM(Lover5):
   .if \modulo
 	@ Any subtractions that we should not have done will be recorded in
 	@ the top three bits of "overdone".  Exactly which were not needed
 	@ are governed by the position of the bit, stored in ip.
-	mov	work, #0xe
-	lsl	work, #28
-	and	overdone, work
+	movs	work, #0xe
+	lsls	work, #28
+	ands	overdone, work
 	beq	LSYM(Lgot_result)
 	
 	@ If we terminated early, because dividend became zero, then the 
@@ -849,33 +853,33 @@ LSYM(Lover5):
 	@ the bit in ip could be in the top two bits which might then match
 	@ with one of the smaller RORs.
 	mov	curbit, ip
-	mov	work, #0x7
+	movs	work, #0x7
 	tst	curbit, work
 	beq	LSYM(Lgot_result)
 	
 	mov	curbit, ip
-	mov	work, #3
-	ror	curbit, work
+	movs	work, #3
+	rors	curbit, work
 	tst	overdone, curbit
 	beq	LSYM(Lover6)
-	lsr	work, divisor, #3
-	add	dividend, work
+	lsrs	work, divisor, #3
+	adds	dividend, work
 LSYM(Lover6):
 	mov	curbit, ip
-	mov	work, #2
-	ror	curbit, work
+	movs	work, #2
+	rors	curbit, work
 	tst	overdone, curbit
 	beq	LSYM(Lover7)
-	lsr	work, divisor, #2
-	add	dividend, work
+	lsrs	work, divisor, #2
+	adds	dividend, work
 LSYM(Lover7):
 	mov	curbit, ip
-	mov	work, #1
-	ror	curbit, work
+	movs	work, #1
+	rors	curbit, work
 	tst	overdone, curbit
 	beq	LSYM(Lgot_result)
-	lsr	work, divisor, #1
-	add	dividend, work
+	lsrs	work, divisor, #1
+	adds	dividend, work
   .endif
 LSYM(Lgot_result):
 .endm
@@ -885,7 +889,7 @@ LSYM(Lgot_result):
 
 /* Branch to div(n), and jump to label if curbit is lo than divisior.  */
 .macro BranchToDiv n, label
-	lsr	curbit, dividend, \n
+	lsrs	curbit, dividend, \n
 	cmp	curbit, divisor
 	blo	\label
 .endm
@@ -893,13 +897,13 @@ LSYM(Lgot_result):
 /* Body of div(n).  Shift the divisor in n bits and compare the divisor
    and dividend.  Update the dividend as the substruction result.  */
 .macro DoDiv n
-	lsr	curbit, dividend, \n
+	lsrs	curbit, dividend, \n
 	cmp	curbit, divisor
 	bcc	1f
-	lsl	curbit, divisor, \n
-	sub	dividend, dividend, curbit
+	lsls	curbit, divisor, \n
+	subs	dividend, dividend, curbit
 
-1:	adc	result, result
+1:	adcs	result, result
 .endm
 
 /* The body of division with positive divisor.  Unless the divisor is very
@@ -907,29 +911,29 @@ LSYM(Lgot_result):
    unwinding in the main division loop.  Continue shifting until the divisor
    is larger than the dividend.  */
 .macro THUMB1_Div_Positive
-	mov	result, #0
+	movs	result, #0
 	BranchToDiv #1, LSYM(Lthumb1_div1)
 	BranchToDiv #4, LSYM(Lthumb1_div4)
 	BranchToDiv #8, LSYM(Lthumb1_div8)
 	BranchToDiv #12, LSYM(Lthumb1_div12)
 	BranchToDiv #16, LSYM(Lthumb1_div16)
 LSYM(Lthumb1_div_large_positive):
-	mov	result, #0xff
-	lsl	divisor, divisor, #8
+	movs	result, #0xff
+	lsls	divisor, divisor, #8
 	rev	result, result
-	lsr	curbit, dividend, #16
+	lsrs	curbit, dividend, #16
 	cmp	curbit, divisor
 	blo	1f
-	asr	result, #8
-	lsl	divisor, divisor, #8
+	asrs	result, #8
+	lsls	divisor, divisor, #8
 	beq	LSYM(Ldivbyzero_waypoint)
 
-1:	lsr	curbit, dividend, #12
+1:	lsrs	curbit, dividend, #12
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div12)
 	b	LSYM(Lthumb1_div16)
 LSYM(Lthumb1_div_loop):
-	lsr	divisor, divisor, #8
+	lsrs	divisor, divisor, #8
 LSYM(Lthumb1_div16):
 	Dodiv	#15
 	Dodiv	#14
@@ -954,11 +958,11 @@ LSYM(Lthumb1_div3):
 LSYM(Lthumb1_div2):
 	Dodiv	#1
 LSYM(Lthumb1_div1):
-	sub	divisor, dividend, divisor
+	subs	divisor, dividend, divisor
 	bcs	1f
 	cpy	divisor, dividend
 
-1:	adc	result, result
+1:	adcs	result, result
 	cpy	dividend, result
 	RET
 
@@ -970,43 +974,43 @@ LSYM(Ldivbyzero_waypoint):
    THUMB1_Div_Positive except that the shift steps are in multiples
    of six bits.  */
 .macro THUMB1_Div_Negative
-	lsr	result, divisor, #31
+	lsrs	result, divisor, #31
 	beq	1f
-	neg	divisor, divisor
+	negs	divisor, divisor
 
-1:	asr	curbit, dividend, #32
+1:	asrs	curbit, dividend, #32
 	bcc	2f
-	neg	dividend, dividend
+	negs	dividend, dividend
 
-2:	eor	curbit, result
-	mov	result, #0
+2:	eors	curbit, result
+	movs	result, #0
 	cpy	ip, curbit
 	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
 	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
 LSYM(Lthumb1_div_large):
-	mov	result, #0xfc
-	lsl	divisor, divisor, #6
+	movs	result, #0xfc
+	lsls	divisor, divisor, #6
 	rev	result, result
-	lsr	curbit, dividend, #8
+	lsrs	curbit, dividend, #8
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div_negative8)
 
-	lsl	divisor, divisor, #6
-	asr	result, result, #6
+	lsls	divisor, divisor, #6
+	asrs	result, result, #6
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div_negative8)
 
-	lsl	divisor, divisor, #6
-	asr	result, result, #6
+	lsls	divisor, divisor, #6
+	asrs	result, result, #6
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div_negative8)
 
-	lsl	divisor, divisor, #6
+	lsls	divisor, divisor, #6
 	beq	LSYM(Ldivbyzero_negative)
-	asr	result, result, #6
+	asrs	result, result, #6
 	b	LSYM(Lthumb1_div_negative8)
 LSYM(Lthumb1_div_negative_loop):
-	lsr	divisor, divisor, #6
+	lsrs	divisor, divisor, #6
 LSYM(Lthumb1_div_negative8):
 	DoDiv	#7
 	DoDiv	#6
@@ -1017,28 +1021,28 @@ LSYM(Lthumb1_div_negative4):
 	DoDiv	#2
 	bcs	LSYM(Lthumb1_div_negative_loop)
 	DoDiv	#1
-	sub	divisor, dividend, divisor
+	subs	divisor, dividend, divisor
 	bcs	1f
 	cpy	divisor, dividend
 
 1:	cpy	curbit, ip
-	adc	result, result
-	asr	curbit, curbit, #1
+	adcs	result, result
+	asrs	curbit, curbit, #1
 	cpy	dividend, result
 	bcc	2f
-	neg	dividend, dividend
+	negs	dividend, dividend
 	cmp	curbit, #0
 
 2:	bpl	3f
-	neg	divisor, divisor
+	negs	divisor, divisor
 
 3:	RET
 
 LSYM(Ldivbyzero_negative):
 	cpy	curbit, ip
-	asr	curbit, curbit, #1
+	asrs	curbit, curbit, #1
 	bcc	LSYM(Ldiv0)
-	neg	dividend, dividend
+	negs	dividend, dividend
 .endm
 #endif /* ARM Thumb version.  */
 
@@ -1056,8 +1060,8 @@ LSYM(Ldivbyzero_negative):
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
 LSYM(udivsi3_skip_div0_test):
-	mov	curbit, #1
-	mov	result, #0
+	movs	curbit, #1
+	movs	result, #0
 	
 	push	{ work }
 	cmp	dividend, divisor
@@ -1065,7 +1069,7 @@ LSYM(udivsi3_skip_div0_test):
 
 	THUMB_DIV_MOD_BODY 0
 	
-	mov	r0, result
+	movs	r0, result
 	pop	{ work }
 	RET
 
@@ -1184,7 +1188,7 @@ ARM_FUNC_START aeabi_uidivmod
 
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
-	mov	curbit, #1
+	movs	curbit, #1
 	cmp	dividend, divisor
 	bhs	LSYM(Lover10)
 	RET	
@@ -1263,7 +1267,7 @@ LSYM(Lover12):
 #else
 LSYM(divsi3_skip_div0_test):
 	cpy	curbit, dividend
-	orr	curbit, divisor
+	orrs	curbit, divisor
 	bmi	LSYM(Lthumb1_div_negative)
 
 LSYM(Lthumb1_div_positive):
@@ -1395,11 +1399,11 @@ ARM_FUNC_START aeabi_idivmod
 
 	FUNC_START modsi3
 
-	mov	curbit, #1
+	movs	curbit, #1
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
 	bpl	LSYM(Lover10)
-	neg	divisor, divisor		@ Loops below use unsigned.
+	negs	divisor, divisor		@ Loops below use unsigned.
 LSYM(Lover10):
 	push	{ work }
 	@ Need to save the sign of the dividend, unfortunately, we need
@@ -1408,7 +1412,7 @@ LSYM(Lover10):
 	push	{ dividend }
 	cmp	dividend, #0
 	bpl	LSYM(Lover11)
-	neg	dividend, dividend
+	negs	dividend, dividend
 LSYM(Lover11):
 	cmp	dividend, divisor
 	blo	LSYM(Lgot_result)
@@ -1418,7 +1422,7 @@ LSYM(Lover11):
 	pop	{ work }
 	cmp	work, #0
 	bpl	LSYM(Lover12)
-	neg	dividend, dividend
+	negs	dividend, dividend
 LSYM(Lover12):
 	pop	{ work }
 	RET	
@@ -1540,12 +1544,12 @@ LSYM(Lover12):
 	   address, so just clear pc..pc+1.  */
 #if defined __thumb__ && !defined __thumb2__
 	push	{r7}
-	mov	r7, #0xf
-	lsl	r7, #16
-	add	r7, #2
+	movs	r7, #0xf
+	lsls	r7, #16
+	adds	r7, #2
 	adr	r0, . + 4
-	add	r1, r0, #1
-	mov	r2, #0
+	adds	r1, r0, #1
+	movs	r2, #0
 	svc	0
 	pop	{r7}
 #else
@@ -1595,17 +1599,17 @@ LSYM(Lover12):
 	FUNC_ALIAS aeabi_llsr lshrdi3
 	
 #ifdef __thumb__
-	lsr	al, r2
-	mov	r3, ah
-	lsr	ah, r2
+	lsrs	al, r2
+	movs	r3, ah
+	lsrs	ah, r2
 	mov	ip, r3
-	sub	r2, #32
-	lsr	r3, r2
-	orr	al, r3
-	neg	r2, r2
+	subs	r2, #32
+	lsrs	r3, r2
+	orrs	al, r3
+	negs	r2, r2
 	mov	r3, ip
-	lsl	r3, r2
-	orr	al, r3
+	lsls	r3, r2
+	orrs	al, r3
 	RET
 #else
 	subs	r3, r2, #32
@@ -1627,21 +1631,21 @@ LSYM(Lover12):
 	FUNC_ALIAS aeabi_lasr ashrdi3
 	
 #ifdef __thumb__
-	lsr	al, r2
-	mov	r3, ah
-	asr	ah, r2
-	sub	r2, #32
+	lsrs	al, r2
+	movs	r3, ah
+	asrs	ah, r2
+	subs	r2, #32
 	@ If r2 is negative at this point the following step would OR
 	@ the sign bit into all of AL.  That's not what we want...
 	bmi	1f
 	mov	ip, r3
-	asr	r3, r2
-	orr	al, r3
+	asrs	r3, r2
+	orrs	al, r3
 	mov	r3, ip
 1:
-	neg	r2, r2
-	lsl	r3, r2
-	orr	al, r3
+	negs	r2, r2
+	lsls	r3, r2
+	orrs	al, r3
 	RET
 #else
 	subs	r3, r2, #32
@@ -1664,17 +1668,17 @@ LSYM(Lover12):
 	FUNC_ALIAS aeabi_llsl ashldi3
 	
 #ifdef __thumb__
-	lsl	ah, r2
-	mov	r3, al
-	lsl	al, r2
+	lsls	ah, r2
+	movs	r3, al
+	lsls	al, r2
 	mov	ip, r3
-	sub	r2, #32
-	lsl	r3, r2
-	orr	ah, r3
-	neg	r2, r2
+	subs	r2, #32
+	lsls	r3, r2
+	orrs	ah, r3
+	negs	r2, r2
 	mov	r3, ip
-	lsr	r3, r2
-	orr	ah, r3
+	lsrs	r3, r2
+	orrs	ah, r3
 	RET
 #else
 	subs	r3, r2, #32
@@ -1695,26 +1699,26 @@ LSYM(Lover12):
 #ifdef L_clzsi2
 #ifdef NOT_ISA_TARGET_32BIT
 FUNC_START clzsi2
-	mov	r1, #28
-	mov	r3, #1
-	lsl	r3, r3, #16
+	movs	r1, #28
+	movs	r3, #1
+	lsls	r3, r3, #16
 	cmp	r0, r3 /* 0x10000 */
 	bcc	2f
-	lsr	r0, r0, #16
-	sub	r1, r1, #16
-2:	lsr	r3, r3, #8
+	lsrs	r0, r0, #16
+	subs	r1, r1, #16
+2:	lsrs	r3, r3, #8
 	cmp	r0, r3 /* #0x100 */
 	bcc	2f
-	lsr	r0, r0, #8
-	sub	r1, r1, #8
-2:	lsr	r3, r3, #4
+	lsrs	r0, r0, #8
+	subs	r1, r1, #8
+2:	lsrs	r3, r3, #4
 	cmp	r0, r3 /* #0x10 */
 	bcc	2f
-	lsr	r0, r0, #4
-	sub	r1, r1, #4
+	lsrs	r0, r0, #4
+	subs	r1, r1, #4
 2:	adr	r2, 1f
 	ldrb	r0, [r2, r0]
-	add	r0, r0, r1
+	adds	r0, r0, r1
 	bx lr
 .align 2
 1:
@@ -1757,34 +1761,49 @@ ARM_FUNC_START clzsi2
 # ifdef NOT_ISA_TARGET_32BIT
 FUNC_START clzdi2
 	push	{r4, lr}
-# else
+	cmp	xxh, #0
+	bne	1f
+#  ifdef __ARMEB__
+	movs	r0, xxl
+	bl	__clzsi2
+	adds	r0, r0, #32
+	b 2f
+1:
+	bl	__clzsi2
+#  else
+	bl	__clzsi2
+	adds	r0, r0, #32
+	b 2f
+1:
+	movs	r0, xxh
+	bl	__clzsi2
+#  endif
+2:
+	pop	{r4, pc}
+# else /* NOT_ISA_TARGET_32BIT */
 ARM_FUNC_START clzdi2
 	do_push	{r4, lr}
-# endif
 	cmp	xxh, #0
 	bne	1f
-# ifdef __ARMEB__
+#  ifdef __ARMEB__
 	mov	r0, xxl
 	bl	__clzsi2
 	add	r0, r0, #32
 	b 2f
 1:
 	bl	__clzsi2
-# else
+#  else
 	bl	__clzsi2
 	add	r0, r0, #32
 	b 2f
 1:
 	mov	r0, xxh
 	bl	__clzsi2
-# endif
+#  endif
 2:
-# ifdef NOT_ISA_TARGET_32BIT
-	pop	{r4, pc}
-# else
 	RETLDM	r4
-# endif
 	FUNC_END clzdi2
+# endif /* NOT_ISA_TARGET_32BIT */
 
 #else /* defined (__ARM_FEATURE_CLZ) */
 
@@ -1803,28 +1822,28 @@ ARM_FUNC_START clzdi2
 #ifdef L_ctzsi2
 #ifdef NOT_ISA_TARGET_32BIT
 FUNC_START ctzsi2
-	neg	r1, r0
-	and	r0, r0, r1
-	mov	r1, #28
-	mov	r3, #1
-	lsl	r3, r3, #16
+	negs	r1, r0
+	ands	r0, r0, r1
+	movs	r1, #28
+	movs	r3, #1
+	lsls	r3, r3, #16
 	cmp	r0, r3 /* 0x10000 */
 	bcc	2f
-	lsr	r0, r0, #16
-	sub	r1, r1, #16
-2:	lsr	r3, r3, #8
+	lsrs	r0, r0, #16
+	subs	r1, r1, #16
+2:	lsrs	r3, r3, #8
 	cmp	r0, r3 /* #0x100 */
 	bcc	2f
-	lsr	r0, r0, #8
-	sub	r1, r1, #8
-2:	lsr	r3, r3, #4
+	lsrs	r0, r0, #8
+	subs	r1, r1, #8
+2:	lsrs	r3, r3, #4
 	cmp	r0, r3 /* #0x10 */
 	bcc	2f
-	lsr	r0, r0, #4
-	sub	r1, r1, #4
+	lsrs	r0, r0, #4
+	subs	r1, r1, #4
 2:	adr	r2, 1f
 	ldrb	r0, [r2, r0]
-	sub	r0, r0, r1
+	subs	r0, r0, r1
 	bx lr
 .align 2
 1:
diff --git a/libgcc/config/arm/libunwind.S b/libgcc/config/arm/libunwind.S
index 176ba5e73b7..08e0fcc0c58 100644
--- a/libgcc/config/arm/libunwind.S
+++ b/libgcc/config/arm/libunwind.S
@@ -63,28 +63,28 @@
 /* r0 points to a 16-word block.  Upload these values to the actual core
    state.  */
 FUNC_START restore_core_regs
-	mov r1, r0
-	add r1, r1, #52
-	ldmia r1!, {r3, r4, r5}
-	sub r3, r3, #4
-	mov ip, r3
-	str r5, [r3]
-	mov lr, r4
+	movs	r1, r0
+	adds	r1, r1, #52
+	ldmia	r1!, {r3, r4, r5}
+	subs	r3, r3, #4
+	mov	ip, r3
+	str	r5, [r3]
+	mov	lr, r4
 	/* Restore r8-r11.  */
-	mov r1, r0
-	add r1, r1, #32
-	ldmia r1!, {r2, r3, r4, r5}
-	mov r8, r2
-	mov r9, r3
-	mov sl, r4
-	mov fp, r5
-	mov r1, r0
-	add r1, r1, #8
-	ldmia r1!, {r2, r3, r4, r5, r6, r7}
-	ldr r1, [r0, #4]
-	ldr r0, [r0]
-	mov sp, ip
-	pop {pc}
+	movs	r1, r0
+	adds	r1, r1, #32
+	ldmia	r1!, {r2, r3, r4, r5}
+	mov	r8, r2
+	mov	r9, r3
+	mov	sl, r4
+	mov	fp, r5
+	movs	r1, r0
+	adds	r1, r1, #8
+	ldmia	r1!, {r2, r3, r4, r5, r6, r7}
+	ldr	r1, [r0, #4]
+	ldr	r0, [r0]
+	mov	sp, ip
+	pop	{pc}
 	FUNC_END restore_core_regs
 	UNPREFIX restore_core_regs
 
@@ -132,38 +132,38 @@ FUNC_START gnu_Unwind_Save_WMMXC
 	FUNC_START \name
 	/* Create a phase2_vrs structure.  */
 	/* Save r0 in the PC slot so we can use it as a scratch register.  */
-	push {r0}
-	add r0, sp, #4
-	push {r0, lr} /* Push original SP and LR.  */
+	push	{r0}
+	add	r0, sp, #4
+	push	{r0, lr} /* Push original SP and LR.  */
 	/* Make space for r8-r12.  */
-	sub sp, sp, #20
+	sub	sp, sp, #20
 	/* Save low registers.  */
-	push {r0, r1, r2, r3, r4, r5, r6, r7}
+	push	{r0, r1, r2, r3, r4, r5, r6, r7}
 	/* Save high registers.  */
-	add r0, sp, #32
-	mov r1, r8
-	mov r2, r9
-	mov r3, sl
-	mov r4, fp
-	mov r5, ip
-	stmia r0!, {r1, r2, r3, r4, r5}
+	add	r0, sp, #32
+	mov	r1, r8
+	mov	r2, r9
+	mov	r3, sl
+	mov	r4, fp
+	mov	r5, ip
+	stmia	r0!, {r1, r2, r3, r4, r5}
 	/* Restore original low register values.  */
-	add r0, sp, #4
-	ldmia r0!, {r1, r2, r3, r4, r5}
+	add	r0, sp, #4
+	ldmia	r0!, {r1, r2, r3, r4, r5}
 	/* Restore orginial r0.  */
-	ldr r0, [sp, #60]
-	str r0, [sp]
+	ldr	r0, [sp, #60]
+	str	r0, [sp]
 	/* Demand-save flags, plus an extra word for alignment.  */
-	mov r3, #0
-	push {r2, r3}
+	movs	r3, #0
+	push	{r2, r3}
 	/* Point r1 at the block.  Pass r[0..nargs) unchanged.  */
-	add r\nargs, sp, #4
+	add	r\nargs, sp, #4
 
-	bl SYM (__gnu\name)
+	bl	SYM (__gnu\name)
 
-	ldr r3, [sp, #64]
-	add sp, sp, #72
-	bx r3
+	ldr	r3, [sp, #64]
+	add	sp, sp, #72
+	bx	r3
 
 	FUNC_END \name
 	UNPREFIX \name