From patchwork Fri Jan 15 11:30:49 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Daniel Engel <gnu@danielengel.com>
X-Patchwork-Id: 1426916
Return-Path: <gcc-patches-bounces@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Authentication-Results: ozlabs.org;
 spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org
 (client-ip=2620:52:3:1:0:246e:9693:128c; helo=sourceware.org;
 envelope-from=gcc-patches-bounces@gcc.gnu.org; receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
 dmarc=none (p=none dis=none) header.from=danielengel.com
Authentication-Results: ozlabs.org;
	dkim=fail reason="signature verification failed" (2048-bit key;
 unprotected) header.d=danielengel.com header.i=@danielengel.com
 header.a=rsa-sha256 header.s=fm1 header.b=ckPqUn9k;
	dkim=fail reason="signature verification failed" (2048-bit key;
 unprotected) header.d=messagingengine.com header.i=@messagingengine.com
 header.a=rsa-sha256 header.s=fm1 header.b=MLntXjKg;
	dkim-atps=neutral
Received: from sourceware.org (server2.sourceware.org
 [IPv6:2620:52:3:1:0:246e:9693:128c])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest
 SHA256)
	(No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id 4DHJvW6LrVz9sVr
	for <incoming@patchwork.ozlabs.org>; Fri, 15 Jan 2021 22:32:55 +1100 (AEDT)
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 6E1BC3982413;
	Fri, 15 Jan 2021 11:31:53 +0000 (GMT)
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from wout1-smtp.messagingengine.com (wout1-smtp.messagingengine.com
 [64.147.123.24])
 by sourceware.org (Postfix) with ESMTPS id 4E5DA3982413
 for <gcc-patches@gcc.gnu.org>; Fri, 15 Jan 2021 11:31:49 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 4E5DA3982413
Authentication-Results: sourceware.org; dmarc=none (p=none dis=none)
 header.from=danielengel.com
Authentication-Results: sourceware.org;
 spf=pass smtp.mailfrom=gnu@danielengel.com
Received: from compute4.internal (compute4.nyi.internal [10.202.2.44])
 by mailout.west.internal (Postfix) with ESMTP id 5CDDCF54;
 Fri, 15 Jan 2021 06:31:48 -0500 (EST)
Received: from mailfrontend2 ([10.202.2.163])
 by compute4.internal (MEProxy); Fri, 15 Jan 2021 06:31:48 -0500
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=danielengel.com;
 h=from:to:cc:subject:date:message-id:in-reply-to:references
 :mime-version:content-transfer-encoding; s=fm1; bh=kVPGfTV+9IyXG
 unlthkkFuSmarhVunusQQ1lpqQBCNA=; b=ckPqUn9kQoavdkiJ8HcC/3x+UoQmn
 TuogfKm5bI5HF+9uVsuDlHdmm0E/PqfWN/sItZkVxXbr6++6YZcAde6qF7aRh1tJ
 +739W1ccTtzLjUbB2vll2rgkLVayoHGqY+wH/2x1Ni4bVGV5IR++AjXrLhNGbV7D
 Ot7A0la72SvAbsEA2j97NwEK6tFw4N1Wgvze/vH2hacXQEZQgl6ewOttK9PFcu4G
 oaM4qK8wjJtwTX4kvFwUX6lcxgQr9wso7MVrDpJtJYL4gByHh8FntTEdkZ/xSpmU
 0aaYzgdikh9c3lMb+5SoBsbKQspkm8cJru8dErTNC56CHUizv91g7CwMA==
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=
 messagingengine.com; h=cc:content-transfer-encoding:date:from
 :in-reply-to:message-id:mime-version:references:subject:to
 :x-me-proxy:x-me-proxy:x-me-sender:x-me-sender:x-sasl-enc; s=
 fm1; bh=kVPGfTV+9IyXGunlthkkFuSmarhVunusQQ1lpqQBCNA=; b=MLntXjKg
 s4gsy6V+zkcYNIu+wKBkokx/m5+rup5r7yYx7VfpsM5C+pBAQGx+NMwdGzswiD3h
 meFfK8zYLEbG30AtOCGapn9M6JVtBNxmctfi544KFfwvT4+10kNwxIK5XYIWAuR2
 GvBH2TbKl8ggzf4AnbyUxnrK3R2uHquVfwZUOsFXhkBXvLpFueevYewbmeFI+T8n
 EvcdjjgtSShBzHjyS9FX7+lmg6/14tjhxBBZyim9KUd20LurwlWSUR1vPSiy2mnA
 2sItausa212u0mEN5+V6n2vGL5lf+AYej8oKwCVoNUN90vcsPVhYZ5EJvqidfi2k
 lQNAM3f7MH8vaA==
X-ME-Sender: <xms:I30BYLFSzfXl2UPAUtANyhw-uQLd_MCZwpX0eCpG7D5XPgcealfmvQ>
 <xme:I30BYIVX2SEiuxxlz4Na3JmmDnbb-AyIgFUrwWm-JHbPGCv_9L3k4m16ZrcuSBK3U
 5rpcToqoP97pA>
X-ME-Proxy-Cause: 
 gggruggvucftvghtrhhoucdtuddrgeduledrtddvgddtgecutefuodetggdotefrodftvf
 curfhrohhfihhlvgemucfhrghsthforghilhdpqfgfvfdpuffrtefokffrpgfnqfghnecu
 uegrihhlohhuthemuceftddtnecunecujfgurhephffvufffkffojghfggfgsedtkeertd
 ertddtnecuhfhrohhmpeffrghnihgvlhcugfhnghgvlhcuoehgnhhusegurghnihgvlhgv
 nhhgvghlrdgtohhmqeenucggtffrrghtthgvrhhnpeehgfeijeetfeethfetvddtheduff
 ejueduffffjeeuuedutdffhfevgfdtveffheenucffohhmrghinhepghhnuhdrohhrghdp
 lhguihhvrdhssgdpghgttgdrthgrrhhgvghtnecukfhppeejuddrfeeirddutddtrddvvd
 dtnecuvehluhhsthgvrhfuihiivgeptdenucfrrghrrghmpehmrghilhhfrhhomhepghhn
 uhesuggrnhhivghlvghnghgvlhdrtghomh
X-ME-Proxy: <xmx:I30BYNIZKKpFDslKc27m7VnFZ7D0SkJMCxdyRYmaKTuDWCtWSNQJrw>
 <xmx:I30BYJEWwagp0ThZ9xAQnEC1vykcivDu_2AW3tSvwFVLRhGN5aYmHw>
 <xmx:I30BYBWmbBzi8kVOOD2DrAL02X-rx5_9K31K67sXGoD0W7vA4UYw9Q>
 <xmx:JH0BYCff2yi0sotp5GCA46Zqn8ZxGpelLA73Sw6PHKaosdKpkr50AQ>
Received: from sendmail.lorien.danielengel.com (71-36-100-220.ptld.qwest.net
 [71.36.100.220])
 by mail.messagingengine.com (Postfix) with ESMTPA id 95AEC108005B;
 Fri, 15 Jan 2021 06:31:47 -0500 (EST)
Received: from ubuntu.lorien.danielengel.com (ubuntu.lorien.danielengel.com
 [10.0.0.96])
 by sendmail.lorien.danielengel.com (8.15.2/8.15.2) with ESMTP id
 10FBVk8x023754; Fri, 15 Jan 2021 03:31:46 -0800 (PST)
 (envelope-from gnu@danielengel.com)
From: Daniel Engel <gnu@danielengel.com>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH v5 21/33] Import 64-bit division from the CM0 library
Date: Fri, 15 Jan 2021 03:30:49 -0800
Message-Id: 
 <e85baa82d23e30f39b20522f77e81bd5df0a0809.1610709584.git.gnu@danielengel.com>
X-Mailer: git-send-email 2.25.1
In-Reply-To: <cover.1610709584.git.gnu@danielengel.com>
References: <cover.1610709584.git.gnu@danielengel.com>
MIME-Version: 1.0
X-Spam-Status: No, score=-12.8 required=5.0 tests=BAYES_00, DKIM_SIGNED,
 DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, JMQ_SPF_NEUTRAL,
 KAM_SHORT, RCVD_IN_DNSWL_LOW, RCVD_IN_MSPIKE_H4, RCVD_IN_MSPIKE_WL,
 SCC_10_SHORT_WORD_LINES, SCC_5_SHORT_WORD_LINES, SPF_HELO_PASS, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.2
X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
Cc: Richard.Earnshaw@foss.arm.com
Errors-To: gcc-patches-bounces@gcc.gnu.org
Sender: "Gcc-patches" <gcc-patches-bounces@gcc.gnu.org>

gcc/libgcc/ChangeLog:
2021-01-13 Daniel Engel <gnu@danielengel.com>

	* config/arm/bpabi.c: Deleted unused file.
	* config/arm/eabi/ldiv.S (__aeabi_ldivmod, __aeabi_uldivmod):
	Replaced wrapper functions with a complete implementation.
	* config/arm/t-bpabi (LIB2ADD_ST): Removed bpabi.c.
	* config/arm/t-elf (LIB1ASMFUNCS): Added _divdi3 and _udivdi3.
---
 libgcc/config/arm/bpabi.c     |  42 ---
 libgcc/config/arm/eabi/ldiv.S | 542 +++++++++++++++++++++++++++++-----
 libgcc/config/arm/t-bpabi     |   3 +-
 libgcc/config/arm/t-elf       |   9 +
 4 files changed, 474 insertions(+), 122 deletions(-)
 delete mode 100644 libgcc/config/arm/bpabi.c
diff --git a/libgcc/config/arm/bpabi.c b/libgcc/config/arm/bpabi.c
deleted file mode 100644
index bf6ba757964..00000000000
--- a/libgcc/config/arm/bpabi.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Miscellaneous BPABI functions.
-
-   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC.
-
-   This file is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 3, or (at your option) any
-   later version.
-
-   This file is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-extern long long __divdi3 (long long, long long);
-extern unsigned long long __udivdi3 (unsigned long long, 
-				     unsigned long long);
-extern long long __gnu_ldivmod_helper (long long, long long, long long *);
-
-
-long long
-__gnu_ldivmod_helper (long long a, 
-		      long long b, 
-		      long long *remainder)
-{
-  long long quotient;
-
-  quotient = __divdi3 (a, b);
-  *remainder = a - b * quotient;
-  return quotient;
-}
-
diff --git a/libgcc/config/arm/eabi/ldiv.S b/libgcc/config/arm/eabi/ldiv.S
index 3c8280ef580..c225e5973b2 100644
--- a/libgcc/config/arm/eabi/ldiv.S
+++ b/libgcc/config/arm/eabi/ldiv.S
@@ -1,8 +1,7 @@
-/* Miscellaneous BPABI functions.  Thumb-1 implementation, suitable for ARMv4T,
-   ARMv6-M and ARMv8-M Baseline like ISA variants.
+/* ldiv.S: Thumb-1 optimized 64-bit integer division
 
-   Copyright (C) 2006-2020 Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (gnu@danielengel.com)
 
    This file is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the
@@ -24,84 +23,471 @@
    <http://www.gnu.org/licenses/>.  */
 
 
-.macro test_div_by_zero signed
-        cmp     yyh, #0
-        bne     7f
-        cmp     yyl, #0
-        bne     7f
-        cmp     xxh, #0
-        .ifc    \signed, unsigned
-        bne     2f
-        cmp     xxl, #0
-2:
-        beq     3f
-        movs    xxh, #0
-        mvns    xxh, xxh                @ 0xffffffff
-        movs    xxl, xxh
-3:
-        .else
-        blt     6f
-        bgt     4f
-        cmp     xxl, #0
-        beq     5f
-4:      movs    xxl, #0
-        mvns    xxl, xxl                @ 0xffffffff
-        lsrs    xxh, xxl, #1            @ 0x7fffffff
-        b       5f
-6:      movs    xxh, #0x80
-        lsls    xxh, xxh, #24           @ 0x80000000
-        movs    xxl, #0
-5:
-        .endif
-        @ tailcalls are tricky on v6-m.
-        push    {r0, r1, r2}
-        ldr     r0, 1f
-        adr     r1, 1f
-        adds    r0, r1
-        str     r0, [sp, #8]
-        @ We know we are not on armv4t, so pop pc is safe.
-        pop     {r0, r1, pc}
-        .align  2
-1:
-        .word   __aeabi_ldiv0 - 1b
-7:
-.endm
-
-#ifdef L_aeabi_ldivmod
-
-FUNC_START aeabi_ldivmod
-        test_div_by_zero signed
-
-        push    {r0, r1}
-        mov     r0, sp
-        push    {r0, lr}
-        ldr     r0, [sp, #8]
-        bl      SYM(__gnu_ldivmod_helper)
-        ldr     r3, [sp, #4]
-        mov     lr, r3
-        add     sp, sp, #8
-        pop     {r2, r3}
+#ifndef __GNUC__
+
+// long long __aeabi_ldiv0(long long)
+// Helper function for division by 0.
+WEAK_START_SECTION aeabi_ldiv0 .text.sorted.libgcc.ldiv.ldiv0
+    CFI_START_FUNCTION
+
+      #if defined(TRAP_EXCEPTIONS) && TRAP_EXCEPTIONS
+        svc     #(SVC_DIVISION_BY_ZERO)
+      #endif
+
         RET
-        FUNC_END aeabi_ldivmod
 
-#endif /* L_aeabi_ldivmod */
+    CFI_END_FUNCTION
+FUNC_END aeabi_ldiv0
 
-#ifdef L_aeabi_uldivmod
+#endif /* !__GNUC__ */
 
-FUNC_START aeabi_uldivmod
-        test_div_by_zero unsigned
 
-        push    {r0, r1}
-        mov     r0, sp
-        push    {r0, lr}
-        ldr     r0, [sp, #8]
-        bl      SYM(__udivmoddi4)
-        ldr     r3, [sp, #4]
-        mov     lr, r3
-        add     sp, sp, #8
-        pop     {r2, r3}
-        RET
-        FUNC_END aeabi_uldivmod
+#ifdef L_divdi3
+
+// long long __aeabi_ldiv(long long, long long)
+// lldiv_return __aeabi_ldivmod(long long, long long)
+// Returns signed $r1:$r0 after division by $r3:$r2.
+// Also returns the remainder in $r3:$r2.
+// Same parent section as __divsi3() to keep branches within range.
+FUNC_START_SECTION divdi3 .text.sorted.libgcc.ldiv.divdi3
+
+#ifndef __symbian__
+  FUNC_ALIAS aeabi_ldiv divdi3
+  FUNC_ALIAS aeabi_ldivmod divdi3
+#endif
+
+    CFI_START_FUNCTION
+
+        // Test the denominator for zero before pushing registers.
+        cmp     yyl,    #0
+        bne     LLSYM(__ldivmod_valid)
+
+        cmp     yyh,    #0
+      #if defined(PEDANTIC_DIV0) && PEDANTIC_DIV0
+        beq     LLSYM(__ldivmod_zero)
+      #else
+        beq     SYM(__uldivmod_zero)
+      #endif
+
+    LLSYM(__ldivmod_valid):
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        push    { rP, rQ, rT, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 16
+                .cfi_rel_offset rP, 0
+                .cfi_rel_offset rQ, 4
+                .cfi_rel_offset rT, 8
+                .cfi_rel_offset lr, 12
+      #else
+        push    { rP, rQ, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 12
+                .cfi_rel_offset rP, 0
+                .cfi_rel_offset rQ, 4
+                .cfi_rel_offset lr, 8
+      #endif
+
+        // Absolute value of the numerator.
+        asrs    rP,     xxh,    #31
+        eors    xxl,    rP
+        eors    xxh,    rP
+        subs    xxl,    rP
+        sbcs    xxh,    rP
+
+        // Absolute value of the denominator.
+        asrs    rQ,     yyh,    #31
+        eors    yyl,    rQ
+        eors    yyh,    rQ
+        subs    yyl,    rQ
+        sbcs    yyh,    rQ
+
+        // Keep the XOR of signs for the quotient.
+        eors    rQ,     rP
+
+        // Handle division as unsigned.
+        bl      SYM(__uldivmod_nonzero) __PLT__
+
+        // Set the sign of the quotient.
+        eors    xxl,    rQ
+        eors    xxh,    rQ
+        subs    xxl,    rQ
+        sbcs    xxh,    rQ
+
+        // Set the sign of the remainder.
+        eors    yyl,    rP
+        eors    yyh,    rP
+        subs    yyl,    rP
+        sbcs    yyh,    rP
+
+    LLSYM(__ldivmod_return):
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        pop     { rP, rQ, rT, pc }
+                .cfi_restore_state
+      #else
+        pop     { rP, rQ, pc }
+                .cfi_restore_state
+      #endif
+
+  #if defined(PEDANTIC_DIV0) && PEDANTIC_DIV0
+    LLSYM(__ldivmod_zero):
+        // Save the sign of the numerator.
+        asrs    yyl,     xxh,    #31
+
+        // Set up the *div0() parameter specified in the ARM runtime ABI:
+        //  * 0 if the numerator is 0,
+        //  * Or, the largest value of the type manipulated by the calling
+        //     division function if the numerator is positive,
+        //  * Or, the least value of the type manipulated by the calling
+        //     division function if the numerator is negative.
+        rsbs    xxl,    #0
+        sbcs    yyh,    xxh
+        orrs    xxh,    yyh
+        asrs    xxl,    xxh,   #31
+        lsrs    xxh,    xxl,   #1
+        eors    xxh,    yyl
+        eors    xxl,    yyl
+
+        // At least the __aeabi_ldiv0() call is common.
+        b       SYM(__uldivmod_zero2)
+  #endif /* PEDANTIC_DIV0 */
+
+    CFI_END_FUNCTION
+FUNC_END divdi3
+
+#ifndef __symbian__
+  FUNC_END aeabi_ldiv
+  FUNC_END aeabi_ldivmod
+#endif
+
+#endif /* L_divdi3 */
+
+
+#ifdef L_udivdi3
+
+// unsigned long long __aeabi_uldiv(unsigned long long, unsigned long long)
+// ulldiv_return __aeabi_uldivmod(unsigned long long, unsigned long long)
+// Returns unsigned $r1:$r0 after division by $r3:$r2.
+// Also returns the remainder in $r3:$r2.
+FUNC_START_SECTION udivdi3 .text.sorted.libgcc.ldiv.udivdi3
+
+#ifndef __symbian__
+  FUNC_ALIAS aeabi_uldiv udivdi3
+  FUNC_ALIAS aeabi_uldivmod udivdi3
+#endif
+
+    CFI_START_FUNCTION
+
+        // Test the denominator for zero before changing the stack.
+        cmp     yyh,    #0
+        bne     SYM(__uldivmod_nonzero)
+
+        cmp     yyl,    #0
+      #if defined(PEDANTIC_DIV0) && PEDANTIC_DIV0
+        beq     LLSYM(__uldivmod_zero)
+      #else
+        beq     SYM(__uldivmod_zero)
+      #endif
+
+  #if defined(OPTIMIZE_SPEED) && OPTIMIZE_SPEED
+        // MAYBE: Optimize division by a power of 2
+  #endif
+
+    FUNC_ENTRY uldivmod_nonzero
+        push    { rP, rQ, rT, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 16
+                .cfi_rel_offset rP, 0
+                .cfi_rel_offset rQ, 4
+                .cfi_rel_offset rT, 8
+                .cfi_rel_offset lr, 12
+
+        // Set up denominator shift, assuming a single width result.
+        movs    rP,     #32
+
+        // If the upper word of the denominator is 0 ...
+        tst     yyh,    yyh
+        bne     LLSYM(__uldivmod_setup)
+
+  #if !defined(__OPTIMIZE_SIZE__) || !__OPTIMIZE_SIZE__
+        // ... and the upper word of the numerator is also 0,
+        //  single width division will be at least twice as fast.
+        tst     xxh,    xxh
+        beq     LLSYM(__uldivmod_small)
+  #endif
+
+        // ... and the lower word of the denominator is less than or equal
+        //     to the upper word of the numerator ...
+        cmp     xxh,    yyl
+        blo     LLSYM(__uldivmod_setup)
+
+        //  ... then the result will be double width, at least 33 bits.
+        // Set up a flag in $rP to seed the shift for the second word.
+        movs    yyh,    yyl
+        eors    yyl,    yyl
+        adds    rP,     #64
+
+    LLSYM(__uldivmod_setup):
+        // Pre division: Shift the denominator as far as possible left
+        //  without making it larger than the numerator.
+        // Since search is destructive, first save a copy of the numerator.
+        mov     ip,     xxl
+        mov     lr,     xxh
+
+        // Set up binary search.
+        movs    rQ,     #16
+        eors    rT,     rT
+
+    LLSYM(__uldivmod_align):
+        // Maintain a secondary shift $rT = 32 - $rQ, making the overlapping
+        //  shifts between low and high words easier to construct.
+        adds    rT,     rQ
+
+        // Prefer dividing the numerator to multipying the denominator
+        //  (multiplying the denominator may result in overflow).
+        lsrs    xxh,    rQ
+
+        // Measure the high bits of denominator against the numerator.
+        cmp     xxh,    yyh
+        blo     LLSYM(__uldivmod_skip)
+        bhi     LLSYM(__uldivmod_shift)
+
+        // If the high bits are equal, construct the low bits for checking.
+        mov     xxh,    lr
+        lsls    xxh,    rT
+
+        lsrs    xxl,    rQ
+        orrs    xxh,    xxl
+
+        cmp     xxh,    yyl
+        blo     LLSYM(__uldivmod_skip)
+
+    LLSYM(__uldivmod_shift):
+        // Scale the denominator and the result together.
+        subs    rP,     rQ
+
+        // If the reduced numerator is still larger than or equal to the
+        //  denominator, it is safe to shift the denominator left.
+        movs    xxh,    yyl
+        lsrs    xxh,    rT
+        lsls    yyh,    rQ
+
+        lsls    yyl,    rQ
+        orrs    yyh,    xxh
+
+    LLSYM(__uldivmod_skip):
+        // Restore the numerator.
+        mov     xxl,    ip
+        mov     xxh,    lr
+
+        // Iterate until the shift goes to 0.
+        lsrs    rQ,     #1
+        bne     LLSYM(__uldivmod_align)
+
+        // Initialize the result (zero).
+        mov     ip,     rQ
+
+        // HACK: Compensate for the first word test.
+        lsls    rP,     #6
+
+    LLSYM(__uldivmod_word2):
+        // Is there another word?
+        lsrs    rP,     #6
+        beq     LLSYM(__uldivmod_return)
+
+        // Shift the calculated result by 1 word.
+        mov     lr,     ip
+        mov     ip,     rQ
+
+        // Set up the MSB of the next word of the quotient
+        movs    rQ,     #1
+        rors    rQ,     rP
+        b     LLSYM(__uldivmod_entry)
+
+    LLSYM(__uldivmod_loop):
+        // Divide the denominator by 2.
+        // It could be slightly faster to multiply the numerator,
+        //  but that would require shifting the remainder at the end.
+        lsls    rT,     yyh,    #31
+        lsrs    yyh,    #1
+        lsrs    yyl,    #1
+        adds    yyl,    rT
+
+        // Step to the next bit of the result.
+        lsrs    rQ,     #1
+        beq     LLSYM(__uldivmod_word2)
+
+    LLSYM(__uldivmod_entry):
+        // Test if the denominator is smaller, high byte first.
+        cmp     xxh,    yyh
+        blo     LLSYM(__uldivmod_loop)
+        bhi     LLSYM(__uldivmod_quotient)
+
+        cmp     xxl,    yyl
+        blo     LLSYM(__uldivmod_loop)
+
+    LLSYM(__uldivmod_quotient):
+        // Smaller denominator: the next bit of the quotient will be set.
+        add     ip,     rQ
+
+        // Subtract the denominator from the remainder.
+        // If the new remainder goes to 0, exit early.
+        subs    xxl,    yyl
+        sbcs    xxh,    yyh
+        bne     LLSYM(__uldivmod_loop)
+
+        tst     xxl,    xxl
+        bne     LLSYM(__uldivmod_loop)
+
+  #if !defined(__OPTIMIZE_SIZE__) || !__OPTIMIZE_SIZE__
+        // Check whether there's still a second word to calculate.
+        lsrs    rP,     #6
+        beq     LLSYM(__uldivmod_return)
+
+        // If so, shift the result left by a full word.
+        mov     lr,     ip
+        mov     ip,     xxh // zero
+  #else
+        eors    rQ,     rQ
+        b       LLSYM(__uldivmod_word2)
+  #endif
+
+    LLSYM(__uldivmod_return):
+        // Move the remainder to the second half of the result.
+        movs    yyl,    xxl
+        movs    yyh,    xxh
+
+        // Move the quotient to the first half of the result.
+        mov     xxl,    ip
+        mov     xxh,    lr
+
+        pop     { rP, rQ, rT, pc }
+                .cfi_restore_state
+
+  #if defined(PEDANTIC_DIV0) && PEDANTIC_DIV0
+    LLSYM(__uldivmod_zero):
+        // Set up the *div0() parameter specified in the ARM runtime ABI:
+        //  * 0 if the numerator is 0,
+        //  * Or, the largest value of the type manipulated by the calling
+        //     division function if the numerator is positive.
+        subs    yyl,    xxl
+        sbcs    yyh,    xxh
+        orrs    xxh,    yyh
+        asrs    xxh,    #31
+        movs    xxl,    xxh
+
+    FUNC_ENTRY uldivmod_zero2
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        push    { rT, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 8
+                .cfi_rel_offset rT, 0
+                .cfi_rel_offset lr, 4
+      #else
+        push    { lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 4
+                .cfi_rel_offset lr, 0
+      #endif
+
+        // Since GCC implements __aeabi_ldiv0() as a weak overridable function,
+        //  this call must be prepared for a jump beyond +/- 2 KB.
+        // NOTE: __aeabi_ldiv0() can't be implemented as a tail call, since any
+        //  non-trivial override will (likely) corrupt a remainder in $r3:$r2.
+        bl      SYM(__aeabi_ldiv0) __PLT__
+
+        // Since the input to __aeabi_ldiv0() was INF, there really isn't any
+        //  choice in which of the recommended *divmod() patterns to follow.
+        // Clear the remainder to complete {INF, 0}.
+        eors    yyl,    yyl
+        eors    yyh,    yyh
+
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        pop     { rT, pc }
+                .cfi_restore_state
+      #else
+        pop     { pc }
+                .cfi_restore_state
+      #endif
+
+  #else /* !PEDANTIC_DIV0 */
+    FUNC_ENTRY uldivmod_zero
+        // NOTE: The following code sets up a return pair of {0, numerator},
+        //  the second preference given by the ARM runtime ABI specification.
+        // The pedantic version is 30 bytes larger between __aeabi_ldiv() and
+        //  __aeabi_uldiv().  However, this version does not conform to the
+        //  out-of-line parameter requirements given for __aeabi_ldiv0(), and
+        //  also does not pass 'gcc/testsuite/gcc.target/arm/divzero.c'.
+
+        // Since the numerator may be overwritten by __aeabi_ldiv0(), save now.
+        // Afterwards, they can be restored directly as the remainder.
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        push    { r0, r1, rT, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 16
+                .cfi_rel_offset xxl,0
+                .cfi_rel_offset xxh,4
+                .cfi_rel_offset rT, 8
+                .cfi_rel_offset lr, 12
+      #else
+        push    { r0, r1, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 12
+                .cfi_rel_offset xxl,0
+                .cfi_rel_offset xxh,4
+                .cfi_rel_offset lr, 8
+      #endif
+
+        // Set up the quotient.
+        eors    xxl,    xxl
+        eors    xxh,    xxh
+
+        // Since GCC implements div0() as a weak overridable function,
+        //  this call must be prepared for a jump beyond +/- 2 KB.
+        bl      SYM(__aeabi_ldiv0) __PLT__
+
+        // Restore the remainder and return.
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        pop     { r2, r3, rT, pc }
+                .cfi_restore_state
+      #else
+        pop     { r2, r3, pc }
+                .cfi_restore_state
+      #endif
+  #endif /* !PEDANTIC_DIV0 */
+
+  #if !defined(__OPTIMIZE_SIZE__) || !__OPTIMIZE_SIZE__
+    LLSYM(__uldivmod_small):
+        // Arrange operands for (much faster) 32-bit division.
+      #if defined(__ARMEB__) && __ARMEB__
+        movs    r0,     r1
+        movs    r1,     r3
+      #else
+        movs    r1,     r2
+      #endif
+
+        bl      SYM(__uidivmod_nonzero) __PLT__
+
+        // Arrange results back into 64-bit format.
+      #if defined(__ARMEB__) && __ARMEB__
+        movs    r3,     r1
+        movs    r1,     r0
+      #else
+        movs    r2,     r1
+      #endif
+
+        // Extend quotient and remainder to 64 bits, unsigned.
+        eors    xxh,    xxh
+        eors    yyh,    yyh
+        pop     { rP, rQ, rT, pc }
+  #endif
+
+    CFI_END_FUNCTION
+FUNC_END udivdi3
+
+#ifndef __symbian__
+  FUNC_END aeabi_uldiv
+  FUNC_END aeabi_uldivmod
+#endif
 
-#endif /* L_aeabi_uldivmod */
+#endif /* udivdi3 */
 
diff --git a/libgcc/config/arm/t-bpabi b/libgcc/config/arm/t-bpabi
index dddddc7c444..86234d5676f 100644
--- a/libgcc/config/arm/t-bpabi
+++ b/libgcc/config/arm/t-bpabi
@@ -2,8 +2,7 @@
 LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
 
 # Add the BPABI C functions.
-LIB2ADD += $(srcdir)/config/arm/bpabi.c \
-	   $(srcdir)/config/arm/unaligned-funcs.c
+LIB2ADD += $(srcdir)/config/arm/unaligned-funcs.c
 
 LIB2ADD_ST += $(srcdir)/config/arm/fp16.c
 
diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf
index 83325410097..4d430325fa1 100644
--- a/libgcc/config/arm/t-elf
+++ b/libgcc/config/arm/t-elf
@@ -50,6 +50,15 @@ LIB1ASMFUNCS += \
 	_umodsi3 \
 
 
+ifeq (__ARM_ARCH_ISA_THUMB 1,$(ARM_ISA)$(THUMB1_ISA))
+# Group 1B: Integer functions built for v6m only.
+LIB1ASMFUNCS += \
+	_divdi3 \
+	_udivdi3 \
+
+endif
+
+
 # Group 2: Single precision floating point function objects.
 LIB1ASMFUNCS += \
 	_arm_addsubsf3 \