From patchwork Fri May 9 18:35:02 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Roland McGrath X-Patchwork-Id: 347500 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 6769014007F for ; Sat, 10 May 2014 04:35:12 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:mime-version:content-type :content-transfer-encoding:from:to:subject:message-id:date; q= dns; s=default; b=L0p0JT9mGyxSd77TPEuIG7xKzBQOa6mUqneBK0pj6YHfzN crihC1cfWWUmQjqAwqg5Pllz2408sR6Qm9N2kHV8YhjTFALFNjLpYdY1gbpbsxMV p+FOYuPBONeU0QELO5AJ55cmAANwB/JdWbtyzvk2CqGOX46au+0UH08pm0uM0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:mime-version:content-type :content-transfer-encoding:from:to:subject:message-id:date; s= default; bh=XvMrtHM0P6vQ88wiWE/Y8MtYViI=; b=T3h22bloYnbF3BH52XsW LKLOxe75lKQdp9749vr66zXTX8Edbawy2CbqSF7xnOHHrroh1PzTLcPjfeeN6DSa jDxaYqIfGYmrwDCFPXjwf+gCVQ4S7eNacc5By3gTVL+FGeUn0jtHtZ+fGixSwelS tv+yvzLO/wVEvaOPWTcTJYA= Received: (qmail 19631 invoked by alias); 9 May 2014 18:35:06 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 19571 invoked by uid 89); 9 May 2014 18:35:06 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL, BAYES_00 autolearn=ham version=3.3.2 X-HELO: topped-with-meat.com MIME-Version: 1.0 From: Roland McGrath To: "GNU C. Library" Subject: [PATCH roland/arm-strcmp] Make armv7 strcmp assembly compatible with ARM mode and SFI. Message-Id: <20140509183502.A129E2C39F0@topped-with-meat.com> Date: Fri, 9 May 2014 11:35:02 -0700 (PDT) X-CMAE-Score: 0 X-CMAE-Analysis: v=2.1 cv=J405smXS c=1 sm=1 tr=0 a=WkljmVdYkabdwxfqvArNOQ==:117 a=14OXPxybAAAA:8 a=AwYxdOP0H70A:10 a=Z6MIti7PxpgA:10 a=kj9zAlcOel0A:10 a=hOe2yjtxAAAA:8 a=fu0doBhTdlNaEA4sg58A:9 a=CjuIK1q_8ugA:10 Verified on arm-linux-gnueabihf that strcmp.o{,s} disassembly is identical before and after the change. Tested the ARM-mode code by hacking in "#define NO_THUMB" at the top of the file and verifying no regressions in 'make check subdirs=string'. (Also verified that the arm-nacl build passes the NaCl code validator.) OK? Thanks, ROland * sysdeps/arm/armv7/strcmp.S: Use sfi_breg prefix on loads not from sp. [NO_THUMB]: Cope without cbz, cnbz, and orn instructions. --- a/sysdeps/arm/armv7/strcmp.S +++ b/sysdeps/arm/armv7/strcmp.S @@ -35,8 +35,6 @@ #define STRCMP_PRECHECK 1 - /* This version uses Thumb-2 code. */ - .thumb .syntax unified #ifdef __ARM_BIG_ENDIAN @@ -85,6 +83,39 @@ #define syndrome tmp2 +#ifndef NO_THUMB +/* This code is best on Thumb. */ + .thumb + +/* In Thumb code we can't use MVN with a register shift, but we do have ORN. */ +.macro prepare_mask mask_reg, nbits_reg + S2HI \mask_reg, const_m1, \nbits_reg +.endm +.macro apply_mask data_reg, mask_reg + orn \data_reg, \data_reg, \mask_reg +.endm +#else +/* In ARM code we don't have ORN, but we can use MVN with a register shift. */ +.macro prepare_mask mask_reg, nbits_reg + mvn \mask_reg, const_m1, S2HI \nbits_reg +.endm +.macro apply_mask data_reg, mask_reg + orr \data_reg, \data_reg, \mask_reg +.endm + +/* These clobber the condition codes, which the real Thumb cbz/cbnz + instructions do not. But it doesn't matter for any of the uses here. */ +.macro cbz reg, label + cmp \reg, #0 + beq \label +.endm +.macro cbnz reg, label + cmp \reg, #0 + bne \label +.endm +#endif + + /* Macro to compute and return the result value for word-aligned cases. */ .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 @@ -147,8 +178,10 @@ #endif ENTRY (strcmp) #if STRCMP_PRECHECK == 1 - ldrb r2, [src1] - ldrb r3, [src2] + sfi_breg src1, \ + ldrb r2, [\B] + sfi_breg src2, \ + ldrb r3, [\B] cmp r2, #1 it cs cmpcs r2, r3 @@ -178,18 +211,18 @@ ENTRY (strcmp) and tmp2, tmp1, #3 bic src2, src2, #7 lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - ldrd data1a, data1b, [src1], #16 + sfi_breg src1, \ + ldrd data1a, data1b, [\B], #16 tst tmp1, #4 - ldrd data2a, data2b, [src2], #16 - /* In thumb code we can't use MVN with a register shift, but - we do have ORN. */ - S2HI tmp1, const_m1, tmp2 - orn data1a, data1a, tmp1 - orn data2a, data2a, tmp1 + sfi_breg src2, \ + ldrd data2a, data2b, [\B], #16 + prepare_mask tmp1, tmp2 + apply_mask data1a, tmp1 + apply_mask data2a, tmp1 beq .Lstart_realigned8 - orn data1b, data1b, tmp1 + apply_mask data1b, tmp1 mov data1a, const_m1 - orn data2b, data2b, tmp1 + apply_mask data2b, tmp1 mov data2a, const_m1 b .Lstart_realigned8 @@ -198,8 +231,10 @@ ENTRY (strcmp) .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ .p2align 2 /* Always word aligned. */ .Lloop_aligned8: - ldrd data1a, data1b, [src1], #16 - ldrd data2a, data2b, [src2], #16 + sfi_breg src1, \ + ldrd data1a, data1b, [\B], #16 + sfi_breg src2, \ + ldrd data2a, data2b, [\B], #16 .Lstart_realigned8: uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ eor syndrome_a, data1a, data2a @@ -210,8 +245,10 @@ ENTRY (strcmp) sel syndrome_b, syndrome_b, const_m1 cbnz syndrome_b, .Ldiff_in_b - ldrd data1a, data1b, [src1, #-8] - ldrd data2a, data2b, [src2, #-8] + sfi_breg src1, \ + ldrd data1a, data1b, [\B, #-8] + sfi_breg src2, \ + ldrd data2a, data2b, [\B, #-8] uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ eor syndrome_a, data1a, data2a sel syndrome_a, syndrome_a, const_m1 @@ -242,15 +279,19 @@ ENTRY (strcmp) /* Unrolled by a factor of 2, to reduce the number of post-increment operations. */ .Lloop_aligned4: - ldr data1, [src1], #8 - ldr data2, [src2], #8 + sfi_breg src1, \ + ldr data1, [\B], #8 + sfi_breg src2, \ + ldr data2, [\B], #8 .Lstart_realigned4: uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ eor syndrome, data1, data2 sel syndrome, syndrome, const_m1 cbnz syndrome, .Laligned4_done - ldr data1, [src1, #-4] - ldr data2, [src2, #-4] + sfi_breg src1, \ + ldr data1, [\B, #-4] + sfi_breg src2, \ + ldr data2, [\B, #-4] uadd8 syndrome, data1, const_m1 eor syndrome, data1, data2 sel syndrome, syndrome, const_m1 @@ -266,15 +307,15 @@ ENTRY (strcmp) masking off the unwanted loaded data to prevent a difference. */ lsl tmp1, tmp1, #3 /* Bytes -> bits. */ bic src1, src1, #3 - ldr data1, [src1], #8 + sfi_breg src1, \ + ldr data1, [\B], #8 bic src2, src2, #3 - ldr data2, [src2], #8 + sfi_breg src2, \ + ldr data2, [\B], #8 - /* In thumb code we can't use MVN with a register shift, but - we do have ORN. */ - S2HI tmp1, const_m1, tmp1 - orn data1, data1, tmp1 - orn data2, data2, tmp1 + prepare_mask tmp1, tmp1 + apply_mask data1, tmp1 + apply_mask data2, tmp1 b .Lstart_realigned4 .Lmisaligned4: @@ -283,26 +324,30 @@ ENTRY (strcmp) sub src2, src2, tmp1 bic src1, src1, #3 lsls tmp1, tmp1, #31 - ldr data1, [src1], #4 + sfi_breg src1, \ + ldr data1, [\B], #4 beq .Laligned_m2 bcs .Laligned_m1 #if STRCMP_PRECHECK == 0 - ldrb data2, [src2, #1] + sfi_breg src2, \ + ldrb data2, [\B, #1] uxtb tmp1, data1, ror #BYTE1_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m2: - ldrb data2, [src2, #2] + sfi_breg src2, \ + ldrb data2, [\B, #2] uxtb tmp1, data1, ror #BYTE2_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m1: - ldrb data2, [src2, #3] + sfi_breg src2, \ + ldrb data2, [\B, #3] uxtb tmp1, data1, ror #BYTE3_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit @@ -311,14 +356,16 @@ ENTRY (strcmp) #else /* STRCMP_PRECHECK */ /* If we've done the pre-check, then we don't need to check the first byte again here. */ - ldrb data2, [src2, #2] + sfi_breg src2, \ + ldrb data2, [\B, #2] uxtb tmp1, data1, ror #BYTE2_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m2: - ldrb data2, [src2, #3] + sfi_breg src2, \ + ldrb data2, [\B, #3] uxtb tmp1, data1, ror #BYTE3_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit @@ -344,11 +391,13 @@ ENTRY (strcmp) cfi_restore_state /* src1 is word aligned, but src2 has no common alignment with it. */ - ldr data1, [src1], #4 + sfi_breg src1, \ + ldr data1, [\B], #4 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ bic src2, src2, #3 - ldr data2, [src2], #4 + sfi_breg src2, \ + ldr data2, [\B], #4 bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ @@ -360,11 +409,13 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - ldr data2, [src2], #4 + sfi_breg src2, \ + ldr data2, [\B], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #24 bne 6f - ldr data1, [src1], #4 + sfi_breg src1, \ + ldr data1, [\B], #4 b .Loverlap3 4: S2LO data2, data2, #8 @@ -376,7 +427,8 @@ ENTRY (strcmp) /* We can only get here if the MSB of data1 contains 0, so fast-path the exit. */ - ldrb result, [src2] + sfi_breg src2, \ + ldrb result, [\B] ldrd r4, r5, [sp], #16 cfi_remember_state cfi_def_cfa_offset (0) @@ -402,11 +454,13 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - ldr data2, [src2], #4 + sfi_breg src2, \ + ldr data2, [\B], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #16 bne 6f - ldr data1, [src1], #4 + sfi_breg src1, \ + ldr data1, [\B], #4 b .Loverlap2 4: S2LO data2, data2, #16 @@ -415,7 +469,8 @@ ENTRY (strcmp) ands syndrome, syndrome, const_m1, S2LO #16 bne .Lstrcmp_done_equal - ldrh data2, [src2] + sfi_breg src2, \ + ldrh data2, [\B] S2LO data1, data1, #16 #ifdef __ARM_BIG_ENDIAN lsl data2, data2, #16 @@ -435,11 +490,13 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - ldr data2, [src2], #4 + sfi_breg src2, \ + ldr data2, [\B], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #8 bne 6f - ldr data1, [src1], #4 + sfi_breg src1, \ + ldr data1, [\B], #4 b .Loverlap1 4: S2LO data2, data2, #24 @@ -447,7 +504,8 @@ ENTRY (strcmp) 5: tst syndrome, #LSB bne .Lstrcmp_done_equal - ldr data2, [src2] + sfi_breg src2, \ + ldr data2, [\B] 6: S2LO data1, data1, #8 bic data2, data2, #MSB