From patchwork Thu May 30 20:34:53 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kamal Mostafa X-Patchwork-Id: 247751 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) by ozlabs.org (Postfix) with ESMTP id 032132C01FC for ; Fri, 31 May 2013 06:38:36 +1000 (EST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.76) (envelope-from ) id 1Ui9cD-0004SE-9F; Thu, 30 May 2013 20:38:29 +0000 Received: from youngberry.canonical.com ([91.189.89.112]) by huckleberry.canonical.com with esmtp (Exim 4.76) (envelope-from ) id 1Ui9Ym-0002tE-31 for kernel-team@lists.ubuntu.com; Thu, 30 May 2013 20:34:56 +0000 Received: from c-67-160-231-42.hsd1.ca.comcast.net ([67.160.231.42] helo=fourier) by youngberry.canonical.com with esmtpsa (TLS1.0:DHE_RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1Ui9Yl-0002jZ-IL; Thu, 30 May 2013 20:34:56 +0000 Received: from kamal by fourier with local (Exim 4.80) (envelope-from ) id 1Ui9Yj-0003G9-BA; Thu, 30 May 2013 13:34:53 -0700 From: Kamal Mostafa To: Dave Martin Subject: [ 3.8.y.z extended stable ] Patch "ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2" has been added to staging queue Date: Thu, 30 May 2013 13:34:53 -0700 Message-Id: <1369946093-12496-1-git-send-email-kamal@canonical.com> X-Mailer: git-send-email 1.8.1.2 X-Extended-Stable: 3.8 Cc: Russell King , Kamal Mostafa , David McCullough , kernel-team@lists.ubuntu.com, Nicolas Pitre X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.14 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: kernel-team-bounces@lists.ubuntu.com This is a note to let you know that I have just added a patch titled ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 to the linux-3.8.y-queue branch of the 3.8.y.z extended stable tree which can be found at: http://kernel.ubuntu.com/git?p=ubuntu/linux.git;a=shortlog;h=refs/heads/linux-3.8.y-queue This patch is scheduled to be released in version 3.8.13.2. If you, or anyone else, feels it should not be added to this tree, please reply to this email. For more information about the 3.8.y.z tree, see https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable Thanks. -Kamal ------ From 6b068c099954ca74b19aacda8fedc9e3f26687dc Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 10 Jan 2013 12:20:15 +0100 Subject: ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible commit 638591cd7b601d403ed703d55062b48c32ea8cfb upstream. This patch fixes aes-armv4.S and sha1-armv4-large.S to work natively in Thumb. This allows ARM/Thumb interworking workarounds to be removed. I also take the opportunity to convert some explicit assembler directives for exported functions to the standard ENTRY()/ENDPROC(). For the code itself: * In sha1_block_data_order, use of TEQ with sp is deprecated in ARMv7 and not supported in Thumb. For the branches back to .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the assumption that clobbering the C flag here will not cause incorrect behaviour. For the first branch back to .L_20_39_or_60_79 the C flag is important, so sp is moved temporarily into another register so that TEQ can be used for the comparison. * In the AES code, most forms of register-indexed addressing with shifts and rotates are not permitted for loads and stores in Thumb, so the address calculation is done using a separate instruction for the Thumb case. The resulting code is unlikely to be optimally scheduled, but it should not have a large impact given the overall size of the code. I haven't run any benchmarks. Signed-off-by: Dave Martin Tested-by: David McCullough (ARM only) Acked-by: David McCullough Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Kamal Mostafa --- arch/arm/crypto/aes-armv4.S | 64 ++++++++++++-------------------------- arch/arm/crypto/sha1-armv4-large.S | 24 ++++++-------- 2 files changed, 29 insertions(+), 59 deletions(-) -- 1.8.1.2 diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index e59b1d5..19d6cd6 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -34,8 +34,9 @@ @ A little glue here to select the correct code below for the ARM CPU @ that is being targetted. +#include + .text -.code 32 .type AES_Te,%object .align 5 @@ -145,10 +146,8 @@ AES_Te: @ void AES_encrypt(const unsigned char *in, unsigned char *out, @ const AES_KEY *key) { -.global AES_encrypt -.type AES_encrypt,%function .align 5 -AES_encrypt: +ENTRY(AES_encrypt) sub r3,pc,#8 @ AES_encrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp @@ -239,15 +238,8 @@ AES_encrypt: strb r6,[r12,#14] strb r3,[r12,#15] #endif -#if __ARM_ARCH__>=5 ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size AES_encrypt,.-AES_encrypt +ENDPROC(AES_encrypt) .type _armv4_AES_encrypt,%function .align 2 @@ -386,10 +378,8 @@ _armv4_AES_encrypt: ldr pc,[sp],#4 @ pop and return .size _armv4_AES_encrypt,.-_armv4_AES_encrypt -.global private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,%function .align 5 -private_AES_set_encrypt_key: +ENTRY(private_AES_set_encrypt_key) _armv4_AES_set_encrypt_key: sub r3,pc,#8 @ AES_set_encrypt_key teq r0,#0 @@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key: .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key +.Labrt: mov pc,lr +ENDPROC(private_AES_set_encrypt_key) -.global private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,%function .align 5 -private_AES_set_decrypt_key: +ENTRY(private_AES_set_decrypt_key) str lr,[sp,#-4]! @ push lr #if 0 @ kernel does both of these in setkey so optimise this bit out by @@ -748,15 +734,8 @@ private_AES_set_decrypt_key: bne .Lmix mov r0,#0 -#if __ARM_ARCH__>=5 ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +ENDPROC(private_AES_set_decrypt_key) .type AES_Td,%object .align 5 @@ -862,10 +841,8 @@ AES_Td: @ void AES_decrypt(const unsigned char *in, unsigned char *out, @ const AES_KEY *key) { -.global AES_decrypt -.type AES_decrypt,%function .align 5 -AES_decrypt: +ENTRY(AES_decrypt) sub r3,pc,#8 @ AES_decrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp @@ -956,15 +933,8 @@ AES_decrypt: strb r6,[r12,#14] strb r3,[r12,#15] #endif -#if __ARM_ARCH__>=5 ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size AES_decrypt,.-AES_decrypt +ENDPROC(AES_decrypt) .type _armv4_AES_decrypt,%function .align 2 @@ -1064,7 +1034,9 @@ _armv4_AES_decrypt: and r9,lr,r1,lsr#8 ldrb r7,[r10,r7] @ Td4[s1>>0] - ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] + ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24] + THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24] + THUMB( ldrb r1,[r1] ) ldrb r8,[r10,r8] @ Td4[s1>>16] eor r0,r7,r0,lsl#24 ldrb r9,[r10,r9] @ Td4[s1>>8] @@ -1077,7 +1049,9 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s2>>0] and r9,lr,r2,lsr#16 - ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] + ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24] + THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24] + THUMB( ldrb r2,[r2] ) eor r0,r0,r7,lsl#8 ldrb r9,[r10,r9] @ Td4[s2>>16] eor r1,r8,r1,lsl#16 @@ -1090,7 +1064,9 @@ _armv4_AES_decrypt: and r9,lr,r3 @ i2 ldrb r9,[r10,r9] @ Td4[s3>>0] - ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] + ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24] + THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24] + THUMB( ldrb r3,[r3] ) eor r0,r0,r7,lsl#16 ldr r7,[r11,#0] eor r1,r1,r8,lsl#8 diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S index 7050ab1..92c6eed 100644 --- a/arch/arm/crypto/sha1-armv4-large.S +++ b/arch/arm/crypto/sha1-armv4-large.S @@ -51,13 +51,12 @@ @ Profiler-assisted and platform-specific optimization resulted in 10% @ improvement on Cortex A8 core and 12.2 cycles per byte. -.text +#include -.global sha1_block_data_order -.type sha1_block_data_order,%function +.text .align 2 -sha1_block_data_order: +ENTRY(sha1_block_data_order) stmdb sp!,{r4-r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ldmia r0,{r3,r4,r5,r6,r7} @@ -194,7 +193,7 @@ sha1_block_data_order: eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r3,r3,r10 @ E+=F_00_19(B,C,D) - teq r14,sp + cmp r14,sp bne .L_00_15 @ [((11+4)*5+2)*3] #if __ARM_ARCH__<7 ldrb r10,[r1,#2] @@ -374,7 +373,9 @@ sha1_block_data_order: @ F_xx_xx add r3,r3,r9 @ E+=X[i] add r3,r3,r10 @ E+=F_20_39(B,C,D) - teq r14,sp @ preserve carry + ARM( teq r14,sp ) @ preserve carry + THUMB( mov r11,sp ) + THUMB( teq r14,r11 ) @ preserve carry bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes @@ -466,7 +467,7 @@ sha1_block_data_order: add r3,r3,r9 @ E+=X[i] add r3,r3,r10 @ E+=F_40_59(B,C,D) add r3,r3,r11,ror#2 - teq r14,sp + cmp r14,sp bne .L_40_59 @ [+((12+5)*5+2)*4] ldr r8,.LK_60_79 @@ -485,19 +486,12 @@ sha1_block_data_order: teq r1,r2 bne .Lloop @ [+18], total 1307 -#if __ARM_ARCH__>=5 ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif .align 2 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order +ENDPROC(sha1_block_data_order) .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " .align 2