From patchwork Fri Oct 27 21:07:14 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 831515 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=sourceware.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=libc-alpha-return-86499-incoming=patchwork.ozlabs.org@sourceware.org; receiver=) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.b="F1vSAz22"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3yNxKD2vVPz9t4X for ; Sat, 28 Oct 2017 08:07:27 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:date:from:to:subject:message-id:reply-to :mime-version:content-type; q=dns; s=default; b=FdT25F4IJSyGk6Ys Pzy6xADiRymAsv+WkSRevHKTwH8gXMGh6bXkVqiVwhL7t9iLcW/KzWyCVhGYB93E BQFr7eh2RX+iEhViiHwr38XTYptdNHgLRxJRKpscaVLD6gI/cNX8SsDvWyUByKBO xgx8cHDXvGy13jA+N/SH4V5g0eY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:date:from:to:subject:message-id:reply-to :mime-version:content-type; s=default; bh=s+XdfP6W4mBLZZI3NzV752 sJsTg=; b=F1vSAz22S38LZHfT2j6zxj+jpFYK8Nx88lpnNTmejrfAQLVBtQrvPV arsOZ8NS7H6MWa0J8GWrYaTJqY9nOXYeo02hHC/8fGsDXxhpz/RPlcytdtmvsk6W c9YJmBLryddf10/IFX8O++fGBubIinTyygjze51wZ7R3/7UlpdWO8= Received: (qmail 117144 invoked by alias); 27 Oct 2017 21:07:18 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 117055 invoked by uid 89); 27 Oct 2017 21:07:17 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_LAZY_DOMAIN_SECURITY, NO_DNS_FOR_FROM, RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy= X-HELO: mga14.intel.com X-ExtLoop1: 1 Date: Fri, 27 Oct 2017 14:07:14 -0700 From: "H.J. Lu" To: GNU C Library Subject: [PATCH] i586: Use a jump table in strcpy.S {BZ #22353] Message-ID: <20171027210714.GA15539@gmail.com> Reply-To: "H.J. Lu" MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.9.1 (2017-09-22) i586 strcpy.S used a clever trick with LEA to avoid jump table: /* ECX has the last 2 bits of the address of source - 1. */ andl $3, %ecx call 2f 2: popl %edx /* 0xb is the distance between 2: and 1:. */ leal 0xb(%edx,%ecx,8), %ecx jmp *%ecx .align 8 1: /* ECX == 0 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 1 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 2 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 3 */ L(1): movl (%esi), %ecx leal 4(%esi),%esi This may fail if there are instruction changes before L(1):. This patch replaces it with a jump table which works with any instruction changes. Tested on i586 and i686 with and without --disable-multi-arch. Any objections or comments? H.J. {BZ #22353] * sysdeps/i386/i586/strcpy.S (JMPTBL): New. (BRANCH_TO_JMPTBL_ENTRY): Likewise. (STRCPY): Use it. (1): Renamed to ... (L(Src0)): This. (L(Src1)): New. (L(Src2)): Likewise. (L(1)): Renamed to ... (L(Src3)): This. (L(SrcTable)): New. --- sysdeps/i386/i586/strcpy.S | 62 +++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S index a444604f4f..ff3a6cbe7f 100644 --- a/sysdeps/i386/i586/strcpy.S +++ b/sysdeps/i386/i586/strcpy.S @@ -29,6 +29,34 @@ # define STRCPY strcpy #endif +#ifdef PIC +# define JMPTBL(I, B) I - B + +/* Load an entry in a jump table into EDX and branch to it. TABLE is a + jump table with relative offsets. INDEX is a register contains the + index into the jump table. SCALE is the scale of INDEX. */ + +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + /* We first load PC into EDX. */ \ + SETUP_PIC_REG(dx); \ + /* Get the address of the jump table. */ \ + addl $(TABLE - .), %edx; \ + /* Get the entry and convert the relative offset to the \ + absolute address. */ \ + addl (%edx,INDEX,SCALE), %edx; \ + /* We loaded the jump table and adjusted EDX. Go. */ \ + jmp *%edx +#else +# define JMPTBL(I, B) I + +/* Branch to an entry in a jump table. TABLE is a jump table with + absolute offsets. INDEX is a register contains the index into the + jump table. SCALE is the scale of INDEX. */ + +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + jmp *TABLE(,INDEX,SCALE) +#endif + #define magic 0xfefefeff .text @@ -53,41 +81,32 @@ ENTRY (STRCPY) cfi_rel_offset (ebx, 0) andl $3, %ecx -#ifdef PIC - call 2f - cfi_adjust_cfa_offset (4) -2: popl %edx - cfi_adjust_cfa_offset (-4) - /* 0xb is the distance between 2: and 1: but we avoid writing - 1f-2b because the assembler generates worse code. */ - leal 0xb(%edx,%ecx,8), %ecx -#else - leal 1f(,%ecx,8), %ecx -#endif - - jmp *%ecx + BRANCH_TO_JMPTBL_ENTRY (L(SrcTable), %ecx, 4) - .align 8 -1: + .p2align 4 +L(Src0): orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi +L(Src1): orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi +L(Src2): orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi -L(1): movl (%esi), %ecx +L(Src3): + movl (%esi), %ecx leal 4(%esi),%esi subl %ecx, %eax @@ -107,7 +126,7 @@ L(1): movl (%esi), %ecx movl %edx, (%edi) leal 4(%edi),%edi - jmp L(1) + jmp L(Src3) L(3): movl %ecx, %edx @@ -164,6 +183,15 @@ L(end2): ret END (STRCPY) + + .p2align 2 + .section .rodata +L(SrcTable): + .int JMPTBL (L(Src0), L(SrcTable)) + .int JMPTBL (L(Src1), L(SrcTable)) + .int JMPTBL (L(Src2), L(SrcTable)) + .int JMPTBL (L(Src3), L(SrcTable)) + #ifndef USE_AS_STPCPY libc_hidden_builtin_def (strcpy) #endif