From patchwork Fri Oct 6 21:15:53 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella Netto X-Patchwork-Id: 822727 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=sourceware.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=libc-alpha-return-85522-incoming=patchwork.ozlabs.org@sourceware.org; receiver=) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.b="u6RbjuqR"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3y82W3124Hz9sPr for ; Sat, 7 Oct 2017 08:16:14 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id; q=dns; s= default; b=JfkqlKEiG9l1Uvgwy/kv7EPbVM4jhuSCy9i3HB4fTn3f7UlciFR83 VzccB7PQzGMw6TRxtxKfVGSuGSJBgEZxdhOMHABPRxlqjj4tGXHjOViX67B78VTa H1HX6ZiktX2A/mi12yDTUADNH5uunJ6uNtFL/+zeyjxhy0h3bN2QS0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id; s=default; bh=++2rjELAZxhvGyYnEO0bEpQITMs=; b=u6RbjuqRIYZgrEMH5+ohYjbYifRz Dwa/XPIzF8XCsBLysOSQCsKm8/A9AHjh26c0sldNBPyS5fwUf8wUVeKUdynxIBzd e1Ub/X/RpAIkErNh4VR5x6phUHzd4es0VDs7onFlVcupLrhEH2Hsa9UPvwFbPYwV CPPe7kDoLVI0K+c= Received: (qmail 12255 invoked by alias); 6 Oct 2017 21:16:07 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 12241 invoked by uid 89); 6 Oct 2017 21:16:06 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.3 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RCVD_IN_SORBS_SPAM, SPF_PASS autolearn=ham version=3.3.2 spammy=H*MI:2612 X-HELO: mail-qt0-f177.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id; bh=n+TYLXByY86q5jTcC5azLn9RlIZ/oVVyhEaQShuojEo=; b=L2622PLKEy2A6FvGGjHMxoEVogayYJjoBF2avjDit/PVyPz+0Obxj42Oj2qmcluZgs RPV4FdzAN+IZTgR3/rltT8x5HVhFlwzoyDwh2649JRxJc6y6CcpEY+kwuwkCiodOHvf0 WcBPl2o4P53YYCkoOCbgOEu/Mzobeo+Uc8X3Tjc9RmaUReD2oBeL1vCOlqZkfuTKjJE9 s+5aI+stdN8phO28DDAGudBFgpw62mrK81YxRAnUpcBNdOyKLxniVKGdTD6PkW3PoYak mMBAjabk1I9AOBmcl7FlX6TMhyfKEQnP3ZdXCLeUzP4vpJoEpXmpScn2bYN/M25LiANl SlzQ== X-Gm-Message-State: AMCzsaUtrh2dpvod1RgNo6WAaO9oUNYre5NgedGOtfdAxgjuVPXx0rQZ G7qAqnWo/E6XokLoK9QeJrWNCvByDnA= X-Google-Smtp-Source: AOwi7QAiH1Q9r8EEkQILPBvKpx1JAuknOYrcrGBniKZNEoHSTSMQE6mtxSE4GS0Sqkj0BT0u5gh/tA== X-Received: by 10.200.15.83 with SMTP id l19mr4833297qtk.168.1507324561628; Fri, 06 Oct 2017 14:16:01 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH 1/2] arm: Implement memcpy ifunc selection in C Date: Fri, 6 Oct 2017 18:15:53 -0300 Message-Id: <1507324554-2612-1-git-send-email-adhemerval.zanella@linaro.org> This patch refactor ARM memcpy ifunc selector to a C implementation. No functional change is expected, including ifunc resolution rules. To avoid build issues with mainline GCC which set no default gnu indirect function support for ARM (--enable-gnu-indirect-function) two fixes are added: 1. The new macro arm_libc_ifunc_hidden_def is redefined using direct asm assembly directives instead of function attributes. This avoid the incompatbile types for a symbol and its alias. 2. A new macro NO_MEM_INTERAL_SYM_HACKS is added on symbol-hacks and defined on arm ifunc objects. It avoids the symbol definition loop because compiler might emit the hacks for first fix before the ifunc function definition itself. Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi, arm-linux-gnueabihf with and without multiarch support and with both GCC 7.1 and GCC mainline. * sysdeps/arm/arm-ifunc.h: New file. * sysdeps/arm/armv7/multiarch/ifunc-memcpy.h: Likewise. * sysdeps/arm/armv7/multiarch/memcpy.c: Likewise. * sysdeps/arm/armv7/multiarch/memcpy_arm.S: Likewise. * sysdeps/arm/armv7/multiarch/rtld-memcpy.S: Likewise. * sysdeps/arm/armv7/multiarch/Makefile [$(subdir) = string] (sysdep_routines): Add memcpy_arm. (CFLAGS-.os): New rule. * sysdeps/arm/armv7/multiarch/memcpy.S: Remove file. * sysdeps/generic/symbol-hacks.h: Use mem* alias hacks if not defined NO_MEM_INTERAL_SYM_HACKS. --- ChangeLog | 12 +++++ sysdeps/arm/arm-ifunc.h | 47 ++++++++++++++++++ sysdeps/arm/armv7/multiarch/Makefile | 8 +++- sysdeps/arm/armv7/multiarch/ifunc-memcpy.h | 37 +++++++++++++++ sysdeps/arm/armv7/multiarch/memcpy.S | 76 ------------------------------ sysdeps/arm/armv7/multiarch/memcpy.c | 33 +++++++++++++ sysdeps/arm/armv7/multiarch/memcpy_arm.S | 6 +++ sysdeps/arm/armv7/multiarch/rtld-memcpy.S | 1 + sysdeps/generic/symbol-hacks.h | 3 +- 9 files changed, 145 insertions(+), 78 deletions(-) create mode 100644 sysdeps/arm/arm-ifunc.h create mode 100644 sysdeps/arm/armv7/multiarch/ifunc-memcpy.h delete mode 100644 sysdeps/arm/armv7/multiarch/memcpy.S create mode 100644 sysdeps/arm/armv7/multiarch/memcpy.c create mode 100644 sysdeps/arm/armv7/multiarch/memcpy_arm.S create mode 100644 sysdeps/arm/armv7/multiarch/rtld-memcpy.S diff --git a/sysdeps/arm/arm-ifunc.h b/sysdeps/arm/arm-ifunc.h new file mode 100644 index 0000000..f7d3473 --- /dev/null +++ b/sysdeps/arm/arm-ifunc.h @@ -0,0 +1,47 @@ +/* Common definition for ifunc resolvers. Linux/ARM version. + This file is part of the GNU C Library. + Copyright (C) 2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#define INIT_ARCH() + +#define arm_libc_ifunc_redirected(redirected_name, name, expr) \ + __ifunc (redirected_name, name, expr(hwcap), int hwcap, INIT_ARCH) + +#if defined SHARED +# ifdef HAVE_GCC_IFUNC +# define arm_libc_ifunc_hidden_def(redirect_name, name) \ + __hidden_ver1 (name, __GI_##name, redirect_name) \ + __attribute__ ((visibility ("hidden"))) +# else +/* GCC 8 without support for ifunc issues an error when trying to + alias symbols with different prototypes (in this case the redirect + one which have the same as the function implementation and the + ifunc resolver). The macro below avoid it by issuing the required + alias directly. */ +# define arm_libc_ifunc_hidden_def(redirect_name, name) \ + _arm_ifunc_hidden_ver (__GI_##name, name) +# define _arm_ifunc_hidden_ver(local, name) \ + asm (".globl " #local ";" \ + ".hidden " #local ";" \ + #local "=" #name) +# endif +#else +# define arm_libc_ifunc_hidden_def(redirect_name, name) +#endif diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile index 9e1e61c..24c5e5a 100644 --- a/sysdeps/arm/armv7/multiarch/Makefile +++ b/sysdeps/arm/armv7/multiarch/Makefile @@ -1,3 +1,9 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy_neon memcpy_vfp memchr_neon +sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm + +# For ifunc resolvers compiler might place the alias from generic +# symbol-hacks.h before the resolver implementation itself. This +# causes definition loops and the macro below suppress the alias +# definition. +CFLAGS-.os += -DNO_MEM_INTERAL_SYM_HACKS endif diff --git a/sysdeps/arm/armv7/multiarch/ifunc-memcpy.h b/sysdeps/arm/armv7/multiarch/ifunc-memcpy.h new file mode 100644 index 0000000..78cef2a --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/ifunc-memcpy.h @@ -0,0 +1,37 @@ +/* Common definition for memcpy resolver. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef __SOFTFP__ +__typeof (REDIRECT_NAME) OPTIMIZE (arm) attribute_hidden; +#endif +__typeof (REDIRECT_NAME) OPTIMIZE (vfp) attribute_hidden; +__typeof (REDIRECT_NAME) OPTIMIZE (neon) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_ARM_NEON) + return OPTIMIZE (neon); +#ifdef __SOFTFP__ + if (hwcap & HWCAP_ARM_VFP) + return OPTIMIZE (vfp); + return OPTIMIZE (arm); +#else + return OPTIMIZE (vfp); +#endif +} diff --git a/sysdeps/arm/armv7/multiarch/memcpy.S b/sysdeps/arm/armv7/multiarch/memcpy.S deleted file mode 100644 index 8a53bda..0000000 --- a/sysdeps/arm/armv7/multiarch/memcpy.S +++ /dev/null @@ -1,76 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2013-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Thumb requires excess IT instructions here. */ -#define NO_THUMB -#include -#include - -#if IS_IN (libc) -/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy. */ -# ifndef __ARM_NEON__ - .text -ENTRY(memcpy) - .type memcpy, %gnu_indirect_function -# ifdef __SOFTFP__ - ldr r1, .Lmemcpy_arm - tst r0, #HWCAP_ARM_VFP - ldrne r1, .Lmemcpy_vfp -# else - ldr r1, .Lmemcpy_vfp -# endif - tst r0, #HWCAP_ARM_NEON - ldrne r1, .Lmemcpy_neon -1: - add r0, r1, pc - DO_RET(lr) - -# ifdef __SOFTFP__ -.Lmemcpy_arm: - .long C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS -# endif -.Lmemcpy_neon: - .long C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS -.Lmemcpy_vfp: - .long C_SYMBOL_NAME(__memcpy_vfp) - 1b - PC_OFS - -END(memcpy) - -libc_hidden_builtin_def (memcpy) -#endif /* Not __ARM_NEON__. */ - -/* These versions of memcpy are defined not to clobber any VFP or NEON - registers so they must always call the ARM variant of the memcpy code. */ -strong_alias (__memcpy_arm, __aeabi_memcpy) -strong_alias (__memcpy_arm, __aeabi_memcpy4) -strong_alias (__memcpy_arm, __aeabi_memcpy8) -libc_hidden_def (__memcpy_arm) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(x, y) -#undef libc_hidden_def -#define libc_hidden_def(name) - -#define memcpy __memcpy_arm - -#endif - -#include "memcpy_impl.S" diff --git a/sysdeps/arm/armv7/multiarch/memcpy.c b/sysdeps/arm/armv7/multiarch/memcpy.c new file mode 100644 index 0000000..7ef6714 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memcpy.c @@ -0,0 +1,33 @@ +/* Multiple versions of memcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) +# define memcpy __redirect_memcpy +# include +# undef memcpy + +# include + +# define SYMBOL_NAME memcpy +# include "ifunc-memcpy.h" + +arm_libc_ifunc_redirected (__redirect_memcpy, memcpy, IFUNC_SELECTOR); + +arm_libc_ifunc_hidden_def (__redirect_memcpy, memcpy); +#endif diff --git a/sysdeps/arm/armv7/multiarch/memcpy_arm.S b/sysdeps/arm/armv7/multiarch/memcpy_arm.S new file mode 100644 index 0000000..37565cd --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memcpy_arm.S @@ -0,0 +1,6 @@ +#define memcpy __memcpy_arm +#include "memcpy_impl.S" + +strong_alias (__memcpy_arm, __aeabi_memcpy) +strong_alias (__memcpy_arm, __aeabi_memcpy4) +strong_alias (__memcpy_arm, __aeabi_memcpy8) diff --git a/sysdeps/arm/armv7/multiarch/rtld-memcpy.S b/sysdeps/arm/armv7/multiarch/rtld-memcpy.S new file mode 100644 index 0000000..0190edc --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/rtld-memcpy.S @@ -0,0 +1 @@ +#include <./sysdeps/arm/armv7/multiarch/memcpy_impl.S> diff --git a/sysdeps/generic/symbol-hacks.h b/sysdeps/generic/symbol-hacks.h index d614c09..589585a 100644 --- a/sysdeps/generic/symbol-hacks.h +++ b/sysdeps/generic/symbol-hacks.h @@ -1,6 +1,7 @@ /* Some compiler optimizations may transform loops into memset/memmove calls and without proper declaration it may generate PLT calls. */ -#if !defined __ASSEMBLER__ && IS_IN (libc) && defined SHARED +#if !defined __ASSEMBLER__ && IS_IN (libc) && defined SHARED \ + && !defined(NO_MEM_INTERAL_SYM_HACKS) asm ("memmove = __GI_memmove"); asm ("memset = __GI_memset"); asm ("memcpy = __GI_memcpy"); From patchwork Fri Oct 6 21:15:54 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella Netto X-Patchwork-Id: 822728 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=sourceware.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=libc-alpha-return-85523-incoming=patchwork.ozlabs.org@sourceware.org; receiver=) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.b="ApUxqOT2"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3y82WG4qbxz9sPr for ; Sat, 7 Oct 2017 08:16:26 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=P+/4V618pUbh3zYczvUZ7DHGricZWmj FtqkWXEtyANv20ymr3RE43PNyQDLIL45gEZPjSAz1lo1CC1Irs49YgLDRbh0oG0i h3We9df1wsQBtcfzHRmVEuM2i12KGfsL7zjJTGud/FZCH1VpphWV0tXoR74WFg/O opQPn3OtJRQg= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; s=default; bh=qCRgIQYlmBWD2TyZ+2DXpuiYsiQ=; b=ApUxq OT2dskQ9/zLI6VK/UtiBB+QFahUzCtPceqlZJyzof9y4nQ1YxJ7jJNZXPM1v3XAh GlmjpVzWW1wxHiaIusZu+i+w5NLkSV7+KV8xNh22HR+rIG4tLlne2fgObP8zF9Da 0qORWm+8InJ8ea73rNTWq26G4zkm05SSMhi0p8= Received: (qmail 12547 invoked by alias); 6 Oct 2017 21:16:09 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 12490 invoked by uid 89); 6 Oct 2017 21:16:09 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.3 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RCVD_IN_SORBS_SPAM, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mail-qt0-f169.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references; bh=rBb2UOLiYNGNbNHF3E6HND6KedyLSb/iw33CH5TQ2Vo=; b=odca3xqlk2s0F2y6kSctLrv+5GFrEPPXEpqHntSn44ifz2SbcODWqi+Du4kRBoaJs5 MBiZ6X3dSAOMTGWie6cZMgQxzo/IO+iVFzxAY5cXIe+lKdKegFzzhY3p4a7iXVrCsfEI tC2c+kPDP28H4mKSWJfYJdG8lutI+OhAgD9XXkXkd8MbS/MY4s8kUUmyjnQCd9LFtkF/ SvExDuV8o24YlaWekOBFQD18w79Rw5xo1B/gGT6Wl8bv4N1KxiDs3COS9i7majW/Std5 m8miXOo5uAW5g+gB6ejyY6XutRIwq6pZhv7glQv9c2SlM0ogCOIg2v1OupJ92m6vkJO5 LnvQ== X-Gm-Message-State: AMCzsaUUo3KTnM2oRfnUawMy8iFrH4fCcDcPYx3PVzzKxjcvEpt66fGp VUfqKLISzeB8p7fqmOUD6tbnLhQNqqE= X-Google-Smtp-Source: AOwi7QCwX6p5oxRFZrjeXBBXPYVyc9ZOmx+p3wss8/Gmk9v3FkPqmKg8LDgGuwGBTz5ZZZ/nXrlF1g== X-Received: by 10.237.61.49 with SMTP id g46mr5014383qtf.233.1507324563177; Fri, 06 Oct 2017 14:16:03 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH 2/2] arm: Implement memcpy ifunc selection in C Date: Fri, 6 Oct 2017 18:15:54 -0300 Message-Id: <1507324554-2612-2-git-send-email-adhemerval.zanella@linaro.org> In-Reply-To: <1507324554-2612-1-git-send-email-adhemerval.zanella@linaro.org> References: <1507324554-2612-1-git-send-email-adhemerval.zanella@linaro.org> This patch refactor ARM memcpy ifunc selector to a C implementation. No functional change is expected, including ifunc resolution rules. It also reorganize the ifunc options code: 1. The memchr_impl.S is renamed to memchr_neon.S and multiple compilation options (which route to armv6t2/memchr one) is removed. The code to build if __ARM_NEON__ is defined is also simplified. 2. A memchr_noneon is added (which as build along previous ifunc resolution) and includes the armv6t2 direct. 3. Same as 2. for loader object. Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi, arm-linux-gnueabihf with and without multiarch support and with both GCC 7.1 and GCC mainline. * sysdeps/arm/armv7/multiarch/Makefile [$(subdir) = string] (sysdeps_routines): Add memchr_noneon. * sysdeps/arm/armv7/multiarch/ifunc-memchr.h: New file. * sysdeps/arm/armv7/multiarch/memchr_noneon.S: Likewise. * sysdeps/arm/armv7/multiarch/rtld-memchr.S: Likewise. * sysdeps/arm/armv7/multiarch/memchr.S: Remove file. * sysdeps/arm/armv7/multiarch/memchr.c: New file. * sysdeps/arm/armv7/multiarch/memchr_impl.S: Move to ... * sysdeps/arm/armv7/multiarch/memchr_neon.S: ... here. --- ChangeLog | 10 ++ sysdeps/arm/armv7/multiarch/Makefile | 3 +- sysdeps/arm/armv7/multiarch/ifunc-memchr.h | 28 ++++ sysdeps/arm/armv7/multiarch/memchr.S | 59 -------- sysdeps/arm/armv7/multiarch/memchr.c | 35 +++++ sysdeps/arm/armv7/multiarch/memchr_impl.S | 219 --------------------------- sysdeps/arm/armv7/multiarch/memchr_neon.S | 221 +++++++++++++++++++++++++++- sysdeps/arm/armv7/multiarch/memchr_noneon.S | 8 + sysdeps/arm/armv7/multiarch/rtld-memchr.S | 1 + 9 files changed, 299 insertions(+), 285 deletions(-) create mode 100644 sysdeps/arm/armv7/multiarch/ifunc-memchr.h delete mode 100644 sysdeps/arm/armv7/multiarch/memchr.S create mode 100644 sysdeps/arm/armv7/multiarch/memchr.c delete mode 100644 sysdeps/arm/armv7/multiarch/memchr_impl.S create mode 100644 sysdeps/arm/armv7/multiarch/memchr_noneon.S create mode 100644 sysdeps/arm/armv7/multiarch/rtld-memchr.S diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile index 24c5e5a..ccca96e 100644 --- a/sysdeps/arm/armv7/multiarch/Makefile +++ b/sysdeps/arm/armv7/multiarch/Makefile @@ -1,5 +1,6 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm +sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm \ + memchr_noneon # For ifunc resolvers compiler might place the alias from generic # symbol-hacks.h before the resolver implementation itself. This diff --git a/sysdeps/arm/armv7/multiarch/ifunc-memchr.h b/sysdeps/arm/armv7/multiarch/ifunc-memchr.h new file mode 100644 index 0000000..42f89fa --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/ifunc-memchr.h @@ -0,0 +1,28 @@ +/* Common definition for memchr resolver. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +__typeof (REDIRECT_NAME) OPTIMIZE (neon) attribute_hidden; +__typeof (REDIRECT_NAME) OPTIMIZE (noneon) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_ARM_NEON) + return OPTIMIZE (neon); + return OPTIMIZE (noneon); +} diff --git a/sysdeps/arm/armv7/multiarch/memchr.S b/sysdeps/arm/armv7/multiarch/memchr.S deleted file mode 100644 index 8e8097a..0000000 --- a/sysdeps/arm/armv7/multiarch/memchr.S +++ /dev/null @@ -1,59 +0,0 @@ -/* Multiple versions of memchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2013-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#if IS_IN (libc) -/* Under __ARM_NEON__, memchr_neon.S defines the name memchr. */ -# ifndef __ARM_NEON__ - .text - .arm -ENTRY(memchr) - .type memchr, %gnu_indirect_function - ldr r1, .Lmemchr_noneon - tst r0, #HWCAP_ARM_NEON - ldrne r1, .Lmemchr_neon -1: - add r0, r1, pc - DO_RET(lr) - -.Lmemchr_noneon: - .long C_SYMBOL_NAME(__memchr_noneon) - 1b - 8 -.Lmemchr_neon: - .long C_SYMBOL_NAME(__memchr_neon) - 1b - 8 - -END(memchr) - -libc_hidden_builtin_def (memchr) -# endif /* Not __ARM_NEON__. */ -libc_hidden_def (__memchr_noneon) - -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) -# undef weak_alias -# define weak_alias(x, y) -# undef libc_hidden_def -# define libc_hidden_def(name) - -# define memchr __memchr_noneon - -#endif - -#include "memchr_impl.S" diff --git a/sysdeps/arm/armv7/multiarch/memchr.c b/sysdeps/arm/armv7/multiarch/memchr.c new file mode 100644 index 0000000..906bcd5 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memchr.c @@ -0,0 +1,35 @@ +/* Multiple versions of memchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* For __ARM_NEON__ memchr_neon.S defines memchr directly and ifunc + is not used. */ +#if IS_IN (libc) && !defined (__ARM_NEON__) +# define memchr __redirect_memchr +# include +# undef memchr + +# include + +# define SYMBOL_NAME memchr +# include "ifunc-memchr.h" + +arm_libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR); + +arm_libc_ifunc_hidden_def (__redirect_memchr, memchr); +#endif diff --git a/sysdeps/arm/armv7/multiarch/memchr_impl.S b/sysdeps/arm/armv7/multiarch/memchr_impl.S deleted file mode 100644 index e8cbb97..0000000 --- a/sysdeps/arm/armv7/multiarch/memchr_impl.S +++ /dev/null @@ -1,219 +0,0 @@ -/* memchr implemented using NEON. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -#ifdef MEMCHR_NEON - -#include - - .arch armv7-a - .fpu neon - - -/* Arguments */ -#define srcin r0 -#define chrin r1 -#define cntin r2 - -/* Retval */ -#define result r0 /* Live range does not overlap with srcin */ - -/* Working registers */ -#define src r1 /* Live range does not overlap with chrin */ -#define tmp r3 -#define synd r0 /* No overlap with srcin or result */ -#define soff r12 - -/* Working NEON registers */ -#define vrepchr q0 -#define vdata0 q1 -#define vdata0_0 d2 /* Lower half of vdata0 */ -#define vdata0_1 d3 /* Upper half of vdata0 */ -#define vdata1 q2 -#define vdata1_0 d4 /* Lower half of vhas_chr0 */ -#define vdata1_1 d5 /* Upper half of vhas_chr0 */ -#define vrepmask q3 -#define vrepmask0 d6 -#define vrepmask1 d7 -#define vend q4 -#define vend0 d8 -#define vend1 d9 - -/* - * Core algorithm: - * - * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per - * byte. Each bit is set if the relevant byte matched the requested character - * and cleared otherwise. Since the bits in the syndrome reflect exactly the - * order in which things occur in the original string, counting trailing zeros - * allows to identify exactly which byte has matched. - */ - -#ifndef NO_THUMB - .thumb_func -#else - .arm -#endif - .p2align 4,,15 - -ENTRY(memchr) - /* Use a simple loop if there are less than 8 bytes to search. */ - cmp cntin, #7 - bhi .Llargestr - and chrin, chrin, #0xff - -.Lsmallstr: - subs cntin, cntin, #1 - blo .Lnotfound /* Return not found if reached end. */ - ldrb tmp, [srcin], #1 - cmp tmp, chrin - bne .Lsmallstr /* Loop again if not found. */ - /* Otherwise fixup address and return. */ - sub result, srcin, #1 - bx lr - - -.Llargestr: - vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */ - /* - * Magic constant 0x8040201008040201 allows us to identify which lane - * matches the requested byte. - */ - movw tmp, #0x0201 - movt tmp, #0x0804 - lsl soff, tmp, #4 - vmov vrepmask0, tmp, soff - vmov vrepmask1, tmp, soff - /* Work with aligned 32-byte chunks */ - bic src, srcin, #31 - ands soff, srcin, #31 - beq .Lloopintro /* Go straight to main loop if it's aligned. */ - - /* - * Input string is not 32-byte aligned. We calculate the syndrome - * value for the aligned 32 bytes block containing the first bytes - * and mask the irrelevant part. - */ - vld1.8 {vdata0, vdata1}, [src:256]! - sub tmp, soff, #32 - adds cntin, cntin, tmp - vceq.i8 vdata0, vdata0, vrepchr - vceq.i8 vdata1, vdata1, vrepchr - vand vdata0, vdata0, vrepmask - vand vdata1, vdata1, vrepmask - vpadd.i8 vdata0_0, vdata0_0, vdata0_1 - vpadd.i8 vdata1_0, vdata1_0, vdata1_1 - vpadd.i8 vdata0_0, vdata0_0, vdata1_0 - vpadd.i8 vdata0_0, vdata0_0, vdata0_0 - vmov synd, vdata0_0[0] - - /* Clear the soff lower bits */ - lsr synd, synd, soff - lsl synd, synd, soff - /* The first block can also be the last */ - bls .Lmasklast - /* Have we found something already? */ -#ifndef NO_THUMB - cbnz synd, .Ltail -#else - cmp synd, #0 - bne .Ltail -#endif - - -.Lloopintro: - vpush {vend} - /* 264/265 correspond to d8/d9 for q4 */ - cfi_adjust_cfa_offset (16) - cfi_rel_offset (264, 0) - cfi_rel_offset (265, 8) - .p2align 3,,7 -.Lloop: - vld1.8 {vdata0, vdata1}, [src:256]! - subs cntin, cntin, #32 - vceq.i8 vdata0, vdata0, vrepchr - vceq.i8 vdata1, vdata1, vrepchr - /* If we're out of data we finish regardless of the result. */ - bls .Lend - /* Use a fast check for the termination condition. */ - vorr vend, vdata0, vdata1 - vorr vend0, vend0, vend1 - vmov synd, tmp, vend0 - orrs synd, synd, tmp - /* We're not out of data, loop if we haven't found the character. */ - beq .Lloop - -.Lend: - vpop {vend} - cfi_adjust_cfa_offset (-16) - cfi_restore (264) - cfi_restore (265) - - /* Termination condition found, let's calculate the syndrome value. */ - vand vdata0, vdata0, vrepmask - vand vdata1, vdata1, vrepmask - vpadd.i8 vdata0_0, vdata0_0, vdata0_1 - vpadd.i8 vdata1_0, vdata1_0, vdata1_1 - vpadd.i8 vdata0_0, vdata0_0, vdata1_0 - vpadd.i8 vdata0_0, vdata0_0, vdata0_0 - vmov synd, vdata0_0[0] -#ifndef NO_THUMB - cbz synd, .Lnotfound - bhi .Ltail /* Uses the condition code from - subs cntin, cntin, #32 above. */ -#else - cmp synd, #0 - beq .Lnotfound - cmp cntin, #0 - bhi .Ltail -#endif - - -.Lmasklast: - /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */ - neg cntin, cntin - lsl synd, synd, cntin - lsrs synd, synd, cntin - it eq - moveq src, #0 /* If no match, set src to 0 so the retval is 0. */ - - -.Ltail: - /* Count the trailing zeros using bit reversing */ - rbit synd, synd - /* Compensate the last post-increment */ - sub src, src, #32 - /* Count the leading zeros */ - clz synd, synd - /* Compute the potential result and return */ - add result, src, synd - bx lr - - -.Lnotfound: - /* Set result to NULL if not found and return */ - mov result, #0 - bx lr - -END(memchr) -libc_hidden_builtin_def (memchr) - -#else - -#include "../../armv6t2/memchr.S" - -#endif diff --git a/sysdeps/arm/armv7/multiarch/memchr_neon.S b/sysdeps/arm/armv7/multiarch/memchr_neon.S index ee21818..a400033 100644 --- a/sysdeps/arm/armv7/multiarch/memchr_neon.S +++ b/sysdeps/arm/armv7/multiarch/memchr_neon.S @@ -1,9 +1,218 @@ -#ifdef __ARM_NEON__ -/* Under __ARM_NEON__, this file defines memchr directly. */ -libc_hidden_builtin_def (memchr) -#else +/* memchr implemented using NEON. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* For __ARM_NEON__ this file defines memchr. */ +#ifndef __ARM_NEON__ # define memchr __memchr_neon +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(a) +#endif + + .arch armv7-a + .fpu neon + + +/* Arguments */ +#define srcin r0 +#define chrin r1 +#define cntin r2 + +/* Retval */ +#define result r0 /* Live range does not overlap with srcin */ + +/* Working registers */ +#define src r1 /* Live range does not overlap with chrin */ +#define tmp r3 +#define synd r0 /* No overlap with srcin or result */ +#define soff r12 + +/* Working NEON registers */ +#define vrepchr q0 +#define vdata0 q1 +#define vdata0_0 d2 /* Lower half of vdata0 */ +#define vdata0_1 d3 /* Upper half of vdata0 */ +#define vdata1 q2 +#define vdata1_0 d4 /* Lower half of vhas_chr0 */ +#define vdata1_1 d5 /* Upper half of vhas_chr0 */ +#define vrepmask q3 +#define vrepmask0 d6 +#define vrepmask1 d7 +#define vend q4 +#define vend0 d8 +#define vend1 d9 + +/* + * Core algorithm: + * + * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per + * byte. Each bit is set if the relevant byte matched the requested character + * and cleared otherwise. Since the bits in the syndrome reflect exactly the + * order in which things occur in the original string, counting trailing zeros + * allows to identify exactly which byte has matched. + */ + +#ifndef NO_THUMB + .thumb_func +#else + .arm +#endif + .p2align 4,,15 + +ENTRY(memchr) + /* Use a simple loop if there are less than 8 bytes to search. */ + cmp cntin, #7 + bhi .Llargestr + and chrin, chrin, #0xff + +.Lsmallstr: + subs cntin, cntin, #1 + blo .Lnotfound /* Return not found if reached end. */ + ldrb tmp, [srcin], #1 + cmp tmp, chrin + bne .Lsmallstr /* Loop again if not found. */ + /* Otherwise fixup address and return. */ + sub result, srcin, #1 + bx lr + + +.Llargestr: + vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */ + /* + * Magic constant 0x8040201008040201 allows us to identify which lane + * matches the requested byte. + */ + movw tmp, #0x0201 + movt tmp, #0x0804 + lsl soff, tmp, #4 + vmov vrepmask0, tmp, soff + vmov vrepmask1, tmp, soff + /* Work with aligned 32-byte chunks */ + bic src, srcin, #31 + ands soff, srcin, #31 + beq .Lloopintro /* Go straight to main loop if it's aligned. */ + + /* + * Input string is not 32-byte aligned. We calculate the syndrome + * value for the aligned 32 bytes block containing the first bytes + * and mask the irrelevant part. + */ + vld1.8 {vdata0, vdata1}, [src:256]! + sub tmp, soff, #32 + adds cntin, cntin, tmp + vceq.i8 vdata0, vdata0, vrepchr + vceq.i8 vdata1, vdata1, vrepchr + vand vdata0, vdata0, vrepmask + vand vdata1, vdata1, vrepmask + vpadd.i8 vdata0_0, vdata0_0, vdata0_1 + vpadd.i8 vdata1_0, vdata1_0, vdata1_1 + vpadd.i8 vdata0_0, vdata0_0, vdata1_0 + vpadd.i8 vdata0_0, vdata0_0, vdata0_0 + vmov synd, vdata0_0[0] + + /* Clear the soff lower bits */ + lsr synd, synd, soff + lsl synd, synd, soff + /* The first block can also be the last */ + bls .Lmasklast + /* Have we found something already? */ +#ifndef NO_THUMB + cbnz synd, .Ltail +#else + cmp synd, #0 + bne .Ltail #endif -#define MEMCHR_NEON -#include "memchr_impl.S" + +.Lloopintro: + vpush {vend} + /* 264/265 correspond to d8/d9 for q4 */ + cfi_adjust_cfa_offset (16) + cfi_rel_offset (264, 0) + cfi_rel_offset (265, 8) + .p2align 3,,7 +.Lloop: + vld1.8 {vdata0, vdata1}, [src:256]! + subs cntin, cntin, #32 + vceq.i8 vdata0, vdata0, vrepchr + vceq.i8 vdata1, vdata1, vrepchr + /* If we're out of data we finish regardless of the result. */ + bls .Lend + /* Use a fast check for the termination condition. */ + vorr vend, vdata0, vdata1 + vorr vend0, vend0, vend1 + vmov synd, tmp, vend0 + orrs synd, synd, tmp + /* We're not out of data, loop if we haven't found the character. */ + beq .Lloop + +.Lend: + vpop {vend} + cfi_adjust_cfa_offset (-16) + cfi_restore (264) + cfi_restore (265) + + /* Termination condition found, let's calculate the syndrome value. */ + vand vdata0, vdata0, vrepmask + vand vdata1, vdata1, vrepmask + vpadd.i8 vdata0_0, vdata0_0, vdata0_1 + vpadd.i8 vdata1_0, vdata1_0, vdata1_1 + vpadd.i8 vdata0_0, vdata0_0, vdata1_0 + vpadd.i8 vdata0_0, vdata0_0, vdata0_0 + vmov synd, vdata0_0[0] +#ifndef NO_THUMB + cbz synd, .Lnotfound + bhi .Ltail /* Uses the condition code from + subs cntin, cntin, #32 above. */ +#else + cmp synd, #0 + beq .Lnotfound + cmp cntin, #0 + bhi .Ltail +#endif + + +.Lmasklast: + /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */ + neg cntin, cntin + lsl synd, synd, cntin + lsrs synd, synd, cntin + it eq + moveq src, #0 /* If no match, set src to 0 so the retval is 0. */ + + +.Ltail: + /* Count the trailing zeros using bit reversing */ + rbit synd, synd + /* Compensate the last post-increment */ + sub src, src, #32 + /* Count the leading zeros */ + clz synd, synd + /* Compute the potential result and return */ + add result, src, synd + bx lr + + +.Lnotfound: + /* Set result to NULL if not found and return */ + mov result, #0 + bx lr + +END(memchr) +libc_hidden_builtin_def (memchr) diff --git a/sysdeps/arm/armv7/multiarch/memchr_noneon.S b/sysdeps/arm/armv7/multiarch/memchr_noneon.S new file mode 100644 index 0000000..e13be13 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memchr_noneon.S @@ -0,0 +1,8 @@ +/* For __ARM_NEON__ memchr_neon defines memchr. */ +#ifndef __ARM_NEON__ +# define memchr __memchr_noneon +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + +# include +#endif diff --git a/sysdeps/arm/armv7/multiarch/rtld-memchr.S b/sysdeps/arm/armv7/multiarch/rtld-memchr.S new file mode 100644 index 0000000..ae8e5f0 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/rtld-memchr.S @@ -0,0 +1 @@ +#include