From patchwork Thu Jun 30 01:56:16 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 1650431 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: bilbo.ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.a=rsa-sha256 header.s=default header.b=CkTqWXUp; dkim-atps=neutral Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=sourceware.org (client-ip=8.43.85.97; helo=sourceware.org; envelope-from=libc-alpha-bounces+incoming=patchwork.ozlabs.org@sourceware.org; receiver=) Received: from sourceware.org (server2.sourceware.org [8.43.85.97]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4LYLzX1xn3z9s07 for ; Thu, 30 Jun 2022 11:56:40 +1000 (AEST) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 18D90386F0D3 for ; Thu, 30 Jun 2022 01:56:38 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 18D90386F0D3 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1656554198; bh=Q7oPgT25OI1lg+zlc4gOo5Ukp/tdsY2CrmyjLKv+hww=; h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:From; b=CkTqWXUp6uua+xlG0//I1VimXiQ86Wr+EE8RP8mJtyfSr36goHsr66aQ+RDRVxhMw nlfPXsahttdbzOlw1Qh+RGQopPEB96DISG9pqOLzNNULlALpTewYbbgaMSyHqdT92f yNJXaca1+EJucO/+DpvLegthefTL9CO6DYGxH390= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-pj1-x102b.google.com (mail-pj1-x102b.google.com [IPv6:2607:f8b0:4864:20::102b]) by sourceware.org (Postfix) with ESMTPS id 813543847814 for ; Thu, 30 Jun 2022 01:56:23 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 813543847814 Received: by mail-pj1-x102b.google.com with SMTP id m14-20020a17090a668e00b001ee6ece8368so1292331pjj.3 for ; Wed, 29 Jun 2022 18:56:23 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=Q7oPgT25OI1lg+zlc4gOo5Ukp/tdsY2CrmyjLKv+hww=; b=RZhbEIVCpRtgor/KEVXbSg7gL0pIXklhZ16hu/Ntu7dW1cTB1QKJ2l28tVdWXD/eON 00fWgwa0pwz4D5Gu29FBBW2i8jgBGTIqun9h6jMf9W/Ohiv0a3o5xZfb26H6pFGMISlC uhbgvao/BjpIsbisYiOsQoIrUA0DZ+d9dLdnitCAjtd931rZpveyfKaKbn0DbHzHYOaI jk/NHMPvFYRwZuS0fIFLPVTlfdJ3gbuSkO3tzhEC1vaoi+XkCH3tpPnOpiL0WWsnIrnr dfBr9GRUSZ7/vM4TvgfSC27kNwriknMaiTK1JL60+Hq4Yws9/m3PqxXmMSlZzVT8x9VJ lS/g== X-Gm-Message-State: AJIora/TEjw5QR4oChVD2WnOVPZr7TuRKglf02dJtdOXx3fTnTk9PdTx ghcIFTLzXyrgCIcZGvQvSd1Hgwksq2c= X-Google-Smtp-Source: AGRyM1tLOkFG7vZuj3zqc1jgx8iuTDRcqOCVz0vrdJGRRy6atUbRD8tAL0nCgYRAjf5bmsr4cTpx/w== X-Received: by 2002:a17:90a:3fc7:b0:1ec:fcbf:be06 with SMTP id u7-20020a17090a3fc700b001ecfcbfbe06mr7035616pjm.197.1656554182367; Wed, 29 Jun 2022 18:56:22 -0700 (PDT) Received: from noah-tgl.. ([2600:1010:b00a:11cb:54c6:67b8:c3a4:f835]) by smtp.gmail.com with ESMTPSA id h15-20020a170902680f00b00161e50e2245sm11991259plk.178.2022.06.29.18.56.21 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 29 Jun 2022 18:56:21 -0700 (PDT) To: libc-alpha@sourceware.org Subject: [PATCH v1] x86: Add missing IS_IN (libc) check to memmove-ssse3.S Date: Wed, 29 Jun 2022 18:56:16 -0700 Message-Id: <20220630015618.3586787-1-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 X-Spam-Status: No, score=-12.1 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+incoming=patchwork.ozlabs.org@sourceware.org Sender: "Libc-alpha" Was missing to for the multiarch build rtld-memmove-ssse3.os was being built and exporting symbols: >$ nm string/rtld-memmove-ssse3.os U __GI___chk_fail 0000000000000020 T __memcpy_chk_ssse3 0000000000000040 T __memcpy_ssse3 0000000000000020 T __memmove_chk_ssse3 0000000000000040 T __memmove_ssse3 0000000000000000 T __mempcpy_chk_ssse3 0000000000000010 T __mempcpy_ssse3 U __x86_shared_cache_size_half Introduced after 2.35 in: commit 26b2478322db94edc9e0e8f577b2f71d291e5acb Author: Noah Goldstein Date: Thu Apr 14 11:47:40 2022 -0500 x86: Reduce code size of mem{move|pcpy|cpy}-ssse3 --- sysdeps/x86_64/multiarch/memmove-ssse3.S | 60 +++++++++++++++++------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S index 310ff62b86..a88fde4a8f 100644 --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S @@ -1,19 +1,42 @@ -#include - -#ifndef MEMMOVE -# define MEMMOVE __memmove_ssse3 -# define MEMMOVE_CHK __memmove_chk_ssse3 -# define MEMCPY __memcpy_ssse3 -# define MEMCPY_CHK __memcpy_chk_ssse3 -# define MEMPCPY __mempcpy_ssse3 -# define MEMPCPY_CHK __mempcpy_chk_ssse3 -#endif +/* memmove/memcpy/mempcpy optimized for aligned access with SSSE3. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#if IS_IN (libc) + +# include +# ifndef MEMMOVE +# define MEMMOVE __memmove_ssse3 +# define MEMMOVE_CHK __memmove_chk_ssse3 +# define MEMCPY __memcpy_ssse3 +# define MEMCPY_CHK __memcpy_chk_ssse3 +# define MEMPCPY __mempcpy_ssse3 +# define MEMPCPY_CHK __mempcpy_chk_ssse3 +# endif .section .text.ssse3, "ax", @progbits +# if defined SHARED ENTRY(MEMPCPY_CHK) cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET(__chk_fail) END(MEMPCPY_CHK) +# endif ENTRY(MEMPCPY) mov %RDI_LP, %RAX_LP @@ -21,10 +44,12 @@ ENTRY(MEMPCPY) jmp L(start) END(MEMPCPY) +# if defined SHARED ENTRY(MEMMOVE_CHK) cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET(__chk_fail) END(MEMMOVE_CHK) +# endif ENTRY_P2ALIGN(MEMMOVE, 6) # ifdef __ILP32__ @@ -124,11 +149,11 @@ L(more_2x_vec): loop. */ movups %xmm0, (%rdi) -#ifdef SHARED_CACHE_SIZE_HALF +# ifdef SHARED_CACHE_SIZE_HALF cmp $SHARED_CACHE_SIZE_HALF, %RDX_LP -#else +# else cmp __x86_shared_cache_size_half(%rip), %rdx -#endif +# endif ja L(large_memcpy) leaq -64(%rdi, %rdx), %r8 @@ -206,7 +231,7 @@ L(end_loop_fwd): /* Extactly 64 bytes if `jmp L(end_loop_fwd)` is long encoding. 60 bytes otherwise. */ -#define ALIGNED_LOOP_FWD(align_by); \ +# define ALIGNED_LOOP_FWD(align_by); \ .p2align 6; \ L(loop_fwd_ ## align_by): \ movaps 16(%rsi), %xmm0; \ @@ -275,7 +300,7 @@ L(end_large_loop_fwd): /* Size > 64 bytes and <= 96 bytes. 32-byte align between ensure 96-byte spacing between each. */ -#define ALIGNED_LARGE_LOOP_FWD(align_by); \ +# define ALIGNED_LARGE_LOOP_FWD(align_by); \ .p2align 5; \ L(large_loop_fwd_ ## align_by): \ movaps 16(%rsi), %xmm0; \ @@ -343,7 +368,7 @@ L(end_loop_bkwd): /* Extactly 64 bytes if `jmp L(end_loop_bkwd)` is long encoding. 60 bytes otherwise. */ -#define ALIGNED_LOOP_BKWD(align_by); \ +# define ALIGNED_LOOP_BKWD(align_by); \ .p2align 6; \ L(loop_bkwd_ ## align_by): \ movaps 32(%rsi), %xmm1; \ @@ -381,4 +406,7 @@ L(loop_bkwd_ ## align_by): \ END(MEMMOVE) strong_alias (MEMMOVE, MEMCPY) +# if defined SHARED strong_alias (MEMMOVE_CHK, MEMCPY_CHK) +# endif +#endif