From patchwork Wed Sep 27 05:59:11 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vijay Kumar X-Patchwork-Id: 818936 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=sparclinux-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3y26bQ1Cs6z9t33 for ; Wed, 27 Sep 2017 15:59:30 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752299AbdI0F73 (ORCPT ); Wed, 27 Sep 2017 01:59:29 -0400 Received: from aserp1040.oracle.com ([141.146.126.69]:38409 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752292AbdI0F71 (ORCPT ); Wed, 27 Sep 2017 01:59:27 -0400 Received: from aserv0022.oracle.com (aserv0022.oracle.com [141.146.126.234]) by aserp1040.oracle.com (Sentrion-MTA-4.3.2/Sentrion-MTA-4.3.2) with ESMTP id v8R5xPo1006088 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK); Wed, 27 Sep 2017 05:59:25 GMT Received: from userv0121.oracle.com (userv0121.oracle.com [156.151.31.72]) by aserv0022.oracle.com (8.14.4/8.14.4) with ESMTP id v8R5xOS0021742 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK); Wed, 27 Sep 2017 05:59:24 GMT Received: from abhmp0012.oracle.com (abhmp0012.oracle.com [141.146.116.18]) by userv0121.oracle.com (8.14.4/8.13.8) with ESMTP id v8R5xO8M005231; Wed, 27 Sep 2017 05:59:24 GMT Received: from ca-sparc60.us.oracle.com (/10.147.24.150) by default (Oracle Beehive Gateway v4.0) with ESMTP ; Tue, 26 Sep 2017 22:59:23 -0700 From: Vijay Kumar To: davem@davemloft.net Cc: linux-kernel@vger.kernel.org, sparclinux@vger.kernel.org, babu.moger@oracle.com Subject: [PATCH 1/2] sparc64: Define SPARC default fls and __fls Date: Tue, 26 Sep 2017 23:59:11 -0600 Message-Id: <1506491952-9659-2-git-send-email-vijay.ac.kumar@oracle.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1506491952-9659-1-git-send-email-vijay.ac.kumar@oracle.com> References: <1506491952-9659-1-git-send-email-vijay.ac.kumar@oracle.com> X-Source-IP: aserv0022.oracle.com [141.146.126.234] Sender: sparclinux-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: sparclinux@vger.kernel.org fls and __fls will now require boot time patching on T4 and above. Redefining these functions under arc/sparc/lib. Signed-off-by: Vijay Kumar Reviewed-by: Babu Moger --- arch/sparc/include/asm/bitops_64.h | 7 +- arch/sparc/lib/Makefile | 1 + arch/sparc/lib/fls.S | 126 ++++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..946c236 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,11 +22,12 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +#define fls64(word) (((word)?(__fls(word) + 1):0)) +int fls(unsigned int word); +int __fls(unsigned long word); + #include -#include -#include -#include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..eefbb9c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 0000000..a19bff2 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,126 @@ +/* fls.S: SPARC default fls and __fls definitions. + * + * SPARC default fls and __fls definitions, which follows the same + * algorithm as in generic fls() and __fls(). These functions will + * be boot time patched on T4 and onward. + */ + +#include +#include + + .text + .align 32 + + .global fls, __fls + .type fls, #function + .type __fls, #function + + .register %g2, #scratch + .register %g3, #scratch + +EXPORT_SYMBOL(__fls) +EXPORT_SYMBOL(fls) + +fls: + brz,pn %o0, 6f + mov 0, %o1 + sethi %hi(0xffff0000), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f + mov 32, %o1 + sethi %hi(0xff000000), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f + sethi %hi(0xf0000000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf0000000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f + sethi %hi(0xc0000000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f + sll %o0, 2, %o0 +5: + xnor %g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 + sra %o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b + sra %o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff000000), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b + mov 16, %o1 + ba,pt %xcc, 1b + sll %o0, 8, %o0 + .size fls, .-fls + +__fls: +#if BITS_PER_LONG == 64 + mov -1, %g2 + sllx %g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f + mov 63, %g1 + sllx %o0, 32, %o0 +#endif + mov 31, %g1 +1: + mov -1, %g2 + sllx %g2, (BITS_PER_LONG-16), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f + mov -1, %g2 + sllx %o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx %g2, (BITS_PER_LONG-8), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f + mov -1, %g2 + sllx %o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx %g2, (BITS_PER_LONG-4), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f + mov -1, %g2 + sllx %o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx %g2, (BITS_PER_LONG-2), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f + mov -1, %g3 + sllx %o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx %g3, (BITS_PER_LONG-1), %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 + sra %g1, 0, %o0 + .size __fls, .-__fls From patchwork Wed Sep 27 05:59:12 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vijay Kumar X-Patchwork-Id: 818937 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=sparclinux-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3y26c42Mnsz9t33 for ; Wed, 27 Sep 2017 16:00:04 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752292AbdI0F7b (ORCPT ); Wed, 27 Sep 2017 01:59:31 -0400 Received: from aserp1040.oracle.com ([141.146.126.69]:38410 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752294AbdI0F72 (ORCPT ); Wed, 27 Sep 2017 01:59:28 -0400 Received: from aserv0021.oracle.com (aserv0021.oracle.com [141.146.126.233]) by aserp1040.oracle.com (Sentrion-MTA-4.3.2/Sentrion-MTA-4.3.2) with ESMTP id v8R5xOKn006057 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK); Wed, 27 Sep 2017 05:59:25 GMT Received: from aserv0121.oracle.com (aserv0121.oracle.com [141.146.126.235]) by aserv0021.oracle.com (8.14.4/8.14.4) with ESMTP id v8R5xOv2020244 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK); Wed, 27 Sep 2017 05:59:24 GMT Received: from abhmp0012.oracle.com (abhmp0012.oracle.com [141.146.116.18]) by aserv0121.oracle.com (8.14.4/8.13.8) with ESMTP id v8R5xOHn028059; Wed, 27 Sep 2017 05:59:24 GMT Received: from ca-sparc60.us.oracle.com (/10.147.24.150) by default (Oracle Beehive Gateway v4.0) with ESMTP ; Tue, 26 Sep 2017 22:59:24 -0700 From: Vijay Kumar To: davem@davemloft.net Cc: linux-kernel@vger.kernel.org, sparclinux@vger.kernel.org, babu.moger@oracle.com Subject: [PATCH 2/2] sparc64: Use lzcnt instruction for fls and __fls Date: Tue, 26 Sep 2017 23:59:12 -0600 Message-Id: <1506491952-9659-3-git-send-email-vijay.ac.kumar@oracle.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1506491952-9659-1-git-send-email-vijay.ac.kumar@oracle.com> References: <1506491952-9659-1-git-send-email-vijay.ac.kumar@oracle.com> X-Source-IP: aserv0021.oracle.com [141.146.126.233] Sender: sparclinux-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: sparclinux@vger.kernel.org For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar Reviewed-by: Babu Moger --- arch/sparc/Makefile | 1 + arch/sparc/kernel/head_64.S | 2 ++ arch/sparc/lib/Makefile | 3 +++ arch/sparc/lib/NG4fls.S | 31 +++++++++++++++++++++++++++++++ arch/sparc/lib/NG4patch.S | 9 +++++++++ 5 files changed, 46 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 8496a07..0763cd8 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs +KBUILD_AFLAGS += -Wa,-Asparc4 ifeq ($(CONFIG_MCOUNT),y) KBUILD_CFLAGS += -pg diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..1165254 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -628,6 +628,8 @@ niagara4_patch: nop call niagara4_patch_pageops nop + call niagara4_patch_fls + nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index eefbb9c..72d2d8c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-$(CONFIG_SPARC64) += PeeCeeI.o + +obj-$(CONFIG_SPARC64) += fls.o +obj-$(CONFIG_SPARC64) += NG4fls.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 0000000..7c2cfb3 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,31 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + + .text + .align 32 + + .globl NG4fls + .globl __NG4fls + .type NG4fls, #function + .type __NG4fls, #function + +NG4fls: + lzcnt %o0, %o1 + mov 64, %o2 + sub %o2, %o1, %o0 + retl + .size NG4fls, .-NG4fls + +__NG4fls: + brz,pn %o0, 1f + mov %o0, %o1 + lzcnt %o1, %o0 + mov 63, %o2 + sub %o2, %o0, %o0 +1: + retl + nop + nop + .size __NG4fls, .-__NG4fls diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..1010d53 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -52,3 +52,12 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl + nop + .size niagara4_patch_fls,.-niagara4_patch_fls