diff mbox series

[v2] powerpc/64s/radix: Enable huge vmalloc mappings

Message ID 20210502110050.324953-1-npiggin@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v2] powerpc/64s/radix: Enable huge vmalloc mappings | expand
Related show

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (e3a9b9d6a03f5fbf99b540e863b001d46ba1735c)
snowpatch_ozlabs/build-ppc64le fail Build failed!
snowpatch_ozlabs/build-ppc64be fail Build failed!
snowpatch_ozlabs/build-ppc64e fail Build failed!
snowpatch_ozlabs/build-pmac32 fail Build failed!
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 56 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Nicholas Piggin May 2, 2021, 11 a.m. UTC
This reduces TLB misses by nearly 30x on a `git diff` workload on a
2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
to vfs hashes being allocated with 2MB pages.

Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Since v1:
- Don't define MODULES_VADDR which has some other side effect (e.g.,
  ptdump).
- Fixed (hopefully) kbuild warning.
- Keep __vmalloc_node_range call on 3 lines.

 .../admin-guide/kernel-parameters.txt          |  2 ++
 arch/powerpc/Kconfig                           |  1 +
 arch/powerpc/kernel/module.c                   | 18 +++++++++++++-----
 3 files changed, 16 insertions(+), 5 deletions(-)

Comments

kernel test robot May 2, 2021, 1:12 p.m. UTC | #1
Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on linus/master next-20210430]
[cannot apply to scottwood/next v5.12]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64s-radix-Enable-huge-vmalloc-mappings/20210502-190249
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-akebono_defconfig (attached as .config)
compiler: powerpc-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/897024543f33f779ce158849e73999b9d1271f57
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Nicholas-Piggin/powerpc-64s-radix-Enable-huge-vmalloc-mappings/20210502-190249
        git checkout 897024543f33f779ce158849e73999b9d1271f57
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross W=1 ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arch/powerpc/kernel/module.c: In function '__module_alloc':
   arch/powerpc/kernel/module.c:101:32: error: 'VM_NO_HUGE_VMAP' undeclared (first use in this function)
     101 |         VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
         |                                ^~~~~~~~~~~~~~~
   arch/powerpc/kernel/module.c:101:32: note: each undeclared identifier is reported only once for each function it appears in
   In file included from <command-line>:
   arch/powerpc/kernel/module.c: In function 'module_alloc':
>> arch/powerpc/kernel/module.c:111:27: error: 'MODULES_VADDR' undeclared (first use in this function)
     111 |  BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
         |                           ^~~~~~~~~~~~~
   include/linux/compiler_types.h:300:9: note: in definition of macro '__compiletime_assert'
     300 |   if (!(condition))     \
         |         ^~~~~~~~~
   include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert'
     320 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |  ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:50:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      50 |  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
         |  ^~~~~~~~~~~~~~~~
   arch/powerpc/kernel/module.c:111:2: note: in expansion of macro 'BUILD_BUG_ON'
     111 |  BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
         |  ^~~~~~~~~~~~
>> arch/powerpc/kernel/module.c:114:37: error: 'MODULES_END' undeclared (first use in this function)
     114 |  if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
         |                                     ^~~~~~~~~~~


vim +/MODULES_VADDR +111 arch/powerpc/kernel/module.c

2ec13df167040c Christophe Leroy 2021-04-01  104  
7fbc22ce299316 Christophe Leroy 2020-06-29  105  void *module_alloc(unsigned long size)
7fbc22ce299316 Christophe Leroy 2020-06-29  106  {
897024543f33f7 Nicholas Piggin  2021-05-02  107  #ifdef CONFIG_PPC32
2ec13df167040c Christophe Leroy 2021-04-01  108  	unsigned long limit = (unsigned long)_etext - SZ_32M;
2ec13df167040c Christophe Leroy 2021-04-01  109  	void *ptr = NULL;
2ec13df167040c Christophe Leroy 2021-04-01  110  
7fbc22ce299316 Christophe Leroy 2020-06-29 @111  	BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
7fbc22ce299316 Christophe Leroy 2020-06-29  112  
2ec13df167040c Christophe Leroy 2021-04-01  113  	/* First try within 32M limit from _etext to avoid branch trampolines */
2ec13df167040c Christophe Leroy 2021-04-01 @114  	if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Christophe Leroy May 2, 2021, 5:11 p.m. UTC | #2
Le 02/05/2021 à 13:00, Nicholas Piggin a écrit :
> This reduces TLB misses by nearly 30x on a `git diff` workload on a
> 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
> to vfs hashes being allocated with 2MB pages.
> 
> Acked-by: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> Since v1:
> - Don't define MODULES_VADDR which has some other side effect (e.g.,
>    ptdump).
> - Fixed (hopefully) kbuild warning.
> - Keep __vmalloc_node_range call on 3 lines.
> 
>   .../admin-guide/kernel-parameters.txt          |  2 ++
>   arch/powerpc/Kconfig                           |  1 +
>   arch/powerpc/kernel/module.c                   | 18 +++++++++++++-----
>   3 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 1c0a3cf6fcc9..1be38b25c485 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3250,6 +3250,8 @@
>   
>   	nohugeiomap	[KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
>   
> +	nohugevmalloc	[PPC] Disable kernel huge vmalloc mappings.
> +
>   	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
>   			Equivalent to smt=1.
>   
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 1e6230bea09d..c547a9d6a2dd 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -185,6 +185,7 @@ config PPC
>   	select GENERIC_VDSO_TIME_NS
>   	select HAVE_ARCH_AUDITSYSCALL
>   	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
> +	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
>   	select HAVE_ARCH_JUMP_LABEL
>   	select HAVE_ARCH_JUMP_LABEL_RELATIVE
>   	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
> diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
> index fab84024650c..ea1fa55a6897 100644
> --- a/arch/powerpc/kernel/module.c
> +++ b/arch/powerpc/kernel/module.c
> @@ -8,6 +8,7 @@
>   #include <linux/moduleloader.h>
>   #include <linux/err.h>
>   #include <linux/vmalloc.h>
> +#include <linux/mm.h>
>   #include <linux/bug.h>
>   #include <asm/module.h>
>   #include <linux/uaccess.h>
> @@ -88,17 +89,22 @@ int module_finalize(const Elf_Ehdr *hdr,
>   	return 0;
>   }
>   
> -#ifdef MODULES_VADDR
>   static __always_inline void *
>   __module_alloc(unsigned long size, unsigned long start, unsigned long end)
>   {
> -	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
> -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> -				    __builtin_return_address(0));
> +	/*
> +	 * Don't do huge page allocations for modules yet until more testing
> +	 * is done. STRICT_MODULE_RWX may require extra work to support this
> +	 * too.
> +	 */
> +	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
> +				    VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
> +				    NUMA_NO_NODE, __builtin_return_address(0));
>   }
>   
>   void *module_alloc(unsigned long size)
>   {
> +#ifdef CONFIG_PPC32

What then happens to PPC32 platforms that doesn't define MODULES_VADDR, for instance 4xx or booke ?

I think it should be:

#ifdef MODULES_VADDR

>   	unsigned long limit = (unsigned long)_etext - SZ_32M;
>   	void *ptr = NULL;
>   
> @@ -112,5 +118,7 @@ void *module_alloc(unsigned long size)
>   		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
>   
>   	return ptr;
> -}
> +#else
> +	return __module_alloc(size, VMALLOC_START, VMALLOC_END);
>   #endif
> +}
>
Nicholas Piggin May 3, 2021, 12:43 a.m. UTC | #3
Excerpts from Christophe Leroy's message of May 3, 2021 3:11 am:
> 
> 
> Le 02/05/2021 à 13:00, Nicholas Piggin a écrit :
>> This reduces TLB misses by nearly 30x on a `git diff` workload on a
>> 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
>> to vfs hashes being allocated with 2MB pages.
>> 
>> Acked-by: Michael Ellerman <mpe@ellerman.id.au>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> Since v1:
>> - Don't define MODULES_VADDR which has some other side effect (e.g.,
>>    ptdump).
>> - Fixed (hopefully) kbuild warning.
>> - Keep __vmalloc_node_range call on 3 lines.
>> 
>>   .../admin-guide/kernel-parameters.txt          |  2 ++
>>   arch/powerpc/Kconfig                           |  1 +
>>   arch/powerpc/kernel/module.c                   | 18 +++++++++++++-----
>>   3 files changed, 16 insertions(+), 5 deletions(-)
>> 
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index 1c0a3cf6fcc9..1be38b25c485 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -3250,6 +3250,8 @@
>>   
>>   	nohugeiomap	[KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
>>   
>> +	nohugevmalloc	[PPC] Disable kernel huge vmalloc mappings.
>> +
>>   	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
>>   			Equivalent to smt=1.
>>   
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 1e6230bea09d..c547a9d6a2dd 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -185,6 +185,7 @@ config PPC
>>   	select GENERIC_VDSO_TIME_NS
>>   	select HAVE_ARCH_AUDITSYSCALL
>>   	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
>> +	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
>>   	select HAVE_ARCH_JUMP_LABEL
>>   	select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>   	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
>> diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
>> index fab84024650c..ea1fa55a6897 100644
>> --- a/arch/powerpc/kernel/module.c
>> +++ b/arch/powerpc/kernel/module.c
>> @@ -8,6 +8,7 @@
>>   #include <linux/moduleloader.h>
>>   #include <linux/err.h>
>>   #include <linux/vmalloc.h>
>> +#include <linux/mm.h>
>>   #include <linux/bug.h>
>>   #include <asm/module.h>
>>   #include <linux/uaccess.h>
>> @@ -88,17 +89,22 @@ int module_finalize(const Elf_Ehdr *hdr,
>>   	return 0;
>>   }
>>   
>> -#ifdef MODULES_VADDR
>>   static __always_inline void *
>>   __module_alloc(unsigned long size, unsigned long start, unsigned long end)
>>   {
>> -	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
>> -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
>> -				    __builtin_return_address(0));
>> +	/*
>> +	 * Don't do huge page allocations for modules yet until more testing
>> +	 * is done. STRICT_MODULE_RWX may require extra work to support this
>> +	 * too.
>> +	 */
>> +	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
>> +				    VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
>> +				    NUMA_NO_NODE, __builtin_return_address(0));
>>   }
>>   
>>   void *module_alloc(unsigned long size)
>>   {
>> +#ifdef CONFIG_PPC32
> 
> What then happens to PPC32 platforms that doesn't define MODULES_VADDR, for instance 4xx or booke ?
> 
> I think it should be:
> 
> #ifdef MODULES_VADDR

Yes the kernel build robot agrees with you. I'll respin.

Thanks,
Nick

> 
>>   	unsigned long limit = (unsigned long)_etext - SZ_32M;
>>   	void *ptr = NULL;
>>   
>> @@ -112,5 +118,7 @@ void *module_alloc(unsigned long size)
>>   		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
>>   
>>   	return ptr;
>> -}
>> +#else
>> +	return __module_alloc(size, VMALLOC_START, VMALLOC_END);
>>   #endif
>> +}
>> 
>
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1c0a3cf6fcc9..1be38b25c485 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3250,6 +3250,8 @@ 
 
 	nohugeiomap	[KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
 
+	nohugevmalloc	[PPC] Disable kernel huge vmalloc mappings.
+
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1e6230bea09d..c547a9d6a2dd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -185,6 +185,7 @@  config PPC
 	select GENERIC_VDSO_TIME_NS
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
+	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index fab84024650c..ea1fa55a6897 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -8,6 +8,7 @@ 
 #include <linux/moduleloader.h>
 #include <linux/err.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <linux/bug.h>
 #include <asm/module.h>
 #include <linux/uaccess.h>
@@ -88,17 +89,22 @@  int module_finalize(const Elf_Ehdr *hdr,
 	return 0;
 }
 
-#ifdef MODULES_VADDR
 static __always_inline void *
 __module_alloc(unsigned long size, unsigned long start, unsigned long end)
 {
-	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
-				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-				    __builtin_return_address(0));
+	/*
+	 * Don't do huge page allocations for modules yet until more testing
+	 * is done. STRICT_MODULE_RWX may require extra work to support this
+	 * too.
+	 */
+	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
+				    VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
+				    NUMA_NO_NODE, __builtin_return_address(0));
 }
 
 void *module_alloc(unsigned long size)
 {
+#ifdef CONFIG_PPC32
 	unsigned long limit = (unsigned long)_etext - SZ_32M;
 	void *ptr = NULL;
 
@@ -112,5 +118,7 @@  void *module_alloc(unsigned long size)
 		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
 
 	return ptr;
-}
+#else
+	return __module_alloc(size, VMALLOC_START, VMALLOC_END);
 #endif
+}