diff mbox series

powerpc/vdso: Fix multiple issues with sys_call_table

Message ID 20200306135705.7f80fcad@kryten.localdomain (mailing list archive)
State Superseded
Headers show
Series powerpc/vdso: Fix multiple issues with sys_call_table | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (ab326587bb5fb91cc97df9b9f48e9e1469f04621)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 fail build failed!
snowpatch_ozlabs/checkpatch warning total: 0 errors, 1 warnings, 0 checks, 37 lines checked
snowpatch_ozlabs/needsstable success Patch is tagged for stable

Commit Message

Anton Blanchard March 6, 2020, 2:57 a.m. UTC
The VDSO exports a bitmap of valid syscalls. vdso_setup_syscall_map()
sets this up, but there are both little and big endian bugs. The issue
is with:

       if (sys_call_table[i] != sys_ni_syscall)

On little endian, instead of comparing pointers to the two functions,
we compare the first two instructions of each function. If a function
happens to have the same first two instructions as sys_ni_syscall, then
we have a spurious match and mark the instruction as not implemented.
Fix this by removing the inline declarations.

On big endian we have a further issue where sys_ni_syscall is a function
descriptor and sys_call_table[] holds pointers to the instruction text.
Fix this by using dereference_kernel_function_descriptor().

Cc: stable@vger.kernel.org
Signed-off-by: Anton Blanchard <anton@ozlabs.org>

---

Comments

kernel test robot March 18, 2020, 5:06 p.m. UTC | #1
Hi Anton,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on linux/master linus/master v5.6-rc6 next-20200317]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Anton-Blanchard/powerpc-vdso-Fix-multiple-issues-with-sys_call_table/20200306-134043
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-a001-20200318 (attached as .config)
compiler: powerpc-linux-gcc (GCC) 9.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=9.2.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   arch/powerpc/kernel/vdso.c: In function 'vdso_setup_syscall_map':
>> arch/powerpc/kernel/vdso.c:662:25: warning: comparison between pointer and integer
     662 |   if (sys_call_table[i] != sys_ni_syscall)
         |                         ^~

vim +662 arch/powerpc/kernel/vdso.c

a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  641  
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  642  /*
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  643   * Called from setup_arch to initialize the bitmap of available
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  644   * syscalls in the systemcfg page
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  645   */
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  646  static void __init vdso_setup_syscall_map(void)
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  647  {
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  648  	unsigned int i;
457c1792bcc570 Anton Blanchard        2020-03-06  649  	unsigned long ni_syscall;
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  650  
457c1792bcc570 Anton Blanchard        2020-03-06  651  	ni_syscall = (unsigned long)dereference_kernel_function_descriptor(sys_ni_syscall);
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  652  
f43194e45852b0 Rashmica Gupta         2015-11-19  653  	for (i = 0; i < NR_syscalls; i++) {
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  654  #ifdef CONFIG_PPC64
457c1792bcc570 Anton Blanchard        2020-03-06  655  		if (sys_call_table[i] != ni_syscall)
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  656  			vdso_data->syscall_map_64[i >> 5] |=
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  657  				0x80000000UL >> (i & 0x1f);
457c1792bcc570 Anton Blanchard        2020-03-06  658  		if (compat_sys_call_table[i] != ni_syscall)
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  659  			vdso_data->syscall_map_32[i >> 5] |=
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  660  				0x80000000UL >> (i & 0x1f);
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  661  #else /* CONFIG_PPC64 */
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11 @662  		if (sys_call_table[i] != sys_ni_syscall)
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  663  			vdso_data->syscall_map_32[i >> 5] |=
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  664  				0x80000000UL >> (i & 0x1f);
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  665  #endif /* CONFIG_PPC64 */
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  666  	}
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  667  }
a7f290dad32ee3 Benjamin Herrenschmidt 2005-11-11  668  

:::::: The code at line 662 was first introduced by commit
:::::: a7f290dad32ee34d931561b7943c858fe2aae503 [PATCH] powerpc: Merge vdso's and add vdso support to 32 bits kernel

:::::: TO: Benjamin Herrenschmidt <benh@kernel.crashing.org>
:::::: CC: Paul Mackerras <paulus@samba.org>

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Michael Ellerman March 19, 2020, 1:10 a.m. UTC | #2
Anton Blanchard <anton@ozlabs.org> writes:
> The VDSO exports a bitmap of valid syscalls. vdso_setup_syscall_map()
> sets this up, but there are both little and big endian bugs. The issue
> is with:
>
>        if (sys_call_table[i] != sys_ni_syscall)
>
> On little endian, instead of comparing pointers to the two functions,
> we compare the first two instructions of each function. If a function
> happens to have the same first two instructions as sys_ni_syscall, then
> we have a spurious match and mark the instruction as not implemented.
> Fix this by removing the inline declarations.
>
> On big endian we have a further issue where sys_ni_syscall is a function
> descriptor and sys_call_table[] holds pointers to the instruction text.
> Fix this by using dereference_kernel_function_descriptor().
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Anton Blanchard <anton@ozlabs.org>

That's some pretty epic breakage.

Is it even worth keeping, or should we just rip it out and declare that
the syscall map is junk? Userspace can hardly rely on it given it's been
this broken for so long.

If not it would be really nice to have a selftest of this stuff so we
can verify it works and not break it again in future.

cheers

> ---
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index b9a108411c0d..d186b729026e 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -17,6 +17,7 @@
>  #include <linux/elf.h>
>  #include <linux/security.h>
>  #include <linux/memblock.h>
> +#include <linux/syscalls.h>
>  
>  #include <asm/pgtable.h>
>  #include <asm/processor.h>
> @@ -30,6 +31,7 @@
>  #include <asm/vdso.h>
>  #include <asm/vdso_datapage.h>
>  #include <asm/setup.h>
> +#include <asm/syscall.h>
>  
>  #undef DEBUG
>  
> @@ -644,19 +646,16 @@ static __init int vdso_setup(void)
>  static void __init vdso_setup_syscall_map(void)
>  {
>  	unsigned int i;
> -	extern unsigned long *sys_call_table;
> -#ifdef CONFIG_PPC64
> -	extern unsigned long *compat_sys_call_table;
> -#endif
> -	extern unsigned long sys_ni_syscall;
> +	unsigned long ni_syscall;
>  
> +	ni_syscall = (unsigned long)dereference_kernel_function_descriptor(sys_ni_syscall);
>  
>  	for (i = 0; i < NR_syscalls; i++) {
>  #ifdef CONFIG_PPC64
> -		if (sys_call_table[i] != sys_ni_syscall)
> +		if (sys_call_table[i] != ni_syscall)
>  			vdso_data->syscall_map_64[i >> 5] |=
>  				0x80000000UL >> (i & 0x1f);
> -		if (compat_sys_call_table[i] != sys_ni_syscall)
> +		if (compat_sys_call_table[i] != ni_syscall)
>  			vdso_data->syscall_map_32[i >> 5] |=
>  				0x80000000UL >> (i & 0x1f);
>  #else /* CONFIG_PPC64 */
Christophe Leroy June 10, 2021, 11:36 a.m. UTC | #3
Le 19/03/2020 à 02:10, Michael Ellerman a écrit :
> Anton Blanchard <anton@ozlabs.org> writes:
>> The VDSO exports a bitmap of valid syscalls. vdso_setup_syscall_map()
>> sets this up, but there are both little and big endian bugs. The issue
>> is with:
>>
>>         if (sys_call_table[i] != sys_ni_syscall)
>>
>> On little endian, instead of comparing pointers to the two functions,
>> we compare the first two instructions of each function. If a function
>> happens to have the same first two instructions as sys_ni_syscall, then
>> we have a spurious match and mark the instruction as not implemented.
>> Fix this by removing the inline declarations.
>>
>> On big endian we have a further issue where sys_ni_syscall is a function
>> descriptor and sys_call_table[] holds pointers to the instruction text.
>> Fix this by using dereference_kernel_function_descriptor().
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Anton Blanchard <anton@ozlabs.org>
> 
> That's some pretty epic breakage.
> 
> Is it even worth keeping, or should we just rip it out and declare that
> the syscall map is junk? Userspace can hardly rely on it given it's been
> this broken for so long.
> 
> If not it would be really nice to have a selftest of this stuff so we
> can verify it works and not break it again in future.
> 

The problem on little endian is fixed by https://github.com/linuxppc/linux/commit/bc9d5bfc4 I think.

On big endian, I can't see any problem. Looks like sys_call_table in a vmlinux generated with 
ppc64_defconfig contains addresses of items in the opd. So it should be ok, shoudln't it ?

[root@po9473vm linux-powerpc]# powerpc64-linux-objdump -x vmlinux | grep -e " sys_call_table" -e 
ni_syscall
c000000000fc0748 g       .rodata	0000000000000000 sys_call_table
c00000000019fd90 g     F .text	0000000000000028 .sys_ni_syscall
c000000001cc3678 g     F .opd	0000000000000018 sys_ni_syscall

[root@po9473vm linux-powerpc]# powerpc64-linux-objdump -s -j .rodata vmlinux
Contents of section .rodata:
...
  c000000000fc0740 a610e9ee a3f43156 c0000000 01cc0888  ......1V........
  c000000000fc0750 c0000000 01cbf5c8 c0000000 01cbe788  ................
  c000000000fc0760 c0000000 01cf6768 c0000000 01cf6798  ......gh......g.
  c000000000fc0770 c0000000 01cf6240 c0000000 01cf5dd8  ......b@......].
  c000000000fc0780 c0000000 01cbf670 c0000000 01cf61e0  .......p......a.
  c000000000fc0790 c0000000 01cf8490 c0000000 01cf8580  ................
  c000000000fc07a0 c0000000 01cf7890 c0000000 01cf5e50  ......x.......^P
  c000000000fc07b0 c0000000 01ccf120 c0000000 01cf8358  ....... .......X
  c000000000fc07c0 c0000000 01cf6060 c0000000 01cf6108  ......``......a.
  c000000000fc07d0 c0000000 01cc3678 c0000000 01cc3678  ......6x......6x
  c000000000fc07e0 c0000000 01cf63a8 c0000000 01cc1680  ......c.........
  c000000000fc07f0 c0000000 01cfac50 c0000000 01cc3678  .......P......6x
...


Do you agree ?

Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index b9a108411c0d..d186b729026e 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -17,6 +17,7 @@ 
 #include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/memblock.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -30,6 +31,7 @@ 
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
 #include <asm/setup.h>
+#include <asm/syscall.h>
 
 #undef DEBUG
 
@@ -644,19 +646,16 @@  static __init int vdso_setup(void)
 static void __init vdso_setup_syscall_map(void)
 {
 	unsigned int i;
-	extern unsigned long *sys_call_table;
-#ifdef CONFIG_PPC64
-	extern unsigned long *compat_sys_call_table;
-#endif
-	extern unsigned long sys_ni_syscall;
+	unsigned long ni_syscall;
 
+	ni_syscall = (unsigned long)dereference_kernel_function_descriptor(sys_ni_syscall);
 
 	for (i = 0; i < NR_syscalls; i++) {
 #ifdef CONFIG_PPC64
-		if (sys_call_table[i] != sys_ni_syscall)
+		if (sys_call_table[i] != ni_syscall)
 			vdso_data->syscall_map_64[i >> 5] |=
 				0x80000000UL >> (i & 0x1f);
-		if (compat_sys_call_table[i] != sys_ni_syscall)
+		if (compat_sys_call_table[i] != ni_syscall)
 			vdso_data->syscall_map_32[i >> 5] |=
 				0x80000000UL >> (i & 0x1f);
 #else /* CONFIG_PPC64 */