diff mbox series

[SRU,CVE-2020-25285,BIONIC,1/1] mm/hugetlb: fix a race between hugetlb sysctl handlers

Message ID 20200928190802.37397-2-william.gray@canonical.com
State New
Headers show
Series mm/hugetlb: fix a race between hugetlb sysctl handlers | expand

Commit Message

William Breathitt Gray Sept. 28, 2020, 7:08 p.m. UTC
From: Muchun Song <songmuchun@bytedance.com>

There is a race between the assignment of `table->data` and write value
to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on
the other thread.

  CPU0:                                 CPU1:
                                        proc_sys_write
  hugetlb_sysctl_handler                  proc_sys_call_handler
  hugetlb_sysctl_handler_common             hugetlb_sysctl_handler
    table->data = &tmp;                       hugetlb_sysctl_handler_common
                                                table->data = &tmp;
      proc_doulongvec_minmax
        do_proc_doulongvec_minmax           sysctl_head_finish
          __do_proc_doulongvec_minmax         unuse_table
            i = table->data;
            *i = val;  // corrupt CPU1's stack

Fix this by duplicating the `table`, and only update the duplicate of
it.  And introduce a helper of proc_hugetlb_doulongvec_minmax() to
simplify the code.

The following oops was seen:

    BUG: kernel NULL pointer dereference, address: 0000000000000000
    #PF: supervisor instruction fetch in kernel mode
    #PF: error_code(0x0010) - not-present page
    Code: Bad RIP value.
    ...
    Call Trace:
     ? set_max_huge_pages+0x3da/0x4f0
     ? alloc_pool_huge_page+0x150/0x150
     ? proc_doulongvec_minmax+0x46/0x60
     ? hugetlb_sysctl_handler_common+0x1c7/0x200
     ? nr_hugepages_store+0x20/0x20
     ? copy_fd_bitmaps+0x170/0x170
     ? hugetlb_sysctl_handler+0x1e/0x20
     ? proc_sys_call_handler+0x2f1/0x300
     ? unregister_sysctl_table+0xb0/0xb0
     ? __fd_install+0x78/0x100
     ? proc_sys_write+0x14/0x20
     ? __vfs_write+0x4d/0x90
     ? vfs_write+0xef/0x240
     ? ksys_write+0xc0/0x160
     ? __ia32_sys_read+0x50/0x50
     ? __close_fd+0x129/0x150
     ? __x64_sys_write+0x43/0x50
     ? do_syscall_64+0x6c/0x200
     ? entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: e5ff215941d5 ("hugetlb: multiple hstates for multiple page sizes")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

CVE-2020-25285

(backported from commit 17743798d81238ab13050e8e2833699b54e15467)
[ vilhelmgray: context adjustments ]
Signed-off-by: William Breathitt Gray <william.gray@canonical.com>
---
 mm/hugetlb.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

Comments

Stefan Bader Sept. 29, 2020, 7:13 a.m. UTC | #1
On 28.09.20 21:08, William Breathitt Gray wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> There is a race between the assignment of `table->data` and write value
> to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on
> the other thread.
> 
>   CPU0:                                 CPU1:
>                                         proc_sys_write
>   hugetlb_sysctl_handler                  proc_sys_call_handler
>   hugetlb_sysctl_handler_common             hugetlb_sysctl_handler
>     table->data = &tmp;                       hugetlb_sysctl_handler_common
>                                                 table->data = &tmp;
>       proc_doulongvec_minmax
>         do_proc_doulongvec_minmax           sysctl_head_finish
>           __do_proc_doulongvec_minmax         unuse_table
>             i = table->data;
>             *i = val;  // corrupt CPU1's stack
> 
> Fix this by duplicating the `table`, and only update the duplicate of
> it.  And introduce a helper of proc_hugetlb_doulongvec_minmax() to
> simplify the code.
> 
> The following oops was seen:
> 
>     BUG: kernel NULL pointer dereference, address: 0000000000000000
>     #PF: supervisor instruction fetch in kernel mode
>     #PF: error_code(0x0010) - not-present page
>     Code: Bad RIP value.
>     ...
>     Call Trace:
>      ? set_max_huge_pages+0x3da/0x4f0
>      ? alloc_pool_huge_page+0x150/0x150
>      ? proc_doulongvec_minmax+0x46/0x60
>      ? hugetlb_sysctl_handler_common+0x1c7/0x200
>      ? nr_hugepages_store+0x20/0x20
>      ? copy_fd_bitmaps+0x170/0x170
>      ? hugetlb_sysctl_handler+0x1e/0x20
>      ? proc_sys_call_handler+0x2f1/0x300
>      ? unregister_sysctl_table+0xb0/0xb0
>      ? __fd_install+0x78/0x100
>      ? proc_sys_write+0x14/0x20
>      ? __vfs_write+0x4d/0x90
>      ? vfs_write+0xef/0x240
>      ? ksys_write+0xc0/0x160
>      ? __ia32_sys_read+0x50/0x50
>      ? __close_fd+0x129/0x150
>      ? __x64_sys_write+0x43/0x50
>      ? do_syscall_64+0x6c/0x200
>      ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> Fixes: e5ff215941d5 ("hugetlb: multiple hstates for multiple page sizes")
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> 
> CVE-2020-25285
> 
> (backported from commit 17743798d81238ab13050e8e2833699b54e15467)
> [ vilhelmgray: context adjustments ]
> Signed-off-by: William Breathitt Gray <william.gray@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  mm/hugetlb.c | 26 ++++++++++++++++++++------
>  1 file changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index a6787cafb181..8a0d79eac03e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2911,6 +2911,22 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
>  }
>  
>  #ifdef CONFIG_SYSCTL
> +static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
> +					  void *buffer, size_t *length,
> +					  loff_t *ppos, unsigned long *out)
> +{
> +	struct ctl_table dup_table;
> +
> +	/*
> +	 * In order to avoid races with __do_proc_doulongvec_minmax(), we
> +	 * can duplicate the @table and alter the duplicate of it.
> +	 */
> +	dup_table = *table;
> +	dup_table.data = out;
> +
> +	return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos);
> +}
> +
>  static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
>  			 struct ctl_table *table, int write,
>  			 void __user *buffer, size_t *length, loff_t *ppos)
> @@ -2922,9 +2938,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
>  	if (!hugepages_supported())
>  		return -EOPNOTSUPP;
>  
> -	table->data = &tmp;
> -	table->maxlen = sizeof(unsigned long);
> -	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +	ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
> +					     &tmp);
>  	if (ret)
>  		goto out;
>  
> @@ -2968,9 +2983,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
>  	if (write && hstate_is_gigantic(h))
>  		return -EINVAL;
>  
> -	table->data = &tmp;
> -	table->maxlen = sizeof(unsigned long);
> -	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +	ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
> +					     &tmp);
>  	if (ret)
>  		goto out;
>  
>
Andrea Righi Sept. 29, 2020, 11:06 a.m. UTC | #2
On Mon, Sep 28, 2020 at 03:08:02PM -0400, William Breathitt Gray wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> There is a race between the assignment of `table->data` and write value
> to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on
> the other thread.
> 
>   CPU0:                                 CPU1:
>                                         proc_sys_write
>   hugetlb_sysctl_handler                  proc_sys_call_handler
>   hugetlb_sysctl_handler_common             hugetlb_sysctl_handler
>     table->data = &tmp;                       hugetlb_sysctl_handler_common
>                                                 table->data = &tmp;
>       proc_doulongvec_minmax
>         do_proc_doulongvec_minmax           sysctl_head_finish
>           __do_proc_doulongvec_minmax         unuse_table
>             i = table->data;
>             *i = val;  // corrupt CPU1's stack
> 
> Fix this by duplicating the `table`, and only update the duplicate of
> it.  And introduce a helper of proc_hugetlb_doulongvec_minmax() to
> simplify the code.
> 
> The following oops was seen:
> 
>     BUG: kernel NULL pointer dereference, address: 0000000000000000
>     #PF: supervisor instruction fetch in kernel mode
>     #PF: error_code(0x0010) - not-present page
>     Code: Bad RIP value.
>     ...
>     Call Trace:
>      ? set_max_huge_pages+0x3da/0x4f0
>      ? alloc_pool_huge_page+0x150/0x150
>      ? proc_doulongvec_minmax+0x46/0x60
>      ? hugetlb_sysctl_handler_common+0x1c7/0x200
>      ? nr_hugepages_store+0x20/0x20
>      ? copy_fd_bitmaps+0x170/0x170
>      ? hugetlb_sysctl_handler+0x1e/0x20
>      ? proc_sys_call_handler+0x2f1/0x300
>      ? unregister_sysctl_table+0xb0/0xb0
>      ? __fd_install+0x78/0x100
>      ? proc_sys_write+0x14/0x20
>      ? __vfs_write+0x4d/0x90
>      ? vfs_write+0xef/0x240
>      ? ksys_write+0xc0/0x160
>      ? __ia32_sys_read+0x50/0x50
>      ? __close_fd+0x129/0x150
>      ? __x64_sys_write+0x43/0x50
>      ? do_syscall_64+0x6c/0x200
>      ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> Fixes: e5ff215941d5 ("hugetlb: multiple hstates for multiple page sizes")
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> 
> CVE-2020-25285
> 
> (backported from commit 17743798d81238ab13050e8e2833699b54e15467)
> [ vilhelmgray: context adjustments ]
> Signed-off-by: William Breathitt Gray <william.gray@canonical.com>

Acked-by: Andrea Righi <andrea.righi@canonical.com>
Ian May Oct. 1, 2020, 9:52 p.m. UTC | #3
This patch was applied in the following patchset:

        Bionic update: upstream stable patchset 2020-09-23
	Ported from the following upstream stable releases:
			v4.14.197, v4.19.144,
			v4.14.198, v4.19.145
	https://bugs.launchpad.net/bugs/1896817

Thanks,
Ian

On 2020-09-28 15:08:02 , William Breathitt Gray wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> There is a race between the assignment of `table->data` and write value
> to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on
> the other thread.
> 
>   CPU0:                                 CPU1:
>                                         proc_sys_write
>   hugetlb_sysctl_handler                  proc_sys_call_handler
>   hugetlb_sysctl_handler_common             hugetlb_sysctl_handler
>     table->data = &tmp;                       hugetlb_sysctl_handler_common
>                                                 table->data = &tmp;
>       proc_doulongvec_minmax
>         do_proc_doulongvec_minmax           sysctl_head_finish
>           __do_proc_doulongvec_minmax         unuse_table
>             i = table->data;
>             *i = val;  // corrupt CPU1's stack
> 
> Fix this by duplicating the `table`, and only update the duplicate of
> it.  And introduce a helper of proc_hugetlb_doulongvec_minmax() to
> simplify the code.
> 
> The following oops was seen:
> 
>     BUG: kernel NULL pointer dereference, address: 0000000000000000
>     #PF: supervisor instruction fetch in kernel mode
>     #PF: error_code(0x0010) - not-present page
>     Code: Bad RIP value.
>     ...
>     Call Trace:
>      ? set_max_huge_pages+0x3da/0x4f0
>      ? alloc_pool_huge_page+0x150/0x150
>      ? proc_doulongvec_minmax+0x46/0x60
>      ? hugetlb_sysctl_handler_common+0x1c7/0x200
>      ? nr_hugepages_store+0x20/0x20
>      ? copy_fd_bitmaps+0x170/0x170
>      ? hugetlb_sysctl_handler+0x1e/0x20
>      ? proc_sys_call_handler+0x2f1/0x300
>      ? unregister_sysctl_table+0xb0/0xb0
>      ? __fd_install+0x78/0x100
>      ? proc_sys_write+0x14/0x20
>      ? __vfs_write+0x4d/0x90
>      ? vfs_write+0xef/0x240
>      ? ksys_write+0xc0/0x160
>      ? __ia32_sys_read+0x50/0x50
>      ? __close_fd+0x129/0x150
>      ? __x64_sys_write+0x43/0x50
>      ? do_syscall_64+0x6c/0x200
>      ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> Fixes: e5ff215941d5 ("hugetlb: multiple hstates for multiple page sizes")
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> 
> CVE-2020-25285
> 
> (backported from commit 17743798d81238ab13050e8e2833699b54e15467)
> [ vilhelmgray: context adjustments ]
> Signed-off-by: William Breathitt Gray <william.gray@canonical.com>
> ---
>  mm/hugetlb.c | 26 ++++++++++++++++++++------
>  1 file changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index a6787cafb181..8a0d79eac03e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2911,6 +2911,22 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
>  }
>  
>  #ifdef CONFIG_SYSCTL
> +static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
> +					  void *buffer, size_t *length,
> +					  loff_t *ppos, unsigned long *out)
> +{
> +	struct ctl_table dup_table;
> +
> +	/*
> +	 * In order to avoid races with __do_proc_doulongvec_minmax(), we
> +	 * can duplicate the @table and alter the duplicate of it.
> +	 */
> +	dup_table = *table;
> +	dup_table.data = out;
> +
> +	return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos);
> +}
> +
>  static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
>  			 struct ctl_table *table, int write,
>  			 void __user *buffer, size_t *length, loff_t *ppos)
> @@ -2922,9 +2938,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
>  	if (!hugepages_supported())
>  		return -EOPNOTSUPP;
>  
> -	table->data = &tmp;
> -	table->maxlen = sizeof(unsigned long);
> -	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +	ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
> +					     &tmp);
>  	if (ret)
>  		goto out;
>  
> @@ -2968,9 +2983,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
>  	if (write && hstate_is_gigantic(h))
>  		return -EINVAL;
>  
> -	table->data = &tmp;
> -	table->maxlen = sizeof(unsigned long);
> -	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +	ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
> +					     &tmp);
>  	if (ret)
>  		goto out;
>  
> -- 
> 2.25.1
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a6787cafb181..8a0d79eac03e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2911,6 +2911,22 @@  static unsigned int cpuset_mems_nr(unsigned int *array)
 }
 
 #ifdef CONFIG_SYSCTL
+static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
+					  void *buffer, size_t *length,
+					  loff_t *ppos, unsigned long *out)
+{
+	struct ctl_table dup_table;
+
+	/*
+	 * In order to avoid races with __do_proc_doulongvec_minmax(), we
+	 * can duplicate the @table and alter the duplicate of it.
+	 */
+	dup_table = *table;
+	dup_table.data = out;
+
+	return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos);
+}
+
 static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 			 struct ctl_table *table, int write,
 			 void __user *buffer, size_t *length, loff_t *ppos)
@@ -2922,9 +2938,8 @@  static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 	if (!hugepages_supported())
 		return -EOPNOTSUPP;
 
-	table->data = &tmp;
-	table->maxlen = sizeof(unsigned long);
-	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
+	ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
+					     &tmp);
 	if (ret)
 		goto out;
 
@@ -2968,9 +2983,8 @@  int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 	if (write && hstate_is_gigantic(h))
 		return -EINVAL;
 
-	table->data = &tmp;
-	table->maxlen = sizeof(unsigned long);
-	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
+	ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
+					     &tmp);
 	if (ret)
 		goto out;