diff mbox

[3.8,107/116] hugetlb: fix copy_hugetlb_page_range() to handle migration/hwpoisoned entry

Message ID 1406067727-19683-108-git-send-email-kamal@canonical.com
State New
Headers show

Commit Message

Kamal Mostafa July 22, 2014, 10:21 p.m. UTC
3.8.13.27 -stable review patch.  If anyone has any objections, please let me know.

------------------

From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>

commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.

There's a race between fork() and hugepage migration, as a result we try
to "dereference" a swap entry as a normal pte, causing kernel panic.
The cause of the problem is that copy_hugetlb_page_range() can't handle
"swap entry" family (migration entry and hwpoisoned entry) so let's fix
it.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
---
 mm/hugetlb.c | 71 ++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 28 deletions(-)

Comments

Hugh Dickins July 22, 2014, 11:08 p.m. UTC | #1
On Tue, 22 Jul 2014, Kamal Mostafa wrote:

> 3.8.13.27 -stable review patch.  If anyone has any objections, please let me know.
> 
> ------------------
> 
> From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> 
> commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.
> 
> There's a race between fork() and hugepage migration, as a result we try
> to "dereference" a swap entry as a normal pte, causing kernel panic.
> The cause of the problem is that copy_hugetlb_page_range() can't handle
> "swap entry" family (migration entry and hwpoisoned entry) so let's fix
> it.
> 
> [akpm@linux-foundation.org: coding-style fixes]
> Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> Acked-by: Hugh Dickins <hughd@google.com>
> Cc: Christoph Lameter <cl@linux.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Kamal Mostafa <kamal@canonical.com>

Please drop this one for now: other -stables have carried it, but it
was found last week to contain a bug of its own, arguably worse than
what it's fixing.  Naoya-san has done the fix for that, it's in mmotm
and should make its way to Linus probably this week: so please hold
this back until that can join it - thanks.

Hugh

> ---
>  mm/hugetlb.c | 71 ++++++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 43 insertions(+), 28 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 35fc5eb..7b180d7 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2291,6 +2291,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
>  		update_mmu_cache(vma, address, ptep);
>  }
>  
> +static int is_hugetlb_entry_migration(pte_t pte)
> +{
> +	swp_entry_t swp;
> +
> +	if (huge_pte_none(pte) || pte_present(pte))
> +		return 0;
> +	swp = pte_to_swp_entry(pte);
> +	if (non_swap_entry(swp) && is_migration_entry(swp))
> +		return 1;
> +	else
> +		return 0;
> +}
> +
> +static int is_hugetlb_entry_hwpoisoned(pte_t pte)
> +{
> +	swp_entry_t swp;
> +
> +	if (huge_pte_none(pte) || pte_present(pte))
> +		return 0;
> +	swp = pte_to_swp_entry(pte);
> +	if (non_swap_entry(swp) && is_hwpoison_entry(swp))
> +		return 1;
> +	else
> +		return 0;
> +}
>  
>  int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
>  			    struct vm_area_struct *vma)
> @@ -2318,10 +2343,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
>  
>  		spin_lock(&dst->page_table_lock);
>  		spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
> -		if (!huge_pte_none(huge_ptep_get(src_pte))) {
> +		entry = huge_ptep_get(src_pte);
> +		if (huge_pte_none(entry)) { /* skip none entry */
> +			;
> +		} else if (unlikely(is_hugetlb_entry_migration(entry) ||
> +				    is_hugetlb_entry_hwpoisoned(entry))) {
> +			swp_entry_t swp_entry = pte_to_swp_entry(entry);
> +
> +			if (is_write_migration_entry(swp_entry) && cow) {
> +				/*
> +				 * COW mappings require pages in both
> +				 * parent and child to be set to read.
> +				 */
> +				make_migration_entry_read(&swp_entry);
> +				entry = swp_entry_to_pte(swp_entry);
> +				set_huge_pte_at(src, addr, src_pte, entry);
> +			}
> +			set_huge_pte_at(dst, addr, dst_pte, entry);
> +		} else {
>  			if (cow)
>  				huge_ptep_set_wrprotect(src, addr, src_pte);
> -			entry = huge_ptep_get(src_pte);
>  			ptepage = pte_page(entry);
>  			get_page(ptepage);
>  			page_dup_rmap(ptepage);
> @@ -2336,32 +2377,6 @@ nomem:
>  	return -ENOMEM;
>  }
>  
> -static int is_hugetlb_entry_migration(pte_t pte)
> -{
> -	swp_entry_t swp;
> -
> -	if (huge_pte_none(pte) || pte_present(pte))
> -		return 0;
> -	swp = pte_to_swp_entry(pte);
> -	if (non_swap_entry(swp) && is_migration_entry(swp))
> -		return 1;
> -	else
> -		return 0;
> -}
> -
> -static int is_hugetlb_entry_hwpoisoned(pte_t pte)
> -{
> -	swp_entry_t swp;
> -
> -	if (huge_pte_none(pte) || pte_present(pte))
> -		return 0;
> -	swp = pte_to_swp_entry(pte);
> -	if (non_swap_entry(swp) && is_hwpoison_entry(swp))
> -		return 1;
> -	else
> -		return 0;
> -}
> -
>  void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  			    unsigned long start, unsigned long end,
>  			    struct page *ref_page)
> -- 
> 1.9.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
Kamal Mostafa July 23, 2014, 9:02 p.m. UTC | #2
On Tue, 2014-07-22 at 16:08 -0700, Hugh Dickins wrote:
> On Tue, 22 Jul 2014, Kamal Mostafa wrote:
> 
> > 3.8.13.27 -stable review patch.  If anyone has any objections, please let me know.
> > 
> > ------------------
> > 
> > From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> > 
> > commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.
> > 
> > There's a race between fork() and hugepage migration, as a result we try
> > [...]
> 
> Please drop this one for now: other -stables have carried it, but it
> was found last week to contain a bug of its own, arguably worse than
> what it's fixing.  Naoya-san has done the fix for that, it's in mmotm
> and should make its way to Linus probably this week: so please hold
> this back until that can join it - thanks.
> 
> Hugh

OK, I've dropped it from the 3.8-stable queue, and will watch for the
fix to land.  Thanks very much, Hugh!

 -Kamal
Hugh Dickins July 24, 2014, 12:07 a.m. UTC | #3
On Wed, 23 Jul 2014, Kamal Mostafa wrote:
> On Tue, 2014-07-22 at 16:08 -0700, Hugh Dickins wrote:
> > On Tue, 22 Jul 2014, Kamal Mostafa wrote:
> > 
> > > 3.8.13.27 -stable review patch.  If anyone has any objections, please let me know.
> > > 
> > > ------------------
> > > 
> > > From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> > > 
> > > commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.
> > > 
> > > There's a race between fork() and hugepage migration, as a result we try
> > > [...]
> > 
> > Please drop this one for now: other -stables have carried it, but it
> > was found last week to contain a bug of its own, arguably worse than
> > what it's fixing.  Naoya-san has done the fix for that, it's in mmotm
> > and should make its way to Linus probably this week: so please hold
> > this back until that can join it - thanks.
> > 
> > Hugh
> 
> OK, I've dropped it from the 3.8-stable queue, and will watch for the
> fix to land.  Thanks very much, Hugh!

commit 0253d634e0803a8376a0d88efee0bf523d8673f9
Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date:   Wed Jul 23 14:00:19 2014 -0700
mm: hugetlb: fix copy_hugetlb_page_range()

is now in Linus's tree: so the original patch is good to go into
your -stables, so long as you add 0253d634e080 on top.

Hugh
Luis Henriques July 24, 2014, 8:54 a.m. UTC | #4
On Wed, Jul 23, 2014 at 05:07:16PM -0700, Hugh Dickins wrote:
> On Wed, 23 Jul 2014, Kamal Mostafa wrote:
> > On Tue, 2014-07-22 at 16:08 -0700, Hugh Dickins wrote:
> > > On Tue, 22 Jul 2014, Kamal Mostafa wrote:
> > > 
> > > > 3.8.13.27 -stable review patch.  If anyone has any objections, please let me know.
> > > > 
> > > > ------------------
> > > > 
> > > > From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> > > > 
> > > > commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.
> > > > 
> > > > There's a race between fork() and hugepage migration, as a result we try
> > > > [...]
> > > 
> > > Please drop this one for now: other -stables have carried it, but it
> > > was found last week to contain a bug of its own, arguably worse than
> > > what it's fixing.  Naoya-san has done the fix for that, it's in mmotm
> > > and should make its way to Linus probably this week: so please hold
> > > this back until that can join it - thanks.
> > > 
> > > Hugh
> > 
> > OK, I've dropped it from the 3.8-stable queue, and will watch for the
> > fix to land.  Thanks very much, Hugh!
> 
> commit 0253d634e0803a8376a0d88efee0bf523d8673f9
> Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> Date:   Wed Jul 23 14:00:19 2014 -0700
> mm: hugetlb: fix copy_hugetlb_page_range()
> 
> is now in Linus's tree: so the original patch is good to go into
> your -stables, so long as you add 0253d634e080 on top.
> 

Awesome, I'll queue both for the 3.11 kernel as well.  Thanks Hugh!

Cheers,
--
Luís
Kamal Mostafa July 24, 2014, 5:12 p.m. UTC | #5
On Wed, 2014-07-23 at 17:07 -0700, Hugh Dickins wrote:
> 
> commit 0253d634e0803a8376a0d88efee0bf523d8673f9
> Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> Date:   Wed Jul 23 14:00:19 2014 -0700
> mm: hugetlb: fix copy_hugetlb_page_range()
> 
> is now in Linus's tree: so the original patch is good to go into
> your -stables, so long as you add 0253d634e080 on top.
> 
> Hugh

OK, I have queued both for the imminent 3.8-stable release:
        
        4a705fe hugetlb: fix copy_hugetlb_page_range() to handle migration/hwpoisoned entry
        0253d63 mm: hugetlb: fix copy_hugetlb_page_range()

The first patch was already released in the previous 3.13-stable cycle;
I've now queued up the second for the next 3.13 cycle.

Thanks again, Hugh and Naoya-san!

 -Kamal
Ben Hutchings Aug. 4, 2014, 12:07 a.m. UTC | #6
On Wed, 2014-07-23 at 17:07 -0700, Hugh Dickins wrote:
> On Wed, 23 Jul 2014, Kamal Mostafa wrote:
> > On Tue, 2014-07-22 at 16:08 -0700, Hugh Dickins wrote:
> > > On Tue, 22 Jul 2014, Kamal Mostafa wrote:
> > > 
> > > > 3.8.13.27 -stable review patch.  If anyone has any objections, please let me know.
> > > > 
> > > > ------------------
> > > > 
> > > > From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> > > > 
> > > > commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.
> > > > 
> > > > There's a race between fork() and hugepage migration, as a result we try
> > > > [...]
> > > 
> > > Please drop this one for now: other -stables have carried it, but it
> > > was found last week to contain a bug of its own, arguably worse than
> > > what it's fixing.  Naoya-san has done the fix for that, it's in mmotm
> > > and should make its way to Linus probably this week: so please hold
> > > this back until that can join it - thanks.
> > > 
> > > Hugh
> > 
> > OK, I've dropped it from the 3.8-stable queue, and will watch for the
> > fix to land.  Thanks very much, Hugh!
> 
> commit 0253d634e0803a8376a0d88efee0bf523d8673f9
> Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
> Date:   Wed Jul 23 14:00:19 2014 -0700
> mm: hugetlb: fix copy_hugetlb_page_range()
> 
> is now in Linus's tree: so the original patch is good to go into
> your -stables, so long as you add 0253d634e080 on top.

I've now queued up "mm: hugetlb: fix copy_hugetlb_page_range()" for 3.2.

Ben.
Jiri Slaby Aug. 19, 2014, 3:45 p.m. UTC | #7
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 07/24/2014, 02:07 AM, Hugh Dickins wrote:
> On Wed, 23 Jul 2014, Kamal Mostafa wrote:
>> On Tue, 2014-07-22 at 16:08 -0700, Hugh Dickins wrote:
>>> On Tue, 22 Jul 2014, Kamal Mostafa wrote:
>>> 
>>>> 3.8.13.27 -stable review patch.  If anyone has any
>>>> objections, please let me know.
>>>> 
>>>> ------------------
>>>> 
>>>> From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
>>>> 
>>>> commit 4a705fef986231a3e7a6b1a6d3c37025f021f49f upstream.
>>>> 
>>>> There's a race between fork() and hugepage migration, as a
>>>> result we try [...]
>>> 
>>> Please drop this one for now: other -stables have carried it,
>>> but it was found last week to contain a bug of its own,
>>> arguably worse than what it's fixing.  Naoya-san has done the
>>> fix for that, it's in mmotm and should make its way to Linus
>>> probably this week: so please hold this back until that can
>>> join it - thanks.
>>> 
>>> Hugh
>> 
>> OK, I've dropped it from the 3.8-stable queue, and will watch for
>> the fix to land.  Thanks very much, Hugh!
> 
> commit 0253d634e0803a8376a0d88efee0bf523d8673f9 Author: Naoya
> Horiguchi <n-horiguchi@ah.jp.nec.com> Date:   Wed Jul 23 14:00:19
> 2014 -0700 mm: hugetlb: fix copy_hugetlb_page_range()
> 
> is now in Linus's tree: so the original patch is good to go into 
> your -stables, so long as you add 0253d634e080 on top.

So I have queued both to 3.12 now. Thanks.

- -- 
js
suse labs
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBAgAGBQJT83EFAAoJEL0lsQQGtHBJMfAP/jkMSZK9NI6ApH7RB+YREGgI
l3+0qdjYsknE/RlC7exmSh3WY+EfMc4iPcBomiQ3n26FTCEYo00xeqjSXLXcdVIK
AtUawzDRuQvKHBDFZvqgtBVpHRT89WkqoMSchzBDeFx3M6XvLWW2waUoI0aPu4s+
PYc2cqlgz2lAP9u8Ic1qNu3vznbWt+p96/H9b4wb3/UE9IxU1PCfkvq66ep9w2cv
0vq6VqszQwCQsP0Jb75b2EV+1woqCvlz6pOcf3qZlpkzLwcZ3UQHDha0USwMAHMa
L354ZxdKbyqZJrYwH6PDWWIEkMQ5mULHoesHjZg7oVIAsE3kJsqAghf6o+tK9Li7
RqXy2SxhV2/08AAk8QzAuJGKZp4rtZFc0O5eB6V9JWag2+5xLV5EmSWstkQAbGNc
IkVmGZuZWsiaIDj/eTOD4+p7iU51nj4GPxKEe/S51B7iFAjwenczIaSDFqDcLxU7
HiSDGxRMb16TQtBgX9v+sZDDlhf/y4gG9dUYNgAhteQklriXfaF6fiH5MhsCUmNW
tOIiyQV6NQ2XXCzZvGvGVBHutxKNQhQQtiiTMamp5U1sLcg9iAjyFemRK49W+Q3q
5gumMp+1ObY0XgqjRA6b7zVYYqIMtrLVTlWvTVB+UsQ9cS8stYfC0EYgSD/thvZ6
k3x6oxbVkZMrZDJCHWMP
=72iL
-----END PGP SIGNATURE-----
diff mbox

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 35fc5eb..7b180d7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2291,6 +2291,31 @@  static void set_huge_ptep_writable(struct vm_area_struct *vma,
 		update_mmu_cache(vma, address, ptep);
 }
 
+static int is_hugetlb_entry_migration(pte_t pte)
+{
+	swp_entry_t swp;
+
+	if (huge_pte_none(pte) || pte_present(pte))
+		return 0;
+	swp = pte_to_swp_entry(pte);
+	if (non_swap_entry(swp) && is_migration_entry(swp))
+		return 1;
+	else
+		return 0;
+}
+
+static int is_hugetlb_entry_hwpoisoned(pte_t pte)
+{
+	swp_entry_t swp;
+
+	if (huge_pte_none(pte) || pte_present(pte))
+		return 0;
+	swp = pte_to_swp_entry(pte);
+	if (non_swap_entry(swp) && is_hwpoison_entry(swp))
+		return 1;
+	else
+		return 0;
+}
 
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			    struct vm_area_struct *vma)
@@ -2318,10 +2343,26 @@  int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 
 		spin_lock(&dst->page_table_lock);
 		spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
-		if (!huge_pte_none(huge_ptep_get(src_pte))) {
+		entry = huge_ptep_get(src_pte);
+		if (huge_pte_none(entry)) { /* skip none entry */
+			;
+		} else if (unlikely(is_hugetlb_entry_migration(entry) ||
+				    is_hugetlb_entry_hwpoisoned(entry))) {
+			swp_entry_t swp_entry = pte_to_swp_entry(entry);
+
+			if (is_write_migration_entry(swp_entry) && cow) {
+				/*
+				 * COW mappings require pages in both
+				 * parent and child to be set to read.
+				 */
+				make_migration_entry_read(&swp_entry);
+				entry = swp_entry_to_pte(swp_entry);
+				set_huge_pte_at(src, addr, src_pte, entry);
+			}
+			set_huge_pte_at(dst, addr, dst_pte, entry);
+		} else {
 			if (cow)
 				huge_ptep_set_wrprotect(src, addr, src_pte);
-			entry = huge_ptep_get(src_pte);
 			ptepage = pte_page(entry);
 			get_page(ptepage);
 			page_dup_rmap(ptepage);
@@ -2336,32 +2377,6 @@  nomem:
 	return -ENOMEM;
 }
 
-static int is_hugetlb_entry_migration(pte_t pte)
-{
-	swp_entry_t swp;
-
-	if (huge_pte_none(pte) || pte_present(pte))
-		return 0;
-	swp = pte_to_swp_entry(pte);
-	if (non_swap_entry(swp) && is_migration_entry(swp))
-		return 1;
-	else
-		return 0;
-}
-
-static int is_hugetlb_entry_hwpoisoned(pte_t pte)
-{
-	swp_entry_t swp;
-
-	if (huge_pte_none(pte) || pte_present(pte))
-		return 0;
-	swp = pte_to_swp_entry(pte);
-	if (non_swap_entry(swp) && is_hwpoison_entry(swp))
-		return 1;
-	else
-		return 0;
-}
-
 void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			    unsigned long start, unsigned long end,
 			    struct page *ref_page)