diff mbox

mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups - CVE-2011-2479

Message ID 1318601457-4304-2-git-send-email-paolo.pisati@canonical.com
State New
Headers show

Commit Message

Paolo Pisati Oct. 14, 2011, 2:10 p.m. UTC
From: Andrea Arcangeli <aarcange@redhat.com>

CVE-2011-2479

BugLink: http://bugs.launchpad.net/bugs/775809

commit 78f11a255749d09025f54d4e2df4fbcb031530e2 upstream.

The huge_memory.c THP page fault was allowed to run if vm_ops was null
(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
setup a special vma->vm_ops and it would fallback to regular anonymous
memory) but other THP logics weren't fully activated for vmas with vm_file
not NULL (/dev/zero has a not NULL vma->vm_file).

So this removes the vm_file checks so that /dev/zero also can safely use
THP (the other albeit safer approach to fix this bug would have been to
prevent the THP initial page fault to run if vm_file was set).

After removing the vm_file checks, this also makes huge_memory.c stricter
in khugepaged for the DEBUG_VM=y case.  It doesn't replace the vm_file
check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
only be allowed to exist before the first page fault, and in turn when
vma->anon_vma is null (so preventing khugepaged registration).  So I tend
to think the previous comment saying if vm_file was set, VM_PFNMAP might
have been set and we could still be registered in khugepaged (despite
anon_vma was not NULL to be registered in khugepaged) was too paranoid.
The is_linear_pfn_mapping check is also I think superfluous (as described
by comment) but under DEBUG_VM it is safe to stay.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Caspar Zhang <bugs@casparzhang.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
(cherry picked from commit 3a7ecd0f369bc80f3647941224c3c24b7dcb9621)
---
 include/linux/huge_mm.h |    2 +-
 include/linux/mm.h      |    3 ++-
 mm/huge_memory.c        |   43 ++++++++++++++++++++++++-------------------
 3 files changed, 27 insertions(+), 21 deletions(-)

Comments

Andy Whitcroft Oct. 14, 2011, 2:40 p.m. UTC | #1
On Fri, Oct 14, 2011 at 04:10:57PM +0200, Paolo Pisati wrote:
> From: Andrea Arcangeli <aarcange@redhat.com>
> 
> CVE-2011-2479
> 
> BugLink: http://bugs.launchpad.net/bugs/775809
> 
> commit 78f11a255749d09025f54d4e2df4fbcb031530e2 upstream.
> 
> The huge_memory.c THP page fault was allowed to run if vm_ops was null
> (which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
> setup a special vma->vm_ops and it would fallback to regular anonymous
> memory) but other THP logics weren't fully activated for vmas with vm_file
> not NULL (/dev/zero has a not NULL vma->vm_file).
> 
> So this removes the vm_file checks so that /dev/zero also can safely use
> THP (the other albeit safer approach to fix this bug would have been to
> prevent the THP initial page fault to run if vm_file was set).
> 
> After removing the vm_file checks, this also makes huge_memory.c stricter
> in khugepaged for the DEBUG_VM=y case.  It doesn't replace the vm_file
> check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
> under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
> only be allowed to exist before the first page fault, and in turn when
> vma->anon_vma is null (so preventing khugepaged registration).  So I tend
> to think the previous comment saying if vm_file was set, VM_PFNMAP might
> have been set and we could still be registered in khugepaged (despite
> anon_vma was not NULL to be registered in khugepaged) was too paranoid.
> The is_linear_pfn_mapping check is also I think superfluous (as described
> by comment) but under DEBUG_VM it is safe to stay.
> 
> Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682
> 
> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
> Reported-by: Caspar Zhang <bugs@casparzhang.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: Rik van Riel <riel@redhat.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>

<blank space>

> (cherry picked from commit 3a7ecd0f369bc80f3647941224c3c24b7dcb9621)

Signed-off-by: ??

> ---
>  include/linux/huge_mm.h |    2 +-
>  include/linux/mm.h      |    3 ++-
>  mm/huge_memory.c        |   43 ++++++++++++++++++++++++-------------------
>  3 files changed, 27 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index df29c8f..8847c8c 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
>  					 unsigned long end,
>  					 long adjust_next)
>  {
> -	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
> +	if (!vma->anon_vma || vma->vm_ops)
>  		return;
>  	__vma_adjust_trans_huge(vma, start, end, adjust_next);
>  }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 4e43460..72674d4 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
>  #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
>  
>  /*
> - * special vmas that are non-mergable, non-mlock()able
> + * Special vmas that are non-mergable, non-mlock()able.
> + * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
>   */
>  #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
>  
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8f76561..56cac93 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1400,6 +1400,9 @@ out:
>  	return ret;
>  }
>  
> +#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
> +		   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
> +
>  int hugepage_madvise(struct vm_area_struct *vma,
>  		     unsigned long *vm_flags, int advice)
>  {
> @@ -1408,11 +1411,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
>  		/*
>  		 * Be somewhat over-protective like KSM for now!
>  		 */
> -		if (*vm_flags & (VM_HUGEPAGE |
> -				 VM_SHARED   | VM_MAYSHARE   |
> -				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
> -				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
> -				 VM_MIXEDMAP | VM_SAO))
> +		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
>  			return -EINVAL;
>  		*vm_flags &= ~VM_NOHUGEPAGE;
>  		*vm_flags |= VM_HUGEPAGE;
> @@ -1428,11 +1427,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
>  		/*
>  		 * Be somewhat over-protective like KSM for now!
>  		 */
> -		if (*vm_flags & (VM_NOHUGEPAGE |
> -				 VM_SHARED   | VM_MAYSHARE   |
> -				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
> -				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
> -				 VM_MIXEDMAP | VM_SAO))
> +		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
>  			return -EINVAL;
>  		*vm_flags &= ~VM_HUGEPAGE;
>  		*vm_flags |= VM_NOHUGEPAGE;
> @@ -1566,10 +1561,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
>  		 * page fault if needed.
>  		 */
>  		return 0;
> -	if (vma->vm_file || vma->vm_ops)
> +	if (vma->vm_ops)
>  		/* khugepaged not yet working on file or special mappings */
>  		return 0;
> -	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
> +	/*
> +	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
> +	 * true too, verify it here.
> +	 */
> +	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
>  	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
>  	hend = vma->vm_end & HPAGE_PMD_MASK;
>  	if (hstart < hend)
> @@ -1818,12 +1817,15 @@ static void collapse_huge_page(struct mm_struct *mm,
>  	    (vma->vm_flags & VM_NOHUGEPAGE))
>  		goto out;
>  
> -	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
> -	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
> +	if (!vma->anon_vma || vma->vm_ops)
>  		goto out;
>  	if (is_vma_temporary_stack(vma))
>  		goto out;
> -	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
> +	/*
> +	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
> +	 * true too, verify it here.
> +	 */
> +	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
>  
>  	pgd = pgd_offset(mm, address);
>  	if (!pgd_present(*pgd))
> @@ -2056,13 +2058,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>  			progress++;
>  			continue;
>  		}
> -		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
> -		if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
> +		if (!vma->anon_vma || vma->vm_ops)
>  			goto skip;
>  		if (is_vma_temporary_stack(vma))
>  			goto skip;
> -
> -		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
> +		/*
> +		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
> +		 * must be true too, verify it here.
> +		 */
> +		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
> +			  vma->vm_flags & VM_NO_THP);
>  
>  		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
>  		hend = vma->vm_end & HPAGE_PMD_MASK;

Other than the attribution this looks fine.

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw
diff mbox

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index df29c8f..8847c8c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,7 +117,7 @@  static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 					 unsigned long end,
 					 long adjust_next)
 {
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		return;
 	__vma_adjust_trans_huge(vma, start, end, adjust_next);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4e43460..72674d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,8 @@  extern unsigned int kobjsize(const void *objp);
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
 /*
- * special vmas that are non-mergable, non-mlock()able
+ * Special vmas that are non-mergable, non-mlock()able.
+ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8f76561..56cac93 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1400,6 +1400,9 @@  out:
 	return ret;
 }
 
+#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
+		   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+
 int hugepage_madvise(struct vm_area_struct *vma,
 		     unsigned long *vm_flags, int advice)
 {
@@ -1408,11 +1411,7 @@  int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_HUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_NOHUGEPAGE;
 		*vm_flags |= VM_HUGEPAGE;
@@ -1428,11 +1427,7 @@  int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_NOHUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_HUGEPAGE;
 		*vm_flags |= VM_NOHUGEPAGE;
@@ -1566,10 +1561,14 @@  int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_file || vma->vm_ops)
+	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -1818,12 +1817,15 @@  static void collapse_huge_page(struct mm_struct *mm,
 	    (vma->vm_flags & VM_NOHUGEPAGE))
 		goto out;
 
-	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		goto out;
 	if (is_vma_temporary_stack(vma))
 		goto out;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -2056,13 +2058,16 @@  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			progress++;
 			continue;
 		}
-		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-		if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+		if (!vma->anon_vma || vma->vm_ops)
 			goto skip;
 		if (is_vma_temporary_stack(vma))
 			goto skip;
-
-		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+		/*
+		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
+		 * must be true too, verify it here.
+		 */
+		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
+			  vma->vm_flags & VM_NO_THP);
 
 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 		hend = vma->vm_end & HPAGE_PMD_MASK;