Patchwork [3/5] powerpc/mm: Fix bugs in huge page hashing

login
register
mail settings
Submitter Benjamin Herrenschmidt
Date July 23, 2010, 12:41 a.m.
Message ID <1279845704-14848-3-git-send-email-benh@kernel.crashing.org>
Download mbox | patch
Permalink /patch/59712/
State Superseded
Headers show

Comments

Benjamin Herrenschmidt - July 23, 2010, 12:41 a.m.
There's a couple of nasty bugs lurking in our huge page hashing code.

First, we don't check the access permission atomically with setting
the _PAGE_BUSY bit, which means that the PTE value we end up using
for the hashing might be different than the one we have checked
the access permissions for.

We've seen cases where that leads us to try to use an invalidated
PTE for hashing, causing all sort of "interesting" issues.

Then, we also failed to set _PAGE_DIRTY on a write access.

This fixes both, while also simplifying the code a bit.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
---
 arch/powerpc/mm/hugetlbpage-hash64.c |   31 +++++++++++++------------------
 1 files changed, 13 insertions(+), 18 deletions(-)
Benjamin Herrenschmidt - July 23, 2010, 3:03 a.m.
On Fri, 2010-07-23 at 10:41 +1000, Benjamin Herrenschmidt wrote:
> There's a couple of nasty bugs lurking in our huge page hashing code.
> 
> First, we don't check the access permission atomically with setting
> the _PAGE_BUSY bit, which means that the PTE value we end up using
> for the hashing might be different than the one we have checked
> the access permissions for.
> 
> We've seen cases where that leads us to try to use an invalidated
> PTE for hashing, causing all sort of "interesting" issues.
> 
> Then, we also failed to set _PAGE_DIRTY on a write access.
> 
> This fixes both, while also simplifying the code a bit.

The changeset lacks a comment about a change to the return value
when hitting _PAGE_BUSY and ...

>  	do {
>  		old_pte = pte_val(*ptep);
> -		if (old_pte & _PAGE_BUSY)
> -			goto out;
> +		/* If PTE busy, retry the access */
> +		if (unlikely(old_pte & _PAGE_BUSY))
> +			return 0;
> +		/* If PTE permissions don't match, take page fault */
> +		if (unlikely(access & ~pte_val(*ptep)))
> +			return 1;

old_pte will do just fine instead of reloading it

Thanks Michael !

I'll respin that one.

Cheers,
Ben.

> +		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
> +		 * a write access */
>  		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
> +		if (access & _PAGE_RW)
> +			new_pte |= _PAGE_DIRTY;
>  	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
>  					 old_pte, new_pte));
>  
> @@ -127,8 +127,7 @@ repeat:
>  		 */
>  		if (unlikely(slot == -2)) {
>  			*ptep = __pte(old_pte);
> -			err = -1;
> -			goto out;
> +			return -1;
>  		}
>  
>  		new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
> @@ -138,9 +137,5 @@ repeat:
>  	 * No need to use ldarx/stdcx here
>  	 */
>  	*ptep = __pte(new_pte & ~_PAGE_BUSY);
> -
> -	err = 0;
> -
> - out:
> -	return err;
> +	return 0;
>  }

Patch

diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index c9acd79..fd0a15e 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -21,21 +21,13 @@  int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	unsigned long old_pte, new_pte;
 	unsigned long va, rflags, pa, sz;
 	long slot;
-	int err = 1;
 
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
 	va = hpt_va(ea, vsid, ssize);
 
-	/*
-	 * Check the user's access rights to the page.  If access should be
-	 * prevented then send the problem up to do_page_fault.
-	 */
-	if (unlikely(access & ~pte_val(*ptep)))
-		goto out;
-	/*
-	 * At this point, we have a pte (old_pte) which can be used to build
+	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
 	 *
 	 * 1. There is a valid (present) pte with no associated HPTE (this is
@@ -49,9 +41,17 @@  int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 
 	do {
 		old_pte = pte_val(*ptep);
-		if (old_pte & _PAGE_BUSY)
-			goto out;
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & _PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(access & ~pte_val(*ptep)))
+			return 1;
+		/* Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access */
 		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_RW)
+			new_pte |= _PAGE_DIRTY;
 	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
 					 old_pte, new_pte));
 
@@ -127,8 +127,7 @@  repeat:
 		 */
 		if (unlikely(slot == -2)) {
 			*ptep = __pte(old_pte);
-			err = -1;
-			goto out;
+			return -1;
 		}
 
 		new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
@@ -138,9 +137,5 @@  repeat:
 	 * No need to use ldarx/stdcx here
 	 */
 	*ptep = __pte(new_pte & ~_PAGE_BUSY);
-
-	err = 0;
-
- out:
-	return err;
+	return 0;
 }