diff mbox

[03/10] mm: Convert p[te|md]_numa users to p[te|md]_protnone_numa

Message ID 1416578268-19597-4-git-send-email-mgorman@suse.de (mailing list archive)
State Not Applicable
Headers show

Commit Message

Mel Gorman Nov. 21, 2014, 1:57 p.m. UTC
Convert existing users of pte_numa and friends to the new helper. Note
that the kernel is broken after this patch is applied until the other
page table modifiers are also altered. This patch layout is to make
review easier.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |  2 +-
 arch/powerpc/mm/fault.c             |  5 -----
 arch/powerpc/mm/pgtable.c           | 11 ++++++++---
 arch/powerpc/mm/pgtable_64.c        |  3 ++-
 arch/x86/mm/gup.c                   |  4 ++--
 include/uapi/linux/mempolicy.h      |  2 +-
 mm/gup.c                            | 10 +++++-----
 mm/huge_memory.c                    | 16 +++++++--------
 mm/memory.c                         |  4 ++--
 mm/mprotect.c                       | 39 ++++++++++---------------------------
 mm/pgtable-generic.c                |  2 +-
 11 files changed, 40 insertions(+), 58 deletions(-)

Comments

Benjamin Herrenschmidt Dec. 1, 2014, 10:42 p.m. UTC | #1
On Fri, 2014-11-21 at 13:57 +0000, Mel Gorman wrote:
> Convert existing users of pte_numa and friends to the new helper. Note
> that the kernel is broken after this patch is applied until the other
> page table modifiers are also altered. This patch layout is to make
> review easier.

Aneesh, the removal of the DSISR_PROTFAULT checks, I wonder if we might
break something here ... (I know, I asked for them to be removed :-)

IE, we basically bounce all protection checks to the "normal" VMA
protection checking, so far so good...

But what about the subpage protection stuff ? Will that still work ?

Cheers,
Ben.

> Signed-off-by: Mel Gorman <mgorman@suse.de>
> Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
> Acked-by: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c |  2 +-
>  arch/powerpc/mm/fault.c             |  5 -----
>  arch/powerpc/mm/pgtable.c           | 11 ++++++++---
>  arch/powerpc/mm/pgtable_64.c        |  3 ++-
>  arch/x86/mm/gup.c                   |  4 ++--
>  include/uapi/linux/mempolicy.h      |  2 +-
>  mm/gup.c                            | 10 +++++-----
>  mm/huge_memory.c                    | 16 +++++++--------
>  mm/memory.c                         |  4 ++--
>  mm/mprotect.c                       | 39 ++++++++++---------------------------
>  mm/pgtable-generic.c                |  2 +-
>  11 files changed, 40 insertions(+), 58 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 084ad54..3e6ad3f 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -235,7 +235,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
>  		pte_size = psize;
>  		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
>  						  &pte_size);
> -		if (pte_present(pte) && !pte_numa(pte)) {
> +		if (pte_present(pte) && !pte_protnone(pte)) {
>  			if (writing && !pte_write(pte))
>  				/* make the actual HPTE be read-only */
>  				ptel = hpte_make_readonly(ptel);
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index eb79907..b434153 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -398,8 +398,6 @@ good_area:
>  		 * processors use the same I/D cache coherency mechanism
>  		 * as embedded.
>  		 */
> -		if (error_code & DSISR_PROTFAULT)
> -			goto bad_area;
>  #endif /* CONFIG_PPC_STD_MMU */
>  
>  		/*
> @@ -423,9 +421,6 @@ good_area:
>  		flags |= FAULT_FLAG_WRITE;
>  	/* a read */
>  	} else {
> -		/* protection fault */
> -		if (error_code & 0x08000000)
> -			goto bad_area;
>  		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
>  			goto bad_area;
>  	}
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index c90e602..83dfcb5 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -172,9 +172,14 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
>  void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>  		pte_t pte)
>  {
> -#ifdef CONFIG_DEBUG_VM
> -	WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
> -#endif
> +	/*
> +	 * When handling numa faults, we already have the pte marked
> +	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
> +	 * Hence we can use set_pte_at for them.
> +	 */
> +	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
> +		(_PAGE_PRESENT | _PAGE_USER));
> +
>  	/* Note: mm->context.id might not yet have been assigned as
>  	 * this context might not have been activated yet when this
>  	 * is called.
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 87ff0c1..435ebf7 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -718,7 +718,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>  		pmd_t *pmdp, pmd_t pmd)
>  {
>  #ifdef CONFIG_DEBUG_VM
> -	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
> +	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
> +		(_PAGE_PRESENT | _PAGE_USER));
>  	assert_spin_locked(&mm->page_table_lock);
>  	WARN_ON(!pmd_trans_huge(pmd));
>  #endif
> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
> index 207d9aef..f32e12c 100644
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -84,7 +84,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
>  		struct page *page;
>  
>  		/* Similar to the PMD case, NUMA hinting must take slow path */
> -		if (pte_numa(pte)) {
> +		if (pte_protnone(pte)) {
>  			pte_unmap(ptep);
>  			return 0;
>  		}
> @@ -178,7 +178,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
>  			 * slowpath for accounting purposes and so that they
>  			 * can be serialised against THP migration.
>  			 */
> -			if (pmd_numa(pmd))
> +			if (pmd_protnone(pmd))
>  				return 0;
>  			if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
>  				return 0;
> diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
> index 0d11c3d..9cd8b21 100644
> --- a/include/uapi/linux/mempolicy.h
> +++ b/include/uapi/linux/mempolicy.h
> @@ -67,7 +67,7 @@ enum mpol_rebind_step {
>  #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
>  #define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
>  #define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
> -#define MPOL_F_MORON	(1 << 4) /* Migrate On pte_numa Reference On Node */
> +#define MPOL_F_MORON	(1 << 4) /* Migrate On protnone Reference On Node */
>  
> 
>  #endif /* _UAPI_LINUX_MEMPOLICY_H */
> diff --git a/mm/gup.c b/mm/gup.c
> index 0ca1df9..e5dab89 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -64,7 +64,7 @@ retry:
>  		migration_entry_wait(mm, pmd, address);
>  		goto retry;
>  	}
> -	if ((flags & FOLL_NUMA) && pte_numa(pte))
> +	if ((flags & FOLL_NUMA) && pte_protnone(pte))
>  		goto no_page;
>  	if ((flags & FOLL_WRITE) && !pte_write(pte)) {
>  		pte_unmap_unlock(ptep, ptl);
> @@ -193,7 +193,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
>  		}
>  		return page;
>  	}
> -	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
> +	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
>  		return no_page_table(vma, flags);
>  	if (pmd_trans_huge(*pmd)) {
>  		if (flags & FOLL_SPLIT) {
> @@ -740,10 +740,10 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
>  
>  		/*
>  		 * Similar to the PMD case below, NUMA hinting must take slow
> -		 * path
> +		 * path using the pte_protnone check.
>  		 */
>  		if (!pte_present(pte) || pte_special(pte) ||
> -			pte_numa(pte) || (write && !pte_write(pte)))
> +			pte_protnone(pte) || (write && !pte_write(pte)))
>  			goto pte_unmap;
>  
>  		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> @@ -938,7 +938,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
>  			 * slowpath for accounting purposes and so that they
>  			 * can be serialised against THP migration.
>  			 */
> -			if (pmd_numa(pmd))
> +			if (pmd_protnone(pmd))
>  				return 0;
>  
>  			if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index a2cd021..f81fddf 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1222,7 +1222,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
>  		return ERR_PTR(-EFAULT);
>  
>  	/* Full NUMA hinting faults to serialise migration in fault paths */
> -	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
> +	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
>  		goto out;
>  
>  	page = pmd_page(*pmd);
> @@ -1353,7 +1353,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
>  
>  	/*
>  	 * Migrate the THP to the requested node, returns with page unlocked
> -	 * and pmd_numa cleared.
> +	 * and access rights restored.
>  	 */
>  	spin_unlock(ptl);
>  	migrated = migrate_misplaced_transhuge_page(mm, vma,
> @@ -1368,7 +1368,7 @@ clear_pmdnuma:
>  	BUG_ON(!PageLocked(page));
>  	pmd = pmd_mknonnuma(pmd);
>  	set_pmd_at(mm, haddr, pmdp, pmd);
> -	VM_BUG_ON(pmd_numa(*pmdp));
> +	VM_BUG_ON(pmd_protnone(*pmdp));
>  	update_mmu_cache_pmd(vma, addr, pmdp);
>  	unlock_page(page);
>  out_unlock:
> @@ -1514,7 +1514,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  		ret = 1;
>  		if (!prot_numa) {
>  			entry = pmdp_get_and_clear_notify(mm, addr, pmd);
> -			if (pmd_numa(entry))
> +			if (pmd_protnone(entry))
>  				entry = pmd_mknonnuma(entry);
>  			entry = pmd_modify(entry, newprot);
>  			ret = HPAGE_PMD_NR;
> @@ -1530,7 +1530,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  			 * local vs remote hits on the zero page.
>  			 */
>  			if (!is_huge_zero_page(page) &&
> -			    !pmd_numa(*pmd)) {
> +			    !pmd_protnone(*pmd)) {
>  				pmdp_set_numa(mm, addr, pmd);
>  				ret = HPAGE_PMD_NR;
>  			}
> @@ -1798,9 +1798,9 @@ static int __split_huge_page_map(struct page *page,
>  			pte_t *pte, entry;
>  			BUG_ON(PageCompound(page+i));
>  			/*
> -			 * Note that pmd_numa is not transferred deliberately
> -			 * to avoid any possibility that pte_numa leaks to
> -			 * a PROT_NONE VMA by accident.
> +			 * Note that NUMA hinting access restrictions are not
> +			 * transferred to avoid any possibility of altering
> +			 * permissions across VMAs.
>  			 */
>  			entry = mk_pte(page + i, vma->vm_page_prot);
>  			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> diff --git a/mm/memory.c b/mm/memory.c
> index ae923f5..eaa46f1 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3214,7 +3214,7 @@ static int handle_pte_fault(struct mm_struct *mm,
>  					pte, pmd, flags, entry);
>  	}
>  
> -	if (pte_numa(entry))
> +	if (pte_protnone(entry))
>  		return do_numa_page(mm, vma, address, entry, pte, pmd);
>  
>  	ptl = pte_lockptr(mm, pmd);
> @@ -3292,7 +3292,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  			if (pmd_trans_splitting(orig_pmd))
>  				return 0;
>  
> -			if (pmd_numa(orig_pmd))
> +			if (pmd_protnone(orig_pmd))
>  				return do_huge_pmd_numa_page(mm, vma, address,
>  							     orig_pmd, pmd);
>  
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index ace9345..e93ddac 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -75,36 +75,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  		oldpte = *pte;
>  		if (pte_present(oldpte)) {
>  			pte_t ptent;
> -			bool updated = false;
> -
> -			if (!prot_numa) {
> -				ptent = ptep_modify_prot_start(mm, addr, pte);
> -				if (pte_numa(ptent))
> -					ptent = pte_mknonnuma(ptent);
> -				ptent = pte_modify(ptent, newprot);
> -				/*
> -				 * Avoid taking write faults for pages we
> -				 * know to be dirty.
> -				 */
> -				if (dirty_accountable && pte_dirty(ptent) &&
> -				    (pte_soft_dirty(ptent) ||
> -				     !(vma->vm_flags & VM_SOFTDIRTY)))
> -					ptent = pte_mkwrite(ptent);
> -				ptep_modify_prot_commit(mm, addr, pte, ptent);
> -				updated = true;
> -			} else {
> -				struct page *page;
> -
> -				page = vm_normal_page(vma, addr, oldpte);
> -				if (page && !PageKsm(page)) {
> -					if (!pte_numa(oldpte)) {
> -						ptep_set_numa(mm, addr, pte);
> -						updated = true;
> -					}
> -				}
> +			ptent = ptep_modify_prot_start(mm, addr, pte);
> +			ptent = pte_modify(ptent, newprot);
> +
> +			/* Avoid taking write faults for known dirty pages */
> +			if (dirty_accountable && pte_dirty(ptent) &&
> +					(pte_soft_dirty(ptent) ||
> +					 !(vma->vm_flags & VM_SOFTDIRTY))) {
> +				ptent = pte_mkwrite(ptent);
>  			}
> -			if (updated)
> -				pages++;
> +			ptep_modify_prot_commit(mm, addr, pte, ptent);
> +			pages++;
>  		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
>  			swp_entry_t entry = pte_to_swp_entry(oldpte);
>  
> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
> index dfb79e0..4b8ad76 100644
> --- a/mm/pgtable-generic.c
> +++ b/mm/pgtable-generic.c
> @@ -193,7 +193,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
>  		     pmd_t *pmdp)
>  {
>  	pmd_t entry = *pmdp;
> -	if (pmd_numa(entry))
> +	if (pmd_protnone(entry))
>  		entry = pmd_mknonnuma(entry);
>  	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
>  	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
Benjamin Herrenschmidt Dec. 1, 2014, 10:44 p.m. UTC | #2
On Fri, 2014-11-21 at 13:57 +0000, Mel Gorman wrote:
> void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>                 pte_t pte)
>  {
> -#ifdef CONFIG_DEBUG_VM
> -       WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
> -#endif
> +       /*
> +        * When handling numa faults, we already have the pte marked
> +        * _PAGE_PRESENT, but we can be sure that it is not in hpte.
> +        * Hence we can use set_pte_at for them.
> +        */
> +       VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
> +               (_PAGE_PRESENT | _PAGE_USER));
> +

His is that going to fare with set_pte_at() called for kernel pages ?

Cheers,
Ben.
Aneesh Kumar K.V Dec. 2, 2014, 7:27 a.m. UTC | #3
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Fri, 2014-11-21 at 13:57 +0000, Mel Gorman wrote:
>> Convert existing users of pte_numa and friends to the new helper. Note
>> that the kernel is broken after this patch is applied until the other
>> page table modifiers are also altered. This patch layout is to make
>> review easier.
>
> Aneesh, the removal of the DSISR_PROTFAULT checks, I wonder if we might
> break something here ... (I know, I asked for them to be removed :-)
>

That is the reason I converted that to a WARN_ON in later patch. 

> IE, we basically bounce all protection checks to the "normal" VMA
> protection checking, so far so good...
>
> But what about the subpage protection stuff ? Will that still work ?
>

I did look at that before. So if we had subpage access limitted, when we
take a fault for that subpage, we bail out early in hash_page_mm. (with
rc = 2). low_hash_fault handle that case directly. We will not end up
calling do_page_fault.

Now, hash_preload can possibly insert an hpte in hash page table even if
the access is not allowed by the pte permissions. But i guess even that
is ok. because we will fault again, end-up calling hash_page_mm where we
handle that part correctly.

-aneesh
Aneesh Kumar K.V Dec. 2, 2014, 7:31 a.m. UTC | #4
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Fri, 2014-11-21 at 13:57 +0000, Mel Gorman wrote:
>> void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>>                 pte_t pte)
>>  {
>> -#ifdef CONFIG_DEBUG_VM
>> -       WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
>> -#endif
>> +       /*
>> +        * When handling numa faults, we already have the pte marked
>> +        * _PAGE_PRESENT, but we can be sure that it is not in hpte.
>> +        * Hence we can use set_pte_at for them.
>> +        */
>> +       VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
>> +               (_PAGE_PRESENT | _PAGE_USER));
>> +
>
> His is that going to fare with set_pte_at() called for kernel pages ?
>

Yes, we won't capture those errors now. But is there any other debug
check i could use to capture the wrong usage of set_pte_at ?

-aneesh
Benjamin Herrenschmidt Dec. 2, 2014, 8:11 p.m. UTC | #5
On Tue, 2014-12-02 at 12:57 +0530, Aneesh Kumar K.V wrote:
> Now, hash_preload can possibly insert an hpte in hash page table even if
> the access is not allowed by the pte permissions. But i guess even that
> is ok. because we will fault again, end-up calling hash_page_mm where we
> handle that part correctly.

I think we need a test case...

Cheers,
Ben.
Benjamin Herrenschmidt Dec. 2, 2014, 8:13 p.m. UTC | #6
On Tue, 2014-12-02 at 13:01 +0530, Aneesh Kumar K.V wrote:
> Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> 
> > On Fri, 2014-11-21 at 13:57 +0000, Mel Gorman wrote:
> >> void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> >>                 pte_t pte)
> >>  {
> >> -#ifdef CONFIG_DEBUG_VM
> >> -       WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
> >> -#endif
> >> +       /*
> >> +        * When handling numa faults, we already have the pte marked
> >> +        * _PAGE_PRESENT, but we can be sure that it is not in hpte.
> >> +        * Hence we can use set_pte_at for them.
> >> +        */
> >> +       VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
> >> +               (_PAGE_PRESENT | _PAGE_USER));
> >> +
> >
> > His is that going to fare with set_pte_at() called for kernel pages ?
> >
> 
> Yes, we won't capture those errors now. But is there any other debug
> check i could use to capture the wrong usage of set_pte_at ?

Actually the above is fine, for some reason I mis-read the test as
blowing on kernel pages, it doesn't.

We probably do need to make sure however that protnone isn't used for
kernel pages.

Cheers,
Ben.

> -aneesh
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
diff mbox

Patch

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54..3e6ad3f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -235,7 +235,7 @@  long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		pte_size = psize;
 		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
 						  &pte_size);
-		if (pte_present(pte) && !pte_numa(pte)) {
+		if (pte_present(pte) && !pte_protnone(pte)) {
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index eb79907..b434153 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -398,8 +398,6 @@  good_area:
 		 * processors use the same I/D cache coherency mechanism
 		 * as embedded.
 		 */
-		if (error_code & DSISR_PROTFAULT)
-			goto bad_area;
 #endif /* CONFIG_PPC_STD_MMU */
 
 		/*
@@ -423,9 +421,6 @@  good_area:
 		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
-		/* protection fault */
-		if (error_code & 0x08000000)
-			goto bad_area;
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
 	}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c90e602..83dfcb5 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -172,9 +172,14 @@  static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
 void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 		pte_t pte)
 {
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
-#endif
+	/*
+	 * When handling numa faults, we already have the pte marked
+	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
+	 * Hence we can use set_pte_at for them.
+	 */
+	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
+		(_PAGE_PRESENT | _PAGE_USER));
+
 	/* Note: mm->context.id might not yet have been assigned as
 	 * this context might not have been activated yet when this
 	 * is called.
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 87ff0c1..435ebf7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -718,7 +718,8 @@  void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_DEBUG_VM
-	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
+	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
+		(_PAGE_PRESENT | _PAGE_USER));
 	assert_spin_locked(&mm->page_table_lock);
 	WARN_ON(!pmd_trans_huge(pmd));
 #endif
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 207d9aef..f32e12c 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -84,7 +84,7 @@  static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		struct page *page;
 
 		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_numa(pte)) {
+		if (pte_protnone(pte)) {
 			pte_unmap(ptep);
 			return 0;
 		}
@@ -178,7 +178,7 @@  static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			 * slowpath for accounting purposes and so that they
 			 * can be serialised against THP migration.
 			 */
-			if (pmd_numa(pmd))
+			if (pmd_protnone(pmd))
 				return 0;
 			if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
 				return 0;
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 0d11c3d..9cd8b21 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -67,7 +67,7 @@  enum mpol_rebind_step {
 #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
 #define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
 #define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
-#define MPOL_F_MORON	(1 << 4) /* Migrate On pte_numa Reference On Node */
+#define MPOL_F_MORON	(1 << 4) /* Migrate On protnone Reference On Node */
 
 
 #endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/gup.c b/mm/gup.c
index 0ca1df9..e5dab89 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -64,7 +64,7 @@  retry:
 		migration_entry_wait(mm, pmd, address);
 		goto retry;
 	}
-	if ((flags & FOLL_NUMA) && pte_numa(pte))
+	if ((flags & FOLL_NUMA) && pte_protnone(pte))
 		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte)) {
 		pte_unmap_unlock(ptep, ptl);
@@ -193,7 +193,7 @@  struct page *follow_page_mask(struct vm_area_struct *vma,
 		}
 		return page;
 	}
-	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
 		return no_page_table(vma, flags);
 	if (pmd_trans_huge(*pmd)) {
 		if (flags & FOLL_SPLIT) {
@@ -740,10 +740,10 @@  static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
 		/*
 		 * Similar to the PMD case below, NUMA hinting must take slow
-		 * path
+		 * path using the pte_protnone check.
 		 */
 		if (!pte_present(pte) || pte_special(pte) ||
-			pte_numa(pte) || (write && !pte_write(pte)))
+			pte_protnone(pte) || (write && !pte_write(pte)))
 			goto pte_unmap;
 
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -938,7 +938,7 @@  static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			 * slowpath for accounting purposes and so that they
 			 * can be serialised against THP migration.
 			 */
-			if (pmd_numa(pmd))
+			if (pmd_protnone(pmd))
 				return 0;
 
 			if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a2cd021..f81fddf 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1222,7 +1222,7 @@  struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		return ERR_PTR(-EFAULT);
 
 	/* Full NUMA hinting faults to serialise migration in fault paths */
-	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
 		goto out;
 
 	page = pmd_page(*pmd);
@@ -1353,7 +1353,7 @@  int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	/*
 	 * Migrate the THP to the requested node, returns with page unlocked
-	 * and pmd_numa cleared.
+	 * and access rights restored.
 	 */
 	spin_unlock(ptl);
 	migrated = migrate_misplaced_transhuge_page(mm, vma,
@@ -1368,7 +1368,7 @@  clear_pmdnuma:
 	BUG_ON(!PageLocked(page));
 	pmd = pmd_mknonnuma(pmd);
 	set_pmd_at(mm, haddr, pmdp, pmd);
-	VM_BUG_ON(pmd_numa(*pmdp));
+	VM_BUG_ON(pmd_protnone(*pmdp));
 	update_mmu_cache_pmd(vma, addr, pmdp);
 	unlock_page(page);
 out_unlock:
@@ -1514,7 +1514,7 @@  int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		ret = 1;
 		if (!prot_numa) {
 			entry = pmdp_get_and_clear_notify(mm, addr, pmd);
-			if (pmd_numa(entry))
+			if (pmd_protnone(entry))
 				entry = pmd_mknonnuma(entry);
 			entry = pmd_modify(entry, newprot);
 			ret = HPAGE_PMD_NR;
@@ -1530,7 +1530,7 @@  int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			 * local vs remote hits on the zero page.
 			 */
 			if (!is_huge_zero_page(page) &&
-			    !pmd_numa(*pmd)) {
+			    !pmd_protnone(*pmd)) {
 				pmdp_set_numa(mm, addr, pmd);
 				ret = HPAGE_PMD_NR;
 			}
@@ -1798,9 +1798,9 @@  static int __split_huge_page_map(struct page *page,
 			pte_t *pte, entry;
 			BUG_ON(PageCompound(page+i));
 			/*
-			 * Note that pmd_numa is not transferred deliberately
-			 * to avoid any possibility that pte_numa leaks to
-			 * a PROT_NONE VMA by accident.
+			 * Note that NUMA hinting access restrictions are not
+			 * transferred to avoid any possibility of altering
+			 * permissions across VMAs.
 			 */
 			entry = mk_pte(page + i, vma->vm_page_prot);
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
diff --git a/mm/memory.c b/mm/memory.c
index ae923f5..eaa46f1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3214,7 +3214,7 @@  static int handle_pte_fault(struct mm_struct *mm,
 					pte, pmd, flags, entry);
 	}
 
-	if (pte_numa(entry))
+	if (pte_protnone(entry))
 		return do_numa_page(mm, vma, address, entry, pte, pmd);
 
 	ptl = pte_lockptr(mm, pmd);
@@ -3292,7 +3292,7 @@  static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			if (pmd_trans_splitting(orig_pmd))
 				return 0;
 
-			if (pmd_numa(orig_pmd))
+			if (pmd_protnone(orig_pmd))
 				return do_huge_pmd_numa_page(mm, vma, address,
 							     orig_pmd, pmd);
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ace9345..e93ddac 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -75,36 +75,17 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		oldpte = *pte;
 		if (pte_present(oldpte)) {
 			pte_t ptent;
-			bool updated = false;
-
-			if (!prot_numa) {
-				ptent = ptep_modify_prot_start(mm, addr, pte);
-				if (pte_numa(ptent))
-					ptent = pte_mknonnuma(ptent);
-				ptent = pte_modify(ptent, newprot);
-				/*
-				 * Avoid taking write faults for pages we
-				 * know to be dirty.
-				 */
-				if (dirty_accountable && pte_dirty(ptent) &&
-				    (pte_soft_dirty(ptent) ||
-				     !(vma->vm_flags & VM_SOFTDIRTY)))
-					ptent = pte_mkwrite(ptent);
-				ptep_modify_prot_commit(mm, addr, pte, ptent);
-				updated = true;
-			} else {
-				struct page *page;
-
-				page = vm_normal_page(vma, addr, oldpte);
-				if (page && !PageKsm(page)) {
-					if (!pte_numa(oldpte)) {
-						ptep_set_numa(mm, addr, pte);
-						updated = true;
-					}
-				}
+			ptent = ptep_modify_prot_start(mm, addr, pte);
+			ptent = pte_modify(ptent, newprot);
+
+			/* Avoid taking write faults for known dirty pages */
+			if (dirty_accountable && pte_dirty(ptent) &&
+					(pte_soft_dirty(ptent) ||
+					 !(vma->vm_flags & VM_SOFTDIRTY))) {
+				ptent = pte_mkwrite(ptent);
 			}
-			if (updated)
-				pages++;
+			ptep_modify_prot_commit(mm, addr, pte, ptent);
+			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index dfb79e0..4b8ad76 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -193,7 +193,7 @@  void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
 	pmd_t entry = *pmdp;
-	if (pmd_numa(entry))
+	if (pmd_protnone(entry))
 		entry = pmd_mknonnuma(entry);
 	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
 	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);