diff mbox

[1/2] mm/autonuma: Let architecture override how the write bit should be stashed in a protnone pte.

Message ID 1486609259-6796-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Aneesh Kumar K.V Feb. 9, 2017, 3 a.m. UTC
Autonuma preserves the write permission across numa fault to avoid taking
a writefault after a numa fault (Commit: b191f9b106ea " mm: numa: preserve PTE
write permissions across a NUMA hinting fault"). Architecture can implement
protnone in different ways and some may choose to implement that by clearing Read/
Write/Exec bit of pte. Setting the write bit on such pte can result in wrong
behaviour. Fix this up by allowing arch to override how to save the write bit
on a protnone pte.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 include/asm-generic/pgtable.h | 16 ++++++++++++++++
 mm/huge_memory.c              |  4 ++--
 mm/memory.c                   |  2 +-
 mm/mprotect.c                 |  4 ++--
 4 files changed, 21 insertions(+), 5 deletions(-)

Comments

Aneesh Kumar K.V Feb. 9, 2017, 3:16 a.m. UTC | #1
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> writes:

> Autonuma preserves the write permission across numa fault to avoid taking
> a writefault after a numa fault (Commit: b191f9b106ea " mm: numa: preserve PTE
> write permissions across a NUMA hinting fault"). Architecture can implement
> protnone in different ways and some may choose to implement that by clearing Read/
> Write/Exec bit of pte. Setting the write bit on such pte can result in wrong
> behaviour. Fix this up by allowing arch to override how to save the write bit
> on a protnone pte.
>

The problem we are trying to fix here is w.r.t autnuma related thp
migration. migrate_misplaced_transhuge_page() cannot deal with
concurrent modification of the page. It does a page copy without
following the migration pte sequence. IIUC, this was done to keep the
migration simpler and at the time of implemenation we didn't had THP
page cache which would have required a more elaborate migration scheme.
[1]. That means thp autonuma migration expect the protnone
with saved write to be done such that both kernel and user cannot update
the page content. This patch series enables archs like ppc64 to do that.
We are good with the hash translation mode with the current code,
because we never create a hardware page table entry for a protnone pte. 


-aneesh
Michael Neuling Feb. 14, 2017, 3:58 a.m. UTC | #2
On Thu, 2017-02-09 at 08:30 +0530, Aneesh Kumar K.V wrote:
> Autonuma preserves the write permission across numa fault to avoid taking
> a writefault after a numa fault (Commit: b191f9b106ea " mm: numa: preserve PTE
> write permissions across a NUMA hinting fault"). Architecture can implement
> protnone in different ways and some may choose to implement that by clearing
> Read/
> Write/Exec bit of pte. Setting the write bit on such pte can result in wrong
> behaviour. Fix this up by allowing arch to override how to save the write bit
> on a protnone pte.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

FWIW this is pretty simple and helps with us in powerpc...

Acked-By: Michael Neuling <mikey@neuling.org>

> ---
>  include/asm-generic/pgtable.h | 16 ++++++++++++++++
>  mm/huge_memory.c              |  4 ++--
>  mm/memory.c                   |  2 +-
>  mm/mprotect.c                 |  4 ++--
>  4 files changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
> index 18af2bcefe6a..b6f3a8a4b738 100644
> --- a/include/asm-generic/pgtable.h
> +++ b/include/asm-generic/pgtable.h
> @@ -192,6 +192,22 @@ static inline void ptep_set_wrprotect(struct mm_struct
> *mm, unsigned long addres
>  }
>  #endif
>  
> +#ifndef pte_savedwrite
> +#define pte_savedwrite pte_write
> +#endif
> +
> +#ifndef pte_mk_savedwrite
> +#define pte_mk_savedwrite pte_mkwrite
> +#endif
> +
> +#ifndef pmd_savedwrite
> +#define pmd_savedwrite pmd_write
> +#endif
> +
> +#ifndef pmd_mk_savedwrite
> +#define pmd_mk_savedwrite pmd_mkwrite
> +#endif
> +
>  #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9a6bd6c8d55a..2f0f855ec911 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1300,7 +1300,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t
> pmd)
>  	goto out;
>  clear_pmdnuma:
>  	BUG_ON(!PageLocked(page));
> -	was_writable = pmd_write(pmd);
> +	was_writable = pmd_savedwrite(pmd);
>  	pmd = pmd_modify(pmd, vma->vm_page_prot);
>  	pmd = pmd_mkyoung(pmd);
>  	if (was_writable)
> @@ -1555,7 +1555,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t
> *pmd,
>  			entry = pmdp_huge_get_and_clear_notify(mm, addr,
> pmd);
>  			entry = pmd_modify(entry, newprot);
>  			if (preserve_write)
> -				entry = pmd_mkwrite(entry);
> +				entry = pmd_mk_savedwrite(entry);
>  			ret = HPAGE_PMD_NR;
>  			set_pmd_at(mm, addr, pmd, entry);
>  			BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
> diff --git a/mm/memory.c b/mm/memory.c
> index e78bf72f30dd..88c24f89d6d3 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3388,7 +3388,7 @@ static int do_numa_page(struct vm_fault *vmf)
>  	int target_nid;
>  	bool migrated = false;
>  	pte_t pte;
> -	bool was_writable = pte_write(vmf->orig_pte);
> +	bool was_writable = pte_savedwrite(vmf->orig_pte);
>  	int flags = 0;
>  
>  	/*
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index f9c07f54dd62..15f5c174a7c1 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -113,13 +113,13 @@ static unsigned long change_pte_range(struct
> vm_area_struct *vma, pmd_t *pmd,
>  			ptent = ptep_modify_prot_start(mm, addr, pte);
>  			ptent = pte_modify(ptent, newprot);
>  			if (preserve_write)
> -				ptent = pte_mkwrite(ptent);
> +				ptent = pte_mk_savedwrite(ptent);
>  
>  			/* Avoid taking write faults for known dirty pages */
>  			if (dirty_accountable && pte_dirty(ptent) &&
>  					(pte_soft_dirty(ptent) ||
>  					 !(vma->vm_flags & VM_SOFTDIRTY))) {
> -				ptent = pte_mkwrite(ptent);
> +				ptent = pte_mk_savedwrite(ptent);
>  			}
>  			ptep_modify_prot_commit(mm, addr, pte, ptent);
>  			pages++;
diff mbox

Patch

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 18af2bcefe6a..b6f3a8a4b738 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -192,6 +192,22 @@  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
+#ifndef pte_savedwrite
+#define pte_savedwrite pte_write
+#endif
+
+#ifndef pte_mk_savedwrite
+#define pte_mk_savedwrite pte_mkwrite
+#endif
+
+#ifndef pmd_savedwrite
+#define pmd_savedwrite pmd_write
+#endif
+
+#ifndef pmd_mk_savedwrite
+#define pmd_mk_savedwrite pmd_mkwrite
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9a6bd6c8d55a..2f0f855ec911 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1300,7 +1300,7 @@  int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	goto out;
 clear_pmdnuma:
 	BUG_ON(!PageLocked(page));
-	was_writable = pmd_write(pmd);
+	was_writable = pmd_savedwrite(pmd);
 	pmd = pmd_modify(pmd, vma->vm_page_prot);
 	pmd = pmd_mkyoung(pmd);
 	if (was_writable)
@@ -1555,7 +1555,7 @@  int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
 			entry = pmd_modify(entry, newprot);
 			if (preserve_write)
-				entry = pmd_mkwrite(entry);
+				entry = pmd_mk_savedwrite(entry);
 			ret = HPAGE_PMD_NR;
 			set_pmd_at(mm, addr, pmd, entry);
 			BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
diff --git a/mm/memory.c b/mm/memory.c
index e78bf72f30dd..88c24f89d6d3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3388,7 +3388,7 @@  static int do_numa_page(struct vm_fault *vmf)
 	int target_nid;
 	bool migrated = false;
 	pte_t pte;
-	bool was_writable = pte_write(vmf->orig_pte);
+	bool was_writable = pte_savedwrite(vmf->orig_pte);
 	int flags = 0;
 
 	/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index f9c07f54dd62..15f5c174a7c1 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -113,13 +113,13 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			ptent = ptep_modify_prot_start(mm, addr, pte);
 			ptent = pte_modify(ptent, newprot);
 			if (preserve_write)
-				ptent = pte_mkwrite(ptent);
+				ptent = pte_mk_savedwrite(ptent);
 
 			/* Avoid taking write faults for known dirty pages */
 			if (dirty_accountable && pte_dirty(ptent) &&
 					(pte_soft_dirty(ptent) ||
 					 !(vma->vm_flags & VM_SOFTDIRTY))) {
-				ptent = pte_mkwrite(ptent);
+				ptent = pte_mk_savedwrite(ptent);
 			}
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
 			pages++;