diff mbox series

[V5,2/5] mm: update ptep_modify_prot_commit to take old pte value as arg

Message ID 20190116085035.29729-3-aneesh.kumar@linux.ibm.com (mailing list archive)
State Awaiting Upstream
Headers show
Series NestMMU pte upgrade workaround for mprotect | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied
snowpatch_ozlabs/checkpatch warning total: 0 errors, 6 warnings, 0 checks, 89 lines checked

Commit Message

Aneesh Kumar K V Jan. 16, 2019, 8:50 a.m. UTC
Architectures like ppc64 require to do a conditional tlb flush based on the old
and new value of pte. Enable that by passing old pte value as the arg.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 3 ++-
 arch/s390/mm/pgtable.c          | 2 +-
 arch/x86/include/asm/paravirt.h | 2 +-
 fs/proc/task_mmu.c              | 8 +++++---
 include/asm-generic/pgtable.h   | 2 +-
 mm/memory.c                     | 8 ++++----
 mm/mprotect.c                   | 6 +++---
 7 files changed, 17 insertions(+), 14 deletions(-)

Comments

Michael Ellerman Jan. 30, 2019, 10:46 a.m. UTC | #1
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> Architectures like ppc64 require to do a conditional tlb flush based on the old
> and new value of pte. Enable that by passing old pte value as the arg.

It's not actually the architecture, it's to work around a specific bug
on Power9.

> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index c89ce07923c8..028c724dcb1a 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -110,8 +110,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  					continue;
>  			}
>  
> -			ptent = ptep_modify_prot_start(vma, addr, pte);
> -			ptent = pte_modify(ptent, newprot);
> +			oldpte = ptep_modify_prot_start(vma, addr, pte);
> +			ptent = pte_modify(oldpte, newprot);
>  			if (preserve_write)
>  				ptent = pte_mk_savedwrite(ptent);

Is it OK to reuse oldpte here?

It was set at the top of the loop with:

		oldpte = *pte;

Is it guaranteed that ptep_modify_prot_start() returns the old value
unmodified, or could an implementation conceivably filter some bits out?

If so then it could be confusing for oldpte to have its value change
half way through the loop.


cheers
Aneesh Kumar K V Jan. 31, 2019, 5:03 a.m. UTC | #2
Michael Ellerman <mpe@ellerman.id.au> writes:

> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>
>> Architectures like ppc64 require to do a conditional tlb flush based on the old
>> and new value of pte. Enable that by passing old pte value as the arg.
>
> It's not actually the architecture, it's to work around a specific bug
> on Power9.
>
>> diff --git a/mm/mprotect.c b/mm/mprotect.c
>> index c89ce07923c8..028c724dcb1a 100644
>> --- a/mm/mprotect.c
>> +++ b/mm/mprotect.c
>> @@ -110,8 +110,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>>  					continue;
>>  			}
>>  
>> -			ptent = ptep_modify_prot_start(vma, addr, pte);
>> -			ptent = pte_modify(ptent, newprot);
>> +			oldpte = ptep_modify_prot_start(vma, addr, pte);
>> +			ptent = pte_modify(oldpte, newprot);
>>  			if (preserve_write)
>>  				ptent = pte_mk_savedwrite(ptent);
>
> Is it OK to reuse oldpte here?
>
> It was set at the top of the loop with:
>
> 		oldpte = *pte;
>
> Is it guaranteed that ptep_modify_prot_start() returns the old value
> unmodified, or could an implementation conceivably filter some bits out?
>
> If so then it could be confusing for oldpte to have its value change
> half way through the loop.
>

ptep_modify_prot_start and ptep_modify_prot_commit is the sequence that
we can safely use to do read/modify/update of a pte entry. Now w.r.t old
pte, we can't update the pte bits from software because we are holding
the page table lock(ptl). Now we could definitely end up having updated
reference and change bit. But we make sure we don't lose those by using
prot_start and prot_commit sequence.

-aneesh
diff mbox series

Patch

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5d730199e37b..76dc344edb8c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1070,7 +1070,8 @@  static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, pte_t *, pte_t);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+			     pte_t *, pte_t, pte_t);
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 29c0a21cd34a..b283b92722cc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -322,7 +322,7 @@  pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
 EXPORT_SYMBOL(ptep_modify_prot_start);
 
 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
-			     pte_t *ptep, pte_t pte)
+			     pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 	pgste_t pgste;
 	struct mm_struct *mm = vma->vm_mm;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c5a7f18cce7e..c25c38a05c1c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -433,7 +433,7 @@  static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned
 }
 
 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
-					   pte_t *ptep, pte_t pte)
+					   pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 
 	if (sizeof(pteval_t) > sizeof(long))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0adcaf338d64..f367121c747e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -942,10 +942,12 @@  static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	pte_t ptent = *pte;
 
 	if (pte_present(ptent)) {
-		ptent = ptep_modify_prot_start(vma, addr, pte);
-		ptent = pte_wrprotect(ptent);
+		pte_t old_pte;
+
+		old_pte = ptep_modify_prot_start(vma, addr, pte);
+		ptent = pte_wrprotect(old_pte);
 		ptent = pte_clear_soft_dirty(ptent);
-		ptep_modify_prot_commit(vma, addr, pte, ptent);
+		ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
 	} else if (is_swap_pte(ptent)) {
 		ptent = pte_swp_clear_soft_dirty(ptent);
 		set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8b0e933efe26..fa782fba51ee 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -657,7 +657,7 @@  static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
  */
 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
 					   unsigned long addr,
-					   pte_t *ptep, pte_t pte)
+					   pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 	__ptep_modify_prot_commit(vma, addr, ptep, pte);
 }
diff --git a/mm/memory.c b/mm/memory.c
index fb742f3237ad..b5b77fb5ba2c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3590,7 +3590,7 @@  static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	int last_cpupid;
 	int target_nid;
 	bool migrated = false;
-	pte_t pte;
+	pte_t pte, old_pte;
 	bool was_writable = pte_savedwrite(vmf->orig_pte);
 	int flags = 0;
 
@@ -3610,12 +3610,12 @@  static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * Make it present again, Depending on how arch implementes non
 	 * accessible ptes, some can allow access by kernel mode.
 	 */
-	pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
-	pte = pte_modify(pte, vma->vm_page_prot);
+	old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+	pte = pte_modify(old_pte, vma->vm_page_prot);
 	pte = pte_mkyoung(pte);
 	if (was_writable)
 		pte = pte_mkwrite(pte);
-	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, pte);
+	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
 	update_mmu_cache(vma, vmf->address, vmf->pte);
 
 	page = vm_normal_page(vma, vmf->address, pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c89ce07923c8..028c724dcb1a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -110,8 +110,8 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					continue;
 			}
 
-			ptent = ptep_modify_prot_start(vma, addr, pte);
-			ptent = pte_modify(ptent, newprot);
+			oldpte = ptep_modify_prot_start(vma, addr, pte);
+			ptent = pte_modify(oldpte, newprot);
 			if (preserve_write)
 				ptent = pte_mk_savedwrite(ptent);
 
@@ -121,7 +121,7 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					 !(vma->vm_flags & VM_SOFTDIRTY))) {
 				ptent = pte_mkwrite(ptent);
 			}
-			ptep_modify_prot_commit(vma, addr, pte, ptent);
+			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
 			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);