diff mbox series

[v9,14/24] mm: Introduce __maybe_mkwrite()

Message ID 1520963994-28477-15-git-send-email-ldufour@linux.vnet.ibm.com (mailing list archive)
State Not Applicable
Headers show
Series Speculative page faults | expand

Commit Message

Laurent Dufour March 13, 2018, 5:59 p.m. UTC
The current maybe_mkwrite() is getting passed the pointer to the vma
structure to fetch the vm_flags field.

When dealing with the speculative page fault handler, it will be better to
rely on the cached vm_flags value stored in the vm_fault structure.

This patch introduce a __maybe_mkwrite() service which can be called by
passing the value of the vm_flags field.

There is no change functional changes expected for the other callers of
maybe_mkwrite().

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
 include/linux/mm.h | 9 +++++++--
 mm/memory.c        | 6 +++---
 2 files changed, 10 insertions(+), 5 deletions(-)

Comments

David Rientjes April 2, 2018, 11:12 p.m. UTC | #1
On Tue, 13 Mar 2018, Laurent Dufour wrote:

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index dfa81a638b7c..a84ddc218bbd 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
>   * pte_mkwrite.  But get_user_pages can cause write faults for mappings
>   * that do not have writing enabled, when used by access_process_vm.
>   */
> -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
> +static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
>  {
> -	if (likely(vma->vm_flags & VM_WRITE))
> +	if (likely(vma_flags & VM_WRITE))
>  		pte = pte_mkwrite(pte);
>  	return pte;
>  }
>  
> +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
> +{
> +	return __maybe_mkwrite(pte, vma->vm_flags);
> +}
> +
>  int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>  		struct page *page);
>  int finish_fault(struct vm_fault *vmf);
> diff --git a/mm/memory.c b/mm/memory.c
> index 0a0a483d9a65..af0338fbc34d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>  
>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  	entry = pte_mkyoung(vmf->orig_pte);
> -	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +	entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>  	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>  		update_mmu_cache(vma, vmf->address, vmf->pte);
>  	pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
>  			inc_mm_counter_fast(mm, MM_ANONPAGES);
>  		}
>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> -		entry = mk_pte(new_page, vma->vm_page_prot);
> -		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +		entry = mk_pte(new_page, vmf->vma_page_prot);
> +		entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>  		/*
>  		 * Clear the pte entry and flush it first, before updating the
>  		 * pte with the new entry. This will avoid a race condition

Don't you also need to do this in do_swap_page()?

diff --git a/mm/memory.c b/mm/memory.c
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3067,9 +3067,9 @@ int do_swap_page(struct vm_fault *vmf)
 
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
-	pte = mk_pte(page, vma->vm_page_prot);
+	pte = mk_pte(page, vmf->vma_page_prot);
 	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
-		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+		pte = __maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
 		vmf->flags &= ~FAULT_FLAG_WRITE;
 		ret |= VM_FAULT_WRITE;
 		exclusive = RMAP_EXCLUSIVE;
Laurent Dufour April 4, 2018, 3:56 p.m. UTC | #2
On 03/04/2018 01:12, David Rientjes wrote:
> On Tue, 13 Mar 2018, Laurent Dufour wrote:
> 
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index dfa81a638b7c..a84ddc218bbd 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
>>   * pte_mkwrite.  But get_user_pages can cause write faults for mappings
>>   * that do not have writing enabled, when used by access_process_vm.
>>   */
>> -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>> +static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
>>  {
>> -	if (likely(vma->vm_flags & VM_WRITE))
>> +	if (likely(vma_flags & VM_WRITE))
>>  		pte = pte_mkwrite(pte);
>>  	return pte;
>>  }
>>  
>> +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>> +{
>> +	return __maybe_mkwrite(pte, vma->vm_flags);
>> +}
>> +
>>  int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>  		struct page *page);
>>  int finish_fault(struct vm_fault *vmf);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 0a0a483d9a65..af0338fbc34d 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>>  
>>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>  	entry = pte_mkyoung(vmf->orig_pte);
>> -	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +	entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>>  	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>>  		update_mmu_cache(vma, vmf->address, vmf->pte);
>>  	pte_unmap_unlock(vmf->pte, vmf->ptl);
>> @@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
>>  			inc_mm_counter_fast(mm, MM_ANONPAGES);
>>  		}
>>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>> -		entry = mk_pte(new_page, vma->vm_page_prot);
>> -		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +		entry = mk_pte(new_page, vmf->vma_page_prot);
>> +		entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>>  		/*
>>  		 * Clear the pte entry and flush it first, before updating the
>>  		 * pte with the new entry. This will avoid a race condition
> 
> Don't you also need to do this in do_swap_page()?

Indeed I'll drop this patch as all the changes are now done in the patch 11
"mm: Cache some VMA fields in the vm_fault structure" where, as you suggested,
maybe_mkwrite() is now getting passed the vm_flags value directly.

> diff --git a/mm/memory.c b/mm/memory.c
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3067,9 +3067,9 @@ int do_swap_page(struct vm_fault *vmf)
> 
>  	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>  	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
> -	pte = mk_pte(page, vma->vm_page_prot);
> +	pte = mk_pte(page, vmf->vma_page_prot);
>  	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
> -		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> +		pte = __maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
>  		vmf->flags &= ~FAULT_FLAG_WRITE;
>  		ret |= VM_FAULT_WRITE;
>  		exclusive = RMAP_EXCLUSIVE;
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dfa81a638b7c..a84ddc218bbd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -684,13 +684,18 @@  void free_compound_page(struct page *page);
  * pte_mkwrite.  But get_user_pages can cause write faults for mappings
  * that do not have writing enabled, when used by access_process_vm.
  */
-static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
 {
-	if (likely(vma->vm_flags & VM_WRITE))
+	if (likely(vma_flags & VM_WRITE))
 		pte = pte_mkwrite(pte);
 	return pte;
 }
 
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+	return __maybe_mkwrite(pte, vma->vm_flags);
+}
+
 int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
 int finish_fault(struct vm_fault *vmf);
diff --git a/mm/memory.c b/mm/memory.c
index 0a0a483d9a65..af0338fbc34d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2472,7 +2472,7 @@  static inline void wp_page_reuse(struct vm_fault *vmf)
 
 	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 	entry = pte_mkyoung(vmf->orig_pte);
-	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+	entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
 	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
 		update_mmu_cache(vma, vmf->address, vmf->pte);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2549,8 +2549,8 @@  static int wp_page_copy(struct vm_fault *vmf)
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
-		entry = mk_pte(new_page, vma->vm_page_prot);
-		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		entry = mk_pte(new_page, vmf->vma_page_prot);
+		entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
 		 * pte with the new entry. This will avoid a race condition