diff mbox series

[v3,12/15] mm/memory: pass PTE to copy_present_pte()

Message ID 20240129124649.189745-13-david@redhat.com
State New
Headers show
Series mm/memory: optimize fork() with PTE-mapped THP | expand

Commit Message

David Hildenbrand Jan. 29, 2024, 12:46 p.m. UTC
We already read it, let's just forward it.

This patch is based on work by Ryan Roberts.

Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 mm/memory.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

Comments

Mike Rapoport Feb. 8, 2024, 6:27 a.m. UTC | #1
On Mon, Jan 29, 2024 at 01:46:46PM +0100, David Hildenbrand wrote:
> We already read it, let's just forward it.
> 
> This patch is based on work by Ryan Roberts.
> 
> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>

> ---
>  mm/memory.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index a3bdb25f4c8d..41b24da5be38 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -959,10 +959,9 @@ static inline void __copy_present_pte(struct vm_area_struct *dst_vma,
>   */
>  static inline int
>  copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
> -		 pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
> -		 struct folio **prealloc)
> +		 pte_t *dst_pte, pte_t *src_pte, pte_t pte, unsigned long addr,
> +		 int *rss, struct folio **prealloc)
>  {
> -	pte_t pte = ptep_get(src_pte);
>  	struct page *page;
>  	struct folio *folio;
>  
> @@ -1103,7 +1102,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
>  		}
>  		/* copy_present_pte() will clear `*prealloc' if consumed */
>  		ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte,
> -				       addr, rss, &prealloc);
> +				       ptent, addr, rss, &prealloc);
>  		/*
>  		 * If we need a pre-allocated page for this pte, drop the
>  		 * locks, allocate, and try again.
> -- 
> 2.43.0
> 
>
David Hildenbrand Feb. 14, 2024, 10:40 p.m. UTC | #2
On 29.01.24 13:46, David Hildenbrand wrote:
> We already read it, let's just forward it.
> 
> This patch is based on work by Ryan Roberts.
> 
> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>   mm/memory.c | 7 +++----
>   1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index a3bdb25f4c8d..41b24da5be38 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -959,10 +959,9 @@ static inline void __copy_present_pte(struct vm_area_struct *dst_vma,
>    */
>   static inline int
>   copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
> -		 pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
> -		 struct folio **prealloc)
> +		 pte_t *dst_pte, pte_t *src_pte, pte_t pte, unsigned long addr,
> +		 int *rss, struct folio **prealloc)
>   {
> -	pte_t pte = ptep_get(src_pte);
>   	struct page *page;
>   	struct folio *folio;
>   
> @@ -1103,7 +1102,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
>   		}
>   		/* copy_present_pte() will clear `*prealloc' if consumed */
>   		ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte,
> -				       addr, rss, &prealloc);
> +				       ptent, addr, rss, &prealloc);
>   		/*
>   		 * If we need a pre-allocated page for this pte, drop the
>   		 * locks, allocate, and try again.

The following fixup for that device-exclusive thingy on top (fixing a hmm
selftest I just discovered to be broken).


 From 8f9e44f25087dc71890b8d9bd680375691232e85 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 14 Feb 2024 23:09:29 +0100
Subject: [PATCH] fixup: mm/memory: pass PTE to copy_present_pte()

For device-exclusive nonswp entries (is_device_exclusive_entry()),
copy_nonpresent_pte() can turn the PTEs into actual present PTEs while
holding the page table lock.

We hae to re-read the PTE after that operation, such that we won't be
working on the stale non-present PTE, assuming it would be present.

This fixes the hmm "exclusive_cow" selftest.

  ./run_vmtests.sh -t hmm
  # #  RUN           hmm.hmm_device_private.exclusive_cow ...
  # #            OK  hmm.hmm_device_private.exclusive_cow
  # ok 23 hmm.hmm_device_private.exclusive_cow

Signed-off-by: David Hildenbrand <david@redhat.com>
---
  mm/memory.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index 3b8e56eb08a3..29a75f38df7c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1208,6 +1208,8 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
  				progress += 8;
  				continue;
  			}
+			ptent = ptep_get(src_pte);
+			VM_WARN_ON_ONCE(!pte_present(ptent));
  
  			/*
  			 * Device exclusive entry restored, continue by copying
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index a3bdb25f4c8d..41b24da5be38 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -959,10 +959,9 @@  static inline void __copy_present_pte(struct vm_area_struct *dst_vma,
  */
 static inline int
 copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
-		 pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
-		 struct folio **prealloc)
+		 pte_t *dst_pte, pte_t *src_pte, pte_t pte, unsigned long addr,
+		 int *rss, struct folio **prealloc)
 {
-	pte_t pte = ptep_get(src_pte);
 	struct page *page;
 	struct folio *folio;
 
@@ -1103,7 +1102,7 @@  copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
 		}
 		/* copy_present_pte() will clear `*prealloc' if consumed */
 		ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte,
-				       addr, rss, &prealloc);
+				       ptent, addr, rss, &prealloc);
 		/*
 		 * If we need a pre-allocated page for this pte, drop the
 		 * locks, allocate, and try again.