diff mbox series

[4/4] mm: check the device private page owner in hmm_range_fault

Message ID 20200316193216.920734-5-hch@lst.de
State Not Applicable
Headers show
Series [1/4] memremap: add an owner field to struct dev_pagemap | expand

Commit Message

Christoph Hellwig March 16, 2020, 7:32 p.m. UTC
Hmm range fault will succeed for any kind of device private memory,
even if it doesn't belong to the calling entity.  While nouveau
has some crude checks for that, they are broken because they assume
nouveau is the only user of device private memory.  Fix this by
passing in an expected pgmap owner in the hmm_range_fault structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Fixes: 4ef589dc9b10 ("mm/hmm/devmem: device memory hotplug using ZONE_DEVICE")
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ------------
 include/linux/hmm.h                    |  2 ++
 mm/hmm.c                               | 10 +++++++++-
 3 files changed, 11 insertions(+), 13 deletions(-)

Comments

Jason Gunthorpe March 16, 2020, 7:49 p.m. UTC | #1
On Mon, Mar 16, 2020 at 08:32:16PM +0100, Christoph Hellwig wrote:
> Hmm range fault will succeed for any kind of device private memory,
> even if it doesn't belong to the calling entity.  While nouveau
> has some crude checks for that, they are broken because they assume
> nouveau is the only user of device private memory.  Fix this by
> passing in an expected pgmap owner in the hmm_range_fault structure.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Fixes: 4ef589dc9b10 ("mm/hmm/devmem: device memory hotplug using ZONE_DEVICE")
> ---
>  drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ------------
>  include/linux/hmm.h                    |  2 ++
>  mm/hmm.c                               | 10 +++++++++-
>  3 files changed, 11 insertions(+), 13 deletions(-)

Nice

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason
Ralph Campbell March 16, 2020, 11:11 p.m. UTC | #2
On 3/16/20 12:32 PM, Christoph Hellwig wrote:
> Hmm range fault will succeed for any kind of device private memory,
> even if it doesn't belong to the calling entity.  While nouveau
> has some crude checks for that, they are broken because they assume
> nouveau is the only user of device private memory.  Fix this by
> passing in an expected pgmap owner in the hmm_range_fault structure.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Fixes: 4ef589dc9b10 ("mm/hmm/devmem: device memory hotplug using ZONE_DEVICE")

Looks good.
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>

> ---
>   drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ------------
>   include/linux/hmm.h                    |  2 ++
>   mm/hmm.c                               | 10 +++++++++-
>   3 files changed, 11 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index edfd0805fba4..ad89e09a0be3 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -672,12 +672,6 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
>   	return ret;
>   }
>   
> -static inline bool
> -nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
> -{
> -	return is_device_private_page(page) && drm->dmem == page_to_dmem(page);
> -}
> -
>   void
>   nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
>   			 struct hmm_range *range)
> @@ -696,12 +690,6 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
>   		if (!is_device_private_page(page))
>   			continue;
>   
> -		if (!nouveau_dmem_page(drm, page)) {
> -			WARN(1, "Some unknown device memory !\n");
> -			range->pfns[i] = 0;
> -			continue;
> -		}
> -
>   		addr = nouveau_dmem_page_addr(page);
>   		range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
>   		range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 5e6034f105c3..bb6be4428633 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -132,6 +132,7 @@ enum hmm_pfn_value_e {
>    * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
>    * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
>    * @valid: pfns array did not change since it has been fill by an HMM function
> + * @dev_private_owner: owner of device private pages
>    */
>   struct hmm_range {
>   	struct mmu_interval_notifier *notifier;
> @@ -144,6 +145,7 @@ struct hmm_range {
>   	uint64_t		default_flags;
>   	uint64_t		pfn_flags_mask;
>   	uint8_t			pfn_shift;
> +	void			*dev_private_owner;
>   };
>   
>   /*
> diff --git a/mm/hmm.c b/mm/hmm.c
> index cfad65f6a67b..b75b3750e03d 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -216,6 +216,14 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
>   		unsigned long end, uint64_t *pfns, pmd_t pmd);
>   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>   
> +static inline bool hmm_is_device_private_entry(struct hmm_range *range,
> +		swp_entry_t entry)
> +{
> +	return is_device_private_entry(entry) &&
> +		device_private_entry_to_page(entry)->pgmap->owner ==
> +		range->dev_private_owner;
> +}
> +
>   static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
>   {
>   	if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
> @@ -254,7 +262,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>   		 * Never fault in device private pages pages, but just report
>   		 * the PFN even if not present.
>   		 */
> -		if (is_device_private_entry(entry)) {
> +		if (hmm_is_device_private_entry(range, entry)) {
>   			*pfn = hmm_device_entry_from_pfn(range,
>   					    swp_offset(entry));
>   			*pfn |= range->flags[HMM_PFN_VALID];
>
Jason Gunthorpe March 20, 2020, 1:41 p.m. UTC | #3
On Mon, Mar 16, 2020 at 08:32:16PM +0100, Christoph Hellwig wrote:
> diff --git a/mm/hmm.c b/mm/hmm.c
> index cfad65f6a67b..b75b3750e03d 100644
> +++ b/mm/hmm.c
> @@ -216,6 +216,14 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
>  		unsigned long end, uint64_t *pfns, pmd_t pmd);
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>  
> +static inline bool hmm_is_device_private_entry(struct hmm_range *range,
> +		swp_entry_t entry)
> +{
> +	return is_device_private_entry(entry) &&
> +		device_private_entry_to_page(entry)->pgmap->owner ==
> +		range->dev_private_owner;
> +}

Thinking about this some more, does the locking work out here?

hmm_range_fault() runs with mmap_sem in read, and does not lock any of
the page table levels.

So it relies on accessing stale pte data being safe, and here we
introduce for the first time a page pointer dereference and a pgmap
dereference without any locking/refcounting.

The get_dev_pagemap() worked on the PFN and obtained a refcount, so it
created safety.

Is there some tricky reason this is safe, eg a DEVICE_PRIVATE page
cannot be removed from the vma without holding mmap_sem in write or
something?

Jason
Christoph Hellwig March 21, 2020, 8:22 a.m. UTC | #4
On Fri, Mar 20, 2020 at 10:41:09AM -0300, Jason Gunthorpe wrote:
> Thinking about this some more, does the locking work out here?
> 
> hmm_range_fault() runs with mmap_sem in read, and does not lock any of
> the page table levels.
> 
> So it relies on accessing stale pte data being safe, and here we
> introduce for the first time a page pointer dereference and a pgmap
> dereference without any locking/refcounting.
> 
> The get_dev_pagemap() worked on the PFN and obtained a refcount, so it
> created safety.
> 
> Is there some tricky reason this is safe, eg a DEVICE_PRIVATE page
> cannot be removed from the vma without holding mmap_sem in write or
> something?

I don't think there is any specific protection.  Let me see if we
can throw in a get_dev_pagemap here - note that current mainline doesn't
even use it for this path..
Jason Gunthorpe March 21, 2020, 12:38 p.m. UTC | #5
On Sat, Mar 21, 2020 at 09:22:36AM +0100, Christoph Hellwig wrote:
> On Fri, Mar 20, 2020 at 10:41:09AM -0300, Jason Gunthorpe wrote:
> > Thinking about this some more, does the locking work out here?
> > 
> > hmm_range_fault() runs with mmap_sem in read, and does not lock any of
> > the page table levels.
> > 
> > So it relies on accessing stale pte data being safe, and here we
> > introduce for the first time a page pointer dereference and a pgmap
> > dereference without any locking/refcounting.
> > 
> > The get_dev_pagemap() worked on the PFN and obtained a refcount, so it
> > created safety.
> > 
> > Is there some tricky reason this is safe, eg a DEVICE_PRIVATE page
> > cannot be removed from the vma without holding mmap_sem in write or
> > something?
> 
> I don't think there is any specific protection.  Let me see if we
> can throw in a get_dev_pagemap here

The page tables are RCU protected right? could we do something like

 if (is_device_private_entry()) {
       rcu_read_lock()
       if (READ_ONCE(*ptep) != pte)
           return -EBUSY;
       hmm_is_device_private_entry()
       rcu_read_unlock()
 }

?

Then pgmap needs a synchronize_rcu before the struct page's are
destroyed (possibly gup_fast already requires this?)

I've got some other patches trying to close some of these styles of
bugs, but 

> note that current mainline doesn't even use it for this path..

Don't follow?

Jason
Christoph Hellwig March 21, 2020, 3:18 p.m. UTC | #6
On Sat, Mar 21, 2020 at 09:38:04AM -0300, Jason Gunthorpe wrote:
> > I don't think there is any specific protection.  Let me see if we
> > can throw in a get_dev_pagemap here
> 
> The page tables are RCU protected right? could we do something like
> 
>  if (is_device_private_entry()) {
>        rcu_read_lock()
>        if (READ_ONCE(*ptep) != pte)
>            return -EBUSY;
>        hmm_is_device_private_entry()
>        rcu_read_unlock()
>  }
> 
> ?

Are they everywhere?  I'd really love to hear from people that really
know this ara..

> 
> Then pgmap needs a synchronize_rcu before the struct page's are
> destroyed (possibly gup_fast already requires this?)
> 
> I've got some other patches trying to close some of these styles of
> bugs, but 
> 
> > note that current mainline doesn't even use it for this path..
> 
> Don't follow?

If you look at mainline (or any other tree), we only do a
get_dev_pagemap for devmap ptes.  But device private pages are encoded
as non-present swap ptes.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index edfd0805fba4..ad89e09a0be3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -672,12 +672,6 @@  nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 	return ret;
 }
 
-static inline bool
-nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
-{
-	return is_device_private_page(page) && drm->dmem == page_to_dmem(page);
-}
-
 void
 nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 			 struct hmm_range *range)
@@ -696,12 +690,6 @@  nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 		if (!is_device_private_page(page))
 			continue;
 
-		if (!nouveau_dmem_page(drm, page)) {
-			WARN(1, "Some unknown device memory !\n");
-			range->pfns[i] = 0;
-			continue;
-		}
-
 		addr = nouveau_dmem_page_addr(page);
 		range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
 		range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 5e6034f105c3..bb6be4428633 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -132,6 +132,7 @@  enum hmm_pfn_value_e {
  * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
  * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
  * @valid: pfns array did not change since it has been fill by an HMM function
+ * @dev_private_owner: owner of device private pages
  */
 struct hmm_range {
 	struct mmu_interval_notifier *notifier;
@@ -144,6 +145,7 @@  struct hmm_range {
 	uint64_t		default_flags;
 	uint64_t		pfn_flags_mask;
 	uint8_t			pfn_shift;
+	void			*dev_private_owner;
 };
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index cfad65f6a67b..b75b3750e03d 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -216,6 +216,14 @@  int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
 		unsigned long end, uint64_t *pfns, pmd_t pmd);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline bool hmm_is_device_private_entry(struct hmm_range *range,
+		swp_entry_t entry)
+{
+	return is_device_private_entry(entry) &&
+		device_private_entry_to_page(entry)->pgmap->owner ==
+		range->dev_private_owner;
+}
+
 static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
 {
 	if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
@@ -254,7 +262,7 @@  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 		 * Never fault in device private pages pages, but just report
 		 * the PFN even if not present.
 		 */
-		if (is_device_private_entry(entry)) {
+		if (hmm_is_device_private_entry(range, entry)) {
 			*pfn = hmm_device_entry_from_pfn(range,
 					    swp_offset(entry));
 			*pfn |= range->flags[HMM_PFN_VALID];