drm/nouveau: Fixup gk20a instobj hierarchy

Message ID	20231208104653.1917055-1-thierry.reding@gmail.com
State	Handled Elsewhere
Headers	show Return-Path: <linux-tegra+bounces-232-incoming=patchwork.ozlabs.org@vger.kernel.org> From: Thierry Reding <thierry.reding@gmail.com> To: Karol Herbst <kherbst@redhat.com>, Lyude Paul <lyude@redhat.com>, Danilo Krummrich <dakr@redhat.com> Cc: Ben Skeggs <bskeggs@redhat.com>, Jon Hunter <jonathanh@nvidia.com>, dri-devel@lists.freedesktop.org, nouveau@lists.freedesktop.org, linux-tegra@vger.kernel.org Subject: [PATCH] drm/nouveau: Fixup gk20a instobj hierarchy Date: Fri, 8 Dec 2023 11:46:53 +0100 Message-ID: <20231208104653.1917055-1-thierry.reding@gmail.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	drm/nouveau: Fixup gk20a instobj hierarchy \| expand drm/nouveau: Fixup gk20a instobj hierarchy

Message ID

20231208104653.1917055-1-thierry.reding@gmail.com

State

Handled Elsewhere

Headers

From: Thierry Reding <thierry.reding@gmail.com>
To: Karol Herbst <kherbst@redhat.com>,
	Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@redhat.com>
Cc: Ben Skeggs <bskeggs@redhat.com>,
	Jon Hunter <jonathanh@nvidia.com>,
	dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-tegra@vger.kernel.org
Subject: [PATCH] drm/nouveau: Fixup gk20a instobj hierarchy
Date: Fri,  8 Dec 2023 11:46:53 +0100
Message-ID: <20231208104653.1917055-1-thierry.reding@gmail.com>
Precedence: bulk
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit

Series

drm/nouveau: Fixup gk20a instobj hierarchy | expand

Commit Message

Thierry Reding Dec. 8, 2023, 10:46 a.m. UTC

From: Thierry Reding <treding@nvidia.com>

Commit 12c9b05da918 ("drm/nouveau/imem: support allocations not
preserved across suspend") uses container_of() to cast from struct
nvkm_memory to struct nvkm_instobj, assuming that all instance objects
are derived from struct nvkm_instobj. For the gk20a family that's not
the case and they are derived from struct nvkm_memory instead. This
causes some subtle data corruption (nvkm_instobj.preserve ends up
mapping to gk20a_instobj.vaddr) that causes a NULL pointer dereference
in gk20a_instobj_acquire_iommu() (and possibly elsewhere) and also
prevents suspend/resume from working.

Fix this by making struct gk20a_instobj derive from struct nvkm_instobj
instead.

Fixes: 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend")
Reported-by: Jonathan Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
Note that this was probably subtly wrong before the above-mentioned
commit already, but I don't think we've seen any reports that would
indicate any actual failures related to this before. So I think it's
good enough to apply this fix for v6.7. The next closest thing would
be commit d8e83994aaf6 ("drm/nouveau/imem: improve management of
instance memory"), but that's 8 years old (Linux v4.3)...
---
 .../drm/nouveau/nvkm/subdev/instmem/gk20a.c    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

Comments

Jon Hunter Dec. 14, 2023, 9:25 a.m. UTC | #1

On 08/12/2023 10:46, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
> 
> Commit 12c9b05da918 ("drm/nouveau/imem: support allocations not
> preserved across suspend") uses container_of() to cast from struct
> nvkm_memory to struct nvkm_instobj, assuming that all instance objects
> are derived from struct nvkm_instobj. For the gk20a family that's not
> the case and they are derived from struct nvkm_memory instead. This
> causes some subtle data corruption (nvkm_instobj.preserve ends up
> mapping to gk20a_instobj.vaddr) that causes a NULL pointer dereference
> in gk20a_instobj_acquire_iommu() (and possibly elsewhere) and also
> prevents suspend/resume from working.
> 
> Fix this by making struct gk20a_instobj derive from struct nvkm_instobj
> instead.
> 
> Fixes: 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend")
> Reported-by: Jonathan Hunter <jonathanh@nvidia.com>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
> Note that this was probably subtly wrong before the above-mentioned
> commit already, but I don't think we've seen any reports that would
> indicate any actual failures related to this before. So I think it's
> good enough to apply this fix for v6.7. The next closest thing would
> be commit d8e83994aaf6 ("drm/nouveau/imem: improve management of
> instance memory"), but that's 8 years old (Linux v4.3)...
> ---
>   .../drm/nouveau/nvkm/subdev/instmem/gk20a.c    | 18 +++++++++---------
>   1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> index 1b811d6972a1..201022ae9214 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> @@ -49,14 +49,14 @@
>   #include <subdev/mmu.h>
>   
>   struct gk20a_instobj {
> -	struct nvkm_memory memory;
> +	struct nvkm_instobj base;
>   	struct nvkm_mm_node *mn;
>   	struct gk20a_instmem *imem;
>   
>   	/* CPU mapping */
>   	u32 *vaddr;
>   };
> -#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
> +#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory)
>   
>   /*
>    * Used for objects allocated using the DMA API
> @@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
>   	list_del(&obj->vaddr_node);
>   	vunmap(obj->base.vaddr);
>   	obj->base.vaddr = NULL;
> -	imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
> +	imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory);
>   	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
>   		   imem->vaddr_max);
>   }
> @@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
>   {
>   	struct gk20a_instobj *node = gk20a_instobj(memory);
>   	struct nvkm_vmm_map map = {
> -		.memory = &node->memory,
> +		.memory = &node->base.memory,
>   		.offset = offset,
>   		.mem = node->mn,
>   	};
> @@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
>   		return -ENOMEM;
>   	*_node = &node->base;
>   
> -	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
> -	node->base.memory.ptrs = &gk20a_instobj_ptrs;
> +	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory);
> +	node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
>   
>   	node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
>   					   &node->handle, GFP_KERNEL,
> @@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
>   	*_node = &node->base;
>   	node->dma_addrs = (void *)(node->pages + npages);
>   
> -	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
> -	node->base.memory.ptrs = &gk20a_instobj_ptrs;
> +	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory);
> +	node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
>   
>   	/* Allocate backing memory */
>   	for (i = 0; i < npages; i++) {
> @@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
>   	else
>   		ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT,
>   					     align, &node);
> -	*pmemory = node ? &node->memory : NULL;
> +	*pmemory = node ? &node->base.memory : NULL;
>   	if (ret)
>   		return ret;
>   


Tested-by: Jon Hunter <jonathanh@nvidia.com>

Thanks!
Jon

Dave Airlie Dec. 15, 2023, 4:14 a.m. UTC | #2

On Thu, 14 Dec 2023 at 19:26, Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
>
> On 08/12/2023 10:46, Thierry Reding wrote:
> > From: Thierry Reding <treding@nvidia.com>
> >
> > Commit 12c9b05da918 ("drm/nouveau/imem: support allocations not
> > preserved across suspend") uses container_of() to cast from struct
> > nvkm_memory to struct nvkm_instobj, assuming that all instance objects
> > are derived from struct nvkm_instobj. For the gk20a family that's not
> > the case and they are derived from struct nvkm_memory instead. This
> > causes some subtle data corruption (nvkm_instobj.preserve ends up
> > mapping to gk20a_instobj.vaddr) that causes a NULL pointer dereference
> > in gk20a_instobj_acquire_iommu() (and possibly elsewhere) and also
> > prevents suspend/resume from working.
> >
> > Fix this by making struct gk20a_instobj derive from struct nvkm_instobj
> > instead.
> >
> > Fixes: 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend")
> > Reported-by: Jonathan Hunter <jonathanh@nvidia.com>
> > Signed-off-by: Thierry Reding <treding@nvidia.com>

I've applied this to drm-fixes.

Dave.

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 1b811d6972a1..201022ae9214 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -49,14 +49,14 @@ 
 #include <subdev/mmu.h>
 
 struct gk20a_instobj {
-	struct nvkm_memory memory;
+	struct nvkm_instobj base;
 	struct nvkm_mm_node *mn;
 	struct gk20a_instmem *imem;
 
 	/* CPU mapping */
 	u32 *vaddr;
 };
-#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
+#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory)
 
 /*
  * Used for objects allocated using the DMA API
@@ -148,7 +148,7 @@  gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
 	list_del(&obj->vaddr_node);
 	vunmap(obj->base.vaddr);
 	obj->base.vaddr = NULL;
-	imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
+	imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory);
 	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
 		   imem->vaddr_max);
 }
@@ -283,7 +283,7 @@  gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
 {
 	struct gk20a_instobj *node = gk20a_instobj(memory);
 	struct nvkm_vmm_map map = {
-		.memory = &node->memory,
+		.memory = &node->base.memory,
 		.offset = offset,
 		.mem = node->mn,
 	};
@@ -391,8 +391,8 @@  gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 		return -ENOMEM;
 	*_node = &node->base;
 
-	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
-	node->base.memory.ptrs = &gk20a_instobj_ptrs;
+	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory);
+	node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
 
 	node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
 					   &node->handle, GFP_KERNEL,
@@ -438,8 +438,8 @@  gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	*_node = &node->base;
 	node->dma_addrs = (void *)(node->pages + npages);
 
-	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
-	node->base.memory.ptrs = &gk20a_instobj_ptrs;
+	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory);
+	node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
 
 	/* Allocate backing memory */
 	for (i = 0; i < npages; i++) {
@@ -533,7 +533,7 @@  gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
 	else
 		ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT,
 					     align, &node);
-	*pmemory = node ? &node->memory : NULL;
+	*pmemory = node ? &node->base.memory : NULL;
 	if (ret)
 		return ret;

drm/nouveau: Fixup gk20a instobj hierarchy

Commit Message

Comments

Patch