drm/tegra: Prevent BOs from being freed during job submission

Submitted by Thierry Reding on Aug. 11, 2017, 5:59 p.m.

Details

Message ID 20170811175926.24317-1-thierry.reding@gmail.com
State New
Headers show

Commit Message

Thierry Reding Aug. 11, 2017, 5:59 p.m.
From: Dmitry Osipenko <digetx@gmail.com>

Since DRM IOCTL's are lockless, there is a chance that BOs could be
released while a job submission is in progress. To avoid that, keep the
GEM reference until the job has been pinned, part of which will be to
take another reference.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

Comments

Dmitry Osipenko Aug. 11, 2017, 6:16 p.m.
On 11.08.2017 20:59, Thierry Reding wrote:
> From: Dmitry Osipenko <digetx@gmail.com>
> 
> Since DRM IOCTL's are lockless, there is a chance that BOs could be
> released while a job submission is in progress. To avoid that, keep the
> GEM reference until the job has been pinned, part of which will be to
> take another reference.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
>  drivers/gpu/drm/tegra/drm.c | 42 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index f01db33fa20f..e5d19e1c9bc8 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -320,8 +320,6 @@ host1x_bo_lookup(struct drm_file *file, u32 handle)
>  	if (!gem)
>  		return NULL;
>  
> -	drm_gem_object_unreference_unlocked(gem);
> -
>  	bo = to_tegra_bo(gem);
>  	return &bo->base;
>  }
> @@ -410,8 +408,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		(void __user *)(uintptr_t)args->waitchks;
>  	struct drm_tegra_syncpt syncpt;
>  	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
> +	struct drm_gem_object **refs;
>  	struct host1x_syncpt *sp;
>  	struct host1x_job *job;
> +	unsigned int num_refs;
>  	int err;
>  
>  	/* We don't yet support other than one syncpt_incr struct per submit */
> @@ -433,6 +433,26 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  	job->class = context->client->base.class;
>  	job->serialize = true;
>  
> +	/*
> +	 * Track referenced BOs so that they can be unreferenced after the
> +	 * submission is complete.
> +	 */
> +	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
> +
> +	if (sizeof(*refs) * num_refs > ULONG_MAX) {

Thierry, since you've omitted (u64) here (comparing to the original patch), I
think it should be better to write as:

	if (num_refs > ULONG_MAX / sizeof(*refs)) {

To avoid integer overflow in the check.

> +		err = -EINVAL;
> +		goto fail;
> +	}
> +
> +	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
> +	if (!refs) {
> +		err = -ENOMEM;
> +		goto fail;
> +	}
> +
> +	/* reuse as an iterator later */
> +	num_refs = 0;
> +
>  	while (num_cmdbufs) {
>  		struct drm_tegra_cmdbuf cmdbuf;
>  		struct host1x_bo *bo;
> @@ -461,6 +481,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  
>  		offset = (u64)cmdbuf.offset + (u64)cmdbuf.words * sizeof(u32);
>  		obj = host1x_to_tegra_bo(bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * Gather buffer base address must be 4-bytes aligned,
> @@ -490,6 +511,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  
>  		reloc = &job->relocarray[num_relocs];
>  		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * The unaligned cmdbuf offset will cause an unaligned write
> @@ -503,6 +525,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		}
>  
>  		obj = host1x_to_tegra_bo(reloc->target.bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		if (reloc->target.offset >= obj->gem.size) {
>  			err = -EINVAL;
> @@ -522,6 +545,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  			goto fail;
>  
>  		obj = host1x_to_tegra_bo(wait->bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * The unaligned offset will cause an unaligned write during
> @@ -561,17 +585,17 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		goto fail;
>  
>  	err = host1x_job_submit(job);
> -	if (err)
> -		goto fail_submit;
> +	if (err) {
> +		host1x_job_unpin(job);
> +		goto fail;
> +	}
>  
>  	args->fence = job->syncpt_end;
>  
> -	host1x_job_put(job);
> -	return 0;
> -
> -fail_submit:
> -	host1x_job_unpin(job);
>  fail:
> +	while (num_refs--)
> +		drm_gem_object_put_unlocked(refs[num_refs]);
> +
>  	host1x_job_put(job);
>  	return err;
>  }
>
Dmitry Osipenko Aug. 12, 2017, 3:24 p.m.
On 11.08.2017 21:16, Dmitry Osipenko wrote:
> On 11.08.2017 20:59, Thierry Reding wrote:
>> From: Dmitry Osipenko <digetx@gmail.com>
>>
>> Since DRM IOCTL's are lockless, there is a chance that BOs could be
>> released while a job submission is in progress. To avoid that, keep the
>> GEM reference until the job has been pinned, part of which will be to
>> take another reference.
>>
>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>> Signed-off-by: Thierry Reding <treding@nvidia.com>
>> ---
>>  drivers/gpu/drm/tegra/drm.c | 42 +++++++++++++++++++++++++++++++++---------
>>  1 file changed, 33 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
>> index f01db33fa20f..e5d19e1c9bc8 100644
>> --- a/drivers/gpu/drm/tegra/drm.c
>> +++ b/drivers/gpu/drm/tegra/drm.c
>> @@ -320,8 +320,6 @@ host1x_bo_lookup(struct drm_file *file, u32 handle)
>>  	if (!gem)
>>  		return NULL;
>>  
>> -	drm_gem_object_unreference_unlocked(gem);
>> -
>>  	bo = to_tegra_bo(gem);
>>  	return &bo->base;
>>  }
>> @@ -410,8 +408,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  		(void __user *)(uintptr_t)args->waitchks;
>>  	struct drm_tegra_syncpt syncpt;
>>  	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
>> +	struct drm_gem_object **refs;
>>  	struct host1x_syncpt *sp;
>>  	struct host1x_job *job;
>> +	unsigned int num_refs;
>>  	int err;
>>  
>>  	/* We don't yet support other than one syncpt_incr struct per submit */
>> @@ -433,6 +433,26 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  	job->class = context->client->base.class;
>>  	job->serialize = true;
>>  
>> +	/*
>> +	 * Track referenced BOs so that they can be unreferenced after the
>> +	 * submission is complete.
>> +	 */
>> +	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
>> +
>> +	if (sizeof(*refs) * num_refs > ULONG_MAX) {
> 
> Thierry, since you've omitted (u64) here (comparing to the original patch), I
> think it should be better to write as:
> 
> 	if (num_refs > ULONG_MAX / sizeof(*refs)) {
> 
> To avoid integer overflow in the check.
> 

Moreover, this check isn't needed at all because it duplicates same checking
done by kmalloc_array().

>> +		err = -EINVAL;
>> +		goto fail;
>> +	}
>> +
>> +	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
>> +	if (!refs) {
>> +		err = -ENOMEM;
>> +		goto fail;
>> +	}
>> +
>> +	/* reuse as an iterator later */
>> +	num_refs = 0;
>> +
>>  	while (num_cmdbufs) {
>>  		struct drm_tegra_cmdbuf cmdbuf;
>>  		struct host1x_bo *bo;
>> @@ -461,6 +481,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  
>>  		offset = (u64)cmdbuf.offset + (u64)cmdbuf.words * sizeof(u32);
>>  		obj = host1x_to_tegra_bo(bo);
>> +		refs[num_refs++] = &obj->gem;
>>  
>>  		/*
>>  		 * Gather buffer base address must be 4-bytes aligned,
>> @@ -490,6 +511,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  
>>  		reloc = &job->relocarray[num_relocs];
>>  		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
>> +		refs[num_refs++] = &obj->gem;
>>  
>>  		/*
>>  		 * The unaligned cmdbuf offset will cause an unaligned write
>> @@ -503,6 +525,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  		}
>>  
>>  		obj = host1x_to_tegra_bo(reloc->target.bo);
>> +		refs[num_refs++] = &obj->gem;
>>  
>>  		if (reloc->target.offset >= obj->gem.size) {
>>  			err = -EINVAL;
>> @@ -522,6 +545,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  			goto fail;
>>  
>>  		obj = host1x_to_tegra_bo(wait->bo);
>> +		refs[num_refs++] = &obj->gem;
>>  
>>  		/*
>>  		 * The unaligned offset will cause an unaligned write during
>> @@ -561,17 +585,17 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>>  		goto fail;
>>  
>>  	err = host1x_job_submit(job);
>> -	if (err)
>> -		goto fail_submit;
>> +	if (err) {
>> +		host1x_job_unpin(job);
>> +		goto fail;
>> +	}
>>  
>>  	args->fence = job->syncpt_end;
>>  
>> -	host1x_job_put(job);
>> -	return 0;
>> -
>> -fail_submit:
>> -	host1x_job_unpin(job);
>>  fail:
>> +	while (num_refs--)
>> +		drm_gem_object_put_unlocked(refs[num_refs]);
>> +
>>  	host1x_job_put(job);
>>  	return err;
>>  }
>>
> 
>
Dmitry Osipenko Aug. 12, 2017, 3:48 p.m.
On 11.08.2017 20:59, Thierry Reding wrote:
> From: Dmitry Osipenko <digetx@gmail.com>
> 
> Since DRM IOCTL's are lockless, there is a chance that BOs could be
> released while a job submission is in progress. To avoid that, keep the
> GEM reference until the job has been pinned, part of which will be to
> take another reference.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
>  drivers/gpu/drm/tegra/drm.c | 42 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index f01db33fa20f..e5d19e1c9bc8 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -320,8 +320,6 @@ host1x_bo_lookup(struct drm_file *file, u32 handle)
>  	if (!gem)
>  		return NULL;
>  
> -	drm_gem_object_unreference_unlocked(gem);
> -
>  	bo = to_tegra_bo(gem);
>  	return &bo->base;
>  }
> @@ -410,8 +408,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		(void __user *)(uintptr_t)args->waitchks;
>  	struct drm_tegra_syncpt syncpt;
>  	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
> +	struct drm_gem_object **refs;
>  	struct host1x_syncpt *sp;
>  	struct host1x_job *job;
> +	unsigned int num_refs;

This definition of 'num_refs' could be moved to others "unsigned int"
definitions for consistency.

>  	int err;
>  
>  	/* We don't yet support other than one syncpt_incr struct per submit */
> @@ -433,6 +433,26 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  	job->class = context->client->base.class;
>  	job->serialize = true;
>  
> +	/*
> +	 * Track referenced BOs so that they can be unreferenced after the
> +	 * submission is complete.
> +	 */
> +	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
> +

And 'num_refs' could be assigned directly in its definition, solely for
consistency as well.

> +	if (sizeof(*refs) * num_refs > ULONG_MAX) {
> +		err = -EINVAL;
> +		goto fail;
> +	}
> +
> +	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
> +	if (!refs) {
> +		err = -ENOMEM;
> +		goto fail;
> +	}
> +
> +	/* reuse as an iterator later */
> +	num_refs = 0;
> +
>  	while (num_cmdbufs) {
>  		struct drm_tegra_cmdbuf cmdbuf;
>  		struct host1x_bo *bo;
> @@ -461,6 +481,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  
>  		offset = (u64)cmdbuf.offset + (u64)cmdbuf.words * sizeof(u32);
>  		obj = host1x_to_tegra_bo(bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * Gather buffer base address must be 4-bytes aligned,
> @@ -490,6 +511,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  
>  		reloc = &job->relocarray[num_relocs];
>  		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * The unaligned cmdbuf offset will cause an unaligned write
> @@ -503,6 +525,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		}
>  
>  		obj = host1x_to_tegra_bo(reloc->target.bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		if (reloc->target.offset >= obj->gem.size) {
>  			err = -EINVAL;
> @@ -522,6 +545,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  			goto fail;
>  
>  		obj = host1x_to_tegra_bo(wait->bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * The unaligned offset will cause an unaligned write during
> @@ -561,17 +585,17 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		goto fail;
>  
>  	err = host1x_job_submit(job);
> -	if (err)
> -		goto fail_submit;
> +	if (err) {
> +		host1x_job_unpin(job);
> +		goto fail;
> +	}
>  
>  	args->fence = job->syncpt_end;
>  
> -	host1x_job_put(job);
> -	return 0;
> -
> -fail_submit:
> -	host1x_job_unpin(job);
>  fail:
> +	while (num_refs--)
> +		drm_gem_object_put_unlocked(refs[num_refs]);
> +
>  	host1x_job_put(job);
>  	return err;
>  }
>
Dmitry Osipenko Aug. 12, 2017, 4:01 p.m.
On 11.08.2017 20:59, Thierry Reding wrote:
> From: Dmitry Osipenko <digetx@gmail.com>
> 
> Since DRM IOCTL's are lockless, there is a chance that BOs could be
> released while a job submission is in progress. To avoid that, keep the
> GEM reference until the job has been pinned, part of which will be to
> take another reference.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
>  drivers/gpu/drm/tegra/drm.c | 42 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index f01db33fa20f..e5d19e1c9bc8 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -320,8 +320,6 @@ host1x_bo_lookup(struct drm_file *file, u32 handle)
>  	if (!gem)
>  		return NULL;
>  
> -	drm_gem_object_unreference_unlocked(gem);
> -
>  	bo = to_tegra_bo(gem);
>  	return &bo->base;
>  }
> @@ -410,8 +408,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		(void __user *)(uintptr_t)args->waitchks;
>  	struct drm_tegra_syncpt syncpt;
>  	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
> +	struct drm_gem_object **refs;

Should be "**refs = NULL" in conjunction with a missed kfree() below.

>  	struct host1x_syncpt *sp;
>  	struct host1x_job *job;
> +	unsigned int num_refs;
>  	int err;
>  
>  	/* We don't yet support other than one syncpt_incr struct per submit */
> @@ -433,6 +433,26 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  	job->class = context->client->base.class;
>  	job->serialize = true;
>  
> +	/*
> +	 * Track referenced BOs so that they can be unreferenced after the
> +	 * submission is complete.
> +	 */
> +	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
> +
> +	if (sizeof(*refs) * num_refs > ULONG_MAX) {
> +		err = -EINVAL;
> +		goto fail;
> +	}
> +
> +	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
> +	if (!refs) {
> +		err = -ENOMEM;
> +		goto fail;
> +	}
> +
> +	/* reuse as an iterator later */
> +	num_refs = 0;
> +
>  	while (num_cmdbufs) {
>  		struct drm_tegra_cmdbuf cmdbuf;
>  		struct host1x_bo *bo;
> @@ -461,6 +481,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  
>  		offset = (u64)cmdbuf.offset + (u64)cmdbuf.words * sizeof(u32);
>  		obj = host1x_to_tegra_bo(bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * Gather buffer base address must be 4-bytes aligned,
> @@ -490,6 +511,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  
>  		reloc = &job->relocarray[num_relocs];
>  		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * The unaligned cmdbuf offset will cause an unaligned write
> @@ -503,6 +525,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		}
>  
>  		obj = host1x_to_tegra_bo(reloc->target.bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		if (reloc->target.offset >= obj->gem.size) {
>  			err = -EINVAL;
> @@ -522,6 +545,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  			goto fail;
>  
>  		obj = host1x_to_tegra_bo(wait->bo);
> +		refs[num_refs++] = &obj->gem;
>  
>  		/*
>  		 * The unaligned offset will cause an unaligned write during
> @@ -561,17 +585,17 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>  		goto fail;
>  
>  	err = host1x_job_submit(job);
> -	if (err)
> -		goto fail_submit;
> +	if (err) {
> +		host1x_job_unpin(job);
> +		goto fail;
> +	}
>  
>  	args->fence = job->syncpt_end;
>  
> -	host1x_job_put(job);
> -	return 0;
> -
> -fail_submit:
> -	host1x_job_unpin(job);
>  fail:
> +	while (num_refs--)
> +		drm_gem_object_put_unlocked(refs[num_refs]);
> +

kfree(refs) is missed here.

>  	host1x_job_put(job);
>  	return err;
>  }
>

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index f01db33fa20f..e5d19e1c9bc8 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -320,8 +320,6 @@  host1x_bo_lookup(struct drm_file *file, u32 handle)
 	if (!gem)
 		return NULL;
 
-	drm_gem_object_unreference_unlocked(gem);
-
 	bo = to_tegra_bo(gem);
 	return &bo->base;
 }
@@ -410,8 +408,10 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 		(void __user *)(uintptr_t)args->waitchks;
 	struct drm_tegra_syncpt syncpt;
 	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
+	struct drm_gem_object **refs;
 	struct host1x_syncpt *sp;
 	struct host1x_job *job;
+	unsigned int num_refs;
 	int err;
 
 	/* We don't yet support other than one syncpt_incr struct per submit */
@@ -433,6 +433,26 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 	job->class = context->client->base.class;
 	job->serialize = true;
 
+	/*
+	 * Track referenced BOs so that they can be unreferenced after the
+	 * submission is complete.
+	 */
+	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
+
+	if (sizeof(*refs) * num_refs > ULONG_MAX) {
+		err = -EINVAL;
+		goto fail;
+	}
+
+	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
+	if (!refs) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	/* reuse as an iterator later */
+	num_refs = 0;
+
 	while (num_cmdbufs) {
 		struct drm_tegra_cmdbuf cmdbuf;
 		struct host1x_bo *bo;
@@ -461,6 +481,7 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 
 		offset = (u64)cmdbuf.offset + (u64)cmdbuf.words * sizeof(u32);
 		obj = host1x_to_tegra_bo(bo);
+		refs[num_refs++] = &obj->gem;
 
 		/*
 		 * Gather buffer base address must be 4-bytes aligned,
@@ -490,6 +511,7 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 
 		reloc = &job->relocarray[num_relocs];
 		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
+		refs[num_refs++] = &obj->gem;
 
 		/*
 		 * The unaligned cmdbuf offset will cause an unaligned write
@@ -503,6 +525,7 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 		}
 
 		obj = host1x_to_tegra_bo(reloc->target.bo);
+		refs[num_refs++] = &obj->gem;
 
 		if (reloc->target.offset >= obj->gem.size) {
 			err = -EINVAL;
@@ -522,6 +545,7 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 			goto fail;
 
 		obj = host1x_to_tegra_bo(wait->bo);
+		refs[num_refs++] = &obj->gem;
 
 		/*
 		 * The unaligned offset will cause an unaligned write during
@@ -561,17 +585,17 @@  int tegra_drm_submit(struct tegra_drm_context *context,
 		goto fail;
 
 	err = host1x_job_submit(job);
-	if (err)
-		goto fail_submit;
+	if (err) {
+		host1x_job_unpin(job);
+		goto fail;
+	}
 
 	args->fence = job->syncpt_end;
 
-	host1x_job_put(job);
-	return 0;
-
-fail_submit:
-	host1x_job_unpin(job);
 fail:
+	while (num_refs--)
+		drm_gem_object_put_unlocked(refs[num_refs]);
+
 	host1x_job_put(job);
 	return err;
 }