diff mbox series

[SRU,Bionic,1/1] drm/i915: Flush TLBs before releasing backing store

Message ID 20220126165845.22813-2-cascardo@canonical.com
State New
Headers show
Series CVE-2022-0330 | expand

Commit Message

Thadeu Lima de Souza Cascardo Jan. 26, 2022, 4:58 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We need to flush TLBs before releasing backing store otherwise userspace
is able to encounter stale entries if a) it is not declaring GPU access to
certain buffers and b) this GPU execution then races with the backing
store release getting triggered asynchronously.

Approach taken is to mark any buffer objects which were ever bound to the
GPU and triggering a serialized TLB flush when their backing store is
released.

Alternatively the flushing could be done on VMA unbind, at which point we
would be able to ascertain whether there is potential parallel GPU
execution (which could race), but choice essentially boils down to paying
the cost of TLB flushes maybe needlessly at VMA unbind time (when the
backing store is not known to be definitely going away, so flushing not
always required for safety), versus potentially needlessly at backing
store relase time since at that point cannot tell whether there is a
parallel GPU execution happening.

Therefore simplicity of implementation has been chosen for now, with scope
to benchmark and refine later as required.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: stable@vger.kernel.org
(backported from commit 7938d61591d33394a21bdd7797a245b65428f44c)
[cascardo: confict cleanup because of extra i915_gemfs_init]
CVE-2022-0330
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
---
 drivers/gpu/drm/i915/i915_drv.h        |  2 +
 drivers/gpu/drm/i915/i915_gem.c        | 84 +++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_object.h |  1 +
 drivers/gpu/drm/i915/i915_reg.h        |  6 ++
 drivers/gpu/drm/i915/i915_vma.c        |  4 ++
 5 files changed, 96 insertions(+), 1 deletion(-)

Comments

Stefan Bader Jan. 27, 2022, 1:57 p.m. UTC | #1
On 26.01.22 17:58, Thadeu Lima de Souza Cascardo wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We need to flush TLBs before releasing backing store otherwise userspace
> is able to encounter stale entries if a) it is not declaring GPU access to
> certain buffers and b) this GPU execution then races with the backing
> store release getting triggered asynchronously.
> 
> Approach taken is to mark any buffer objects which were ever bound to the
> GPU and triggering a serialized TLB flush when their backing store is
> released.
> 
> Alternatively the flushing could be done on VMA unbind, at which point we
> would be able to ascertain whether there is potential parallel GPU
> execution (which could race), but choice essentially boils down to paying
> the cost of TLB flushes maybe needlessly at VMA unbind time (when the
> backing store is not known to be definitely going away, so flushing not
> always required for safety), versus potentially needlessly at backing
> store relase time since at that point cannot tell whether there is a
> parallel GPU execution happening.
> 
> Therefore simplicity of implementation has been chosen for now, with scope
> to benchmark and refine later as required.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: stable@vger.kernel.org
> (backported from commit 7938d61591d33394a21bdd7797a245b65428f44c)
> [cascardo: confict cleanup because of extra i915_gemfs_init]
> CVE-2022-0330
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> ---

Applied to bionic:linux/master-next. Thanks.

-Stefan

>   drivers/gpu/drm/i915/i915_drv.h        |  2 +
>   drivers/gpu/drm/i915/i915_gem.c        | 84 +++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_object.h |  1 +
>   drivers/gpu/drm/i915/i915_reg.h        |  6 ++
>   drivers/gpu/drm/i915/i915_vma.c        |  4 ++
>   5 files changed, 96 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a7b32ee8549e..0efcb09c3306 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2245,6 +2245,8 @@ struct drm_i915_private {
>   
>   	struct intel_uncore uncore;
>   
> +	struct mutex tlb_invalidate_lock;
> +
>   	struct i915_virtual_gpu vgpu;
>   
>   	struct intel_gvt *gvt;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8356adc9a67a..32974cf96f73 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2284,6 +2284,76 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>   	rcu_read_unlock();
>   }
>   
> +struct reg_and_bit {
> +	i915_reg_t reg;
> +	u32 bit;
> +};
> +
> +static struct reg_and_bit
> +get_reg_and_bit(const struct intel_engine_cs *engine,
> +		const i915_reg_t *regs, const unsigned int num)
> +{
> +	const unsigned int class = engine->class;
> +	struct reg_and_bit rb = { .bit = 1 };
> +
> +	if (WARN_ON_ONCE(class >= num || !regs[class].reg))
> +		return rb;
> +
> +	rb.reg = regs[class];
> +	if (class == VIDEO_DECODE_CLASS)
> +		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> +
> +	return rb;
> +}
> +
> +static void invalidate_tlbs(struct drm_i915_private *dev_priv)
> +{
> +	static const i915_reg_t gen8_regs[] = {
> +		[RENDER_CLASS]                  = GEN8_RTCR,
> +		[VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
> +		[VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
> +		[COPY_ENGINE_CLASS]             = GEN8_BTCR,
> +	};
> +	const unsigned int num = ARRAY_SIZE(gen8_regs);
> +	const i915_reg_t *regs = gen8_regs;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	if (INTEL_GEN(dev_priv) < 8)
> +		return;
> +
> +	assert_rpm_wakelock_held(dev_priv);
> +
> +	mutex_lock(&dev_priv->tlb_invalidate_lock);
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	for_each_engine(engine, dev_priv, id) {
> +		/*
> +		 * HW architecture suggest typical invalidation time at 40us,
> +		 * with pessimistic cases up to 100us and a recommendation to
> +		 * cap at 1ms. We go a bit higher just in case.
> +		 */
> +		const unsigned int timeout_us = 100;
> +		const unsigned int timeout_ms = 4;
> +		struct reg_and_bit rb;
> +
> +		rb = get_reg_and_bit(engine, regs, num);
> +		if (!i915_mmio_reg_offset(rb.reg))
> +			continue;
> +
> +		I915_WRITE_FW(rb.reg, rb.bit);
> +		if (__intel_wait_for_register_fw(dev_priv,
> +						 rb.reg, rb.bit, 0,
> +						 timeout_us, timeout_ms,
> +						 NULL))
> +			DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
> +					      engine->name, timeout_ms);
> +	}
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +	mutex_unlock(&dev_priv->tlb_invalidate_lock);
> +}
> +
>   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>   				 enum i915_mm_subclass subclass)
>   {
> @@ -2326,8 +2396,18 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>   
>   	__i915_gem_object_reset_page_iter(obj);
>   
> -	if (!IS_ERR(pages))
> +	if (!IS_ERR(pages)) {
> +		if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
> +			struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +
> +			if (intel_runtime_pm_get_if_in_use(i915)) {
> +				invalidate_tlbs(i915);
> +				intel_runtime_pm_put(i915);
> +			}
> +		}
> +
>   		obj->ops->put_pages(obj, pages);
> +	}
>   
>   	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
>   
> @@ -5288,6 +5368,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>   
>   	spin_lock_init(&dev_priv->fb_tracking.lock);
>   
> +	mutex_init(&dev_priv->tlb_invalidate_lock);
> +
>   	err = i915_gemfs_init(dev_priv);
>   	if (err)
>   		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 6b1de7942349..e3c0a897ca19 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -135,6 +135,7 @@ struct drm_i915_gem_object {
>   	 * activity?
>   	 */
>   #define I915_BO_ACTIVE_REF 0
> +#define I915_BO_WAS_BOUND_BIT    1
>   
>   	/*
>   	 * Is the object to be mapped as read-only to the GPU
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 814b4c66c6e3..ae1f77e3a824 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2384,6 +2384,12 @@ enum i915_power_well_id {
>   #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1<<28)
>   #define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT	(1<<24)
>   
> +#define GEN8_RTCR	_MMIO(0x4260)
> +#define GEN8_M1TCR	_MMIO(0x4264)
> +#define GEN8_M2TCR	_MMIO(0x4268)
> +#define GEN8_BTCR	_MMIO(0x426c)
> +#define GEN8_VTCR	_MMIO(0x4270)
> +
>   #if 0
>   #define PRB0_TAIL	_MMIO(0x2030)
>   #define PRB0_HEAD	_MMIO(0x2034)
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7c4ab4263d33..19475637ba2d 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -283,6 +283,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>   		return ret;
>   
>   	vma->flags |= bind_flags;
> +
> +	if (vma->obj)
> +		set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
> +
>   	return 0;
>   }
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a7b32ee8549e..0efcb09c3306 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2245,6 +2245,8 @@  struct drm_i915_private {
 
 	struct intel_uncore uncore;
 
+	struct mutex tlb_invalidate_lock;
+
 	struct i915_virtual_gpu vgpu;
 
 	struct intel_gvt *gvt;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8356adc9a67a..32974cf96f73 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2284,6 +2284,76 @@  static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 	rcu_read_unlock();
 }
 
+struct reg_and_bit {
+	i915_reg_t reg;
+	u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine,
+		const i915_reg_t *regs, const unsigned int num)
+{
+	const unsigned int class = engine->class;
+	struct reg_and_bit rb = { .bit = 1 };
+
+	if (WARN_ON_ONCE(class >= num || !regs[class].reg))
+		return rb;
+
+	rb.reg = regs[class];
+	if (class == VIDEO_DECODE_CLASS)
+		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+
+	return rb;
+}
+
+static void invalidate_tlbs(struct drm_i915_private *dev_priv)
+{
+	static const i915_reg_t gen8_regs[] = {
+		[RENDER_CLASS]                  = GEN8_RTCR,
+		[VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
+		[VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
+		[COPY_ENGINE_CLASS]             = GEN8_BTCR,
+	};
+	const unsigned int num = ARRAY_SIZE(gen8_regs);
+	const i915_reg_t *regs = gen8_regs;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (INTEL_GEN(dev_priv) < 8)
+		return;
+
+	assert_rpm_wakelock_held(dev_priv);
+
+	mutex_lock(&dev_priv->tlb_invalidate_lock);
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	for_each_engine(engine, dev_priv, id) {
+		/*
+		 * HW architecture suggest typical invalidation time at 40us,
+		 * with pessimistic cases up to 100us and a recommendation to
+		 * cap at 1ms. We go a bit higher just in case.
+		 */
+		const unsigned int timeout_us = 100;
+		const unsigned int timeout_ms = 4;
+		struct reg_and_bit rb;
+
+		rb = get_reg_and_bit(engine, regs, num);
+		if (!i915_mmio_reg_offset(rb.reg))
+			continue;
+
+		I915_WRITE_FW(rb.reg, rb.bit);
+		if (__intel_wait_for_register_fw(dev_priv,
+						 rb.reg, rb.bit, 0,
+						 timeout_us, timeout_ms,
+						 NULL))
+			DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
+					      engine->name, timeout_ms);
+	}
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	mutex_unlock(&dev_priv->tlb_invalidate_lock);
+}
+
 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 				 enum i915_mm_subclass subclass)
 {
@@ -2326,8 +2396,18 @@  void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 
 	__i915_gem_object_reset_page_iter(obj);
 
-	if (!IS_ERR(pages))
+	if (!IS_ERR(pages)) {
+		if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+			struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+			if (intel_runtime_pm_get_if_in_use(i915)) {
+				invalidate_tlbs(i915);
+				intel_runtime_pm_put(i915);
+			}
+		}
+
 		obj->ops->put_pages(obj, pages);
+	}
 
 	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
 
@@ -5288,6 +5368,8 @@  i915_gem_load_init(struct drm_i915_private *dev_priv)
 
 	spin_lock_init(&dev_priv->fb_tracking.lock);
 
+	mutex_init(&dev_priv->tlb_invalidate_lock);
+
 	err = i915_gemfs_init(dev_priv);
 	if (err)
 		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 6b1de7942349..e3c0a897ca19 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -135,6 +135,7 @@  struct drm_i915_gem_object {
 	 * activity?
 	 */
 #define I915_BO_ACTIVE_REF 0
+#define I915_BO_WAS_BOUND_BIT    1
 
 	/*
 	 * Is the object to be mapped as read-only to the GPU
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 814b4c66c6e3..ae1f77e3a824 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2384,6 +2384,12 @@  enum i915_power_well_id {
 #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1<<28)
 #define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT	(1<<24)
 
+#define GEN8_RTCR	_MMIO(0x4260)
+#define GEN8_M1TCR	_MMIO(0x4264)
+#define GEN8_M2TCR	_MMIO(0x4268)
+#define GEN8_BTCR	_MMIO(0x426c)
+#define GEN8_VTCR	_MMIO(0x4270)
+
 #if 0
 #define PRB0_TAIL	_MMIO(0x2030)
 #define PRB0_HEAD	_MMIO(0x2034)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7c4ab4263d33..19475637ba2d 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -283,6 +283,10 @@  int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		return ret;
 
 	vma->flags |= bind_flags;
+
+	if (vma->obj)
+		set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
 	return 0;
 }