diff mbox series

[v3,6/7] video: Only dcache flush damaged lines

Message ID 20221230195828.88134-7-agraf@csgraf.de
State Superseded
Delegated to: Anatolij Gustschin
Headers show
Series Add video damage tracking | expand

Commit Message

Alexander Graf Dec. 30, 2022, 7:58 p.m. UTC
Now that we have a damage area tells us which parts of the frame buffer
actually need updating, let's only dcache flush those on video_sync()
calls. With this optimization in place, frame buffer updates - especially
on large screen such as 4k displays - speed up significantly.

Signed-off-by: Alexander Graf <agraf@csgraf.de>
Reported-by: Da Xue <da@libre.computer>

---

v1 -> v2:

  - Fix dcache range; we were flushing too much before
  - Remove ifdefs
---
 drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 8 deletions(-)

Comments

Heinrich Schuchardt Dec. 30, 2022, 8:47 p.m. UTC | #1
On 12/30/22 20:58, Alexander Graf wrote:
> Now that we have a damage area tells us which parts of the frame buffer
> actually need updating, let's only dcache flush those on video_sync()
> calls. With this optimization in place, frame buffer updates - especially
> on large screen such as 4k displays - speed up significantly.
>
> Signed-off-by: Alexander Graf <agraf@csgraf.de>
> Reported-by: Da Xue <da@libre.computer>
>
> ---
>
> v1 -> v2:
>
>    - Fix dcache range; we were flushing too much before
>    - Remove ifdefs
> ---
>   drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------
>   1 file changed, 43 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
> index f1fbeaadcb..bf93f524c7 100644
> --- a/drivers/video/video-uclass.c
> +++ b/drivers/video/video-uclass.c
> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int width, int height)
>   	return 0;
>   }
>
> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
> +static void video_flush_dcache(struct udevice *vid)
> +{
> +	struct video_priv *priv = dev_get_uclass_priv(vid);
> +
> +	if (!priv->flush_dcache)
> +		return;
> +
> +	if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
> +		flush_dcache_range((ulong)priv->fb,
> +				   ALIGN((ulong)priv->fb + priv->fb_size,
> +					 CONFIG_SYS_CACHELINE_SIZE));
> +
> +		return;
> +	}
> +
> +	if (priv->damage.endx && priv->damage.endy) {
> +		int lstart = priv->damage.x * VNBYTES(priv->bpix);
> +		int lend = priv->damage.endx * VNBYTES(priv->bpix);
> +		int y;
> +
> +		for (y = priv->damage.y; y < priv->damage.endy; y++) {
> +			ulong fb = (ulong)priv->fb;
> +			ulong start = fb + (y * priv->line_length) + lstart;
> +			ulong end = start + lend - lstart;
> +
> +			start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE);
> +			end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE);
> +
> +			flush_dcache_range(start, end);
> +		}
> +	}
> +}
> +#endif
> +
>   /* Flush video activity to the caches */
>   int video_sync(struct udevice *vid, bool force)
>   {
> +	struct video_priv *priv = dev_get_uclass_priv(vid);
>   	struct video_ops *ops = video_get_ops(vid);
>   	int ret;
>
> @@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force)
>   	 * out whether it exists? For now, ARM is safe.
>   	 */
>   #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
> -	struct video_priv *priv = dev_get_uclass_priv(vid);
> -
> -	if (priv->flush_dcache) {
> -		flush_dcache_range((ulong)priv->fb,
> -				   ALIGN((ulong)priv->fb + priv->fb_size,
> -					 CONFIG_SYS_CACHELINE_SIZE));
> -	}
> +	video_flush_dcache(vid);
>   #elif defined(CONFIG_VIDEO_SANDBOX_SDL)
> -	struct video_priv *priv = dev_get_uclass_priv(vid);
>   	static ulong last_sync;
>
>   	if (force || get_timer(last_sync) > 100) {
> @@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force)
>   		last_sync = get_timer(0);
>   	}
>   #endif
> +
> +	if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
> +		priv->damage.endx = 0;
> +		priv->damage.endy = 0;

priv->damage.x = priv->xsize;
priv->damage.y = priv->ysize;

With this you can avoid differentiating between first damage and later
damage in video_damage().

Best regards

Heinrich

> +	}
> +
>   	return 0;
>   }
>
Heinrich Schuchardt Dec. 30, 2022, 9:12 p.m. UTC | #2
On 12/30/22 20:58, Alexander Graf wrote:
> Now that we have a damage area tells us which parts of the frame buffer
> actually need updating, let's only dcache flush those on video_sync()
> calls. With this optimization in place, frame buffer updates - especially
> on large screen such as 4k displays - speed up significantly.
>
> Signed-off-by: Alexander Graf <agraf@csgraf.de>
> Reported-by: Da Xue <da@libre.computer>
>
> ---
>
> v1 -> v2:
>
>    - Fix dcache range; we were flushing too much before
>    - Remove ifdefs
> ---
>   drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------
>   1 file changed, 43 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
> index f1fbeaadcb..bf93f524c7 100644
> --- a/drivers/video/video-uclass.c
> +++ b/drivers/video/video-uclass.c
> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int width, int height)
>   	return 0;
>   }
>
> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)

ARM isn't the only architecture implementing flush_dcache_range().
So this condition needs to be fixed as well as the one in video_sync.

Why don't you simply rely on priv->flash_dcache irrespective of the
architecture? Let the video drivers decide if they need it.

Best regards

Heinrich

> +static void video_flush_dcache(struct udevice *vid)
> +{
> +	struct video_priv *priv = dev_get_uclass_priv(vid);
> +
> +	if (!priv->flush_dcache)
> +		return;
> +
> +	if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
> +		flush_dcache_range((ulong)priv->fb,
> +				   ALIGN((ulong)priv->fb + priv->fb_size,
> +					 CONFIG_SYS_CACHELINE_SIZE));
> +
> +		return;
> +	}
> +
> +	if (priv->damage.endx && priv->damage.endy) {
> +		int lstart = priv->damage.x * VNBYTES(priv->bpix);
> +		int lend = priv->damage.endx * VNBYTES(priv->bpix);
> +		int y;
> +
> +		for (y = priv->damage.y; y < priv->damage.endy; y++) {
> +			ulong fb = (ulong)priv->fb;
> +			ulong start = fb + (y * priv->line_length) + lstart;
> +			ulong end = start + lend - lstart;
> +
> +			start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE);
> +			end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE);
> +
> +			flush_dcache_range(start, end);
> +		}
> +	}
> +}
> +#endif
> +
>   /* Flush video activity to the caches */
>   int video_sync(struct udevice *vid, bool force)
>   {
> +	struct video_priv *priv = dev_get_uclass_priv(vid);
>   	struct video_ops *ops = video_get_ops(vid);
>   	int ret;
>
> @@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force)
>   	 * out whether it exists? For now, ARM is safe.
>   	 */
>   #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
> -	struct video_priv *priv = dev_get_uclass_priv(vid);
> -
> -	if (priv->flush_dcache) {
> -		flush_dcache_range((ulong)priv->fb,
> -				   ALIGN((ulong)priv->fb + priv->fb_size,
> -					 CONFIG_SYS_CACHELINE_SIZE));
> -	}
> +	video_flush_dcache(vid);
>   #elif defined(CONFIG_VIDEO_SANDBOX_SDL)
> -	struct video_priv *priv = dev_get_uclass_priv(vid);
>   	static ulong last_sync;
>
>   	if (force || get_timer(last_sync) > 100) {
> @@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force)
>   		last_sync = get_timer(0);
>   	}
>   #endif
> +
> +	if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
> +		priv->damage.endx = 0;
> +		priv->damage.endy = 0;
> +	}
> +
>   	return 0;
>   }
>
Alexander Graf Jan. 3, 2023, 8:25 p.m. UTC | #3
On 30.12.22 22:12, Heinrich Schuchardt wrote:
> On 12/30/22 20:58, Alexander Graf wrote:
>> Now that we have a damage area tells us which parts of the frame buffer
>> actually need updating, let's only dcache flush those on video_sync()
>> calls. With this optimization in place, frame buffer updates - 
>> especially
>> on large screen such as 4k displays - speed up significantly.
>>
>> Signed-off-by: Alexander Graf <agraf@csgraf.de>
>> Reported-by: Da Xue <da@libre.computer>
>>
>> ---
>>
>> v1 -> v2:
>>
>>    - Fix dcache range; we were flushing too much before
>>    - Remove ifdefs
>> ---
>>   drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------
>>   1 file changed, 43 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
>> index f1fbeaadcb..bf93f524c7 100644
>> --- a/drivers/video/video-uclass.c
>> +++ b/drivers/video/video-uclass.c
>> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int 
>> y, int width, int height)
>>       return 0;
>>   }
>>
>> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
>
> ARM isn't the only architecture implementing flush_dcache_range().
> So this condition needs to be fixed as well as the one in video_sync.
>
> Why don't you simply rely on priv->flash_dcache irrespective of the
> architecture? Let the video drivers decide if they need it.


This is code that already was #ifdef'ed before. The way I understand its 
history, we couldn't decisively ensure that flush_dcache_range() exists, 
so we couldn't build the code always.

I agree that it looks weird and error prone though. I believe you're 
trying to say we should just make sure there's always a 
flush_dcache_range() function and keep the rest to the compiler?



Alex
Simon Glass Jan. 7, 2023, 12:13 a.m. UTC | #4
Hi,

On Tue, 3 Jan 2023 at 13:25, Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 30.12.22 22:12, Heinrich Schuchardt wrote:
> > On 12/30/22 20:58, Alexander Graf wrote:
> >> Now that we have a damage area tells us which parts of the frame buffer
> >> actually need updating, let's only dcache flush those on video_sync()
> >> calls. With this optimization in place, frame buffer updates -
> >> especially
> >> on large screen such as 4k displays - speed up significantly.
> >>
> >> Signed-off-by: Alexander Graf <agraf@csgraf.de>
> >> Reported-by: Da Xue <da@libre.computer>
> >>
> >> ---
> >>
> >> v1 -> v2:
> >>
> >>    - Fix dcache range; we were flushing too much before
> >>    - Remove ifdefs
> >> ---
> >>   drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------
> >>   1 file changed, 43 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
> >> index f1fbeaadcb..bf93f524c7 100644
> >> --- a/drivers/video/video-uclass.c
> >> +++ b/drivers/video/video-uclass.c
> >> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int
> >> y, int width, int height)
> >>       return 0;
> >>   }
> >>
> >> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
> >
> > ARM isn't the only architecture implementing flush_dcache_range().
> > So this condition needs to be fixed as well as the one in video_sync.
> >
> > Why don't you simply rely on priv->flash_dcache irrespective of the
> > architecture? Let the video drivers decide if they need it.
>
>
> This is code that already was #ifdef'ed before. The way I understand its
> history, we couldn't decisively ensure that flush_dcache_range() exists,
> so we couldn't build the code always.
>
> I agree that it looks weird and error prone though. I believe you're
> trying to say we should just make sure there's always a
> flush_dcache_range() function and keep the rest to the compiler?
>
Can  / should we use the dma_map stuff here?

Regards,
Simon
diff mbox series

Patch

diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
index f1fbeaadcb..bf93f524c7 100644
--- a/drivers/video/video-uclass.c
+++ b/drivers/video/video-uclass.c
@@ -297,9 +297,45 @@  int video_damage(struct udevice *vid, int x, int y, int width, int height)
 	return 0;
 }
 
+#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
+static void video_flush_dcache(struct udevice *vid)
+{
+	struct video_priv *priv = dev_get_uclass_priv(vid);
+
+	if (!priv->flush_dcache)
+		return;
+
+	if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+		flush_dcache_range((ulong)priv->fb,
+				   ALIGN((ulong)priv->fb + priv->fb_size,
+					 CONFIG_SYS_CACHELINE_SIZE));
+
+		return;
+	}
+
+	if (priv->damage.endx && priv->damage.endy) {
+		int lstart = priv->damage.x * VNBYTES(priv->bpix);
+		int lend = priv->damage.endx * VNBYTES(priv->bpix);
+		int y;
+
+		for (y = priv->damage.y; y < priv->damage.endy; y++) {
+			ulong fb = (ulong)priv->fb;
+			ulong start = fb + (y * priv->line_length) + lstart;
+			ulong end = start + lend - lstart;
+
+			start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE);
+			end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE);
+
+			flush_dcache_range(start, end);
+		}
+	}
+}
+#endif
+
 /* Flush video activity to the caches */
 int video_sync(struct udevice *vid, bool force)
 {
+	struct video_priv *priv = dev_get_uclass_priv(vid);
 	struct video_ops *ops = video_get_ops(vid);
 	int ret;
 
@@ -315,15 +351,8 @@  int video_sync(struct udevice *vid, bool force)
 	 * out whether it exists? For now, ARM is safe.
 	 */
 #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
-	struct video_priv *priv = dev_get_uclass_priv(vid);
-
-	if (priv->flush_dcache) {
-		flush_dcache_range((ulong)priv->fb,
-				   ALIGN((ulong)priv->fb + priv->fb_size,
-					 CONFIG_SYS_CACHELINE_SIZE));
-	}
+	video_flush_dcache(vid);
 #elif defined(CONFIG_VIDEO_SANDBOX_SDL)
-	struct video_priv *priv = dev_get_uclass_priv(vid);
 	static ulong last_sync;
 
 	if (force || get_timer(last_sync) > 100) {
@@ -331,6 +360,12 @@  int video_sync(struct udevice *vid, bool force)
 		last_sync = get_timer(0);
 	}
 #endif
+
+	if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+		priv->damage.endx = 0;
+		priv->damage.endy = 0;
+	}
+
 	return 0;
 }