Message ID | 20221230195828.88134-7-agraf@csgraf.de |
---|---|
State | Superseded |
Delegated to: | Anatolij Gustschin |
Headers | show |
Series | Add video damage tracking | expand |
On 12/30/22 20:58, Alexander Graf wrote: > Now that we have a damage area tells us which parts of the frame buffer > actually need updating, let's only dcache flush those on video_sync() > calls. With this optimization in place, frame buffer updates - especially > on large screen such as 4k displays - speed up significantly. > > Signed-off-by: Alexander Graf <agraf@csgraf.de> > Reported-by: Da Xue <da@libre.computer> > > --- > > v1 -> v2: > > - Fix dcache range; we were flushing too much before > - Remove ifdefs > --- > drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------ > 1 file changed, 43 insertions(+), 8 deletions(-) > > diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c > index f1fbeaadcb..bf93f524c7 100644 > --- a/drivers/video/video-uclass.c > +++ b/drivers/video/video-uclass.c > @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int width, int height) > return 0; > } > > +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) > +static void video_flush_dcache(struct udevice *vid) > +{ > + struct video_priv *priv = dev_get_uclass_priv(vid); > + > + if (!priv->flush_dcache) > + return; > + > + if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) { > + flush_dcache_range((ulong)priv->fb, > + ALIGN((ulong)priv->fb + priv->fb_size, > + CONFIG_SYS_CACHELINE_SIZE)); > + > + return; > + } > + > + if (priv->damage.endx && priv->damage.endy) { > + int lstart = priv->damage.x * VNBYTES(priv->bpix); > + int lend = priv->damage.endx * VNBYTES(priv->bpix); > + int y; > + > + for (y = priv->damage.y; y < priv->damage.endy; y++) { > + ulong fb = (ulong)priv->fb; > + ulong start = fb + (y * priv->line_length) + lstart; > + ulong end = start + lend - lstart; > + > + start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE); > + end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE); > + > + flush_dcache_range(start, end); > + } > + } > +} > +#endif > + > /* Flush video activity to the caches */ > int video_sync(struct udevice *vid, bool force) > { > + struct video_priv *priv = dev_get_uclass_priv(vid); > struct video_ops *ops = video_get_ops(vid); > int ret; > > @@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force) > * out whether it exists? For now, ARM is safe. > */ > #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) > - struct video_priv *priv = dev_get_uclass_priv(vid); > - > - if (priv->flush_dcache) { > - flush_dcache_range((ulong)priv->fb, > - ALIGN((ulong)priv->fb + priv->fb_size, > - CONFIG_SYS_CACHELINE_SIZE)); > - } > + video_flush_dcache(vid); > #elif defined(CONFIG_VIDEO_SANDBOX_SDL) > - struct video_priv *priv = dev_get_uclass_priv(vid); > static ulong last_sync; > > if (force || get_timer(last_sync) > 100) { > @@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force) > last_sync = get_timer(0); > } > #endif > + > + if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) { > + priv->damage.endx = 0; > + priv->damage.endy = 0; priv->damage.x = priv->xsize; priv->damage.y = priv->ysize; With this you can avoid differentiating between first damage and later damage in video_damage(). Best regards Heinrich > + } > + > return 0; > } >
On 12/30/22 20:58, Alexander Graf wrote: > Now that we have a damage area tells us which parts of the frame buffer > actually need updating, let's only dcache flush those on video_sync() > calls. With this optimization in place, frame buffer updates - especially > on large screen such as 4k displays - speed up significantly. > > Signed-off-by: Alexander Graf <agraf@csgraf.de> > Reported-by: Da Xue <da@libre.computer> > > --- > > v1 -> v2: > > - Fix dcache range; we were flushing too much before > - Remove ifdefs > --- > drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------ > 1 file changed, 43 insertions(+), 8 deletions(-) > > diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c > index f1fbeaadcb..bf93f524c7 100644 > --- a/drivers/video/video-uclass.c > +++ b/drivers/video/video-uclass.c > @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int width, int height) > return 0; > } > > +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) ARM isn't the only architecture implementing flush_dcache_range(). So this condition needs to be fixed as well as the one in video_sync. Why don't you simply rely on priv->flash_dcache irrespective of the architecture? Let the video drivers decide if they need it. Best regards Heinrich > +static void video_flush_dcache(struct udevice *vid) > +{ > + struct video_priv *priv = dev_get_uclass_priv(vid); > + > + if (!priv->flush_dcache) > + return; > + > + if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) { > + flush_dcache_range((ulong)priv->fb, > + ALIGN((ulong)priv->fb + priv->fb_size, > + CONFIG_SYS_CACHELINE_SIZE)); > + > + return; > + } > + > + if (priv->damage.endx && priv->damage.endy) { > + int lstart = priv->damage.x * VNBYTES(priv->bpix); > + int lend = priv->damage.endx * VNBYTES(priv->bpix); > + int y; > + > + for (y = priv->damage.y; y < priv->damage.endy; y++) { > + ulong fb = (ulong)priv->fb; > + ulong start = fb + (y * priv->line_length) + lstart; > + ulong end = start + lend - lstart; > + > + start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE); > + end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE); > + > + flush_dcache_range(start, end); > + } > + } > +} > +#endif > + > /* Flush video activity to the caches */ > int video_sync(struct udevice *vid, bool force) > { > + struct video_priv *priv = dev_get_uclass_priv(vid); > struct video_ops *ops = video_get_ops(vid); > int ret; > > @@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force) > * out whether it exists? For now, ARM is safe. > */ > #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) > - struct video_priv *priv = dev_get_uclass_priv(vid); > - > - if (priv->flush_dcache) { > - flush_dcache_range((ulong)priv->fb, > - ALIGN((ulong)priv->fb + priv->fb_size, > - CONFIG_SYS_CACHELINE_SIZE)); > - } > + video_flush_dcache(vid); > #elif defined(CONFIG_VIDEO_SANDBOX_SDL) > - struct video_priv *priv = dev_get_uclass_priv(vid); > static ulong last_sync; > > if (force || get_timer(last_sync) > 100) { > @@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force) > last_sync = get_timer(0); > } > #endif > + > + if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) { > + priv->damage.endx = 0; > + priv->damage.endy = 0; > + } > + > return 0; > } >
On 30.12.22 22:12, Heinrich Schuchardt wrote: > On 12/30/22 20:58, Alexander Graf wrote: >> Now that we have a damage area tells us which parts of the frame buffer >> actually need updating, let's only dcache flush those on video_sync() >> calls. With this optimization in place, frame buffer updates - >> especially >> on large screen such as 4k displays - speed up significantly. >> >> Signed-off-by: Alexander Graf <agraf@csgraf.de> >> Reported-by: Da Xue <da@libre.computer> >> >> --- >> >> v1 -> v2: >> >> - Fix dcache range; we were flushing too much before >> - Remove ifdefs >> --- >> drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------ >> 1 file changed, 43 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c >> index f1fbeaadcb..bf93f524c7 100644 >> --- a/drivers/video/video-uclass.c >> +++ b/drivers/video/video-uclass.c >> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int >> y, int width, int height) >> return 0; >> } >> >> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) > > ARM isn't the only architecture implementing flush_dcache_range(). > So this condition needs to be fixed as well as the one in video_sync. > > Why don't you simply rely on priv->flash_dcache irrespective of the > architecture? Let the video drivers decide if they need it. This is code that already was #ifdef'ed before. The way I understand its history, we couldn't decisively ensure that flush_dcache_range() exists, so we couldn't build the code always. I agree that it looks weird and error prone though. I believe you're trying to say we should just make sure there's always a flush_dcache_range() function and keep the rest to the compiler? Alex
Hi, On Tue, 3 Jan 2023 at 13:25, Alexander Graf <agraf@csgraf.de> wrote: > > > On 30.12.22 22:12, Heinrich Schuchardt wrote: > > On 12/30/22 20:58, Alexander Graf wrote: > >> Now that we have a damage area tells us which parts of the frame buffer > >> actually need updating, let's only dcache flush those on video_sync() > >> calls. With this optimization in place, frame buffer updates - > >> especially > >> on large screen such as 4k displays - speed up significantly. > >> > >> Signed-off-by: Alexander Graf <agraf@csgraf.de> > >> Reported-by: Da Xue <da@libre.computer> > >> > >> --- > >> > >> v1 -> v2: > >> > >> - Fix dcache range; we were flushing too much before > >> - Remove ifdefs > >> --- > >> drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------ > >> 1 file changed, 43 insertions(+), 8 deletions(-) > >> > >> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c > >> index f1fbeaadcb..bf93f524c7 100644 > >> --- a/drivers/video/video-uclass.c > >> +++ b/drivers/video/video-uclass.c > >> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int > >> y, int width, int height) > >> return 0; > >> } > >> > >> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) > > > > ARM isn't the only architecture implementing flush_dcache_range(). > > So this condition needs to be fixed as well as the one in video_sync. > > > > Why don't you simply rely on priv->flash_dcache irrespective of the > > architecture? Let the video drivers decide if they need it. > > > This is code that already was #ifdef'ed before. The way I understand its > history, we couldn't decisively ensure that flush_dcache_range() exists, > so we couldn't build the code always. > > I agree that it looks weird and error prone though. I believe you're > trying to say we should just make sure there's always a > flush_dcache_range() function and keep the rest to the compiler? > Can / should we use the dma_map stuff here? Regards, Simon
diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c index f1fbeaadcb..bf93f524c7 100644 --- a/drivers/video/video-uclass.c +++ b/drivers/video/video-uclass.c @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int width, int height) return 0; } +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) +static void video_flush_dcache(struct udevice *vid) +{ + struct video_priv *priv = dev_get_uclass_priv(vid); + + if (!priv->flush_dcache) + return; + + if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) { + flush_dcache_range((ulong)priv->fb, + ALIGN((ulong)priv->fb + priv->fb_size, + CONFIG_SYS_CACHELINE_SIZE)); + + return; + } + + if (priv->damage.endx && priv->damage.endy) { + int lstart = priv->damage.x * VNBYTES(priv->bpix); + int lend = priv->damage.endx * VNBYTES(priv->bpix); + int y; + + for (y = priv->damage.y; y < priv->damage.endy; y++) { + ulong fb = (ulong)priv->fb; + ulong start = fb + (y * priv->line_length) + lstart; + ulong end = start + lend - lstart; + + start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE); + end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE); + + flush_dcache_range(start, end); + } + } +} +#endif + /* Flush video activity to the caches */ int video_sync(struct udevice *vid, bool force) { + struct video_priv *priv = dev_get_uclass_priv(vid); struct video_ops *ops = video_get_ops(vid); int ret; @@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force) * out whether it exists? For now, ARM is safe. */ #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) - struct video_priv *priv = dev_get_uclass_priv(vid); - - if (priv->flush_dcache) { - flush_dcache_range((ulong)priv->fb, - ALIGN((ulong)priv->fb + priv->fb_size, - CONFIG_SYS_CACHELINE_SIZE)); - } + video_flush_dcache(vid); #elif defined(CONFIG_VIDEO_SANDBOX_SDL) - struct video_priv *priv = dev_get_uclass_priv(vid); static ulong last_sync; if (force || get_timer(last_sync) > 100) { @@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force) last_sync = get_timer(0); } #endif + + if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) { + priv->damage.endx = 0; + priv->damage.endy = 0; + } + return 0; }
Now that we have a damage area tells us which parts of the frame buffer actually need updating, let's only dcache flush those on video_sync() calls. With this optimization in place, frame buffer updates - especially on large screen such as 4k displays - speed up significantly. Signed-off-by: Alexander Graf <agraf@csgraf.de> Reported-by: Da Xue <da@libre.computer> --- v1 -> v2: - Fix dcache range; we were flushing too much before - Remove ifdefs --- drivers/video/video-uclass.c | 51 ++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 8 deletions(-)