diff mbox series

[v3,9/9] sm501: Fix and optimize overlap check

Message ID f0b64bf047e343f8b2e91baeccb4753bc26b17cc.1592686588.git.balaton@eik.bme.hu
State New
Headers show
Series More sm501 fixes and optimisations | expand

Commit Message

BALATON Zoltan June 20, 2020, 8:56 p.m. UTC
When doing reverse blit we need to check if source and dest overlap
but it is not trivial due to possible different base and pitch of
source and dest. Do rectangle overlap if base and pitch match,
otherwise just check if memory area containing the rects overlaps so
rects could possibly overlap.

Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
---
 hw/display/sm501.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

Comments

Peter Maydell June 22, 2020, 7:38 p.m. UTC | #1
On Sat, 20 Jun 2020 at 22:04, BALATON Zoltan <balaton@eik.bme.hu> wrote:
>
> When doing reverse blit we need to check if source and dest overlap
> but it is not trivial due to possible different base and pitch of
> source and dest. Do rectangle overlap if base and pitch match,
> otherwise just check if memory area containing the rects overlaps so
> rects could possibly overlap.
>
> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
> ---
>  hw/display/sm501.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/hw/display/sm501.c b/hw/display/sm501.c
> index 2db347dcbc..e7c69bf7fd 100644
> --- a/hw/display/sm501.c
> +++ b/hw/display/sm501.c
> @@ -690,6 +690,7 @@ static void sm501_2d_operation(SM501State *s)
>      unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF;
>      int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
>      int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt);
> +    bool overlap = false;
>
>      if ((s->twoD_stretch >> 16) & 0xF) {
>          qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n");
> @@ -784,16 +785,21 @@ static void sm501_2d_operation(SM501State *s)
>                           ldn_he_p(&s->local_mem[src_base + si], bypp));
>                  break;
>              }
> -            /* Check for overlaps, this could be made more exact */
> -            uint32_t sb, se, db, de;
> -            sb = src_base + src_x + src_y * (width + src_pitch);
> -            se = sb + width + height * (width + src_pitch);
> -            db = dst_base + dst_x + dst_y * (width + dst_pitch);
> -            de = db + width + height * (width + dst_pitch);
> -            if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) {
> -                /* regions may overlap: copy via temporary */
> -                int llb = width * bypp;
> -                int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
> +            /* If reverse blit do simple check for overlaps */
> +            if (rtl && src_base == dst_base && src_pitch == dst_pitch) {
> +                overlap = (src_x < dst_x + width && src_x + width > dst_x &&
> +                           src_y < dst_y + height && src_y + height > dst_y);

This part looks good...

> +            } else if (rtl) {
> +                unsigned int sb, se, db, de;
> +                sb = src_base + (src_x + src_y * src_pitch) * bypp;
> +                se = sb + (width + height * src_pitch) * bypp;
> +                db = dst_base + (dst_x + dst_y * dst_pitch) * bypp;
> +                de = db + (width + height * dst_pitch) * bypp;
> +                overlap = (db >= sb && db <= se) || (de >= sb && de <= se);

...but this part I think the overlap calculation isn't right. Consider
 db=5, de=15, sb=10, se=12. This gives overlap=false but
the two regions do overlap because [sb,se] is entirely inside [db,de].
I think you want
  overlap = (db < se && sb < de);
(this is the same logic as each of the x/y range checks in the rectangle
overlap test. put another way, if !(db<se) then we can't have an overlap
because the dest range starts after the source range ends; similarly if
!(sb<de) then the source range begins after the dest range ends and
there's no overlap. So for an overlap to be possible we must have both
db<se && sb<de.)
Here I'm using a definition of the "end" de and se which is that they point
to the byte *after* the last one used (ie that we're really working with
"half-open" ranges [db, de)  and [sb, se) where de and se aren't in the
range), because that's easier to calculate given that we need to account
for bypp and it's more natural when dealing with "start, length" pairs.

Also and less importantly (because it's wrong in the "safe" direction) I think
your se and de are overestimates, because one-past-the-last-used-byte in each
case is
   sb + (width + (height-1) * src_pitch) * bypp
(consider width=1 height=1, where one-past-the-last-used-byte is sb + bypp
because there's only one pixel involved).

> +            }
> +            if (overlap) {
> +                /* pixman can't do reverse blit: copy via temporary */
> +                int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t));
>                  uint32_t *tmp = tmp_buf;
>
>                  if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) {

PS: why do we care about overlap only for right-to-left blits and not
left-to-right blits ?

thanks
-- PMM
BALATON Zoltan June 22, 2020, 11:07 p.m. UTC | #2
On Mon, 22 Jun 2020, Peter Maydell wrote:
> On Sat, 20 Jun 2020 at 22:04, BALATON Zoltan <balaton@eik.bme.hu> wrote:
>>
>> When doing reverse blit we need to check if source and dest overlap
>> but it is not trivial due to possible different base and pitch of
>> source and dest. Do rectangle overlap if base and pitch match,
>> otherwise just check if memory area containing the rects overlaps so
>> rects could possibly overlap.
>>
>> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
>> ---
>>  hw/display/sm501.c | 26 ++++++++++++++++----------
>>  1 file changed, 16 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/display/sm501.c b/hw/display/sm501.c
>> index 2db347dcbc..e7c69bf7fd 100644
>> --- a/hw/display/sm501.c
>> +++ b/hw/display/sm501.c
>> @@ -690,6 +690,7 @@ static void sm501_2d_operation(SM501State *s)
>>      unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF;
>>      int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
>>      int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt);
>> +    bool overlap = false;
>>
>>      if ((s->twoD_stretch >> 16) & 0xF) {
>>          qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n");
>> @@ -784,16 +785,21 @@ static void sm501_2d_operation(SM501State *s)
>>                           ldn_he_p(&s->local_mem[src_base + si], bypp));
>>                  break;
>>              }
>> -            /* Check for overlaps, this could be made more exact */
>> -            uint32_t sb, se, db, de;
>> -            sb = src_base + src_x + src_y * (width + src_pitch);
>> -            se = sb + width + height * (width + src_pitch);
>> -            db = dst_base + dst_x + dst_y * (width + dst_pitch);
>> -            de = db + width + height * (width + dst_pitch);
>> -            if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) {
>> -                /* regions may overlap: copy via temporary */
>> -                int llb = width * bypp;
>> -                int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
>> +            /* If reverse blit do simple check for overlaps */
>> +            if (rtl && src_base == dst_base && src_pitch == dst_pitch) {
>> +                overlap = (src_x < dst_x + width && src_x + width > dst_x &&
>> +                           src_y < dst_y + height && src_y + height > dst_y);
>
> This part looks good...
>
>> +            } else if (rtl) {
>> +                unsigned int sb, se, db, de;
>> +                sb = src_base + (src_x + src_y * src_pitch) * bypp;
>> +                se = sb + (width + height * src_pitch) * bypp;
>> +                db = dst_base + (dst_x + dst_y * dst_pitch) * bypp;
>> +                de = db + (width + height * dst_pitch) * bypp;
>> +                overlap = (db >= sb && db <= se) || (de >= sb && de <= se);
>
> ...but this part I think the overlap calculation isn't right. Consider
> db=5, de=15, sb=10, se=12. This gives overlap=false but
> the two regions do overlap because [sb,se] is entirely inside [db,de].
> I think you want
>  overlap = (db < se && sb < de);
> (this is the same logic as each of the x/y range checks in the rectangle
> overlap test. put another way, if !(db<se) then we can't have an overlap
> because the dest range starts after the source range ends; similarly if
> !(sb<de) then the source range begins after the dest range ends and
> there's no overlap. So for an overlap to be possible we must have both
> db<se && sb<de.)

Thanks for checking it, you're right, I'll need to think about this again 
for a bit longer.

> Here I'm using a definition of the "end" de and se which is that they point
> to the byte *after* the last one used (ie that we're really working with
> "half-open" ranges [db, de)  and [sb, se) where de and se aren't in the
> range), because that's easier to calculate given that we need to account
> for bypp and it's more natural when dealing with "start, length" pairs.
>
> Also and less importantly (because it's wrong in the "safe" direction) I think
> your se and de are overestimates, because one-past-the-last-used-byte in each
> case is
>   sb + (width + (height-1) * src_pitch) * bypp
> (consider width=1 height=1, where one-past-the-last-used-byte is sb + bypp
> because there's only one pixel involved).

I'm not sure I follow the above but I had >= in the check to account for 
this although I've got the check wrong so that doesn't mean much.

What I've used is something like:

base               pitch
+------------------------------------------+
|        ^                                 |
|      y |                                 |
|   x    v      w                          |
|< - - ->+-------------+                   |
|        |             |                   |
|       h|             |                   |
|        |             |                   |
|        +-------------+                   |
|                                          |
+------------------------------------------+

then y * pitch * bypp is the index of the line + x * bypp is index of 
first point which gives:

sb = base + (x + y * pitch) * bypp

then add width * bypp to get past the rect (index of byte after as you say 
above), then moved h lines down that's h * pitch * bypp but maybe that's 
too much because we only want to get to the last line not past that. Then 
using h - 1 is enough.

>> +            }
>> +            if (overlap) {
>> +                /* pixman can't do reverse blit: copy via temporary */
>> +                int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t));
>>                  uint32_t *tmp = tmp_buf;
>>
>>                  if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) {
>
> PS: why do we care about overlap only for right-to-left blits and not
> left-to-right blits ?

I think I've already answered that here:

https://lists.nongnu.org/archive/html/qemu-devel/2020-05/msg07650.html

since pixman does left to right that should match the intended operation 
even with overlap when rtl is not set. (Rtl also seems to mean bottom to 
top as well because there's only this one bit for direction.)

Regards,
BALATON Zoltan
diff mbox series

Patch

diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 2db347dcbc..e7c69bf7fd 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -690,6 +690,7 @@  static void sm501_2d_operation(SM501State *s)
     unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF;
     int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
     int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt);
+    bool overlap = false;
 
     if ((s->twoD_stretch >> 16) & 0xF) {
         qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n");
@@ -784,16 +785,21 @@  static void sm501_2d_operation(SM501State *s)
                          ldn_he_p(&s->local_mem[src_base + si], bypp));
                 break;
             }
-            /* Check for overlaps, this could be made more exact */
-            uint32_t sb, se, db, de;
-            sb = src_base + src_x + src_y * (width + src_pitch);
-            se = sb + width + height * (width + src_pitch);
-            db = dst_base + dst_x + dst_y * (width + dst_pitch);
-            de = db + width + height * (width + dst_pitch);
-            if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) {
-                /* regions may overlap: copy via temporary */
-                int llb = width * bypp;
-                int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
+            /* If reverse blit do simple check for overlaps */
+            if (rtl && src_base == dst_base && src_pitch == dst_pitch) {
+                overlap = (src_x < dst_x + width && src_x + width > dst_x &&
+                           src_y < dst_y + height && src_y + height > dst_y);
+            } else if (rtl) {
+                unsigned int sb, se, db, de;
+                sb = src_base + (src_x + src_y * src_pitch) * bypp;
+                se = sb + (width + height * src_pitch) * bypp;
+                db = dst_base + (dst_x + dst_y * dst_pitch) * bypp;
+                de = db + (width + height * dst_pitch) * bypp;
+                overlap = (db >= sb && db <= se) || (de >= sb && de <= se);
+            }
+            if (overlap) {
+                /* pixman can't do reverse blit: copy via temporary */
+                int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t));
                 uint32_t *tmp = tmp_buf;
 
                 if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) {