Message ID | f0b64bf047e343f8b2e91baeccb4753bc26b17cc.1592686588.git.balaton@eik.bme.hu |
---|---|
State | New |
Headers | show |
Series | More sm501 fixes and optimisations | expand |
On Sat, 20 Jun 2020 at 22:04, BALATON Zoltan <balaton@eik.bme.hu> wrote: > > When doing reverse blit we need to check if source and dest overlap > but it is not trivial due to possible different base and pitch of > source and dest. Do rectangle overlap if base and pitch match, > otherwise just check if memory area containing the rects overlaps so > rects could possibly overlap. > > Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> > --- > hw/display/sm501.c | 26 ++++++++++++++++---------- > 1 file changed, 16 insertions(+), 10 deletions(-) > > diff --git a/hw/display/sm501.c b/hw/display/sm501.c > index 2db347dcbc..e7c69bf7fd 100644 > --- a/hw/display/sm501.c > +++ b/hw/display/sm501.c > @@ -690,6 +690,7 @@ static void sm501_2d_operation(SM501State *s) > unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; > int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; > int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); > + bool overlap = false; > > if ((s->twoD_stretch >> 16) & 0xF) { > qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); > @@ -784,16 +785,21 @@ static void sm501_2d_operation(SM501State *s) > ldn_he_p(&s->local_mem[src_base + si], bypp)); > break; > } > - /* Check for overlaps, this could be made more exact */ > - uint32_t sb, se, db, de; > - sb = src_base + src_x + src_y * (width + src_pitch); > - se = sb + width + height * (width + src_pitch); > - db = dst_base + dst_x + dst_y * (width + dst_pitch); > - de = db + width + height * (width + dst_pitch); > - if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { > - /* regions may overlap: copy via temporary */ > - int llb = width * bypp; > - int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); > + /* If reverse blit do simple check for overlaps */ > + if (rtl && src_base == dst_base && src_pitch == dst_pitch) { > + overlap = (src_x < dst_x + width && src_x + width > dst_x && > + src_y < dst_y + height && src_y + height > dst_y); This part looks good... > + } else if (rtl) { > + unsigned int sb, se, db, de; > + sb = src_base + (src_x + src_y * src_pitch) * bypp; > + se = sb + (width + height * src_pitch) * bypp; > + db = dst_base + (dst_x + dst_y * dst_pitch) * bypp; > + de = db + (width + height * dst_pitch) * bypp; > + overlap = (db >= sb && db <= se) || (de >= sb && de <= se); ...but this part I think the overlap calculation isn't right. Consider db=5, de=15, sb=10, se=12. This gives overlap=false but the two regions do overlap because [sb,se] is entirely inside [db,de]. I think you want overlap = (db < se && sb < de); (this is the same logic as each of the x/y range checks in the rectangle overlap test. put another way, if !(db<se) then we can't have an overlap because the dest range starts after the source range ends; similarly if !(sb<de) then the source range begins after the dest range ends and there's no overlap. So for an overlap to be possible we must have both db<se && sb<de.) Here I'm using a definition of the "end" de and se which is that they point to the byte *after* the last one used (ie that we're really working with "half-open" ranges [db, de) and [sb, se) where de and se aren't in the range), because that's easier to calculate given that we need to account for bypp and it's more natural when dealing with "start, length" pairs. Also and less importantly (because it's wrong in the "safe" direction) I think your se and de are overestimates, because one-past-the-last-used-byte in each case is sb + (width + (height-1) * src_pitch) * bypp (consider width=1 height=1, where one-past-the-last-used-byte is sb + bypp because there's only one pixel involved). > + } > + if (overlap) { > + /* pixman can't do reverse blit: copy via temporary */ > + int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t)); > uint32_t *tmp = tmp_buf; > > if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) { PS: why do we care about overlap only for right-to-left blits and not left-to-right blits ? thanks -- PMM
On Mon, 22 Jun 2020, Peter Maydell wrote: > On Sat, 20 Jun 2020 at 22:04, BALATON Zoltan <balaton@eik.bme.hu> wrote: >> >> When doing reverse blit we need to check if source and dest overlap >> but it is not trivial due to possible different base and pitch of >> source and dest. Do rectangle overlap if base and pitch match, >> otherwise just check if memory area containing the rects overlaps so >> rects could possibly overlap. >> >> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> >> --- >> hw/display/sm501.c | 26 ++++++++++++++++---------- >> 1 file changed, 16 insertions(+), 10 deletions(-) >> >> diff --git a/hw/display/sm501.c b/hw/display/sm501.c >> index 2db347dcbc..e7c69bf7fd 100644 >> --- a/hw/display/sm501.c >> +++ b/hw/display/sm501.c >> @@ -690,6 +690,7 @@ static void sm501_2d_operation(SM501State *s) >> unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; >> int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; >> int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); >> + bool overlap = false; >> >> if ((s->twoD_stretch >> 16) & 0xF) { >> qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); >> @@ -784,16 +785,21 @@ static void sm501_2d_operation(SM501State *s) >> ldn_he_p(&s->local_mem[src_base + si], bypp)); >> break; >> } >> - /* Check for overlaps, this could be made more exact */ >> - uint32_t sb, se, db, de; >> - sb = src_base + src_x + src_y * (width + src_pitch); >> - se = sb + width + height * (width + src_pitch); >> - db = dst_base + dst_x + dst_y * (width + dst_pitch); >> - de = db + width + height * (width + dst_pitch); >> - if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { >> - /* regions may overlap: copy via temporary */ >> - int llb = width * bypp; >> - int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); >> + /* If reverse blit do simple check for overlaps */ >> + if (rtl && src_base == dst_base && src_pitch == dst_pitch) { >> + overlap = (src_x < dst_x + width && src_x + width > dst_x && >> + src_y < dst_y + height && src_y + height > dst_y); > > This part looks good... > >> + } else if (rtl) { >> + unsigned int sb, se, db, de; >> + sb = src_base + (src_x + src_y * src_pitch) * bypp; >> + se = sb + (width + height * src_pitch) * bypp; >> + db = dst_base + (dst_x + dst_y * dst_pitch) * bypp; >> + de = db + (width + height * dst_pitch) * bypp; >> + overlap = (db >= sb && db <= se) || (de >= sb && de <= se); > > ...but this part I think the overlap calculation isn't right. Consider > db=5, de=15, sb=10, se=12. This gives overlap=false but > the two regions do overlap because [sb,se] is entirely inside [db,de]. > I think you want > overlap = (db < se && sb < de); > (this is the same logic as each of the x/y range checks in the rectangle > overlap test. put another way, if !(db<se) then we can't have an overlap > because the dest range starts after the source range ends; similarly if > !(sb<de) then the source range begins after the dest range ends and > there's no overlap. So for an overlap to be possible we must have both > db<se && sb<de.) Thanks for checking it, you're right, I'll need to think about this again for a bit longer. > Here I'm using a definition of the "end" de and se which is that they point > to the byte *after* the last one used (ie that we're really working with > "half-open" ranges [db, de) and [sb, se) where de and se aren't in the > range), because that's easier to calculate given that we need to account > for bypp and it's more natural when dealing with "start, length" pairs. > > Also and less importantly (because it's wrong in the "safe" direction) I think > your se and de are overestimates, because one-past-the-last-used-byte in each > case is > sb + (width + (height-1) * src_pitch) * bypp > (consider width=1 height=1, where one-past-the-last-used-byte is sb + bypp > because there's only one pixel involved). I'm not sure I follow the above but I had >= in the check to account for this although I've got the check wrong so that doesn't mean much. What I've used is something like: base pitch +------------------------------------------+ | ^ | | y | | | x v w | |< - - ->+-------------+ | | | | | | h| | | | | | | | +-------------+ | | | +------------------------------------------+ then y * pitch * bypp is the index of the line + x * bypp is index of first point which gives: sb = base + (x + y * pitch) * bypp then add width * bypp to get past the rect (index of byte after as you say above), then moved h lines down that's h * pitch * bypp but maybe that's too much because we only want to get to the last line not past that. Then using h - 1 is enough. >> + } >> + if (overlap) { >> + /* pixman can't do reverse blit: copy via temporary */ >> + int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t)); >> uint32_t *tmp = tmp_buf; >> >> if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) { > > PS: why do we care about overlap only for right-to-left blits and not > left-to-right blits ? I think I've already answered that here: https://lists.nongnu.org/archive/html/qemu-devel/2020-05/msg07650.html since pixman does left to right that should match the intended operation even with overlap when rtl is not set. (Rtl also seems to mean bottom to top as well because there's only this one bit for direction.) Regards, BALATON Zoltan
diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 2db347dcbc..e7c69bf7fd 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -690,6 +690,7 @@ static void sm501_2d_operation(SM501State *s) unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); + bool overlap = false; if ((s->twoD_stretch >> 16) & 0xF) { qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); @@ -784,16 +785,21 @@ static void sm501_2d_operation(SM501State *s) ldn_he_p(&s->local_mem[src_base + si], bypp)); break; } - /* Check for overlaps, this could be made more exact */ - uint32_t sb, se, db, de; - sb = src_base + src_x + src_y * (width + src_pitch); - se = sb + width + height * (width + src_pitch); - db = dst_base + dst_x + dst_y * (width + dst_pitch); - de = db + width + height * (width + dst_pitch); - if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { - /* regions may overlap: copy via temporary */ - int llb = width * bypp; - int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); + /* If reverse blit do simple check for overlaps */ + if (rtl && src_base == dst_base && src_pitch == dst_pitch) { + overlap = (src_x < dst_x + width && src_x + width > dst_x && + src_y < dst_y + height && src_y + height > dst_y); + } else if (rtl) { + unsigned int sb, se, db, de; + sb = src_base + (src_x + src_y * src_pitch) * bypp; + se = sb + (width + height * src_pitch) * bypp; + db = dst_base + (dst_x + dst_y * dst_pitch) * bypp; + de = db + (width + height * dst_pitch) * bypp; + overlap = (db >= sb && db <= se) || (de >= sb && de <= se); + } + if (overlap) { + /* pixman can't do reverse blit: copy via temporary */ + int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t)); uint32_t *tmp = tmp_buf; if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) {
When doing reverse blit we need to check if source and dest overlap but it is not trivial due to possible different base and pitch of source and dest. Do rectangle overlap if base and pitch match, otherwise just check if memory area containing the rects overlaps so rects could possibly overlap. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> --- hw/display/sm501.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-)