diff mbox series

[v2,4/4] ati-vga: Implement fallback for pixman routines

Message ID ed0fba3f74e48143f02228b83bf8796ca49f3e7d.1698871239.git.balaton@eik.bme.hu
State New
Headers show
Series Misc ati-vga patches | expand

Commit Message

BALATON Zoltan Nov. 1, 2023, 8:45 p.m. UTC
Pixman routines can fail if no implementation is available and it will
become optional soon so add fallbacks when pixman does not work.

Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
---
 hw/display/ati.c     |  8 +++++
 hw/display/ati_2d.c  | 75 +++++++++++++++++++++++++++++++-------------
 hw/display/ati_int.h |  1 +
 3 files changed, 62 insertions(+), 22 deletions(-)

Comments

Marc-André Lureau Nov. 6, 2023, 10:41 a.m. UTC | #1
Hi

On Thu, Nov 2, 2023 at 12:46 AM BALATON Zoltan <balaton@eik.bme.hu> wrote:
>
> Pixman routines can fail if no implementation is available and it will
> become optional soon so add fallbacks when pixman does not work.
>
> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
> ---
>  hw/display/ati.c     |  8 +++++
>  hw/display/ati_2d.c  | 75 +++++++++++++++++++++++++++++++-------------
>  hw/display/ati_int.h |  1 +
>  3 files changed, 62 insertions(+), 22 deletions(-)
>
> diff --git a/hw/display/ati.c b/hw/display/ati.c
> index 5e38d2c3de..f911fbc327 100644
> --- a/hw/display/ati.c
> +++ b/hw/display/ati.c
> @@ -1047,6 +1047,7 @@ static Property ati_vga_properties[] = {
>      DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
>                         PCI_DEVICE_ID_ATI_RAGE128_PF),
>      DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
> +    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
>      DEFINE_PROP_END_OF_LIST()
>  };
>
> @@ -1068,11 +1069,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
>      k->exit = ati_vga_exit;
>  }
>
> +static void ati_vga_init(Object *o)
> +{
> +    object_property_set_description(o, "x-pixman", "Use pixman for: "
> +                                    "1: fill, 2: blit");
> +}
> +
>  static const TypeInfo ati_vga_info = {
>      .name = TYPE_ATI_VGA,
>      .parent = TYPE_PCI_DEVICE,
>      .instance_size = sizeof(ATIVGAState),
>      .class_init = ati_vga_class_init,
> +    .instance_init = ati_vga_init,
>      .interfaces = (InterfaceInfo[]) {
>            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
>            { },
> diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
> index 7d786653e8..0e6b8e4367 100644
> --- a/hw/display/ati_2d.c
> +++ b/hw/display/ati_2d.c
> @@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
>      switch (s->regs.dp_mix & GMC_ROP3_MASK) {
>      case ROP3_SRCCOPY:
>      {
> +        bool fallback = false;
>          unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
>                         s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
>          unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
> @@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
>                  src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
>                  src_x, src_y, dst_x, dst_y,
>                  s->regs.dst_width, s->regs.dst_height);
> -        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
> +        if ((s->use_pixman & BIT(1)) &&
> +            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
>              s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
> -            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
> -                       src_stride, dst_stride, bpp, bpp,
> -                       src_x, src_y, dst_x, dst_y,
> -                       s->regs.dst_width, s->regs.dst_height);
> -        } else {
> +            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
> +                                   src_stride, dst_stride, bpp, bpp,
> +                                   src_x, src_y, dst_x, dst_y,
> +                                   s->regs.dst_width, s->regs.dst_height);
> +        } else if (s->use_pixman & BIT(1)) {
>              /* FIXME: We only really need a temporary if src and dst overlap */
>              int llb = s->regs.dst_width * (bpp / 8);
>              int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
>              uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
>                                       s->regs.dst_height);
> -            pixman_blt((uint32_t *)src_bits, tmp,
> -                       src_stride, tmp_stride, bpp, bpp,
> -                       src_x, src_y, 0, 0,
> -                       s->regs.dst_width, s->regs.dst_height);
> -            pixman_blt(tmp, (uint32_t *)dst_bits,
> -                       tmp_stride, dst_stride, bpp, bpp,
> -                       0, 0, dst_x, dst_y,
> -                       s->regs.dst_width, s->regs.dst_height);
> +            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
> +                                   src_stride, tmp_stride, bpp, bpp,
> +                                   src_x, src_y, 0, 0,
> +                                   s->regs.dst_width, s->regs.dst_height);
> +            if (!fallback) {
> +                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
> +                                       tmp_stride, dst_stride, bpp, bpp,
> +                                       0, 0, dst_x, dst_y,
> +                                       s->regs.dst_width, s->regs.dst_height);
> +            }
>              g_free(tmp);
> +        } else {
> +            fallback = true;
> +        }
> +        if (fallback) {
> +            unsigned int y, i, j, bypp = bpp / 8;
> +            unsigned int src_pitch = src_stride * sizeof(uint32_t);
> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
> +
> +            for (y = 0; y < s->regs.dst_height; y++) {
> +                i = dst_x * bypp;
> +                j = src_x * bypp;
> +                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
> +                    i += (dst_y + y) * dst_pitch;
> +                    j += (src_y + y) * src_pitch;
> +                } else {
> +                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
> +                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
> +                }
> +                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);

This doesn't seem to handle overlapping regions the same as the
pixman-version. Or am I missing something?

> +            }
>          }
>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
> @@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
>
>          dst_stride /= sizeof(uint32_t);
>          DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
> -                dst_bits, dst_stride, bpp,
> -                dst_x, dst_y,
> -                s->regs.dst_width, s->regs.dst_height,
> -                filler);
> -        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
> -                    dst_x, dst_y,
> -                    s->regs.dst_width, s->regs.dst_height,
> -                    filler);
> +                dst_bits, dst_stride, bpp, dst_x, dst_y,
> +                s->regs.dst_width, s->regs.dst_height, filler);
> +        if (!(s->use_pixman & BIT(0)) ||
> +            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
> +                    s->regs.dst_width, s->regs.dst_height, filler)) {
> +            /* fallback when pixman failed or we don't want to call it */
> +            unsigned int x, y, i, bypp = bpp / 8;
> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
> +            for (y = 0; y < s->regs.dst_height; y++) {
> +                i = dst_x * bypp + (dst_y + y) * dst_pitch;
> +                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
> +                    stn_he_p(&dst_bits[i], bypp, filler);
> +                }
> +            }
> +        }
>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
>              s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
> diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
> index 8abb873f01..f5a47b82b0 100644
> --- a/hw/display/ati_int.h
> +++ b/hw/display/ati_int.h
> @@ -90,6 +90,7 @@ struct ATIVGAState {
>      char *model;
>      uint16_t dev_id;
>      uint8_t mode;
> +    uint8_t use_pixman;
>      bool cursor_guest_mode;
>      uint16_t cursor_size;
>      uint32_t cursor_offset;
> --
> 2.30.9
>
>
BALATON Zoltan Nov. 6, 2023, 11:02 a.m. UTC | #2
On Mon, 6 Nov 2023, Marc-André Lureau wrote:
> On Thu, Nov 2, 2023 at 12:46 AM BALATON Zoltan <balaton@eik.bme.hu> wrote:
>>
>> Pixman routines can fail if no implementation is available and it will
>> become optional soon so add fallbacks when pixman does not work.
>>
>> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
>> ---
>>  hw/display/ati.c     |  8 +++++
>>  hw/display/ati_2d.c  | 75 +++++++++++++++++++++++++++++++-------------
>>  hw/display/ati_int.h |  1 +
>>  3 files changed, 62 insertions(+), 22 deletions(-)
>>
>> diff --git a/hw/display/ati.c b/hw/display/ati.c
>> index 5e38d2c3de..f911fbc327 100644
>> --- a/hw/display/ati.c
>> +++ b/hw/display/ati.c
>> @@ -1047,6 +1047,7 @@ static Property ati_vga_properties[] = {
>>      DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
>>                         PCI_DEVICE_ID_ATI_RAGE128_PF),
>>      DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
>> +    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
>>      DEFINE_PROP_END_OF_LIST()
>>  };
>>
>> @@ -1068,11 +1069,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
>>      k->exit = ati_vga_exit;
>>  }
>>
>> +static void ati_vga_init(Object *o)
>> +{
>> +    object_property_set_description(o, "x-pixman", "Use pixman for: "
>> +                                    "1: fill, 2: blit");
>> +}
>> +
>>  static const TypeInfo ati_vga_info = {
>>      .name = TYPE_ATI_VGA,
>>      .parent = TYPE_PCI_DEVICE,
>>      .instance_size = sizeof(ATIVGAState),
>>      .class_init = ati_vga_class_init,
>> +    .instance_init = ati_vga_init,
>>      .interfaces = (InterfaceInfo[]) {
>>            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
>>            { },
>> diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
>> index 7d786653e8..0e6b8e4367 100644
>> --- a/hw/display/ati_2d.c
>> +++ b/hw/display/ati_2d.c
>> @@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
>>      switch (s->regs.dp_mix & GMC_ROP3_MASK) {
>>      case ROP3_SRCCOPY:
>>      {
>> +        bool fallback = false;
>>          unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
>>                         s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
>>          unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
>> @@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
>>                  src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
>>                  src_x, src_y, dst_x, dst_y,
>>                  s->regs.dst_width, s->regs.dst_height);
>> -        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
>> +        if ((s->use_pixman & BIT(1)) &&
>> +            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
>>              s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
>> -            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
>> -                       src_stride, dst_stride, bpp, bpp,
>> -                       src_x, src_y, dst_x, dst_y,
>> -                       s->regs.dst_width, s->regs.dst_height);
>> -        } else {
>> +            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
>> +                                   src_stride, dst_stride, bpp, bpp,
>> +                                   src_x, src_y, dst_x, dst_y,
>> +                                   s->regs.dst_width, s->regs.dst_height);
>> +        } else if (s->use_pixman & BIT(1)) {
>>              /* FIXME: We only really need a temporary if src and dst overlap */
>>              int llb = s->regs.dst_width * (bpp / 8);
>>              int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
>>              uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
>>                                       s->regs.dst_height);
>> -            pixman_blt((uint32_t *)src_bits, tmp,
>> -                       src_stride, tmp_stride, bpp, bpp,
>> -                       src_x, src_y, 0, 0,
>> -                       s->regs.dst_width, s->regs.dst_height);
>> -            pixman_blt(tmp, (uint32_t *)dst_bits,
>> -                       tmp_stride, dst_stride, bpp, bpp,
>> -                       0, 0, dst_x, dst_y,
>> -                       s->regs.dst_width, s->regs.dst_height);
>> +            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
>> +                                   src_stride, tmp_stride, bpp, bpp,
>> +                                   src_x, src_y, 0, 0,
>> +                                   s->regs.dst_width, s->regs.dst_height);
>> +            if (!fallback) {
>> +                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
>> +                                       tmp_stride, dst_stride, bpp, bpp,
>> +                                       0, 0, dst_x, dst_y,
>> +                                       s->regs.dst_width, s->regs.dst_height);
>> +            }
>>              g_free(tmp);
>> +        } else {
>> +            fallback = true;
>> +        }
>> +        if (fallback) {
>> +            unsigned int y, i, j, bypp = bpp / 8;
>> +            unsigned int src_pitch = src_stride * sizeof(uint32_t);
>> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
>> +
>> +            for (y = 0; y < s->regs.dst_height; y++) {
>> +                i = dst_x * bypp;
>> +                j = src_x * bypp;
>> +                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
>> +                    i += (dst_y + y) * dst_pitch;
>> +                    j += (src_y + y) * src_pitch;
>> +                } else {
>> +                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
>> +                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
>> +                }
>> +                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
>
> This doesn't seem to handle overlapping regions the same as the
> pixman-version. Or am I missing something?

memmove (as opposed to memcpy) allows overlapping regions and handles them 
correctly so no temporary needed for this. I've tested it with MorphOS and 
still got correct picture.

Regards,
BALATON Zoltan

>> +            }
>>          }
>>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
>>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
>> @@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
>>
>>          dst_stride /= sizeof(uint32_t);
>>          DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
>> -                dst_bits, dst_stride, bpp,
>> -                dst_x, dst_y,
>> -                s->regs.dst_width, s->regs.dst_height,
>> -                filler);
>> -        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
>> -                    dst_x, dst_y,
>> -                    s->regs.dst_width, s->regs.dst_height,
>> -                    filler);
>> +                dst_bits, dst_stride, bpp, dst_x, dst_y,
>> +                s->regs.dst_width, s->regs.dst_height, filler);
>> +        if (!(s->use_pixman & BIT(0)) ||
>> +            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
>> +                    s->regs.dst_width, s->regs.dst_height, filler)) {
>> +            /* fallback when pixman failed or we don't want to call it */
>> +            unsigned int x, y, i, bypp = bpp / 8;
>> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
>> +            for (y = 0; y < s->regs.dst_height; y++) {
>> +                i = dst_x * bypp + (dst_y + y) * dst_pitch;
>> +                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
>> +                    stn_he_p(&dst_bits[i], bypp, filler);
>> +                }
>> +            }
>> +        }
>>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
>>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
>>              s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
>> diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
>> index 8abb873f01..f5a47b82b0 100644
>> --- a/hw/display/ati_int.h
>> +++ b/hw/display/ati_int.h
>> @@ -90,6 +90,7 @@ struct ATIVGAState {
>>      char *model;
>>      uint16_t dev_id;
>>      uint8_t mode;
>> +    uint8_t use_pixman;
>>      bool cursor_guest_mode;
>>      uint16_t cursor_size;
>>      uint32_t cursor_offset;
>> --
>> 2.30.9
>>
>>
>
>
>
Marc-André Lureau Nov. 6, 2023, 11:10 a.m. UTC | #3
Hi

On Mon, Nov 6, 2023 at 3:02 PM BALATON Zoltan <balaton@eik.bme.hu> wrote:
>
> On Mon, 6 Nov 2023, Marc-André Lureau wrote:
> > On Thu, Nov 2, 2023 at 12:46 AM BALATON Zoltan <balaton@eik.bme.hu> wrote:
> >>
> >> Pixman routines can fail if no implementation is available and it will
> >> become optional soon so add fallbacks when pixman does not work.
> >>
> >> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
> >> ---
> >>  hw/display/ati.c     |  8 +++++
> >>  hw/display/ati_2d.c  | 75 +++++++++++++++++++++++++++++++-------------
> >>  hw/display/ati_int.h |  1 +
> >>  3 files changed, 62 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/hw/display/ati.c b/hw/display/ati.c
> >> index 5e38d2c3de..f911fbc327 100644
> >> --- a/hw/display/ati.c
> >> +++ b/hw/display/ati.c
> >> @@ -1047,6 +1047,7 @@ static Property ati_vga_properties[] = {
> >>      DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
> >>                         PCI_DEVICE_ID_ATI_RAGE128_PF),
> >>      DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
> >> +    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
> >>      DEFINE_PROP_END_OF_LIST()
> >>  };
> >>
> >> @@ -1068,11 +1069,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
> >>      k->exit = ati_vga_exit;
> >>  }
> >>
> >> +static void ati_vga_init(Object *o)
> >> +{
> >> +    object_property_set_description(o, "x-pixman", "Use pixman for: "
> >> +                                    "1: fill, 2: blit");
> >> +}
> >> +
> >>  static const TypeInfo ati_vga_info = {
> >>      .name = TYPE_ATI_VGA,
> >>      .parent = TYPE_PCI_DEVICE,
> >>      .instance_size = sizeof(ATIVGAState),
> >>      .class_init = ati_vga_class_init,
> >> +    .instance_init = ati_vga_init,
> >>      .interfaces = (InterfaceInfo[]) {
> >>            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
> >>            { },
> >> diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
> >> index 7d786653e8..0e6b8e4367 100644
> >> --- a/hw/display/ati_2d.c
> >> +++ b/hw/display/ati_2d.c
> >> @@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
> >>      switch (s->regs.dp_mix & GMC_ROP3_MASK) {
> >>      case ROP3_SRCCOPY:
> >>      {
> >> +        bool fallback = false;
> >>          unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
> >>                         s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
> >>          unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
> >> @@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
> >>                  src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
> >>                  src_x, src_y, dst_x, dst_y,
> >>                  s->regs.dst_width, s->regs.dst_height);
> >> -        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
> >> +        if ((s->use_pixman & BIT(1)) &&
> >> +            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
> >>              s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
> >> -            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
> >> -                       src_stride, dst_stride, bpp, bpp,
> >> -                       src_x, src_y, dst_x, dst_y,
> >> -                       s->regs.dst_width, s->regs.dst_height);
> >> -        } else {
> >> +            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
> >> +                                   src_stride, dst_stride, bpp, bpp,
> >> +                                   src_x, src_y, dst_x, dst_y,
> >> +                                   s->regs.dst_width, s->regs.dst_height);
> >> +        } else if (s->use_pixman & BIT(1)) {
> >>              /* FIXME: We only really need a temporary if src and dst overlap */
> >>              int llb = s->regs.dst_width * (bpp / 8);
> >>              int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
> >>              uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
> >>                                       s->regs.dst_height);
> >> -            pixman_blt((uint32_t *)src_bits, tmp,
> >> -                       src_stride, tmp_stride, bpp, bpp,
> >> -                       src_x, src_y, 0, 0,
> >> -                       s->regs.dst_width, s->regs.dst_height);
> >> -            pixman_blt(tmp, (uint32_t *)dst_bits,
> >> -                       tmp_stride, dst_stride, bpp, bpp,
> >> -                       0, 0, dst_x, dst_y,
> >> -                       s->regs.dst_width, s->regs.dst_height);
> >> +            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
> >> +                                   src_stride, tmp_stride, bpp, bpp,
> >> +                                   src_x, src_y, 0, 0,
> >> +                                   s->regs.dst_width, s->regs.dst_height);
> >> +            if (!fallback) {
> >> +                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
> >> +                                       tmp_stride, dst_stride, bpp, bpp,
> >> +                                       0, 0, dst_x, dst_y,
> >> +                                       s->regs.dst_width, s->regs.dst_height);
> >> +            }
> >>              g_free(tmp);
> >> +        } else {
> >> +            fallback = true;
> >> +        }
> >> +        if (fallback) {
> >> +            unsigned int y, i, j, bypp = bpp / 8;
> >> +            unsigned int src_pitch = src_stride * sizeof(uint32_t);
> >> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
> >> +
> >> +            for (y = 0; y < s->regs.dst_height; y++) {
> >> +                i = dst_x * bypp;
> >> +                j = src_x * bypp;
> >> +                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
> >> +                    i += (dst_y + y) * dst_pitch;
> >> +                    j += (src_y + y) * src_pitch;
> >> +                } else {
> >> +                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
> >> +                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
> >> +                }
> >> +                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
> >
> > This doesn't seem to handle overlapping regions the same as the
> > pixman-version. Or am I missing something?
>
> memmove (as opposed to memcpy) allows overlapping regions and handles them
> correctly so no temporary needed for this. I've tested it with MorphOS and
> still got correct picture.

But it is calling memmove() for each line, you may have overlapping
rectangles. Having a temporary like above should solve this issue,
assuming it's the correct behaviour.

>
> Regards,
> BALATON Zoltan
>
> >> +            }
> >>          }
> >>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
> >>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
> >> @@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
> >>
> >>          dst_stride /= sizeof(uint32_t);
> >>          DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
> >> -                dst_bits, dst_stride, bpp,
> >> -                dst_x, dst_y,
> >> -                s->regs.dst_width, s->regs.dst_height,
> >> -                filler);
> >> -        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
> >> -                    dst_x, dst_y,
> >> -                    s->regs.dst_width, s->regs.dst_height,
> >> -                    filler);
> >> +                dst_bits, dst_stride, bpp, dst_x, dst_y,
> >> +                s->regs.dst_width, s->regs.dst_height, filler);
> >> +        if (!(s->use_pixman & BIT(0)) ||
> >> +            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
> >> +                    s->regs.dst_width, s->regs.dst_height, filler)) {
> >> +            /* fallback when pixman failed or we don't want to call it */
> >> +            unsigned int x, y, i, bypp = bpp / 8;
> >> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
> >> +            for (y = 0; y < s->regs.dst_height; y++) {
> >> +                i = dst_x * bypp + (dst_y + y) * dst_pitch;
> >> +                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
> >> +                    stn_he_p(&dst_bits[i], bypp, filler);
> >> +                }
> >> +            }
> >> +        }
> >>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
> >>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
> >>              s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
> >> diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
> >> index 8abb873f01..f5a47b82b0 100644
> >> --- a/hw/display/ati_int.h
> >> +++ b/hw/display/ati_int.h
> >> @@ -90,6 +90,7 @@ struct ATIVGAState {
> >>      char *model;
> >>      uint16_t dev_id;
> >>      uint8_t mode;
> >> +    uint8_t use_pixman;
> >>      bool cursor_guest_mode;
> >>      uint16_t cursor_size;
> >>      uint32_t cursor_offset;
> >> --
> >> 2.30.9
> >>
> >>
> >
> >
> >
BALATON Zoltan Nov. 6, 2023, 11:17 a.m. UTC | #4
On Mon, 6 Nov 2023, Marc-André Lureau wrote:
> On Mon, Nov 6, 2023 at 3:02 PM BALATON Zoltan <balaton@eik.bme.hu> wrote:
>>
>> On Mon, 6 Nov 2023, Marc-André Lureau wrote:
>>> On Thu, Nov 2, 2023 at 12:46 AM BALATON Zoltan <balaton@eik.bme.hu> wrote:
>>>>
>>>> Pixman routines can fail if no implementation is available and it will
>>>> become optional soon so add fallbacks when pixman does not work.
>>>>
>>>> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
>>>> ---
>>>>  hw/display/ati.c     |  8 +++++
>>>>  hw/display/ati_2d.c  | 75 +++++++++++++++++++++++++++++++-------------
>>>>  hw/display/ati_int.h |  1 +
>>>>  3 files changed, 62 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/hw/display/ati.c b/hw/display/ati.c
>>>> index 5e38d2c3de..f911fbc327 100644
>>>> --- a/hw/display/ati.c
>>>> +++ b/hw/display/ati.c
>>>> @@ -1047,6 +1047,7 @@ static Property ati_vga_properties[] = {
>>>>      DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
>>>>                         PCI_DEVICE_ID_ATI_RAGE128_PF),
>>>>      DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
>>>> +    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
>>>>      DEFINE_PROP_END_OF_LIST()
>>>>  };
>>>>
>>>> @@ -1068,11 +1069,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
>>>>      k->exit = ati_vga_exit;
>>>>  }
>>>>
>>>> +static void ati_vga_init(Object *o)
>>>> +{
>>>> +    object_property_set_description(o, "x-pixman", "Use pixman for: "
>>>> +                                    "1: fill, 2: blit");
>>>> +}
>>>> +
>>>>  static const TypeInfo ati_vga_info = {
>>>>      .name = TYPE_ATI_VGA,
>>>>      .parent = TYPE_PCI_DEVICE,
>>>>      .instance_size = sizeof(ATIVGAState),
>>>>      .class_init = ati_vga_class_init,
>>>> +    .instance_init = ati_vga_init,
>>>>      .interfaces = (InterfaceInfo[]) {
>>>>            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
>>>>            { },
>>>> diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
>>>> index 7d786653e8..0e6b8e4367 100644
>>>> --- a/hw/display/ati_2d.c
>>>> +++ b/hw/display/ati_2d.c
>>>> @@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
>>>>      switch (s->regs.dp_mix & GMC_ROP3_MASK) {
>>>>      case ROP3_SRCCOPY:
>>>>      {
>>>> +        bool fallback = false;
>>>>          unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
>>>>                         s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
>>>>          unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
>>>> @@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
>>>>                  src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
>>>>                  src_x, src_y, dst_x, dst_y,
>>>>                  s->regs.dst_width, s->regs.dst_height);
>>>> -        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
>>>> +        if ((s->use_pixman & BIT(1)) &&
>>>> +            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
>>>>              s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
>>>> -            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
>>>> -                       src_stride, dst_stride, bpp, bpp,
>>>> -                       src_x, src_y, dst_x, dst_y,
>>>> -                       s->regs.dst_width, s->regs.dst_height);
>>>> -        } else {
>>>> +            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
>>>> +                                   src_stride, dst_stride, bpp, bpp,
>>>> +                                   src_x, src_y, dst_x, dst_y,
>>>> +                                   s->regs.dst_width, s->regs.dst_height);
>>>> +        } else if (s->use_pixman & BIT(1)) {
>>>>              /* FIXME: We only really need a temporary if src and dst overlap */
>>>>              int llb = s->regs.dst_width * (bpp / 8);
>>>>              int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
>>>>              uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
>>>>                                       s->regs.dst_height);
>>>> -            pixman_blt((uint32_t *)src_bits, tmp,
>>>> -                       src_stride, tmp_stride, bpp, bpp,
>>>> -                       src_x, src_y, 0, 0,
>>>> -                       s->regs.dst_width, s->regs.dst_height);
>>>> -            pixman_blt(tmp, (uint32_t *)dst_bits,
>>>> -                       tmp_stride, dst_stride, bpp, bpp,
>>>> -                       0, 0, dst_x, dst_y,
>>>> -                       s->regs.dst_width, s->regs.dst_height);
>>>> +            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
>>>> +                                   src_stride, tmp_stride, bpp, bpp,
>>>> +                                   src_x, src_y, 0, 0,
>>>> +                                   s->regs.dst_width, s->regs.dst_height);
>>>> +            if (!fallback) {
>>>> +                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
>>>> +                                       tmp_stride, dst_stride, bpp, bpp,
>>>> +                                       0, 0, dst_x, dst_y,
>>>> +                                       s->regs.dst_width, s->regs.dst_height);
>>>> +            }
>>>>              g_free(tmp);
>>>> +        } else {
>>>> +            fallback = true;
>>>> +        }
>>>> +        if (fallback) {
>>>> +            unsigned int y, i, j, bypp = bpp / 8;
>>>> +            unsigned int src_pitch = src_stride * sizeof(uint32_t);
>>>> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
>>>> +
>>>> +            for (y = 0; y < s->regs.dst_height; y++) {
>>>> +                i = dst_x * bypp;
>>>> +                j = src_x * bypp;
>>>> +                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
>>>> +                    i += (dst_y + y) * dst_pitch;
>>>> +                    j += (src_y + y) * src_pitch;
>>>> +                } else {
>>>> +                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
>>>> +                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
>>>> +                }
>>>> +                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
>>>
>>> This doesn't seem to handle overlapping regions the same as the
>>> pixman-version. Or am I missing something?
>>
>> memmove (as opposed to memcpy) allows overlapping regions and handles them
>> correctly so no temporary needed for this. I've tested it with MorphOS and
>> still got correct picture.
>
> But it is calling memmove() for each line, you may have overlapping
> rectangles. Having a temporary like above should solve this issue,
> assuming it's the correct behaviour.

Lines are handled by DST_Y_TOP_TO_BOTTOM so they are processed either top 
down or bottom up to avoid overlap. We only need temporary for pixman 
because the only does top down left to right but here vertical direction 
is handled by going through lines in the right order and horisontal 
overlap is handled by memmove. No need for temporary in this case. This is 
doing the same as sm501 so if you accept that works this is exactly the 
same only the coords calculation is a bit different due to ati-vga storing 
values differently.

Regards,
BALATON Zoltan

>>>> +            }
>>>>          }
>>>>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
>>>>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
>>>> @@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
>>>>
>>>>          dst_stride /= sizeof(uint32_t);
>>>>          DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
>>>> -                dst_bits, dst_stride, bpp,
>>>> -                dst_x, dst_y,
>>>> -                s->regs.dst_width, s->regs.dst_height,
>>>> -                filler);
>>>> -        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
>>>> -                    dst_x, dst_y,
>>>> -                    s->regs.dst_width, s->regs.dst_height,
>>>> -                    filler);
>>>> +                dst_bits, dst_stride, bpp, dst_x, dst_y,
>>>> +                s->regs.dst_width, s->regs.dst_height, filler);
>>>> +        if (!(s->use_pixman & BIT(0)) ||
>>>> +            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
>>>> +                    s->regs.dst_width, s->regs.dst_height, filler)) {
>>>> +            /* fallback when pixman failed or we don't want to call it */
>>>> +            unsigned int x, y, i, bypp = bpp / 8;
>>>> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
>>>> +            for (y = 0; y < s->regs.dst_height; y++) {
>>>> +                i = dst_x * bypp + (dst_y + y) * dst_pitch;
>>>> +                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
>>>> +                    stn_he_p(&dst_bits[i], bypp, filler);
>>>> +                }
>>>> +            }
>>>> +        }
>>>>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
>>>>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
>>>>              s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
>>>> diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
>>>> index 8abb873f01..f5a47b82b0 100644
>>>> --- a/hw/display/ati_int.h
>>>> +++ b/hw/display/ati_int.h
>>>> @@ -90,6 +90,7 @@ struct ATIVGAState {
>>>>      char *model;
>>>>      uint16_t dev_id;
>>>>      uint8_t mode;
>>>> +    uint8_t use_pixman;
>>>>      bool cursor_guest_mode;
>>>>      uint16_t cursor_size;
>>>>      uint32_t cursor_offset;
>>>> --
>>>> 2.30.9
>>>>
>>>>
>>>
>>>
>>>
>
>
>
>
Marc-André Lureau Nov. 6, 2023, 11:32 a.m. UTC | #5
Hi

On Mon, Nov 6, 2023 at 3:17 PM BALATON Zoltan <balaton@eik.bme.hu> wrote:
>
> On Mon, 6 Nov 2023, Marc-André Lureau wrote:
> > On Mon, Nov 6, 2023 at 3:02 PM BALATON Zoltan <balaton@eik.bme.hu> wrote:
> >>
> >> On Mon, 6 Nov 2023, Marc-André Lureau wrote:
> >>> On Thu, Nov 2, 2023 at 12:46 AM BALATON Zoltan <balaton@eik.bme.hu> wrote:
> >>>>
> >>>> Pixman routines can fail if no implementation is available and it will
> >>>> become optional soon so add fallbacks when pixman does not work.
> >>>>
> >>>> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
> >>>> ---
> >>>>  hw/display/ati.c     |  8 +++++
> >>>>  hw/display/ati_2d.c  | 75 +++++++++++++++++++++++++++++++-------------
> >>>>  hw/display/ati_int.h |  1 +
> >>>>  3 files changed, 62 insertions(+), 22 deletions(-)
> >>>>
> >>>> diff --git a/hw/display/ati.c b/hw/display/ati.c
> >>>> index 5e38d2c3de..f911fbc327 100644
> >>>> --- a/hw/display/ati.c
> >>>> +++ b/hw/display/ati.c
> >>>> @@ -1047,6 +1047,7 @@ static Property ati_vga_properties[] = {
> >>>>      DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
> >>>>                         PCI_DEVICE_ID_ATI_RAGE128_PF),
> >>>>      DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
> >>>> +    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
> >>>>      DEFINE_PROP_END_OF_LIST()
> >>>>  };
> >>>>
> >>>> @@ -1068,11 +1069,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
> >>>>      k->exit = ati_vga_exit;
> >>>>  }
> >>>>
> >>>> +static void ati_vga_init(Object *o)
> >>>> +{
> >>>> +    object_property_set_description(o, "x-pixman", "Use pixman for: "
> >>>> +                                    "1: fill, 2: blit");
> >>>> +}
> >>>> +
> >>>>  static const TypeInfo ati_vga_info = {
> >>>>      .name = TYPE_ATI_VGA,
> >>>>      .parent = TYPE_PCI_DEVICE,
> >>>>      .instance_size = sizeof(ATIVGAState),
> >>>>      .class_init = ati_vga_class_init,
> >>>> +    .instance_init = ati_vga_init,
> >>>>      .interfaces = (InterfaceInfo[]) {
> >>>>            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
> >>>>            { },
> >>>> diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
> >>>> index 7d786653e8..0e6b8e4367 100644
> >>>> --- a/hw/display/ati_2d.c
> >>>> +++ b/hw/display/ati_2d.c
> >>>> @@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
> >>>>      switch (s->regs.dp_mix & GMC_ROP3_MASK) {
> >>>>      case ROP3_SRCCOPY:
> >>>>      {
> >>>> +        bool fallback = false;
> >>>>          unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
> >>>>                         s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
> >>>>          unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
> >>>> @@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
> >>>>                  src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
> >>>>                  src_x, src_y, dst_x, dst_y,
> >>>>                  s->regs.dst_width, s->regs.dst_height);
> >>>> -        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
> >>>> +        if ((s->use_pixman & BIT(1)) &&
> >>>> +            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
> >>>>              s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
> >>>> -            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
> >>>> -                       src_stride, dst_stride, bpp, bpp,
> >>>> -                       src_x, src_y, dst_x, dst_y,
> >>>> -                       s->regs.dst_width, s->regs.dst_height);
> >>>> -        } else {
> >>>> +            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
> >>>> +                                   src_stride, dst_stride, bpp, bpp,
> >>>> +                                   src_x, src_y, dst_x, dst_y,
> >>>> +                                   s->regs.dst_width, s->regs.dst_height);
> >>>> +        } else if (s->use_pixman & BIT(1)) {
> >>>>              /* FIXME: We only really need a temporary if src and dst overlap */
> >>>>              int llb = s->regs.dst_width * (bpp / 8);
> >>>>              int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
> >>>>              uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
> >>>>                                       s->regs.dst_height);
> >>>> -            pixman_blt((uint32_t *)src_bits, tmp,
> >>>> -                       src_stride, tmp_stride, bpp, bpp,
> >>>> -                       src_x, src_y, 0, 0,
> >>>> -                       s->regs.dst_width, s->regs.dst_height);
> >>>> -            pixman_blt(tmp, (uint32_t *)dst_bits,
> >>>> -                       tmp_stride, dst_stride, bpp, bpp,
> >>>> -                       0, 0, dst_x, dst_y,
> >>>> -                       s->regs.dst_width, s->regs.dst_height);
> >>>> +            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
> >>>> +                                   src_stride, tmp_stride, bpp, bpp,
> >>>> +                                   src_x, src_y, 0, 0,
> >>>> +                                   s->regs.dst_width, s->regs.dst_height);
> >>>> +            if (!fallback) {
> >>>> +                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
> >>>> +                                       tmp_stride, dst_stride, bpp, bpp,
> >>>> +                                       0, 0, dst_x, dst_y,
> >>>> +                                       s->regs.dst_width, s->regs.dst_height);
> >>>> +            }
> >>>>              g_free(tmp);
> >>>> +        } else {
> >>>> +            fallback = true;
> >>>> +        }
> >>>> +        if (fallback) {
> >>>> +            unsigned int y, i, j, bypp = bpp / 8;
> >>>> +            unsigned int src_pitch = src_stride * sizeof(uint32_t);
> >>>> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
> >>>> +
> >>>> +            for (y = 0; y < s->regs.dst_height; y++) {
> >>>> +                i = dst_x * bypp;
> >>>> +                j = src_x * bypp;
> >>>> +                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
> >>>> +                    i += (dst_y + y) * dst_pitch;
> >>>> +                    j += (src_y + y) * src_pitch;
> >>>> +                } else {
> >>>> +                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
> >>>> +                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
> >>>> +                }
> >>>> +                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
> >>>
> >>> This doesn't seem to handle overlapping regions the same as the
> >>> pixman-version. Or am I missing something?
> >>
> >> memmove (as opposed to memcpy) allows overlapping regions and handles them
> >> correctly so no temporary needed for this. I've tested it with MorphOS and
> >> still got correct picture.
> >
> > But it is calling memmove() for each line, you may have overlapping
> > rectangles. Having a temporary like above should solve this issue,
> > assuming it's the correct behaviour.
>
> Lines are handled by DST_Y_TOP_TO_BOTTOM so they are processed either top
> down or bottom up to avoid overlap. We only need temporary for pixman
> because the only does top down left to right but here vertical direction
> is handled by going through lines in the right order and horisontal
> overlap is handled by memmove. No need for temporary in this case. This is
> doing the same as sm501 so if you accept that works this is exactly the
> same only the coords calculation is a bit different due to ati-vga storing
> values differently.

ok, thanks
Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>

>
> Regards,
> BALATON Zoltan
>
> >>>> +            }
> >>>>          }
> >>>>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
> >>>>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
> >>>> @@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
> >>>>
> >>>>          dst_stride /= sizeof(uint32_t);
> >>>>          DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
> >>>> -                dst_bits, dst_stride, bpp,
> >>>> -                dst_x, dst_y,
> >>>> -                s->regs.dst_width, s->regs.dst_height,
> >>>> -                filler);
> >>>> -        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
> >>>> -                    dst_x, dst_y,
> >>>> -                    s->regs.dst_width, s->regs.dst_height,
> >>>> -                    filler);
> >>>> +                dst_bits, dst_stride, bpp, dst_x, dst_y,
> >>>> +                s->regs.dst_width, s->regs.dst_height, filler);
> >>>> +        if (!(s->use_pixman & BIT(0)) ||
> >>>> +            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
> >>>> +                    s->regs.dst_width, s->regs.dst_height, filler)) {
> >>>> +            /* fallback when pixman failed or we don't want to call it */
> >>>> +            unsigned int x, y, i, bypp = bpp / 8;
> >>>> +            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
> >>>> +            for (y = 0; y < s->regs.dst_height; y++) {
> >>>> +                i = dst_x * bypp + (dst_y + y) * dst_pitch;
> >>>> +                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
> >>>> +                    stn_he_p(&dst_bits[i], bypp, filler);
> >>>> +                }
> >>>> +            }
> >>>> +        }
> >>>>          if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
> >>>>              dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
> >>>>              s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
> >>>> diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
> >>>> index 8abb873f01..f5a47b82b0 100644
> >>>> --- a/hw/display/ati_int.h
> >>>> +++ b/hw/display/ati_int.h
> >>>> @@ -90,6 +90,7 @@ struct ATIVGAState {
> >>>>      char *model;
> >>>>      uint16_t dev_id;
> >>>>      uint8_t mode;
> >>>> +    uint8_t use_pixman;
> >>>>      bool cursor_guest_mode;
> >>>>      uint16_t cursor_size;
> >>>>      uint32_t cursor_offset;
> >>>> --
> >>>> 2.30.9
> >>>>
> >>>>
> >>>
> >>>
> >>>
> >
> >
> >
> >
diff mbox series

Patch

diff --git a/hw/display/ati.c b/hw/display/ati.c
index 5e38d2c3de..f911fbc327 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -1047,6 +1047,7 @@  static Property ati_vga_properties[] = {
     DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
                        PCI_DEVICE_ID_ATI_RAGE128_PF),
     DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
+    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1068,11 +1069,18 @@  static void ati_vga_class_init(ObjectClass *klass, void *data)
     k->exit = ati_vga_exit;
 }
 
+static void ati_vga_init(Object *o)
+{
+    object_property_set_description(o, "x-pixman", "Use pixman for: "
+                                    "1: fill, 2: blit");
+}
+
 static const TypeInfo ati_vga_info = {
     .name = TYPE_ATI_VGA,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(ATIVGAState),
     .class_init = ati_vga_class_init,
+    .instance_init = ati_vga_init,
     .interfaces = (InterfaceInfo[]) {
           { INTERFACE_CONVENTIONAL_PCI_DEVICE },
           { },
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 7d786653e8..0e6b8e4367 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -92,6 +92,7 @@  void ati_2d_blt(ATIVGAState *s)
     switch (s->regs.dp_mix & GMC_ROP3_MASK) {
     case ROP3_SRCCOPY:
     {
+        bool fallback = false;
         unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
                        s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
         unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
@@ -122,27 +123,50 @@  void ati_2d_blt(ATIVGAState *s)
                 src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
                 src_x, src_y, dst_x, dst_y,
                 s->regs.dst_width, s->regs.dst_height);
-        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
+        if ((s->use_pixman & BIT(1)) &&
+            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
             s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
-                       src_stride, dst_stride, bpp, bpp,
-                       src_x, src_y, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
-        } else {
+            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
+                                   src_stride, dst_stride, bpp, bpp,
+                                   src_x, src_y, dst_x, dst_y,
+                                   s->regs.dst_width, s->regs.dst_height);
+        } else if (s->use_pixman & BIT(1)) {
             /* FIXME: We only really need a temporary if src and dst overlap */
             int llb = s->regs.dst_width * (bpp / 8);
             int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
             uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
                                      s->regs.dst_height);
-            pixman_blt((uint32_t *)src_bits, tmp,
-                       src_stride, tmp_stride, bpp, bpp,
-                       src_x, src_y, 0, 0,
-                       s->regs.dst_width, s->regs.dst_height);
-            pixman_blt(tmp, (uint32_t *)dst_bits,
-                       tmp_stride, dst_stride, bpp, bpp,
-                       0, 0, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
+            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
+                                   src_stride, tmp_stride, bpp, bpp,
+                                   src_x, src_y, 0, 0,
+                                   s->regs.dst_width, s->regs.dst_height);
+            if (!fallback) {
+                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
+                                       tmp_stride, dst_stride, bpp, bpp,
+                                       0, 0, dst_x, dst_y,
+                                       s->regs.dst_width, s->regs.dst_height);
+            }
             g_free(tmp);
+        } else {
+            fallback = true;
+        }
+        if (fallback) {
+            unsigned int y, i, j, bypp = bpp / 8;
+            unsigned int src_pitch = src_stride * sizeof(uint32_t);
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp;
+                j = src_x * bypp;
+                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
+                    i += (dst_y + y) * dst_pitch;
+                    j += (src_y + y) * src_pitch;
+                } else {
+                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
+                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
+                }
+                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
+            }
         }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
@@ -180,14 +204,21 @@  void ati_2d_blt(ATIVGAState *s)
 
         dst_stride /= sizeof(uint32_t);
         DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
-                dst_bits, dst_stride, bpp,
-                dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height,
-                filler);
-        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
-                    dst_x, dst_y,
-                    s->regs.dst_width, s->regs.dst_height,
-                    filler);
+                dst_bits, dst_stride, bpp, dst_x, dst_y,
+                s->regs.dst_width, s->regs.dst_height, filler);
+        if (!(s->use_pixman & BIT(0)) ||
+            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
+                    s->regs.dst_width, s->regs.dst_height, filler)) {
+            /* fallback when pixman failed or we don't want to call it */
+            unsigned int x, y, i, bypp = bpp / 8;
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp + (dst_y + y) * dst_pitch;
+                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
+                    stn_he_p(&dst_bits[i], bypp, filler);
+                }
+            }
+        }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
             s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index 8abb873f01..f5a47b82b0 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -90,6 +90,7 @@  struct ATIVGAState {
     char *model;
     uint16_t dev_id;
     uint8_t mode;
+    uint8_t use_pixman;
     bool cursor_guest_mode;
     uint16_t cursor_size;
     uint32_t cursor_offset;