diff mbox

[v2,07/14] sm501: Fix device endianness

Message ID 6b70a9dfe56c3a0dc8e874c45d70612aff7b8e3a.1488068248.git.balaton@eik.bme.hu
State New
Headers show

Commit Message

BALATON Zoltan Feb. 25, 2017, 6:46 p.m. UTC
Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
---

v2: Split off small clean up to other patch

 hw/display/sm501.c          |  6 +++---
 hw/display/sm501_template.h | 23 ++++++++++++++---------
 2 files changed, 17 insertions(+), 12 deletions(-)

Comments

Peter Maydell March 2, 2017, 9:04 p.m. UTC | #1
On 25 February 2017 at 18:46, BALATON Zoltan <balaton@eik.bme.hu> wrote:
> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
> ---
>
> v2: Split off small clean up to other patch
>
>  hw/display/sm501.c          |  6 +++---
>  hw/display/sm501_template.h | 23 ++++++++++++++---------
>  2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/hw/display/sm501.c b/hw/display/sm501.c
> index b977094..2694081 100644
> --- a/hw/display/sm501.c
> +++ b/hw/display/sm501.c
> @@ -849,7 +849,7 @@ static const MemoryRegionOps sm501_system_config_ops = {
>          .min_access_size = 4,
>          .max_access_size = 4,
>      },
> -    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
>  };
>
>  static uint32_t sm501_palette_read(void *opaque, hwaddr addr)
> @@ -1085,7 +1085,7 @@ static const MemoryRegionOps sm501_disp_ctrl_ops = {
>          .min_access_size = 4,
>          .max_access_size = 4,
>      },
> -    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
>  };
>
>  static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
> @@ -1173,7 +1173,7 @@ static const MemoryRegionOps sm501_2d_engine_ops = {
>          .min_access_size = 4,
>          .max_access_size = 4,
>      },
> -    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
>  };
>
>  /* draw line functions for all console modes */
> diff --git a/hw/display/sm501_template.h b/hw/display/sm501_template.h
> index 832ee61..6e56baf 100644
> --- a/hw/display/sm501_template.h
> +++ b/hw/display/sm501_template.h
> @@ -64,10 +64,16 @@ static void glue(draw_line16_, PIXEL_NAME)(
>      uint8_t r, g, b;
>
>      do {
> -        rgb565 = lduw_p(s);
> -        r = ((rgb565 >> 11) & 0x1f) << 3;
> -        g = ((rgb565 >>  5) & 0x3f) << 2;
> -        b = ((rgb565 >>  0) & 0x1f) << 3;
> +        rgb565 = lduw_le_p(s);
> +#if defined(TARGET_WORDS_BIGENDIAN)
> +        r = (rgb565 >> 8) & 0xf8;
> +        g = (rgb565 >> 3) & 0xfc;
> +        b = (rgb565 << 3) & 0xf8;
> +#else
> +        b = (rgb565 >> 8) & 0xf8;
> +        g = (rgb565 >> 3) & 0xfc;
> +        r = (rgb565 << 3) & 0xf8;
> +#endif
>          *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
>          s += 2;
>          d += BPP;
> @@ -80,15 +86,14 @@ static void glue(draw_line32_, PIXEL_NAME)(
>      uint8_t r, g, b;
>
>      do {
> -        ldub_p(s);
>  #if defined(TARGET_WORDS_BIGENDIAN)
> +        r = s[0];
> +        g = s[1];
> +        b = s[2];
> +#else
>          r = s[1];
>          g = s[2];
>          b = s[3];
> -#else
> -        b = s[0];
> -        g = s[1];
> -        r = s[2];
>  #endif
>          *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
>          s += 4;

I still suspect this is not correct. In particular, if you
look at the Linux driver:

http://lxr.free-electrons.com/source/drivers/video/fbdev/sm501fb.c#L341

we have for 32 bit two layouts within a 32-bit word:
 BGRX (if "swap fb endian" is set)
 XRGB (the usual)
(plus the cross-product with "32-bit load is BE or LE",
since we've coded this as byte accesses rather than a ldl_*_p()).

and for 16 bit we have either RGB or BGR ordering.
I'm not completely sure this lines up with the code you have.

Looking at how the SWAP_FB_ENDIAN flag gets set:
 * for the r2d board it is set always
 * for PCI devices, it is set only if big-endian

I suspect that what we actually have here is something
like:
 * for the PCI device it's always little endian (and the
   code ought not to need to depend on TARGET_WORDS_BIGENDIAN)
 * sysbus device may be more complicated

Also I note there's an SM501_ENDIAN_CONTROL register on the
device, which presumably if written changes the behaviour.

Plus for the sysbus device if we change the settings to
DEVICE_LITTLE_ENDIAN for the various control register
regions that suggests we should be consistent about the
serial_mm_init() endian order and also the USB controller.

It's late evening here though so I can't investigate any
further right now.

thanks
-- PMM
BALATON Zoltan March 3, 2017, 2:15 a.m. UTC | #2
On Thu, 2 Mar 2017, Peter Maydell wrote:
> On 25 February 2017 at 18:46, BALATON Zoltan <balaton@eik.bme.hu> wrote:
>> Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
>> ---
>>
>> v2: Split off small clean up to other patch
>>
>>  hw/display/sm501.c          |  6 +++---
>>  hw/display/sm501_template.h | 23 ++++++++++++++---------
>>  2 files changed, 17 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/display/sm501.c b/hw/display/sm501.c
>> index b977094..2694081 100644
>> --- a/hw/display/sm501.c
>> +++ b/hw/display/sm501.c
>> @@ -849,7 +849,7 @@ static const MemoryRegionOps sm501_system_config_ops = {
>>          .min_access_size = 4,
>>          .max_access_size = 4,
>>      },
>> -    .endianness = DEVICE_NATIVE_ENDIAN,
>> +    .endianness = DEVICE_LITTLE_ENDIAN,
>>  };
>>
>>  static uint32_t sm501_palette_read(void *opaque, hwaddr addr)
>> @@ -1085,7 +1085,7 @@ static const MemoryRegionOps sm501_disp_ctrl_ops = {
>>          .min_access_size = 4,
>>          .max_access_size = 4,
>>      },
>> -    .endianness = DEVICE_NATIVE_ENDIAN,
>> +    .endianness = DEVICE_LITTLE_ENDIAN,
>>  };
>>
>>  static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
>> @@ -1173,7 +1173,7 @@ static const MemoryRegionOps sm501_2d_engine_ops = {
>>          .min_access_size = 4,
>>          .max_access_size = 4,
>>      },
>> -    .endianness = DEVICE_NATIVE_ENDIAN,
>> +    .endianness = DEVICE_LITTLE_ENDIAN,
>>  };
>>
>>  /* draw line functions for all console modes */
>> diff --git a/hw/display/sm501_template.h b/hw/display/sm501_template.h
>> index 832ee61..6e56baf 100644
>> --- a/hw/display/sm501_template.h
>> +++ b/hw/display/sm501_template.h
>> @@ -64,10 +64,16 @@ static void glue(draw_line16_, PIXEL_NAME)(
>>      uint8_t r, g, b;
>>
>>      do {
>> -        rgb565 = lduw_p(s);
>> -        r = ((rgb565 >> 11) & 0x1f) << 3;
>> -        g = ((rgb565 >>  5) & 0x3f) << 2;
>> -        b = ((rgb565 >>  0) & 0x1f) << 3;
>> +        rgb565 = lduw_le_p(s);
>> +#if defined(TARGET_WORDS_BIGENDIAN)
>> +        r = (rgb565 >> 8) & 0xf8;
>> +        g = (rgb565 >> 3) & 0xfc;
>> +        b = (rgb565 << 3) & 0xf8;
>> +#else
>> +        b = (rgb565 >> 8) & 0xf8;
>> +        g = (rgb565 >> 3) & 0xfc;
>> +        r = (rgb565 << 3) & 0xf8;
>> +#endif
>>          *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
>>          s += 2;
>>          d += BPP;
>> @@ -80,15 +86,14 @@ static void glue(draw_line32_, PIXEL_NAME)(
>>      uint8_t r, g, b;
>>
>>      do {
>> -        ldub_p(s);
>>  #if defined(TARGET_WORDS_BIGENDIAN)
>> +        r = s[0];
>> +        g = s[1];
>> +        b = s[2];
>> +#else
>>          r = s[1];
>>          g = s[2];
>>          b = s[3];
>> -#else
>> -        b = s[0];
>> -        g = s[1];
>> -        r = s[2];
>>  #endif
>>          *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
>>          s += 4;
>
> I still suspect this is not correct. In particular, if you
> look at the Linux driver:
>
> http://lxr.free-electrons.com/source/drivers/video/fbdev/sm501fb.c#L341
>
> we have for 32 bit two layouts within a 32-bit word:
> BGRX (if "swap fb endian" is set)
> XRGB (the usual)
> (plus the cross-product with "32-bit load is BE or LE",
> since we've coded this as byte accesses rather than a ldl_*_p()).
>
> and for 16 bit we have either RGB or BGR ordering.
> I'm not completely sure this lines up with the code you have.

Maybe it's not correct but works for everything I could test better than 
the original code (which was broken even for the SH images one can find) 
so I think we could just go with this until someone complains and provides 
a test case. I've given up on trying to understand it because I don't 
really know this device and SH so I could only go by the images I could 
find and they seem to like it this way.

> Looking at how the SWAP_FB_ENDIAN flag gets set:
> * for the r2d board it is set always
> * for PCI devices, it is set only if big-endian
>
> I suspect that what we actually have here is something
> like:
> * for the PCI device it's always little endian (and the
>   code ought not to need to depend on TARGET_WORDS_BIGENDIAN)
> * sysbus device may be more complicated
>
> Also I note there's an SM501_ENDIAN_CONTROL register on the
> device, which presumably if written changes the behaviour.

I've seen this but it's not emulated so it can be ignored for now. The 
spec also says that default is little endian so for regs at least 
DEVICE_LITTLE_ENDIAN should be OK.

> Plus for the sysbus device if we change the settings to
> DEVICE_LITTLE_ENDIAN for the various control register
> regions that suggests we should be consistent about the
> serial_mm_init() endian order and also the USB controller.
>
> It's late evening here though so I can't investigate any
> further right now.
>
> thanks
> -- PMM
>
>
Peter Maydell March 3, 2017, 6:49 p.m. UTC | #3
On 3 March 2017 at 02:15, BALATON Zoltan <balaton@eik.bme.hu> wrote:
> Maybe it's not correct but works for everything I could test better than the
> original code (which was broken even for the SH images one can find) so I
> think we could just go with this until someone complains and provides a test
> case. I've given up on trying to understand it because I don't really know
> this device and SH so I could only go by the images I could find and they
> seem to like it this way.

So what cases have you tested? The Linux kernel seems to support:
 * sh embedded device, little endian
 * PCI card, little endian host
 * PCI card, big endian host
and there are also
 * 16 bit pixel format
 * 32 bit pixel format

which makes 6 different combinations.
(It's a shame there's no sh4 BE code to run on this.)

>> Looking at how the SWAP_FB_ENDIAN flag gets set:
>> * for the r2d board it is set always
>> * for PCI devices, it is set only if big-endian
>>
>> I suspect that what we actually have here is something
>> like:
>> * for the PCI device it's always little endian (and the
>>   code ought not to need to depend on TARGET_WORDS_BIGENDIAN)
>> * sysbus device may be more complicated
>>
>> Also I note there's an SM501_ENDIAN_CONTROL register on the
>> device, which presumably if written changes the behaviour.

> I've seen this but it's not emulated so it can be ignored for now. The spec
> also says that default is little endian so for regs at least
> DEVICE_LITTLE_ENDIAN should be OK.

Yes, that makes sense. So we should be modelling this as an "always
little endian device".

The changes you have to the memory region ops achieve this for
the registers implemented in this device itself. It looks like
SYSBUS_OHCI is already always little endian. You also need to
change the argument to the serial_mm_init() call to pass
DEVICE_LITTLE_ENDIAN.

thanks
-- PMM
BALATON Zoltan March 3, 2017, 8:11 p.m. UTC | #4
On Fri, 3 Mar 2017, Peter Maydell wrote:
> On 3 March 2017 at 02:15, BALATON Zoltan <balaton@eik.bme.hu> wrote:
>> Maybe it's not correct but works for everything I could test better than the
>> original code (which was broken even for the SH images one can find) so I
>> think we could just go with this until someone complains and provides a test
>> case. I've given up on trying to understand it because I don't really know
>> this device and SH so I could only go by the images I could find and they
>> seem to like it this way.
>
> So what cases have you tested? The Linux kernel seems to support:
> * sh embedded device, little endian
> * PCI card, little endian host
> * PCI card, big endian host
> and there are also
> * 16 bit pixel format
> * 32 bit pixel format
>
> which makes 6 different combinations.
> (It's a shame there's no sh4 BE code to run on this.)

As mentioned before I've used this image to test SH:

https://people.debian.org/~aurel32/qemu/sh4/

with video=800x600-16 kernel parameter where changing -16 to different bit 
depths (8, 32) reproduces the problem. On ppc I've tested with MorphOS 
which uses 16 bpp, Linux and U-boot which I think uses 8 bit. These run on 
real hardware so I think if they work the emulation should be OK.

You've said before that PCI is likely always little endian and SH embedded 
is LE too so unless something uses the device in BE mode (which we don't 
emulate) setting it to little endian should be correct. For frame buffer 
endianness I can only go with what I've seen clients doing and my patch 
does what worked for the above on SH and PPC. I can't prove it's correct 
but works for the systems I've targeted and also fixes the SH case which 
seemed to be broken.

>>> Looking at how the SWAP_FB_ENDIAN flag gets set:
>>> * for the r2d board it is set always
>>> * for PCI devices, it is set only if big-endian
>>>
>>> I suspect that what we actually have here is something
>>> like:
>>> * for the PCI device it's always little endian (and the
>>>   code ought not to need to depend on TARGET_WORDS_BIGENDIAN)
>>> * sysbus device may be more complicated
>>>
>>> Also I note there's an SM501_ENDIAN_CONTROL register on the
>>> device, which presumably if written changes the behaviour.
>
>> I've seen this but it's not emulated so it can be ignored for now. The spec
>> also says that default is little endian so for regs at least
>> DEVICE_LITTLE_ENDIAN should be OK.
>
> Yes, that makes sense. So we should be modelling this as an "always
> little endian device".
>
> The changes you have to the memory region ops achieve this for
> the registers implemented in this device itself. It looks like
> SYSBUS_OHCI is already always little endian. You also need to
> change the argument to the serial_mm_init() call to pass
> DEVICE_LITTLE_ENDIAN.

OK, will do that.
Peter Maydell March 4, 2017, 12:40 p.m. UTC | #5
On 3 March 2017 at 20:11, BALATON Zoltan <balaton@eik.bme.hu> wrote:
> On Fri, 3 Mar 2017, Peter Maydell wrote:
>> So what cases have you tested? The Linux kernel seems to support:
>> * sh embedded device, little endian
>> * PCI card, little endian host
>> * PCI card, big endian host
>> and there are also
>> * 16 bit pixel format
>> * 32 bit pixel format
>>
>> which makes 6 different combinations.
>> (It's a shame there's no sh4 BE code to run on this.)
>
>
> As mentioned before I've used this image to test SH:
>
> https://people.debian.org/~aurel32/qemu/sh4/
>
> with video=800x600-16 kernel parameter where changing -16 to different bit
> depths (8, 32) reproduces the problem. On ppc I've tested with MorphOS which
> uses 16 bpp, Linux and U-boot which I think uses 8 bit. These run on real
> hardware so I think if they work the emulation should be OK.
>
> You've said before that PCI is likely always little endian and SH embedded
> is LE too so unless something uses the device in BE mode (which we don't
> emulate) setting it to little endian should be correct. For frame buffer
> endianness I can only go with what I've seen clients doing and my patch does
> what worked for the above on SH and PPC. I can't prove it's correct but
> works for the systems I've targeted and also fixes the SH case which seemed
> to be broken.

Right, but we should test the PCI-on-a-little-endian CPU
case, because right now your code has #ifdef TARGET_WORDS_BIGENDIAN
in it which means that the PCI device will behave differently
on big and little endian CPUs, and you have a case that you
haven't tested.

(At the moment with the set of cases you've tested the
sh embedded device ones will always be using the "not
TARGET_WORDS_BIGENDIAN" code and the PCI-on-PPC will
always use the "is TARGET_WORDS_BIGENDIAN" code. But
TARGET_WORDS_BIGENDIAN set/not set is orthogonal to
PCI-vs-embedded.)

thanks
-- PMM
BALATON Zoltan March 4, 2017, 10:58 p.m. UTC | #6
On Sat, 4 Mar 2017, Peter Maydell wrote:
> On 3 March 2017 at 20:11, BALATON Zoltan <balaton@eik.bme.hu> wrote:
>> On Fri, 3 Mar 2017, Peter Maydell wrote:
>>> So what cases have you tested? The Linux kernel seems to support:
>>> * sh embedded device, little endian
>>> * PCI card, little endian host
>>> * PCI card, big endian host
>>> and there are also
>>> * 16 bit pixel format
>>> * 32 bit pixel format
>>>
>>> which makes 6 different combinations.
>>> (It's a shame there's no sh4 BE code to run on this.)
>>
>>
>> As mentioned before I've used this image to test SH:
>>
>> https://people.debian.org/~aurel32/qemu/sh4/
>>
>> with video=800x600-16 kernel parameter where changing -16 to different bit
>> depths (8, 32) reproduces the problem. On ppc I've tested with MorphOS which
>> uses 16 bpp, Linux and U-boot which I think uses 8 bit. These run on real
>> hardware so I think if they work the emulation should be OK.
>>
>> You've said before that PCI is likely always little endian and SH embedded
>> is LE too so unless something uses the device in BE mode (which we don't
>> emulate) setting it to little endian should be correct. For frame buffer
>> endianness I can only go with what I've seen clients doing and my patch does
>> what worked for the above on SH and PPC. I can't prove it's correct but
>> works for the systems I've targeted and also fixes the SH case which seemed
>> to be broken.
>
> Right, but we should test the PCI-on-a-little-endian CPU
> case, because right now your code has #ifdef TARGET_WORDS_BIGENDIAN
> in it which means that the PCI device will behave differently
> on big and little endian CPUs, and you have a case that you
> haven't tested.

I could not find an image to test this. I've tried removing the sysbus 
version from SH and add a PCI one instead but then Linux did not seem to 
find the card. I could not find any other OS images that would have this 
combination that is also known to work on any real hardware so I think if 
someone ever finds a problem with such combination then we can fix it when 
having a test case, since I could not figure out theoretically what should 
be the right way.

> (At the moment with the set of cases you've tested the
> sh embedded device ones will always be using the "not
> TARGET_WORDS_BIGENDIAN" code and the PCI-on-PPC will
> always use the "is TARGET_WORDS_BIGENDIAN" code. But
> TARGET_WORDS_BIGENDIAN set/not set is orthogonal to
> PCI-vs-embedded.)

Why do you think that framebuffer endianness is dependent on 
PCI-vs-embedded as opposed to target endianness? The latter seems more 
plausible to me. The TARGET_WORDS_BIGENDIAN was already there for 32 bit 
case. My patch only adds it to the 16 bit too and fixing the bytes for 
both (which seemed to be already wrong even on SH). So I think previously 
it was broken, my patch fixes it to be better (or at most broken in a
different way) so I don't think it's worse than before thus, an 
improvement.

> thanks
> -- PMM
>
>
Peter Maydell March 6, 2017, 10:32 a.m. UTC | #7
On 4 March 2017 at 22:58, BALATON Zoltan <balaton@eik.bme.hu> wrote:
> On Sat, 4 Mar 2017, Peter Maydell wrote:
>> Right, but we should test the PCI-on-a-little-endian CPU
>> case, because right now your code has #ifdef TARGET_WORDS_BIGENDIAN
>> in it which means that the PCI device will behave differently
>> on big and little endian CPUs, and you have a case that you
>> haven't tested.
>
>
> I could not find an image to test this. I've tried removing the sysbus
> version from SH and add a PCI one instead but then Linux did not seem to
> find the card.

I was thinking about testing the PCI card on x86.

> I could not find any other OS images that would have this
> combination that is also known to work on any real hardware so I think if
> someone ever finds a problem with such combination then we can fix it when
> having a test case, since I could not figure out theoretically what should
> be the right way.
>
>> (At the moment with the set of cases you've tested the
>> sh embedded device ones will always be using the "not
>> TARGET_WORDS_BIGENDIAN" code and the PCI-on-PPC will
>> always use the "is TARGET_WORDS_BIGENDIAN" code. But
>> TARGET_WORDS_BIGENDIAN set/not set is orthogonal to
>> PCI-vs-embedded.)
>
>
> Why do you think that framebuffer endianness is dependent on PCI-vs-embedded
> as opposed to target endianness? The latter seems more plausible to me.

The PCI card should behave the same way whether you plug it
into a PPC system or an x86 system (since it's the same
hardware). So code in the handling of the PCI card that
looks at TARGET_WORDS_BIGENDIAN is suspicious.

thanks
-- PMM
BALATON Zoltan March 6, 2017, 6:46 p.m. UTC | #8
On Mon, 6 Mar 2017, Peter Maydell wrote:
> On 4 March 2017 at 22:58, BALATON Zoltan <balaton@eik.bme.hu> wrote:
>> On Sat, 4 Mar 2017, Peter Maydell wrote:
>>> Right, but we should test the PCI-on-a-little-endian CPU
>>> case, because right now your code has #ifdef TARGET_WORDS_BIGENDIAN
>>> in it which means that the PCI device will behave differently
>>> on big and little endian CPUs, and you have a case that you
>>> haven't tested.
>>
>>
>> I could not find an image to test this. I've tried removing the sysbus
>> version from SH and add a PCI one instead but then Linux did not seem to
>> find the card.
>
> I was thinking about testing the PCI card on x86.
>
>> I could not find any other OS images that would have this
>> combination that is also known to work on any real hardware so I think if
>> someone ever finds a problem with such combination then we can fix it when
>> having a test case, since I could not figure out theoretically what should
>> be the right way.
>>
>>> (At the moment with the set of cases you've tested the
>>> sh embedded device ones will always be using the "not
>>> TARGET_WORDS_BIGENDIAN" code and the PCI-on-PPC will
>>> always use the "is TARGET_WORDS_BIGENDIAN" code. But
>>> TARGET_WORDS_BIGENDIAN set/not set is orthogonal to
>>> PCI-vs-embedded.)
>>
>>
>> Why do you think that framebuffer endianness is dependent on PCI-vs-embedded
>> as opposed to target endianness? The latter seems more plausible to me.
>
> The PCI card should behave the same way whether you plug it
> into a PPC system or an x86 system (since it's the same
> hardware). So code in the handling of the PCI card that
> looks at TARGET_WORDS_BIGENDIAN is suspicious.

I've searched the web for this and read about possible cards having this 
chip working under Linux on x86 (or not working as these were mostly bug 
reports) but could not find an image that is known to be working with such 
card. I've tried a few recent Linux live CDs (Ubuntu and Fedora) with 
x86_64 and sm501 PCI emulation on QEMU but these did not seem to boot and 
find the card, only with the default VGA. Probably a card would have some 
BIOS and some VESA modes that the drivers would use so I'm not sure a PCI 
card would really behave the same on PPC and x64 (other video cards 
usually don't work between PC and PPC Macs because of the BIOS on them). 
Also on PPC this chip is mostly found on embedded boards not on pluggable 
cards.

Regards,
BALATON Zoltan
diff mbox

Patch

diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index b977094..2694081 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -849,7 +849,7 @@  static const MemoryRegionOps sm501_system_config_ops = {
         .min_access_size = 4,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static uint32_t sm501_palette_read(void *opaque, hwaddr addr)
@@ -1085,7 +1085,7 @@  static const MemoryRegionOps sm501_disp_ctrl_ops = {
         .min_access_size = 4,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
@@ -1173,7 +1173,7 @@  static const MemoryRegionOps sm501_2d_engine_ops = {
         .min_access_size = 4,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 /* draw line functions for all console modes */
diff --git a/hw/display/sm501_template.h b/hw/display/sm501_template.h
index 832ee61..6e56baf 100644
--- a/hw/display/sm501_template.h
+++ b/hw/display/sm501_template.h
@@ -64,10 +64,16 @@  static void glue(draw_line16_, PIXEL_NAME)(
     uint8_t r, g, b;
 
     do {
-        rgb565 = lduw_p(s);
-        r = ((rgb565 >> 11) & 0x1f) << 3;
-        g = ((rgb565 >>  5) & 0x3f) << 2;
-        b = ((rgb565 >>  0) & 0x1f) << 3;
+        rgb565 = lduw_le_p(s);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        r = (rgb565 >> 8) & 0xf8;
+        g = (rgb565 >> 3) & 0xfc;
+        b = (rgb565 << 3) & 0xf8;
+#else
+        b = (rgb565 >> 8) & 0xf8;
+        g = (rgb565 >> 3) & 0xfc;
+        r = (rgb565 << 3) & 0xf8;
+#endif
         *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
         s += 2;
         d += BPP;
@@ -80,15 +86,14 @@  static void glue(draw_line32_, PIXEL_NAME)(
     uint8_t r, g, b;
 
     do {
-        ldub_p(s);
 #if defined(TARGET_WORDS_BIGENDIAN)
+        r = s[0];
+        g = s[1];
+        b = s[2];
+#else
         r = s[1];
         g = s[2];
         b = s[3];
-#else
-        b = s[0];
-        g = s[1];
-        r = s[2];
 #endif
         *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
         s += 4;