diff mbox

[2/9] eepro100: Fix endianness issues

Message ID 1301603611-7964-3-git-send-email-weil@mail.berlios.de
State Superseded
Headers show

Commit Message

Stefan Weil March 31, 2011, 8:33 p.m. UTC
Like other Intel devices, e100 (eepro100) uses little endian byte order.

This patch was tested with these combinations:

i386 host, i386 + mipsel guests (le-le)
mipsel host, i386 guest (le-le)
i386 host, mips + ppc guests (le-be)
mips host, i386 guest (be-le)

mips and mipsel hosts were emulated machines.

Signed-off-by: Stefan Weil <weil@mail.berlios.de>
---
 hw/eepro100.c |  113 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 80 insertions(+), 33 deletions(-)

Comments

Michael S. Tsirkin March 31, 2011, 9:52 p.m. UTC | #1
On Thu, Mar 31, 2011 at 10:33:24PM +0200, Stefan Weil wrote:
> Like other Intel devices, e100 (eepro100) uses little endian byte order.
> 
> This patch was tested with these combinations:
> 
> i386 host, i386 + mipsel guests (le-le)
> mipsel host, i386 guest (le-le)
> i386 host, mips + ppc guests (le-be)
> mips host, i386 guest (be-le)
> 
> mips and mipsel hosts were emulated machines.
> 
> Signed-off-by: Stefan Weil <weil@mail.berlios.de>
> ---
>  hw/eepro100.c |  113 ++++++++++++++++++++++++++++++++++++++++-----------------
>  1 files changed, 80 insertions(+), 33 deletions(-)
> 
> diff --git a/hw/eepro100.c b/hw/eepro100.c
> index f89ff17..c789767 100644
> --- a/hw/eepro100.c
> +++ b/hw/eepro100.c
> @@ -20,11 +20,10 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   *
>   * Tested features (i82559):
> - *      PXE boot (i386) ok
> + *      PXE boot (i386 guest, i386 / mips / mipsel / ppc host) ok
>   *      Linux networking (i386) ok
>   *
>   * Untested:
> - *      non-i386 platforms
>   *      Windows networking
>   *
>   * References:
> @@ -130,7 +129,7 @@ typedef struct {
>  
>  /* Offsets to the various registers.
>     All accesses need not be longword aligned. */
> -enum speedo_offsets {
> +typedef enum {
>      SCBStatus = 0,              /* Status Word. */
>      SCBAck = 1,
>      SCBCmd = 2,                 /* Rx/Command Unit command and status. */
> @@ -145,7 +144,7 @@ enum speedo_offsets {
>      SCBpmdr = 27,               /* Power Management Driver. */
>      SCBgctrl = 28,              /* General Control. */
>      SCBgstat = 29,              /* General Status. */
> -};
> +} E100RegisterOffset;
>  
>  /* A speedo3 transmit buffer descriptor with two buffers... */
>  typedef struct {
> @@ -307,7 +306,32 @@ static const uint16_t eepro100_mdi_mask[] = {
>      0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
>  };
>  
> -/* XXX: optimize */
> +/* Read a 16 bit little endian value from physical memory. */
> +static uint16_t lduw_le_phys(target_phys_addr_t addr)
> +{
> +    /* Load 16 bit (little endian) word from emulated hardware. */
> +    uint16_t val;
> +    cpu_physical_memory_read(addr, (uint8_t *)&val, sizeof(val));
> +    return le16_to_cpu(val);
> +}
> +
> +/* Read a 32 bit little endian value from physical memory. */
> +static uint32_t ldl_le_phys(target_phys_addr_t addr)
> +{
> +    /* Load 32 bit (little endian) word from emulated hardware. */
> +    uint32_t val;
> +    cpu_physical_memory_read(addr, (uint8_t *)&val, sizeof(val));
> +    return le32_to_cpu(val);
> +}
> +
> +/* Write a 16 bit little endian value to physical memory. */
> +static void stw_le_phys(target_phys_addr_t addr, uint16_t val)
> +{
> +    val = cpu_to_le16(val);
> +    cpu_physical_memory_write(addr, (const uint8_t *)&val, sizeof(val));
> +}
> +
> +/* Write a 32 bit little endian value to physical memory. */

So why not opencode e.g.
	le32_to_cpu(ldl_phys(addr))

wrappers really worth it? What do I miss?

If you insist on these online wrappers, pls prefix
them with eepro100_.
Also, why not use lduw_phys and friends internally?
cpu_physical_  is slower ...

>  static void stl_le_phys(target_phys_addr_t addr, uint32_t val)
>  {
>      val = cpu_to_le32(val);
> @@ -339,6 +363,32 @@ static unsigned compute_mcast_idx(const uint8_t * ep)
>      return (crc & BITS(7, 2)) >> 2;
>  }
>  
> +/* Read a 16 bit control/status (CSR) register. */
> +static uint16_t e100_read_reg2(EEPRO100State *s, E100RegisterOffset addr)
> +{
> +    return le16_to_cpup((uint16_t *)&s->mem[addr]);
> +}
> +
> +/* Read a 32 bit control/status (CSR) register. */
> +static uint32_t e100_read_reg4(EEPRO100State *s, E100RegisterOffset addr)
> +{
> +    return le32_to_cpup((uint32_t *)&s->mem[addr]);
> +}
> +
> +/* Write a 16 bit control/status (CSR) register. */
> +static void e100_write_reg2(EEPRO100State *s, E100RegisterOffset addr,
> +                            uint16_t val)
> +{
> +    cpu_to_le16w((uint16_t *)&s->mem[addr], val);
> +}
> +
> +/* Read a 32 bit control/status (CSR) register. */
> +static void e100_write_reg4(EEPRO100State *s, E100RegisterOffset addr,
> +                            uint32_t val)
> +{
> +    cpu_to_le32w((uint32_t *)&s->mem[addr], val);
> +}
> +

Note that cpu_to_le32w requires an aligned address, unlike
memcpy, and there's no guarantee
addr is aligned apparently?

If true you need to memcpy to a 32 bit variable, then
cpu_to_le32w ther result.

>  #if defined(DEBUG_EEPRO100)
>  static const char *nic_dump(const uint8_t * buf, unsigned size)
>  {
> @@ -590,8 +640,7 @@ static void nic_selective_reset(EEPRO100State * s)
>      TRACE(EEPROM, logout("checksum=0x%04x\n", eeprom_contents[EEPROM_SIZE - 1]));
>  
>      memset(s->mem, 0, sizeof(s->mem));
> -    uint32_t val = BIT(21);
> -    memcpy(&s->mem[SCBCtrlMDI], &val, sizeof(val));
> +    e100_write_reg4(s, SCBCtrlMDI, BIT(21));
>  
>      assert(sizeof(s->mdimem) == sizeof(eepro100_mdi_default));
>      memcpy(&s->mdimem[0], &eepro100_mdi_default[0], sizeof(s->mdimem));
> @@ -739,10 +788,10 @@ static void tx_command(EEPRO100State *s)
>      }
>      assert(tcb_bytes <= sizeof(buf));
>      while (size < tcb_bytes) {
> -        uint32_t tx_buffer_address = ldl_phys(tbd_address);
> -        uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
> +        uint32_t tx_buffer_address = ldl_le_phys(tbd_address);
> +        uint16_t tx_buffer_size = lduw_le_phys(tbd_address + 4);
>  #if 0
> -        uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
> +        uint16_t tx_buffer_el = lduw_le_phys(tbd_address + 6);
>  #endif
>          tbd_address += 8;
>          TRACE(RXTX, logout
> @@ -761,9 +810,9 @@ static void tx_command(EEPRO100State *s)
>          if (s->has_extended_tcb_support && !(s->configuration[6] & BIT(4))) {
>              /* Extended Flexible TCB. */
>              for (; tbd_count < 2; tbd_count++) {
> -                uint32_t tx_buffer_address = ldl_phys(tbd_address);
> -                uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
> -                uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
> +                uint32_t tx_buffer_address = ldl_le_phys(tbd_address);
> +                uint16_t tx_buffer_size = lduw_le_phys(tbd_address + 4);
> +                uint16_t tx_buffer_el = lduw_le_phys(tbd_address + 6);
>                  tbd_address += 8;
>                  TRACE(RXTX, logout
>                      ("TBD (extended flexible mode): buffer address 0x%08x, size 0x%04x\n",
> @@ -779,9 +828,9 @@ static void tx_command(EEPRO100State *s)
>          }
>          tbd_address = tbd_array;
>          for (; tbd_count < s->tx.tbd_count; tbd_count++) {
> -            uint32_t tx_buffer_address = ldl_phys(tbd_address);
> -            uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
> -            uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
> +            uint32_t tx_buffer_address = ldl_le_phys(tbd_address);
> +            uint16_t tx_buffer_size = lduw_le_phys(tbd_address + 4);
> +            uint16_t tx_buffer_el = lduw_le_phys(tbd_address + 6);
>              tbd_address += 8;
>              TRACE(RXTX, logout
>                  ("TBD (flexible mode): buffer address 0x%08x, size 0x%04x\n",
> @@ -889,7 +938,7 @@ static void action_command(EEPRO100State *s)
>              break;
>          }
>          /* Write new status. */
> -        stw_phys(s->cb_address, s->tx.status | ok_status | STATUS_C);
> +        stw_le_phys(s->cb_address, s->tx.status | ok_status | STATUS_C);
>          if (bit_i) {
>              /* CU completed action. */
>              eepro100_cx_interrupt(s);
> @@ -1050,8 +1099,7 @@ static void eepro100_write_command(EEPRO100State * s, uint8_t val)
>  
>  static uint16_t eepro100_read_eeprom(EEPRO100State * s)
>  {
> -    uint16_t val;
> -    memcpy(&val, &s->mem[SCBeeprom], sizeof(val));
> +    uint16_t val = e100_read_reg4(s, SCBeeprom);
>      if (eeprom93xx_read(s->eeprom)) {
>          val |= EEPROM_DO;
>      } else {
> @@ -1121,8 +1169,7 @@ static const char *reg2name(uint8_t reg)
>  
>  static uint32_t eepro100_read_mdi(EEPRO100State * s)
>  {
> -    uint32_t val;
> -    memcpy(&val, &s->mem[0x10], sizeof(val));
> +    uint32_t val = e100_read_reg4(s, SCBCtrlMDI);
>  
>  #ifdef DEBUG_EEPRO100
>      uint8_t raiseint = (val & BIT(29)) >> 29;
> @@ -1231,7 +1278,7 @@ static void eepro100_write_mdi(EEPRO100State * s, uint32_t val)
>          }
>      }
>      val = (val & 0xffff0000) + data;
> -    memcpy(&s->mem[0x10], &val, sizeof(val));
> +    e100_write_reg4(s, SCBCtrlMDI, val);
>  }
>  
>  /*****************************************************************************
> @@ -1258,7 +1305,6 @@ static uint32_t eepro100_read_port(EEPRO100State * s)
>  
>  static void eepro100_write_port(EEPRO100State * s, uint32_t val)
>  {
> -    val = le32_to_cpu(val);
>      uint32_t address = (val & ~PORT_SELECTION_MASK);
>      uint8_t selection = (val & PORT_SELECTION_MASK);
>      switch (selection) {
> @@ -1293,7 +1339,7 @@ static uint8_t eepro100_read1(EEPRO100State * s, uint32_t addr)
>  {
>      uint8_t val = 0;
>      if (addr <= sizeof(s->mem) - sizeof(val)) {
> -        memcpy(&val, &s->mem[addr], sizeof(val));
> +        val = s->mem[addr];
>      }
>  
>      switch (addr) {
> @@ -1336,7 +1382,7 @@ static uint16_t eepro100_read2(EEPRO100State * s, uint32_t addr)
>  {
>      uint16_t val = 0;
>      if (addr <= sizeof(s->mem) - sizeof(val)) {
> -        memcpy(&val, &s->mem[addr], sizeof(val));
> +        val = e100_read_reg2(s, addr);
>      }
>  
>      switch (addr) {
> @@ -1359,7 +1405,7 @@ static uint32_t eepro100_read4(EEPRO100State * s, uint32_t addr)
>  {
>      uint32_t val = 0;
>      if (addr <= sizeof(s->mem) - sizeof(val)) {
> -        memcpy(&val, &s->mem[addr], sizeof(val));
> +        val = e100_read_reg4(s, addr);
>      }
>  
>      switch (addr) {
> @@ -1390,7 +1436,7 @@ static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val)
>  {
>      /* SCBStatus is readonly. */
>      if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) {
> -        memcpy(&s->mem[addr], &val, sizeof(val));
> +        s->mem[addr] = val;
>      }
>  
>      switch (addr) {
> @@ -1433,7 +1479,7 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val)
>  {
>      /* SCBStatus is readonly. */
>      if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) {
> -        memcpy(&s->mem[addr], &val, sizeof(val));
> +        e100_write_reg2(s, addr, val);
>      }
>  
>      switch (addr) {
> @@ -1460,7 +1506,7 @@ static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val)
>  static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val)
>  {
>      if (addr <= sizeof(s->mem) - sizeof(val)) {
> -        memcpy(&s->mem[addr], &val, sizeof(val));
> +        e100_write_reg4(s, addr, val);
>      }
>  
>      switch (addr) {
> @@ -1753,9 +1799,10 @@ static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size
>      }
>      TRACE(OTHER, logout("command 0x%04x, link 0x%08x, addr 0x%08x, size %u\n",
>            rfd_command, rx.link, rx.rx_buf_addr, rfd_size));
> -    stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, status),
> -             rfd_status);
> -    stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, count), size);
> +    stw_le_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, status),
> +                rfd_status);
> +    stw_le_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, count),
> +                size);
>      /* Early receive interrupt not supported. */
>  #if 0
>      eepro100_er_interrupt(s);
> @@ -1884,7 +1931,7 @@ static int e100_nic_init(PCIDevice *pci_dev)
>      /* Handler for memory-mapped I/O */
>      s->mmio_index =
>          cpu_register_io_memory(pci_mmio_read, pci_mmio_write, s,
> -                               DEVICE_NATIVE_ENDIAN);
> +                               DEVICE_LITTLE_ENDIAN);
>  
>      pci_register_bar(&s->dev, 0, PCI_MEM_SIZE,
>                             PCI_BASE_ADDRESS_SPACE_MEMORY |
> -- 
> 1.7.2.5
Stefan Weil April 1, 2011, 5:52 p.m. UTC | #2
Am 31.03.2011 23:52, schrieb Michael S. Tsirkin:
> On Thu, Mar 31, 2011 at 10:33:24PM +0200, Stefan Weil wrote:
>> Like other Intel devices, e100 (eepro100) uses little endian byte order.
>>
>> This patch was tested with these combinations:
>>
>> i386 host, i386 + mipsel guests (le-le)
>> mipsel host, i386 guest (le-le)
>> i386 host, mips + ppc guests (le-be)
>> mips host, i386 guest (be-le)
>>
>> mips and mipsel hosts were emulated machines.
>>
>> Signed-off-by: Stefan Weil <weil@mail.berlios.de>
>> ---
>> hw/eepro100.c | 113 
>> ++++++++++++++++++++++++++++++++++++++++-----------------
>> 1 files changed, 80 insertions(+), 33 deletions(-)
>>
>> diff --git a/hw/eepro100.c b/hw/eepro100.c
>> index f89ff17..c789767 100644
>> --- a/hw/eepro100.c
>> +++ b/hw/eepro100.c
>> @@ -20,11 +20,10 @@
>> * along with this program. If not, see <http://www.gnu.org/licenses/>.
>> *
>> * Tested features (i82559):
>> - * PXE boot (i386) ok
>> + * PXE boot (i386 guest, i386 / mips / mipsel / ppc host) ok
>> * Linux networking (i386) ok
>> *
>> * Untested:
>> - * non-i386 platforms
>> * Windows networking
>> *
>> * References:
>> @@ -130,7 +129,7 @@ typedef struct {
>>
>> /* Offsets to the various registers.
>> All accesses need not be longword aligned. */
>> -enum speedo_offsets {
>> +typedef enum {
>> SCBStatus = 0, /* Status Word. */
>> SCBAck = 1,
>> SCBCmd = 2, /* Rx/Command Unit command and status. */
>> @@ -145,7 +144,7 @@ enum speedo_offsets {
>> SCBpmdr = 27, /* Power Management Driver. */
>> SCBgctrl = 28, /* General Control. */
>> SCBgstat = 29, /* General Status. */
>> -};
>> +} E100RegisterOffset;
>>
>> /* A speedo3 transmit buffer descriptor with two buffers... */
>> typedef struct {
>> @@ -307,7 +306,32 @@ static const uint16_t eepro100_mdi_mask[] = {
>> 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
>> };
>>
>> -/* XXX: optimize */
>> +/* Read a 16 bit little endian value from physical memory. */
>> +static uint16_t lduw_le_phys(target_phys_addr_t addr)
>> +{
>> + /* Load 16 bit (little endian) word from emulated hardware. */
>> + uint16_t val;
>> + cpu_physical_memory_read(addr, (uint8_t *)&val, sizeof(val));
>> + return le16_to_cpu(val);
>> +}
>> +
>> +/* Read a 32 bit little endian value from physical memory. */
>> +static uint32_t ldl_le_phys(target_phys_addr_t addr)
>> +{
>> + /* Load 32 bit (little endian) word from emulated hardware. */
>> + uint32_t val;
>> + cpu_physical_memory_read(addr, (uint8_t *)&val, sizeof(val));
>> + return le32_to_cpu(val);
>> +}
>> +
>> +/* Write a 16 bit little endian value to physical memory. */
>> +static void stw_le_phys(target_phys_addr_t addr, uint16_t val)
>> +{
>> + val = cpu_to_le16(val);
>> + cpu_physical_memory_write(addr, (const uint8_t *)&val, sizeof(val));
>> +}
>> +
>> +/* Write a 32 bit little endian value to physical memory. */
>
> So why not opencode e.g.
> le32_to_cpu(ldl_phys(addr))
>
> wrappers really worth it? What do I miss?
>
> If you insist on these online wrappers, pls prefix
> them with eepro100_.
> Also, why not use lduw_phys and friends internally?
> cpu_physical_ is slower ...
>
>> static void stl_le_phys(target_phys_addr_t addr, uint32_t val)
>> {
>> val = cpu_to_le32(val);
>> @@ -339,6 +363,32 @@ static unsigned compute_mcast_idx(const uint8_t 
>> * ep)
>> return (crc & BITS(7, 2)) >> 2;
>> }
>>
>> +/* Read a 16 bit control/status (CSR) register. */
>> +static uint16_t e100_read_reg2(EEPRO100State *s, E100RegisterOffset 
>> addr)
>> +{
>> + return le16_to_cpup((uint16_t *)&s->mem[addr]);
>> +}
>> +
>> +/* Read a 32 bit control/status (CSR) register. */
>> +static uint32_t e100_read_reg4(EEPRO100State *s, E100RegisterOffset 
>> addr)
>> +{
>> + return le32_to_cpup((uint32_t *)&s->mem[addr]);
>> +}
>> +
>> +/* Write a 16 bit control/status (CSR) register. */
>> +static void e100_write_reg2(EEPRO100State *s, E100RegisterOffset addr,
>> + uint16_t val)
>> +{
>> + cpu_to_le16w((uint16_t *)&s->mem[addr], val);
>> +}
>> +
>> +/* Read a 32 bit control/status (CSR) register. */
>> +static void e100_write_reg4(EEPRO100State *s, E100RegisterOffset addr,
>> + uint32_t val)
>> +{
>> + cpu_to_le32w((uint32_t *)&s->mem[addr], val);
>> +}
>> +
>
> Note that cpu_to_le32w requires an aligned address, unlike
> memcpy, and there's no guarantee
> addr is aligned apparently?
>
> If true you need to memcpy to a 32 bit variable, then
> cpu_to_le32w ther result.
>

[snip]

Thank you for your review, especially for the hints at lduw_phys
and potential alignment issues. I'll apply them to a new version
of this patch.

There was already a function without prefix (stl_le_phys),
and the new ones belong to the same "family". There is nothing
e100 specific in them, so they might be added to qemu-common.h
as well. That was (and is) the reason why I did not add a prefix.
Michael S. Tsirkin April 3, 2011, 11:40 a.m. UTC | #3
On Fri, Apr 01, 2011 at 07:52:52PM +0200, Stefan Weil wrote:
> [snip]
> 
> Thank you for your review, especially for the hints at lduw_phys
> and potential alignment issues. I'll apply them to a new version
> of this patch.
> 
> There was already a function without prefix (stl_le_phys),
> and the new ones belong to the same "family". There is nothing
> e100 specific in them, so they might be added to qemu-common.h
> as well. That was (and is) the reason why I did not add a prefix.

Yes but they don't seem to do much useful,
IMO open-coding will be clearer, they are unlikely
to be generally useful. But if you like to
keep them pls add a eepro100 prefix.
diff mbox

Patch

diff --git a/hw/eepro100.c b/hw/eepro100.c
index f89ff17..c789767 100644
--- a/hw/eepro100.c
+++ b/hw/eepro100.c
@@ -20,11 +20,10 @@ 
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  * Tested features (i82559):
- *      PXE boot (i386) ok
+ *      PXE boot (i386 guest, i386 / mips / mipsel / ppc host) ok
  *      Linux networking (i386) ok
  *
  * Untested:
- *      non-i386 platforms
  *      Windows networking
  *
  * References:
@@ -130,7 +129,7 @@  typedef struct {
 
 /* Offsets to the various registers.
    All accesses need not be longword aligned. */
-enum speedo_offsets {
+typedef enum {
     SCBStatus = 0,              /* Status Word. */
     SCBAck = 1,
     SCBCmd = 2,                 /* Rx/Command Unit command and status. */
@@ -145,7 +144,7 @@  enum speedo_offsets {
     SCBpmdr = 27,               /* Power Management Driver. */
     SCBgctrl = 28,              /* General Control. */
     SCBgstat = 29,              /* General Status. */
-};
+} E100RegisterOffset;
 
 /* A speedo3 transmit buffer descriptor with two buffers... */
 typedef struct {
@@ -307,7 +306,32 @@  static const uint16_t eepro100_mdi_mask[] = {
     0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 };
 
-/* XXX: optimize */
+/* Read a 16 bit little endian value from physical memory. */
+static uint16_t lduw_le_phys(target_phys_addr_t addr)
+{
+    /* Load 16 bit (little endian) word from emulated hardware. */
+    uint16_t val;
+    cpu_physical_memory_read(addr, (uint8_t *)&val, sizeof(val));
+    return le16_to_cpu(val);
+}
+
+/* Read a 32 bit little endian value from physical memory. */
+static uint32_t ldl_le_phys(target_phys_addr_t addr)
+{
+    /* Load 32 bit (little endian) word from emulated hardware. */
+    uint32_t val;
+    cpu_physical_memory_read(addr, (uint8_t *)&val, sizeof(val));
+    return le32_to_cpu(val);
+}
+
+/* Write a 16 bit little endian value to physical memory. */
+static void stw_le_phys(target_phys_addr_t addr, uint16_t val)
+{
+    val = cpu_to_le16(val);
+    cpu_physical_memory_write(addr, (const uint8_t *)&val, sizeof(val));
+}
+
+/* Write a 32 bit little endian value to physical memory. */
 static void stl_le_phys(target_phys_addr_t addr, uint32_t val)
 {
     val = cpu_to_le32(val);
@@ -339,6 +363,32 @@  static unsigned compute_mcast_idx(const uint8_t * ep)
     return (crc & BITS(7, 2)) >> 2;
 }
 
+/* Read a 16 bit control/status (CSR) register. */
+static uint16_t e100_read_reg2(EEPRO100State *s, E100RegisterOffset addr)
+{
+    return le16_to_cpup((uint16_t *)&s->mem[addr]);
+}
+
+/* Read a 32 bit control/status (CSR) register. */
+static uint32_t e100_read_reg4(EEPRO100State *s, E100RegisterOffset addr)
+{
+    return le32_to_cpup((uint32_t *)&s->mem[addr]);
+}
+
+/* Write a 16 bit control/status (CSR) register. */
+static void e100_write_reg2(EEPRO100State *s, E100RegisterOffset addr,
+                            uint16_t val)
+{
+    cpu_to_le16w((uint16_t *)&s->mem[addr], val);
+}
+
+/* Read a 32 bit control/status (CSR) register. */
+static void e100_write_reg4(EEPRO100State *s, E100RegisterOffset addr,
+                            uint32_t val)
+{
+    cpu_to_le32w((uint32_t *)&s->mem[addr], val);
+}
+
 #if defined(DEBUG_EEPRO100)
 static const char *nic_dump(const uint8_t * buf, unsigned size)
 {
@@ -590,8 +640,7 @@  static void nic_selective_reset(EEPRO100State * s)
     TRACE(EEPROM, logout("checksum=0x%04x\n", eeprom_contents[EEPROM_SIZE - 1]));
 
     memset(s->mem, 0, sizeof(s->mem));
-    uint32_t val = BIT(21);
-    memcpy(&s->mem[SCBCtrlMDI], &val, sizeof(val));
+    e100_write_reg4(s, SCBCtrlMDI, BIT(21));
 
     assert(sizeof(s->mdimem) == sizeof(eepro100_mdi_default));
     memcpy(&s->mdimem[0], &eepro100_mdi_default[0], sizeof(s->mdimem));
@@ -739,10 +788,10 @@  static void tx_command(EEPRO100State *s)
     }
     assert(tcb_bytes <= sizeof(buf));
     while (size < tcb_bytes) {
-        uint32_t tx_buffer_address = ldl_phys(tbd_address);
-        uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
+        uint32_t tx_buffer_address = ldl_le_phys(tbd_address);
+        uint16_t tx_buffer_size = lduw_le_phys(tbd_address + 4);
 #if 0
-        uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
+        uint16_t tx_buffer_el = lduw_le_phys(tbd_address + 6);
 #endif
         tbd_address += 8;
         TRACE(RXTX, logout
@@ -761,9 +810,9 @@  static void tx_command(EEPRO100State *s)
         if (s->has_extended_tcb_support && !(s->configuration[6] & BIT(4))) {
             /* Extended Flexible TCB. */
             for (; tbd_count < 2; tbd_count++) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
-                uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
-                uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
+                uint32_t tx_buffer_address = ldl_le_phys(tbd_address);
+                uint16_t tx_buffer_size = lduw_le_phys(tbd_address + 4);
+                uint16_t tx_buffer_el = lduw_le_phys(tbd_address + 6);
                 tbd_address += 8;
                 TRACE(RXTX, logout
                     ("TBD (extended flexible mode): buffer address 0x%08x, size 0x%04x\n",
@@ -779,9 +828,9 @@  static void tx_command(EEPRO100State *s)
         }
         tbd_address = tbd_array;
         for (; tbd_count < s->tx.tbd_count; tbd_count++) {
-            uint32_t tx_buffer_address = ldl_phys(tbd_address);
-            uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
-            uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
+            uint32_t tx_buffer_address = ldl_le_phys(tbd_address);
+            uint16_t tx_buffer_size = lduw_le_phys(tbd_address + 4);
+            uint16_t tx_buffer_el = lduw_le_phys(tbd_address + 6);
             tbd_address += 8;
             TRACE(RXTX, logout
                 ("TBD (flexible mode): buffer address 0x%08x, size 0x%04x\n",
@@ -889,7 +938,7 @@  static void action_command(EEPRO100State *s)
             break;
         }
         /* Write new status. */
-        stw_phys(s->cb_address, s->tx.status | ok_status | STATUS_C);
+        stw_le_phys(s->cb_address, s->tx.status | ok_status | STATUS_C);
         if (bit_i) {
             /* CU completed action. */
             eepro100_cx_interrupt(s);
@@ -1050,8 +1099,7 @@  static void eepro100_write_command(EEPRO100State * s, uint8_t val)
 
 static uint16_t eepro100_read_eeprom(EEPRO100State * s)
 {
-    uint16_t val;
-    memcpy(&val, &s->mem[SCBeeprom], sizeof(val));
+    uint16_t val = e100_read_reg4(s, SCBeeprom);
     if (eeprom93xx_read(s->eeprom)) {
         val |= EEPROM_DO;
     } else {
@@ -1121,8 +1169,7 @@  static const char *reg2name(uint8_t reg)
 
 static uint32_t eepro100_read_mdi(EEPRO100State * s)
 {
-    uint32_t val;
-    memcpy(&val, &s->mem[0x10], sizeof(val));
+    uint32_t val = e100_read_reg4(s, SCBCtrlMDI);
 
 #ifdef DEBUG_EEPRO100
     uint8_t raiseint = (val & BIT(29)) >> 29;
@@ -1231,7 +1278,7 @@  static void eepro100_write_mdi(EEPRO100State * s, uint32_t val)
         }
     }
     val = (val & 0xffff0000) + data;
-    memcpy(&s->mem[0x10], &val, sizeof(val));
+    e100_write_reg4(s, SCBCtrlMDI, val);
 }
 
 /*****************************************************************************
@@ -1258,7 +1305,6 @@  static uint32_t eepro100_read_port(EEPRO100State * s)
 
 static void eepro100_write_port(EEPRO100State * s, uint32_t val)
 {
-    val = le32_to_cpu(val);
     uint32_t address = (val & ~PORT_SELECTION_MASK);
     uint8_t selection = (val & PORT_SELECTION_MASK);
     switch (selection) {
@@ -1293,7 +1339,7 @@  static uint8_t eepro100_read1(EEPRO100State * s, uint32_t addr)
 {
     uint8_t val = 0;
     if (addr <= sizeof(s->mem) - sizeof(val)) {
-        memcpy(&val, &s->mem[addr], sizeof(val));
+        val = s->mem[addr];
     }
 
     switch (addr) {
@@ -1336,7 +1382,7 @@  static uint16_t eepro100_read2(EEPRO100State * s, uint32_t addr)
 {
     uint16_t val = 0;
     if (addr <= sizeof(s->mem) - sizeof(val)) {
-        memcpy(&val, &s->mem[addr], sizeof(val));
+        val = e100_read_reg2(s, addr);
     }
 
     switch (addr) {
@@ -1359,7 +1405,7 @@  static uint32_t eepro100_read4(EEPRO100State * s, uint32_t addr)
 {
     uint32_t val = 0;
     if (addr <= sizeof(s->mem) - sizeof(val)) {
-        memcpy(&val, &s->mem[addr], sizeof(val));
+        val = e100_read_reg4(s, addr);
     }
 
     switch (addr) {
@@ -1390,7 +1436,7 @@  static void eepro100_write1(EEPRO100State * s, uint32_t addr, uint8_t val)
 {
     /* SCBStatus is readonly. */
     if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) {
-        memcpy(&s->mem[addr], &val, sizeof(val));
+        s->mem[addr] = val;
     }
 
     switch (addr) {
@@ -1433,7 +1479,7 @@  static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val)
 {
     /* SCBStatus is readonly. */
     if (addr > SCBStatus && addr <= sizeof(s->mem) - sizeof(val)) {
-        memcpy(&s->mem[addr], &val, sizeof(val));
+        e100_write_reg2(s, addr, val);
     }
 
     switch (addr) {
@@ -1460,7 +1506,7 @@  static void eepro100_write2(EEPRO100State * s, uint32_t addr, uint16_t val)
 static void eepro100_write4(EEPRO100State * s, uint32_t addr, uint32_t val)
 {
     if (addr <= sizeof(s->mem) - sizeof(val)) {
-        memcpy(&s->mem[addr], &val, sizeof(val));
+        e100_write_reg4(s, addr, val);
     }
 
     switch (addr) {
@@ -1753,9 +1799,10 @@  static ssize_t nic_receive(VLANClientState *nc, const uint8_t * buf, size_t size
     }
     TRACE(OTHER, logout("command 0x%04x, link 0x%08x, addr 0x%08x, size %u\n",
           rfd_command, rx.link, rx.rx_buf_addr, rfd_size));
-    stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, status),
-             rfd_status);
-    stw_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, count), size);
+    stw_le_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, status),
+                rfd_status);
+    stw_le_phys(s->ru_base + s->ru_offset + offsetof(eepro100_rx_t, count),
+                size);
     /* Early receive interrupt not supported. */
 #if 0
     eepro100_er_interrupt(s);
@@ -1884,7 +1931,7 @@  static int e100_nic_init(PCIDevice *pci_dev)
     /* Handler for memory-mapped I/O */
     s->mmio_index =
         cpu_register_io_memory(pci_mmio_read, pci_mmio_write, s,
-                               DEVICE_NATIVE_ENDIAN);
+                               DEVICE_LITTLE_ENDIAN);
 
     pci_register_bar(&s->dev, 0, PCI_MEM_SIZE,
                            PCI_BASE_ADDRESS_SPACE_MEMORY |