Patchwork virtio-pci: replace byte swap hack

login
register
mail settings
Submitter Blue Swirl
Date Dec. 30, 2012, 12:55 p.m.
Message ID <89fdb096d0f60d59e45f78d5b4ef26a743ffbc31.1356872111.git.blauwirbel@gmail.com>
Download mbox | patch
Permalink /patch/208752/
State New
Headers show

Comments

Blue Swirl - Dec. 30, 2012, 12:55 p.m.
Remove byte swaps by declaring the config space
as native endian.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 exec.c          |   18 ------------------
 hw/virtio-pci.c |   17 +----------------
 2 files changed, 1 insertions(+), 34 deletions(-)
Alexander Graf - Jan. 6, 2013, 1:17 p.m.
On 30.12.2012, at 13:55, Blue Swirl wrote:

> Remove byte swaps by declaring the config space
> as native endian.

This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.


Alex

> 
> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> exec.c          |   18 ------------------
> hw/virtio-pci.c |   17 +----------------
> 2 files changed, 1 insertions(+), 34 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index a6923ad..140eb56 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2587,24 +2587,6 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
> }
> #endif
> 
> -#if !defined(CONFIG_USER_ONLY)
> -
> -/*
> - * A helper function for the _utterly broken_ virtio device model to find out if
> - * it's running on a big endian machine. Don't do this at home kids!
> - */
> -bool virtio_is_big_endian(void);
> -bool virtio_is_big_endian(void)
> -{
> -#if defined(TARGET_WORDS_BIGENDIAN)
> -    return true;
> -#else
> -    return false;
> -#endif
> -}
> -
> -#endif
> -
> #ifndef CONFIG_USER_ONLY
> bool cpu_physical_memory_is_io(hwaddr phys_addr)
> {
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index d2d2454..df78a3b 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -92,9 +92,6 @@
>  */
> #define wmb() do { } while (0)
> 
> -/* HACK for virtio to determine if it's running a big endian guest */
> -bool virtio_is_big_endian(void);
> -
> /* virtio device */
> 
> static void virtio_pci_notify(void *opaque, uint16_t vector)
> @@ -390,15 +387,9 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
>         break;
>     case 2:
>         val = virtio_config_readw(proxy->vdev, addr);
> -        if (virtio_is_big_endian()) {
> -            val = bswap16(val);
> -        }
>         break;
>     case 4:
>         val = virtio_config_readl(proxy->vdev, addr);
> -        if (virtio_is_big_endian()) {
> -            val = bswap32(val);
> -        }
>         break;
>     }
>     return val;
> @@ -423,15 +414,9 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr,
>         virtio_config_writeb(proxy->vdev, addr, val);
>         break;
>     case 2:
> -        if (virtio_is_big_endian()) {
> -            val = bswap16(val);
> -        }
>         virtio_config_writew(proxy->vdev, addr, val);
>         break;
>     case 4:
> -        if (virtio_is_big_endian()) {
> -            val = bswap32(val);
> -        }
>         virtio_config_writel(proxy->vdev, addr, val);
>         break;
>     }
> @@ -444,7 +429,7 @@ static const MemoryRegionOps virtio_pci_config_ops = {
>         .min_access_size = 1,
>         .max_access_size = 4,
>     },
> -    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> };
> 
> static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
> -- 
> 1.7.2.5
> 
>
Andreas Färber - Jan. 6, 2013, 6:25 p.m.
Am 06.01.2013 14:17, schrieb Alexander Graf:
> 
> On 30.12.2012, at 13:55, Blue Swirl wrote:
> 
>> Remove byte swaps by declaring the config space
>> as native endian.
> 
> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.

Can that MemoryRegion be split in two?

Andreas
Blue Swirl - Jan. 6, 2013, 6:26 p.m.
On Sun, Jan 6, 2013 at 1:17 PM, Alexander Graf <agraf@suse.de> wrote:
>
> On 30.12.2012, at 13:55, Blue Swirl wrote:
>
>> Remove byte swaps by declaring the config space
>> as native endian.
>
> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.

True. I must say that this is poor architectural design in multiple
ways. The endianness problem is aggravated by device being also
instantiated by other devices, so we can't (at least easily) pass an
endianness flag from above, or make two devices that are selected by
the board. Another problem is that the offset changes dynamically
depending on if MSIX is enabled or not. The design is probably also
fixed by the guest driver assumptions and can't ever be improved.
Paravirtual devices should have cleaner design than what for example
original PC devices have, not more insane. :-(

Anyway, the real fix is that the memory region should contain two
subregions, native and LE. The offset of the second region should be
adjusted if MSIX bit is toggled. This may need some kind of
(nontrivial) callback system.

The original design could be improved to avoid the checks and calls to
virtio_is_big_endian() from the config handler by introducing two sets
of config functions (LE+LE, BE+LE) and selecting the correct one at
init time (based on result of virtio_is_big_endian() which does not
change). This would not help the real fix.

Both need more thought and hopefully there are even better options, so
I'll revert the commit.

>
> Alex
>
>>
>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>> ---
>> exec.c          |   18 ------------------
>> hw/virtio-pci.c |   17 +----------------
>> 2 files changed, 1 insertions(+), 34 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index a6923ad..140eb56 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -2587,24 +2587,6 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
>> }
>> #endif
>>
>> -#if !defined(CONFIG_USER_ONLY)
>> -
>> -/*
>> - * A helper function for the _utterly broken_ virtio device model to find out if
>> - * it's running on a big endian machine. Don't do this at home kids!
>> - */
>> -bool virtio_is_big_endian(void);
>> -bool virtio_is_big_endian(void)
>> -{
>> -#if defined(TARGET_WORDS_BIGENDIAN)
>> -    return true;
>> -#else
>> -    return false;
>> -#endif
>> -}
>> -
>> -#endif
>> -
>> #ifndef CONFIG_USER_ONLY
>> bool cpu_physical_memory_is_io(hwaddr phys_addr)
>> {
>> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
>> index d2d2454..df78a3b 100644
>> --- a/hw/virtio-pci.c
>> +++ b/hw/virtio-pci.c
>> @@ -92,9 +92,6 @@
>>  */
>> #define wmb() do { } while (0)
>>
>> -/* HACK for virtio to determine if it's running a big endian guest */
>> -bool virtio_is_big_endian(void);
>> -
>> /* virtio device */
>>
>> static void virtio_pci_notify(void *opaque, uint16_t vector)
>> @@ -390,15 +387,9 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
>>         break;
>>     case 2:
>>         val = virtio_config_readw(proxy->vdev, addr);
>> -        if (virtio_is_big_endian()) {
>> -            val = bswap16(val);
>> -        }
>>         break;
>>     case 4:
>>         val = virtio_config_readl(proxy->vdev, addr);
>> -        if (virtio_is_big_endian()) {
>> -            val = bswap32(val);
>> -        }
>>         break;
>>     }
>>     return val;
>> @@ -423,15 +414,9 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr,
>>         virtio_config_writeb(proxy->vdev, addr, val);
>>         break;
>>     case 2:
>> -        if (virtio_is_big_endian()) {
>> -            val = bswap16(val);
>> -        }
>>         virtio_config_writew(proxy->vdev, addr, val);
>>         break;
>>     case 4:
>> -        if (virtio_is_big_endian()) {
>> -            val = bswap32(val);
>> -        }
>>         virtio_config_writel(proxy->vdev, addr, val);
>>         break;
>>     }
>> @@ -444,7 +429,7 @@ static const MemoryRegionOps virtio_pci_config_ops = {
>>         .min_access_size = 1,
>>         .max_access_size = 4,
>>     },
>> -    .endianness = DEVICE_LITTLE_ENDIAN,
>> +    .endianness = DEVICE_NATIVE_ENDIAN,
>> };
>>
>> static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
>> --
>> 1.7.2.5
>>
>>
>
Alexander Graf - Jan. 6, 2013, 6:50 p.m.
Am 06.01.2013 um 19:26 schrieb Blue Swirl <blauwirbel@gmail.com>:

> On Sun, Jan 6, 2013 at 1:17 PM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> On 30.12.2012, at 13:55, Blue Swirl wrote:
>> 
>>> Remove byte swaps by declaring the config space
>>> as native endian.
>> 
>> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.
> 
> True. I must say that this is poor architectural design in multiple
> ways. The endianness problem is aggravated by device being also
> instantiated by other devices, so we can't (at least easily) pass an
> endianness flag from above, or make two devices that are selected by
> the board. Another problem is that the offset changes dynamically
> depending on if MSIX is enabled or not. The design is probably also
> fixed by the guest driver assumptions and can't ever be improved.
> Paravirtual devices should have cleaner design than what for example
> original PC devices have, not more insane. :-(

I guess now you understand my wording in the comment that your patch removes a bit better :).


Alex
Blue Swirl - Jan. 6, 2013, 8:04 p.m.
On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote:
> Am 06.01.2013 14:17, schrieb Alexander Graf:
>>
>> On 30.12.2012, at 13:55, Blue Swirl wrote:
>>
>>> Remove byte swaps by declaring the config space
>>> as native endian.
>>
>> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.
>
> Can that MemoryRegion be split in two?

Yes, but unfortunately the offset for the second region depends on if
MSIX is enabled or not. PCI layer manages these bits without the
device seeing any changes.

This could be handled by introducing a callback at PCI layer to inform
interested devices about changes to MSIX setup, or even generalized:
inform devices about changes within any set of bits specified by the
device.

>
> Andreas
>
> --
> SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Michael S. Tsirkin - Jan. 7, 2013, 4:11 p.m.
On Sun, Jan 06, 2013 at 08:04:39PM +0000, Blue Swirl wrote:
> On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote:
> > Am 06.01.2013 14:17, schrieb Alexander Graf:
> >>
> >> On 30.12.2012, at 13:55, Blue Swirl wrote:
> >>
> >>> Remove byte swaps by declaring the config space
> >>> as native endian.
> >>
> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.
> >
> > Can that MemoryRegion be split in two?
> 
> Yes, but unfortunately the offset for the second region depends on if
> MSIX is enabled or not. PCI layer manages these bits without the
> device seeing any changes.
> 
> This could be handled by introducing a callback at PCI layer to inform
> interested devices about changes to MSIX setup, or even generalized:
> inform devices about changes within any set of bits specified by the
> device.

We already have a generic config_write callback and even use it in
virtio pci: virtio_write_config.  So you could simply do there:

	if (region size != VIRTIO_PCI_CONFIG(dev)) {
		resize regions
	}

We would also have to resize to the default setup on
vm load and on vm reset.

Overall not sure whether this would make the code cleaner or uglier.

> >
> > Andreas
> >
> > --
> > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Blue Swirl - Jan. 9, 2013, 8:39 p.m.
On Mon, Jan 7, 2013 at 4:11 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sun, Jan 06, 2013 at 08:04:39PM +0000, Blue Swirl wrote:
>> On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote:
>> > Am 06.01.2013 14:17, schrieb Alexander Graf:
>> >>
>> >> On 30.12.2012, at 13:55, Blue Swirl wrote:
>> >>
>> >>> Remove byte swaps by declaring the config space
>> >>> as native endian.
>> >>
>> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.
>> >
>> > Can that MemoryRegion be split in two?
>>
>> Yes, but unfortunately the offset for the second region depends on if
>> MSIX is enabled or not. PCI layer manages these bits without the
>> device seeing any changes.
>>
>> This could be handled by introducing a callback at PCI layer to inform
>> interested devices about changes to MSIX setup, or even generalized:
>> inform devices about changes within any set of bits specified by the
>> device.
>
> We already have a generic config_write callback and even use it in
> virtio pci: virtio_write_config.  So you could simply do there:
>
>         if (region size != VIRTIO_PCI_CONFIG(dev)) {
>                 resize regions
>         }
>
> We would also have to resize to the default setup on
> vm load and on vm reset.
>
> Overall not sure whether this would make the code cleaner or uglier.

I think it would be a net cleanup. Most of the ugliness comes from the
poor device architecture.

There could be (unmeasurably) small performance gains since accesses
to the two regions would be dispatched directly to the handlers. But
if the MSIX mode bit is toggled very often compared to the accesses to
config registers, it could actually cause some slow down due to
adjustment to the offset with the memory API. How often does that
happen, once per boot or more often? Are these registers accessed very
often by the guests?

>
>> >
>> > Andreas
>> >
>> > --
>> > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
>> > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Michael S. Tsirkin - Jan. 9, 2013, 10:06 p.m.
On Wed, Jan 09, 2013 at 08:39:08PM +0000, Blue Swirl wrote:
> On Mon, Jan 7, 2013 at 4:11 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Sun, Jan 06, 2013 at 08:04:39PM +0000, Blue Swirl wrote:
> >> On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote:
> >> > Am 06.01.2013 14:17, schrieb Alexander Graf:
> >> >>
> >> >> On 30.12.2012, at 13:55, Blue Swirl wrote:
> >> >>
> >> >>> Remove byte swaps by declaring the config space
> >> >>> as native endian.
> >> >>
> >> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian.
> >> >
> >> > Can that MemoryRegion be split in two?
> >>
> >> Yes, but unfortunately the offset for the second region depends on if
> >> MSIX is enabled or not. PCI layer manages these bits without the
> >> device seeing any changes.
> >>
> >> This could be handled by introducing a callback at PCI layer to inform
> >> interested devices about changes to MSIX setup, or even generalized:
> >> inform devices about changes within any set of bits specified by the
> >> device.
> >
> > We already have a generic config_write callback and even use it in
> > virtio pci: virtio_write_config.  So you could simply do there:
> >
> >         if (region size != VIRTIO_PCI_CONFIG(dev)) {
> >                 resize regions
> >         }
> >
> > We would also have to resize to the default setup on
> > vm load and on vm reset.
> >
> > Overall not sure whether this would make the code cleaner or uglier.
> 
> I think it would be a net cleanup. Most of the ugliness comes from the
> poor device architecture.
> 
> There could be (unmeasurably) small performance gains since accesses
> to the two regions would be dispatched directly to the handlers. But
> if the MSIX mode bit is toggled very often compared to the accesses to
> config registers, it could actually cause some slow down due to
> adjustment to the offset with the memory API. How often does that
> happen, once per boot or more often? Are these registers accessed very
> often by the guests?

datapath accesses the memory a lot, while OTOH mode change happens once
per boot normally, so yes, in theory it's a minor optimization.  Likely
not measureable by itself but if others think it's cleaner to
structure code that way I sure won't object to a patch like that.

> >
> >> >
> >> > Andreas
> >> >
> >> > --
> >> > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> >> > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Patch

diff --git a/exec.c b/exec.c
index a6923ad..140eb56 100644
--- a/exec.c
+++ b/exec.c
@@ -2587,24 +2587,6 @@  int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 }
 #endif
 
-#if !defined(CONFIG_USER_ONLY)
-
-/*
- * A helper function for the _utterly broken_ virtio device model to find out if
- * it's running on a big endian machine. Don't do this at home kids!
- */
-bool virtio_is_big_endian(void);
-bool virtio_is_big_endian(void)
-{
-#if defined(TARGET_WORDS_BIGENDIAN)
-    return true;
-#else
-    return false;
-#endif
-}
-
-#endif
-
 #ifndef CONFIG_USER_ONLY
 bool cpu_physical_memory_is_io(hwaddr phys_addr)
 {
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d2d2454..df78a3b 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -92,9 +92,6 @@ 
  */
 #define wmb() do { } while (0)
 
-/* HACK for virtio to determine if it's running a big endian guest */
-bool virtio_is_big_endian(void);
-
 /* virtio device */
 
 static void virtio_pci_notify(void *opaque, uint16_t vector)
@@ -390,15 +387,9 @@  static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
         break;
     case 2:
         val = virtio_config_readw(proxy->vdev, addr);
-        if (virtio_is_big_endian()) {
-            val = bswap16(val);
-        }
         break;
     case 4:
         val = virtio_config_readl(proxy->vdev, addr);
-        if (virtio_is_big_endian()) {
-            val = bswap32(val);
-        }
         break;
     }
     return val;
@@ -423,15 +414,9 @@  static void virtio_pci_config_write(void *opaque, hwaddr addr,
         virtio_config_writeb(proxy->vdev, addr, val);
         break;
     case 2:
-        if (virtio_is_big_endian()) {
-            val = bswap16(val);
-        }
         virtio_config_writew(proxy->vdev, addr, val);
         break;
     case 4:
-        if (virtio_is_big_endian()) {
-            val = bswap32(val);
-        }
         virtio_config_writel(proxy->vdev, addr, val);
         break;
     }
@@ -444,7 +429,7 @@  static const MemoryRegionOps virtio_pci_config_ops = {
         .min_access_size = 1,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_LITTLE_ENDIAN,
+    .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
 static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,