Message ID | 89fdb096d0f60d59e45f78d5b4ef26a743ffbc31.1356872111.git.blauwirbel@gmail.com |
---|---|
State | New |
Headers | show |
On 30.12.2012, at 13:55, Blue Swirl wrote: > Remove byte swaps by declaring the config space > as native endian. This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. Alex > > Signed-off-by: Blue Swirl <blauwirbel@gmail.com> > --- > exec.c | 18 ------------------ > hw/virtio-pci.c | 17 +---------------- > 2 files changed, 1 insertions(+), 34 deletions(-) > > diff --git a/exec.c b/exec.c > index a6923ad..140eb56 100644 > --- a/exec.c > +++ b/exec.c > @@ -2587,24 +2587,6 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, > } > #endif > > -#if !defined(CONFIG_USER_ONLY) > - > -/* > - * A helper function for the _utterly broken_ virtio device model to find out if > - * it's running on a big endian machine. Don't do this at home kids! > - */ > -bool virtio_is_big_endian(void); > -bool virtio_is_big_endian(void) > -{ > -#if defined(TARGET_WORDS_BIGENDIAN) > - return true; > -#else > - return false; > -#endif > -} > - > -#endif > - > #ifndef CONFIG_USER_ONLY > bool cpu_physical_memory_is_io(hwaddr phys_addr) > { > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c > index d2d2454..df78a3b 100644 > --- a/hw/virtio-pci.c > +++ b/hw/virtio-pci.c > @@ -92,9 +92,6 @@ > */ > #define wmb() do { } while (0) > > -/* HACK for virtio to determine if it's running a big endian guest */ > -bool virtio_is_big_endian(void); > - > /* virtio device */ > > static void virtio_pci_notify(void *opaque, uint16_t vector) > @@ -390,15 +387,9 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > break; > case 2: > val = virtio_config_readw(proxy->vdev, addr); > - if (virtio_is_big_endian()) { > - val = bswap16(val); > - } > break; > case 4: > val = virtio_config_readl(proxy->vdev, addr); > - if (virtio_is_big_endian()) { > - val = bswap32(val); > - } > break; > } > return val; > @@ -423,15 +414,9 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > virtio_config_writeb(proxy->vdev, addr, val); > break; > case 2: > - if (virtio_is_big_endian()) { > - val = bswap16(val); > - } > virtio_config_writew(proxy->vdev, addr, val); > break; > case 4: > - if (virtio_is_big_endian()) { > - val = bswap32(val); > - } > virtio_config_writel(proxy->vdev, addr, val); > break; > } > @@ -444,7 +429,7 @@ static const MemoryRegionOps virtio_pci_config_ops = { > .min_access_size = 1, > .max_access_size = 4, > }, > - .endianness = DEVICE_LITTLE_ENDIAN, > + .endianness = DEVICE_NATIVE_ENDIAN, > }; > > static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, > -- > 1.7.2.5 > >
Am 06.01.2013 14:17, schrieb Alexander Graf: > > On 30.12.2012, at 13:55, Blue Swirl wrote: > >> Remove byte swaps by declaring the config space >> as native endian. > > This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. Can that MemoryRegion be split in two? Andreas
On Sun, Jan 6, 2013 at 1:17 PM, Alexander Graf <agraf@suse.de> wrote: > > On 30.12.2012, at 13:55, Blue Swirl wrote: > >> Remove byte swaps by declaring the config space >> as native endian. > > This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. True. I must say that this is poor architectural design in multiple ways. The endianness problem is aggravated by device being also instantiated by other devices, so we can't (at least easily) pass an endianness flag from above, or make two devices that are selected by the board. Another problem is that the offset changes dynamically depending on if MSIX is enabled or not. The design is probably also fixed by the guest driver assumptions and can't ever be improved. Paravirtual devices should have cleaner design than what for example original PC devices have, not more insane. :-( Anyway, the real fix is that the memory region should contain two subregions, native and LE. The offset of the second region should be adjusted if MSIX bit is toggled. This may need some kind of (nontrivial) callback system. The original design could be improved to avoid the checks and calls to virtio_is_big_endian() from the config handler by introducing two sets of config functions (LE+LE, BE+LE) and selecting the correct one at init time (based on result of virtio_is_big_endian() which does not change). This would not help the real fix. Both need more thought and hopefully there are even better options, so I'll revert the commit. > > Alex > >> >> Signed-off-by: Blue Swirl <blauwirbel@gmail.com> >> --- >> exec.c | 18 ------------------ >> hw/virtio-pci.c | 17 +---------------- >> 2 files changed, 1 insertions(+), 34 deletions(-) >> >> diff --git a/exec.c b/exec.c >> index a6923ad..140eb56 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -2587,24 +2587,6 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, >> } >> #endif >> >> -#if !defined(CONFIG_USER_ONLY) >> - >> -/* >> - * A helper function for the _utterly broken_ virtio device model to find out if >> - * it's running on a big endian machine. Don't do this at home kids! >> - */ >> -bool virtio_is_big_endian(void); >> -bool virtio_is_big_endian(void) >> -{ >> -#if defined(TARGET_WORDS_BIGENDIAN) >> - return true; >> -#else >> - return false; >> -#endif >> -} >> - >> -#endif >> - >> #ifndef CONFIG_USER_ONLY >> bool cpu_physical_memory_is_io(hwaddr phys_addr) >> { >> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c >> index d2d2454..df78a3b 100644 >> --- a/hw/virtio-pci.c >> +++ b/hw/virtio-pci.c >> @@ -92,9 +92,6 @@ >> */ >> #define wmb() do { } while (0) >> >> -/* HACK for virtio to determine if it's running a big endian guest */ >> -bool virtio_is_big_endian(void); >> - >> /* virtio device */ >> >> static void virtio_pci_notify(void *opaque, uint16_t vector) >> @@ -390,15 +387,9 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, >> break; >> case 2: >> val = virtio_config_readw(proxy->vdev, addr); >> - if (virtio_is_big_endian()) { >> - val = bswap16(val); >> - } >> break; >> case 4: >> val = virtio_config_readl(proxy->vdev, addr); >> - if (virtio_is_big_endian()) { >> - val = bswap32(val); >> - } >> break; >> } >> return val; >> @@ -423,15 +414,9 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, >> virtio_config_writeb(proxy->vdev, addr, val); >> break; >> case 2: >> - if (virtio_is_big_endian()) { >> - val = bswap16(val); >> - } >> virtio_config_writew(proxy->vdev, addr, val); >> break; >> case 4: >> - if (virtio_is_big_endian()) { >> - val = bswap32(val); >> - } >> virtio_config_writel(proxy->vdev, addr, val); >> break; >> } >> @@ -444,7 +429,7 @@ static const MemoryRegionOps virtio_pci_config_ops = { >> .min_access_size = 1, >> .max_access_size = 4, >> }, >> - .endianness = DEVICE_LITTLE_ENDIAN, >> + .endianness = DEVICE_NATIVE_ENDIAN, >> }; >> >> static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, >> -- >> 1.7.2.5 >> >> >
Am 06.01.2013 um 19:26 schrieb Blue Swirl <blauwirbel@gmail.com>: > On Sun, Jan 6, 2013 at 1:17 PM, Alexander Graf <agraf@suse.de> wrote: >> >> On 30.12.2012, at 13:55, Blue Swirl wrote: >> >>> Remove byte swaps by declaring the config space >>> as native endian. >> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. > > True. I must say that this is poor architectural design in multiple > ways. The endianness problem is aggravated by device being also > instantiated by other devices, so we can't (at least easily) pass an > endianness flag from above, or make two devices that are selected by > the board. Another problem is that the offset changes dynamically > depending on if MSIX is enabled or not. The design is probably also > fixed by the guest driver assumptions and can't ever be improved. > Paravirtual devices should have cleaner design than what for example > original PC devices have, not more insane. :-( I guess now you understand my wording in the comment that your patch removes a bit better :). Alex
On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote: > Am 06.01.2013 14:17, schrieb Alexander Graf: >> >> On 30.12.2012, at 13:55, Blue Swirl wrote: >> >>> Remove byte swaps by declaring the config space >>> as native endian. >> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. > > Can that MemoryRegion be split in two? Yes, but unfortunately the offset for the second region depends on if MSIX is enabled or not. PCI layer manages these bits without the device seeing any changes. This could be handled by introducing a callback at PCI layer to inform interested devices about changes to MSIX setup, or even generalized: inform devices about changes within any set of bits specified by the device. > > Andreas > > -- > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
On Sun, Jan 06, 2013 at 08:04:39PM +0000, Blue Swirl wrote: > On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote: > > Am 06.01.2013 14:17, schrieb Alexander Graf: > >> > >> On 30.12.2012, at 13:55, Blue Swirl wrote: > >> > >>> Remove byte swaps by declaring the config space > >>> as native endian. > >> > >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. > > > > Can that MemoryRegion be split in two? > > Yes, but unfortunately the offset for the second region depends on if > MSIX is enabled or not. PCI layer manages these bits without the > device seeing any changes. > > This could be handled by introducing a callback at PCI layer to inform > interested devices about changes to MSIX setup, or even generalized: > inform devices about changes within any set of bits specified by the > device. We already have a generic config_write callback and even use it in virtio pci: virtio_write_config. So you could simply do there: if (region size != VIRTIO_PCI_CONFIG(dev)) { resize regions } We would also have to resize to the default setup on vm load and on vm reset. Overall not sure whether this would make the code cleaner or uglier. > > > > Andreas > > > > -- > > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany > > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
On Mon, Jan 7, 2013 at 4:11 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Sun, Jan 06, 2013 at 08:04:39PM +0000, Blue Swirl wrote: >> On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote: >> > Am 06.01.2013 14:17, schrieb Alexander Graf: >> >> >> >> On 30.12.2012, at 13:55, Blue Swirl wrote: >> >> >> >>> Remove byte swaps by declaring the config space >> >>> as native endian. >> >> >> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. >> > >> > Can that MemoryRegion be split in two? >> >> Yes, but unfortunately the offset for the second region depends on if >> MSIX is enabled or not. PCI layer manages these bits without the >> device seeing any changes. >> >> This could be handled by introducing a callback at PCI layer to inform >> interested devices about changes to MSIX setup, or even generalized: >> inform devices about changes within any set of bits specified by the >> device. > > We already have a generic config_write callback and even use it in > virtio pci: virtio_write_config. So you could simply do there: > > if (region size != VIRTIO_PCI_CONFIG(dev)) { > resize regions > } > > We would also have to resize to the default setup on > vm load and on vm reset. > > Overall not sure whether this would make the code cleaner or uglier. I think it would be a net cleanup. Most of the ugliness comes from the poor device architecture. There could be (unmeasurably) small performance gains since accesses to the two regions would be dispatched directly to the handlers. But if the MSIX mode bit is toggled very often compared to the accesses to config registers, it could actually cause some slow down due to adjustment to the offset with the memory API. How often does that happen, once per boot or more often? Are these registers accessed very often by the guests? > >> > >> > Andreas >> > >> > -- >> > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany >> > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
On Wed, Jan 09, 2013 at 08:39:08PM +0000, Blue Swirl wrote: > On Mon, Jan 7, 2013 at 4:11 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Sun, Jan 06, 2013 at 08:04:39PM +0000, Blue Swirl wrote: > >> On Sun, Jan 6, 2013 at 6:25 PM, Andreas Färber <afaerber@suse.de> wrote: > >> > Am 06.01.2013 14:17, schrieb Alexander Graf: > >> >> > >> >> On 30.12.2012, at 13:55, Blue Swirl wrote: > >> >> > >> >>> Remove byte swaps by declaring the config space > >> >>> as native endian. > >> >> > >> >> This is wrong. Virtio-pci config space is split into 2 regions. One with native endianness, the other one with little endian. > >> > > >> > Can that MemoryRegion be split in two? > >> > >> Yes, but unfortunately the offset for the second region depends on if > >> MSIX is enabled or not. PCI layer manages these bits without the > >> device seeing any changes. > >> > >> This could be handled by introducing a callback at PCI layer to inform > >> interested devices about changes to MSIX setup, or even generalized: > >> inform devices about changes within any set of bits specified by the > >> device. > > > > We already have a generic config_write callback and even use it in > > virtio pci: virtio_write_config. So you could simply do there: > > > > if (region size != VIRTIO_PCI_CONFIG(dev)) { > > resize regions > > } > > > > We would also have to resize to the default setup on > > vm load and on vm reset. > > > > Overall not sure whether this would make the code cleaner or uglier. > > I think it would be a net cleanup. Most of the ugliness comes from the > poor device architecture. > > There could be (unmeasurably) small performance gains since accesses > to the two regions would be dispatched directly to the handlers. But > if the MSIX mode bit is toggled very often compared to the accesses to > config registers, it could actually cause some slow down due to > adjustment to the offset with the memory API. How often does that > happen, once per boot or more often? Are these registers accessed very > often by the guests? datapath accesses the memory a lot, while OTOH mode change happens once per boot normally, so yes, in theory it's a minor optimization. Likely not measureable by itself but if others think it's cleaner to structure code that way I sure won't object to a patch like that. > > > >> > > >> > Andreas > >> > > >> > -- > >> > SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany > >> > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
diff --git a/exec.c b/exec.c index a6923ad..140eb56 100644 --- a/exec.c +++ b/exec.c @@ -2587,24 +2587,6 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, } #endif -#if !defined(CONFIG_USER_ONLY) - -/* - * A helper function for the _utterly broken_ virtio device model to find out if - * it's running on a big endian machine. Don't do this at home kids! - */ -bool virtio_is_big_endian(void); -bool virtio_is_big_endian(void) -{ -#if defined(TARGET_WORDS_BIGENDIAN) - return true; -#else - return false; -#endif -} - -#endif - #ifndef CONFIG_USER_ONLY bool cpu_physical_memory_is_io(hwaddr phys_addr) { diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index d2d2454..df78a3b 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -92,9 +92,6 @@ */ #define wmb() do { } while (0) -/* HACK for virtio to determine if it's running a big endian guest */ -bool virtio_is_big_endian(void); - /* virtio device */ static void virtio_pci_notify(void *opaque, uint16_t vector) @@ -390,15 +387,9 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, break; case 2: val = virtio_config_readw(proxy->vdev, addr); - if (virtio_is_big_endian()) { - val = bswap16(val); - } break; case 4: val = virtio_config_readl(proxy->vdev, addr); - if (virtio_is_big_endian()) { - val = bswap32(val); - } break; } return val; @@ -423,15 +414,9 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, virtio_config_writeb(proxy->vdev, addr, val); break; case 2: - if (virtio_is_big_endian()) { - val = bswap16(val); - } virtio_config_writew(proxy->vdev, addr, val); break; case 4: - if (virtio_is_big_endian()) { - val = bswap32(val); - } virtio_config_writel(proxy->vdev, addr, val); break; } @@ -444,7 +429,7 @@ static const MemoryRegionOps virtio_pci_config_ops = { .min_access_size = 1, .max_access_size = 4, }, - .endianness = DEVICE_LITTLE_ENDIAN, + .endianness = DEVICE_NATIVE_ENDIAN, }; static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
Remove byte swaps by declaring the config space as native endian. Signed-off-by: Blue Swirl <blauwirbel@gmail.com> --- exec.c | 18 ------------------ hw/virtio-pci.c | 17 +---------------- 2 files changed, 1 insertions(+), 34 deletions(-)