Message ID | 20140414115311.32507.17082.stgit@bahia.local |
---|---|
State | New |
Headers | show |
On 14.04.14 13:58, Greg Kurz wrote: > From: Rusty Russell <rusty@rustcorp.com.au> > > virtio data structures are defined as "target endian", which assumes > that's a fixed value. In fact, that actually means it's platform-specific. > The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > We introduce memory accessors to be used accross the virtio code where > needed. These accessors should support both legacy and 1.0 devices. > A good way to do it is to introduce a per-device property to store the > endianness. We choose to set this flag at device reset time because it > is reasonnable to assume the endianness won't change unless we reboot or > kexec another kernel. And it is also reasonnable to assume the new kernel > will reset the devices before using them (otherwise it will break). > > We reuse the virtio_is_big_endian() helper since it provides the right > value for legacy devices with most of the targets, that have fixed > endianness. It can then be overriden to support endian-ambivalent targets. > > To support migration, we need to set the flag in virtio_load() as well. > > (a) One solution would be to add it to the stream, but it have some > drawbacks: > - since this only affects a few targets, the field should be put into a > subsection > - virtio migration code should be ported to vmstate to be able to introduce > such a subsection > > (b) If we assume the following to be true: > - target endianness falls under some cpu state > - cpu state is always restored before virtio devices state because they > get initialized in this order in main(). > Then an alternative is to rely on virtio_is_big_endian() again at > load time. No need to mess around with the migration stream in this case. > > This patch implements (b). > > Note that the tswap helpers are implemented in virtio.c so that > virtio-access.h stays platform independant. Most of the virtio code > will be buildable under common-obj instead of obj then, and spare > some cycles when building for multiple targets. > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > [ ldq_phys() API change, > relicensed virtio-access.h to GPLv2+ on Rusty's request, > introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > add VirtIODevice * arg to virtio helpers, > use the existing virtio_is_big_endian() helper, > virtio-pci: use the device is_big_endian flag, > introduce virtio tswap16 and tswap64 helpers, > move calls to tswap* out of virtio-access.h to make it platform independant, > migration support, > Greg Kurz <gkurz@linux.vnet.ibm.com> ] > Cc: Cédric Le Goater <clg@fr.ibm.com> > Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > --- > > Changes since v6: > - merge the virtio_needs_byteswap() helper from v6 and existing > virtio_is_big_endian() > - virtio-pci: now supports endianness changes > - virtio-access.h fixes (target independant) > > exec.c | 2 - > hw/virtio/Makefile.objs | 2 - > hw/virtio/virtio-pci.c | 11 +-- > hw/virtio/virtio.c | 35 +++++++++ > include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > include/hw/virtio/virtio.h | 2 + > vl.c | 4 + > 7 files changed, 185 insertions(+), 9 deletions(-) > create mode 100644 include/hw/virtio/virtio-access.h > > diff --git a/exec.c b/exec.c > index 91513c6..e6777d0 100644 > --- a/exec.c > +++ b/exec.c > @@ -42,6 +42,7 @@ > #else /* !CONFIG_USER_ONLY */ > #include "sysemu/xen-mapcache.h" > #include "trace.h" > +#include "hw/virtio/virtio.h" > #endif > #include "exec/cpu-all.h" > > @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > * A helper function for the _utterly broken_ virtio device model to find out if > * it's running on a big endian machine. Don't do this at home kids! > */ > -bool virtio_is_big_endian(void); > bool virtio_is_big_endian(void) > { > #if defined(TARGET_WORDS_BIGENDIAN) > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > index 1ba53d9..68c3064 100644 > --- a/hw/virtio/Makefile.objs > +++ b/hw/virtio/Makefile.objs > @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > common-obj-y += virtio-mmio.o > common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > > -obj-y += virtio.o virtio-balloon.o > +obj-y += virtio.o virtio-balloon.o > obj-$(CONFIG_LINUX) += vhost.o > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index ce97514..82a1689 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -89,9 +89,6 @@ > /* Flags track per-device state like workarounds for quirks in older guests. */ > #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > > -/* HACK for virtio to determine if it's running a big endian guest */ > -bool virtio_is_big_endian(void); > - > static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > VirtIOPCIProxy *dev); > > @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > break; > case 2: > val = virtio_config_readw(vdev, addr); > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap16(val); > } > break; > case 4: > val = virtio_config_readl(vdev, addr); > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap32(val); > } > break; > @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > virtio_config_writeb(vdev, addr, val); > break; > case 2: > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap16(val); > } > virtio_config_writew(vdev, addr, val); > break; > case 4: > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap32(val); > } > virtio_config_writel(vdev, addr, val); > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index aeabf3a..bb646f0 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -19,6 +19,7 @@ > #include "hw/virtio/virtio.h" > #include "qemu/atomic.h" > #include "hw/virtio/virtio-bus.h" > +#include "hw/virtio/virtio-access.h" > > /* > * The alignment to use between consumer and producer parts of vring. > @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > virtio_set_status(vdev, 0); > > + vdev->is_big_endian = virtio_is_big_endian(); > + > if (k->reset) { > k->reset(vdev); > } > @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > > + /* NOTE: we assume that endianness is a cpu state AND > + * cpu state is restored before virtio devices. > + */ > + vdev->is_big_endian = virtio_is_big_endian(); > + > if (k->load_config) { > ret = k->load_config(qbus->parent, f); > if (ret) > @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > } > } > > +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > +{ > + if (vdev->is_big_endian) { > + return tswap16(s); > + } else { > + return bswap16(tswap16(s)); > + } This looks pretty bogus. When virtio wants to do "tswap" what it means is "give me a host endianness value in virtio endianness". How about something like #ifdef HOST_WORDS_BIGENDIAN return vdev->is_big_endian ? s : bswap16(s); #else return vdev->is_big_endian ? bswap16(s) : s; #endif That should work pretty well inline as well, so you don't need to compile virtio.c as target dependent. Alex
On 14.04.14 13:58, Greg Kurz wrote: > From: Rusty Russell <rusty@rustcorp.com.au> > > virtio data structures are defined as "target endian", which assumes > that's a fixed value. In fact, that actually means it's platform-specific. > The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > We introduce memory accessors to be used accross the virtio code where > needed. These accessors should support both legacy and 1.0 devices. > A good way to do it is to introduce a per-device property to store the > endianness. We choose to set this flag at device reset time because it > is reasonnable to assume the endianness won't change unless we reboot or > kexec another kernel. And it is also reasonnable to assume the new kernel > will reset the devices before using them (otherwise it will break). > > We reuse the virtio_is_big_endian() helper since it provides the right > value for legacy devices with most of the targets, that have fixed > endianness. It can then be overriden to support endian-ambivalent targets. > > To support migration, we need to set the flag in virtio_load() as well. > > (a) One solution would be to add it to the stream, but it have some > drawbacks: > - since this only affects a few targets, the field should be put into a > subsection > - virtio migration code should be ported to vmstate to be able to introduce > such a subsection > > (b) If we assume the following to be true: > - target endianness falls under some cpu state > - cpu state is always restored before virtio devices state because they > get initialized in this order in main(). > Then an alternative is to rely on virtio_is_big_endian() again at > load time. No need to mess around with the migration stream in this case. > > This patch implements (b). > > Note that the tswap helpers are implemented in virtio.c so that > virtio-access.h stays platform independant. Most of the virtio code > will be buildable under common-obj instead of obj then, and spare > some cycles when building for multiple targets. > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > [ ldq_phys() API change, > relicensed virtio-access.h to GPLv2+ on Rusty's request, > introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > add VirtIODevice * arg to virtio helpers, > use the existing virtio_is_big_endian() helper, > virtio-pci: use the device is_big_endian flag, > introduce virtio tswap16 and tswap64 helpers, > move calls to tswap* out of virtio-access.h to make it platform independant, > migration support, > Greg Kurz <gkurz@linux.vnet.ibm.com> ] > Cc: Cédric Le Goater <clg@fr.ibm.com> > Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > --- > > Changes since v6: > - merge the virtio_needs_byteswap() helper from v6 and existing > virtio_is_big_endian() > - virtio-pci: now supports endianness changes > - virtio-access.h fixes (target independant) > > exec.c | 2 - > hw/virtio/Makefile.objs | 2 - > hw/virtio/virtio-pci.c | 11 +-- > hw/virtio/virtio.c | 35 +++++++++ > include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > include/hw/virtio/virtio.h | 2 + > vl.c | 4 + > 7 files changed, 185 insertions(+), 9 deletions(-) > create mode 100644 include/hw/virtio/virtio-access.h > > diff --git a/exec.c b/exec.c > index 91513c6..e6777d0 100644 > --- a/exec.c > +++ b/exec.c > @@ -42,6 +42,7 @@ > #else /* !CONFIG_USER_ONLY */ > #include "sysemu/xen-mapcache.h" > #include "trace.h" > +#include "hw/virtio/virtio.h" > #endif > #include "exec/cpu-all.h" > > @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > * A helper function for the _utterly broken_ virtio device model to find out if > * it's running on a big endian machine. Don't do this at home kids! > */ > -bool virtio_is_big_endian(void); > bool virtio_is_big_endian(void) > { > #if defined(TARGET_WORDS_BIGENDIAN) > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > index 1ba53d9..68c3064 100644 > --- a/hw/virtio/Makefile.objs > +++ b/hw/virtio/Makefile.objs > @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > common-obj-y += virtio-mmio.o > common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > > -obj-y += virtio.o virtio-balloon.o > +obj-y += virtio.o virtio-balloon.o > obj-$(CONFIG_LINUX) += vhost.o > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index ce97514..82a1689 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -89,9 +89,6 @@ > /* Flags track per-device state like workarounds for quirks in older guests. */ > #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > > -/* HACK for virtio to determine if it's running a big endian guest */ > -bool virtio_is_big_endian(void); > - > static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > VirtIOPCIProxy *dev); > > @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > break; > case 2: > val = virtio_config_readw(vdev, addr); > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap16(val); > } > break; > case 4: > val = virtio_config_readl(vdev, addr); > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap32(val); > } > break; > @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > virtio_config_writeb(vdev, addr, val); > break; > case 2: > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap16(val); > } > virtio_config_writew(vdev, addr, val); > break; > case 4: > - if (virtio_is_big_endian()) { > + if (vdev->is_big_endian) { > val = bswap32(val); > } > virtio_config_writel(vdev, addr, val); > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index aeabf3a..bb646f0 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -19,6 +19,7 @@ > #include "hw/virtio/virtio.h" > #include "qemu/atomic.h" > #include "hw/virtio/virtio-bus.h" > +#include "hw/virtio/virtio-access.h" > > /* > * The alignment to use between consumer and producer parts of vring. > @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > virtio_set_status(vdev, 0); > > + vdev->is_big_endian = virtio_is_big_endian(); > + > if (k->reset) { > k->reset(vdev); > } > @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > > + /* NOTE: we assume that endianness is a cpu state AND > + * cpu state is restored before virtio devices. > + */ > + vdev->is_big_endian = virtio_is_big_endian(); > + > if (k->load_config) { > ret = k->load_config(qbus->parent, f); > if (ret) > @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > } > } > > +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > +{ > + if (vdev->is_big_endian) { > + return tswap16(s); > + } else { > + return bswap16(tswap16(s)); > + } > +} > + > +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) > +{ > + if (vdev->is_big_endian) { > + return tswap32(s); > + } else { > + return bswap32(tswap32(s)); > + } > +} > + > +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) > +{ > + if (vdev->is_big_endian) { > + return tswap64(s); > + } else { > + return bswap64(tswap64(s)); > + } > +} > + > static void virtio_device_realize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h > new file mode 100644 > index 0000000..5c9013e > --- /dev/null > +++ b/include/hw/virtio/virtio-access.h > @@ -0,0 +1,138 @@ > +/* > + * Virtio Accessor Support: In case your target can change endian. > + * > + * Copyright IBM, Corp. 2013 > + * > + * Authors: > + * Rusty Russell <rusty@au.ibm.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation, either version 2 of the License, or > + * (at your option) any later version. > + * > + */ > +#ifndef _QEMU_VIRTIO_ACCESS_H > +#define _QEMU_VIRTIO_ACCESS_H > +#include "hw/virtio/virtio.h" > +#include "exec/address-spaces.h" > + > +static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) > +{ > + if (vdev->is_big_endian) { > + return lduw_be_phys(&address_space_memory, pa); > + } > + return lduw_le_phys(&address_space_memory, pa); > +} > + > +static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) > +{ > + if (vdev->is_big_endian) { > + return ldl_be_phys(&address_space_memory, pa); > + } > + return ldl_le_phys(&address_space_memory, pa); > +} > + > +static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) > +{ > + if (vdev->is_big_endian) { > + return ldq_be_phys(&address_space_memory, pa); > + } > + return ldq_le_phys(&address_space_memory, pa); > +} > + > +static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, > + uint16_t value) > +{ > + if (vdev->is_big_endian) { > + stw_be_phys(&address_space_memory, pa, value); > + } else { > + stw_le_phys(&address_space_memory, pa, value); > + } > +} > + > +static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, > + uint32_t value) > +{ > + if (vdev->is_big_endian) { > + stl_be_phys(&address_space_memory, pa, value); > + } else { > + stl_le_phys(&address_space_memory, pa, value); > + } > +} > + > +static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) > +{ > + if (vdev->is_big_endian) { > + stw_be_p(ptr, v); > + } else { > + stw_le_p(ptr, v); > + } > +} > + > +static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) > +{ > + if (vdev->is_big_endian) { > + stl_be_p(ptr, v); > + } else { > + stl_le_p(ptr, v); > + } > +} > + > +static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) > +{ > + if (vdev->is_big_endian) { > + stq_be_p(ptr, v); > + } else { > + stq_le_p(ptr, v); > + } > +} > + > +static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) > +{ > + if (vdev->is_big_endian) { > + return lduw_be_p(ptr); > + } else { > + return lduw_le_p(ptr); > + } > +} > + > +static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) > +{ > + if (vdev->is_big_endian) { > + return ldl_be_p(ptr); > + } else { > + return ldl_le_p(ptr); > + } > +} > + > +static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) > +{ > + if (vdev->is_big_endian) { > + return ldq_be_p(ptr); > + } else { > + return ldq_le_p(ptr); > + } > +} > + > +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s); > + > +static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) > +{ > + *s = virtio_tswap16(vdev, *s); > +} > + > +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s); > + > +static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) > +{ > + *s = virtio_tswap32(vdev, *s); > +} > + > +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); > + > +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) > +{ > + *s = virtio_tswap64(vdev, *s); > +} Could we try to poison any non-virtio, non-endian-specific memory accessors when this file is included? That way we don't fall into pitfalls where we end up running stl_p in a tiny corner case somewhere still. Alex
On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > > On 14.04.14 13:58, Greg Kurz wrote: > >From: Rusty Russell <rusty@rustcorp.com.au> > > > >virtio data structures are defined as "target endian", which assumes > >that's a fixed value. In fact, that actually means it's platform-specific. > >The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > > >We introduce memory accessors to be used accross the virtio code where > >needed. These accessors should support both legacy and 1.0 devices. > >A good way to do it is to introduce a per-device property to store the > >endianness. We choose to set this flag at device reset time because it > >is reasonnable to assume the endianness won't change unless we reboot or > >kexec another kernel. And it is also reasonnable to assume the new kernel > >will reset the devices before using them (otherwise it will break). > > > >We reuse the virtio_is_big_endian() helper since it provides the right > >value for legacy devices with most of the targets, that have fixed > >endianness. It can then be overriden to support endian-ambivalent targets. > > > >To support migration, we need to set the flag in virtio_load() as well. > > > >(a) One solution would be to add it to the stream, but it have some > > drawbacks: > >- since this only affects a few targets, the field should be put into a > > subsection > >- virtio migration code should be ported to vmstate to be able to introduce > > such a subsection > > > >(b) If we assume the following to be true: > >- target endianness falls under some cpu state > >- cpu state is always restored before virtio devices state because they > > get initialized in this order in main(). > >Then an alternative is to rely on virtio_is_big_endian() again at > >load time. No need to mess around with the migration stream in this case. > > > >This patch implements (b). > > > >Note that the tswap helpers are implemented in virtio.c so that > >virtio-access.h stays platform independant. Most of the virtio code > >will be buildable under common-obj instead of obj then, and spare > >some cycles when building for multiple targets. > > > >Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >[ ldq_phys() API change, > > relicensed virtio-access.h to GPLv2+ on Rusty's request, > > introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > > add VirtIODevice * arg to virtio helpers, > > use the existing virtio_is_big_endian() helper, > > virtio-pci: use the device is_big_endian flag, > > introduce virtio tswap16 and tswap64 helpers, > > move calls to tswap* out of virtio-access.h to make it platform independant, > > migration support, > > Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >Cc: Cédric Le Goater <clg@fr.ibm.com> > >Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >--- > > > >Changes since v6: > >- merge the virtio_needs_byteswap() helper from v6 and existing > > virtio_is_big_endian() > >- virtio-pci: now supports endianness changes > >- virtio-access.h fixes (target independant) > > > > exec.c | 2 - > > hw/virtio/Makefile.objs | 2 - > > hw/virtio/virtio-pci.c | 11 +-- > > hw/virtio/virtio.c | 35 +++++++++ > > include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > > include/hw/virtio/virtio.h | 2 + > > vl.c | 4 + > > 7 files changed, 185 insertions(+), 9 deletions(-) > > create mode 100644 include/hw/virtio/virtio-access.h > > > >diff --git a/exec.c b/exec.c > >index 91513c6..e6777d0 100644 > >--- a/exec.c > >+++ b/exec.c > >@@ -42,6 +42,7 @@ > > #else /* !CONFIG_USER_ONLY */ > > #include "sysemu/xen-mapcache.h" > > #include "trace.h" > >+#include "hw/virtio/virtio.h" > > #endif > > #include "exec/cpu-all.h" > >@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > > * A helper function for the _utterly broken_ virtio device model to find out if > > * it's running on a big endian machine. Don't do this at home kids! > > */ > >-bool virtio_is_big_endian(void); > > bool virtio_is_big_endian(void) > > { > > #if defined(TARGET_WORDS_BIGENDIAN) > >diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >index 1ba53d9..68c3064 100644 > >--- a/hw/virtio/Makefile.objs > >+++ b/hw/virtio/Makefile.objs > >@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > > common-obj-y += virtio-mmio.o > > common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >-obj-y += virtio.o virtio-balloon.o > >+obj-y += virtio.o virtio-balloon.o > > obj-$(CONFIG_LINUX) += vhost.o > >diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >index ce97514..82a1689 100644 > >--- a/hw/virtio/virtio-pci.c > >+++ b/hw/virtio/virtio-pci.c > >@@ -89,9 +89,6 @@ > > /* Flags track per-device state like workarounds for quirks in older guests. */ > > #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >-/* HACK for virtio to determine if it's running a big endian guest */ > >-bool virtio_is_big_endian(void); > >- > > static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > > VirtIOPCIProxy *dev); > >@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > > break; > > case 2: > > val = virtio_config_readw(vdev, addr); > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > break; > > case 4: > > val = virtio_config_readl(vdev, addr); > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > break; > >@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > > virtio_config_writeb(vdev, addr, val); > > break; > > case 2: > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > virtio_config_writew(vdev, addr, val); > > break; > > case 4: > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > virtio_config_writel(vdev, addr, val); > >diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >index aeabf3a..bb646f0 100644 > >--- a/hw/virtio/virtio.c > >+++ b/hw/virtio/virtio.c > >@@ -19,6 +19,7 @@ > > #include "hw/virtio/virtio.h" > > #include "qemu/atomic.h" > > #include "hw/virtio/virtio-bus.h" > >+#include "hw/virtio/virtio-access.h" > > /* > > * The alignment to use between consumer and producer parts of vring. > >@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > virtio_set_status(vdev, 0); > >+ vdev->is_big_endian = virtio_is_big_endian(); > >+ > > if (k->reset) { > > k->reset(vdev); > > } > >@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > > BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >+ /* NOTE: we assume that endianness is a cpu state AND > >+ * cpu state is restored before virtio devices. > >+ */ > >+ vdev->is_big_endian = virtio_is_big_endian(); > >+ > > if (k->load_config) { > > ret = k->load_config(qbus->parent, f); > > if (ret) > >@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > > } > > } > >+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >+{ > >+ if (vdev->is_big_endian) { > >+ return tswap16(s); > >+ } else { > >+ return bswap16(tswap16(s)); > >+ } > > This looks pretty bogus. When virtio wants to do "tswap" what it > means is "give me a host endianness value in virtio endianness". How > about something like > > #ifdef HOST_WORDS_BIGENDIAN > return vdev->is_big_endian ? s : bswap16(s); > #else > return vdev->is_big_endian ? bswap16(s) : s; > #endif > Actually why doesn't this call virtio_is_big_endian? As it is, we get extra branches even if target endian-ness is fixed. > That should work pretty well inline as well, so you don't need to > compile virtio.c as target dependent. > > > Alex Yes but we'll still need to build two variants: fixed endian and dynamic endian platforms. Something along the lines of 32/64 bit split that we have?
On Mon, Apr 14, 2014 at 02:22:36PM +0200, Alexander Graf wrote: > > On 14.04.14 13:58, Greg Kurz wrote: > >From: Rusty Russell <rusty@rustcorp.com.au> > > > >virtio data structures are defined as "target endian", which assumes > >that's a fixed value. In fact, that actually means it's platform-specific. > >The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > > >We introduce memory accessors to be used accross the virtio code where > >needed. These accessors should support both legacy and 1.0 devices. > >A good way to do it is to introduce a per-device property to store the > >endianness. We choose to set this flag at device reset time because it > >is reasonnable to assume the endianness won't change unless we reboot or > >kexec another kernel. And it is also reasonnable to assume the new kernel > >will reset the devices before using them (otherwise it will break). > > > >We reuse the virtio_is_big_endian() helper since it provides the right > >value for legacy devices with most of the targets, that have fixed > >endianness. It can then be overriden to support endian-ambivalent targets. > > > >To support migration, we need to set the flag in virtio_load() as well. > > > >(a) One solution would be to add it to the stream, but it have some > > drawbacks: > >- since this only affects a few targets, the field should be put into a > > subsection > >- virtio migration code should be ported to vmstate to be able to introduce > > such a subsection > > > >(b) If we assume the following to be true: > >- target endianness falls under some cpu state > >- cpu state is always restored before virtio devices state because they > > get initialized in this order in main(). > >Then an alternative is to rely on virtio_is_big_endian() again at > >load time. No need to mess around with the migration stream in this case. > > > >This patch implements (b). > > > >Note that the tswap helpers are implemented in virtio.c so that > >virtio-access.h stays platform independant. Most of the virtio code > >will be buildable under common-obj instead of obj then, and spare > >some cycles when building for multiple targets. > > > >Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >[ ldq_phys() API change, > > relicensed virtio-access.h to GPLv2+ on Rusty's request, > > introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > > add VirtIODevice * arg to virtio helpers, > > use the existing virtio_is_big_endian() helper, > > virtio-pci: use the device is_big_endian flag, > > introduce virtio tswap16 and tswap64 helpers, > > move calls to tswap* out of virtio-access.h to make it platform independant, > > migration support, > > Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >Cc: Cédric Le Goater <clg@fr.ibm.com> > >Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >--- > > > >Changes since v6: > >- merge the virtio_needs_byteswap() helper from v6 and existing > > virtio_is_big_endian() > >- virtio-pci: now supports endianness changes > >- virtio-access.h fixes (target independant) > > > > exec.c | 2 - > > hw/virtio/Makefile.objs | 2 - > > hw/virtio/virtio-pci.c | 11 +-- > > hw/virtio/virtio.c | 35 +++++++++ > > include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > > include/hw/virtio/virtio.h | 2 + > > vl.c | 4 + > > 7 files changed, 185 insertions(+), 9 deletions(-) > > create mode 100644 include/hw/virtio/virtio-access.h > > > >diff --git a/exec.c b/exec.c > >index 91513c6..e6777d0 100644 > >--- a/exec.c > >+++ b/exec.c > >@@ -42,6 +42,7 @@ > > #else /* !CONFIG_USER_ONLY */ > > #include "sysemu/xen-mapcache.h" > > #include "trace.h" > >+#include "hw/virtio/virtio.h" > > #endif > > #include "exec/cpu-all.h" > >@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > > * A helper function for the _utterly broken_ virtio device model to find out if > > * it's running on a big endian machine. Don't do this at home kids! > > */ > >-bool virtio_is_big_endian(void); > > bool virtio_is_big_endian(void) > > { > > #if defined(TARGET_WORDS_BIGENDIAN) > >diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >index 1ba53d9..68c3064 100644 > >--- a/hw/virtio/Makefile.objs > >+++ b/hw/virtio/Makefile.objs > >@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > > common-obj-y += virtio-mmio.o > > common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >-obj-y += virtio.o virtio-balloon.o > >+obj-y += virtio.o virtio-balloon.o > > obj-$(CONFIG_LINUX) += vhost.o > >diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >index ce97514..82a1689 100644 > >--- a/hw/virtio/virtio-pci.c > >+++ b/hw/virtio/virtio-pci.c > >@@ -89,9 +89,6 @@ > > /* Flags track per-device state like workarounds for quirks in older guests. */ > > #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >-/* HACK for virtio to determine if it's running a big endian guest */ > >-bool virtio_is_big_endian(void); > >- > > static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > > VirtIOPCIProxy *dev); > >@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > > break; > > case 2: > > val = virtio_config_readw(vdev, addr); > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > break; > > case 4: > > val = virtio_config_readl(vdev, addr); > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > break; > >@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > > virtio_config_writeb(vdev, addr, val); > > break; > > case 2: > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > virtio_config_writew(vdev, addr, val); > > break; > > case 4: > >- if (virtio_is_big_endian()) { > >+ if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > virtio_config_writel(vdev, addr, val); > >diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >index aeabf3a..bb646f0 100644 > >--- a/hw/virtio/virtio.c > >+++ b/hw/virtio/virtio.c > >@@ -19,6 +19,7 @@ > > #include "hw/virtio/virtio.h" > > #include "qemu/atomic.h" > > #include "hw/virtio/virtio-bus.h" > >+#include "hw/virtio/virtio-access.h" > > /* > > * The alignment to use between consumer and producer parts of vring. > >@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > virtio_set_status(vdev, 0); > >+ vdev->is_big_endian = virtio_is_big_endian(); > >+ > > if (k->reset) { > > k->reset(vdev); > > } > >@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > > BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >+ /* NOTE: we assume that endianness is a cpu state AND > >+ * cpu state is restored before virtio devices. > >+ */ > >+ vdev->is_big_endian = virtio_is_big_endian(); > >+ > > if (k->load_config) { > > ret = k->load_config(qbus->parent, f); > > if (ret) > >@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > > } > > } > >+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >+{ > >+ if (vdev->is_big_endian) { > >+ return tswap16(s); > >+ } else { > >+ return bswap16(tswap16(s)); > >+ } > >+} > >+ > >+uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) > >+{ > >+ if (vdev->is_big_endian) { > >+ return tswap32(s); > >+ } else { > >+ return bswap32(tswap32(s)); > >+ } > >+} > >+ > >+uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) > >+{ > >+ if (vdev->is_big_endian) { > >+ return tswap64(s); > >+ } else { > >+ return bswap64(tswap64(s)); > >+ } > >+} > >+ > > static void virtio_device_realize(DeviceState *dev, Error **errp) > > { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h > >new file mode 100644 > >index 0000000..5c9013e > >--- /dev/null > >+++ b/include/hw/virtio/virtio-access.h > >@@ -0,0 +1,138 @@ > >+/* > >+ * Virtio Accessor Support: In case your target can change endian. > >+ * > >+ * Copyright IBM, Corp. 2013 > >+ * > >+ * Authors: > >+ * Rusty Russell <rusty@au.ibm.com> > >+ * > >+ * This program is free software; you can redistribute it and/or modify > >+ * it under the terms of the GNU General Public License as published by > >+ * the Free Software Foundation, either version 2 of the License, or > >+ * (at your option) any later version. > >+ * > >+ */ > >+#ifndef _QEMU_VIRTIO_ACCESS_H > >+#define _QEMU_VIRTIO_ACCESS_H > >+#include "hw/virtio/virtio.h" > >+#include "exec/address-spaces.h" > >+ > >+static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) > >+{ > >+ if (vdev->is_big_endian) { > >+ return lduw_be_phys(&address_space_memory, pa); > >+ } > >+ return lduw_le_phys(&address_space_memory, pa); > >+} > >+ > >+static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) > >+{ > >+ if (vdev->is_big_endian) { > >+ return ldl_be_phys(&address_space_memory, pa); > >+ } > >+ return ldl_le_phys(&address_space_memory, pa); > >+} > >+ > >+static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) > >+{ > >+ if (vdev->is_big_endian) { > >+ return ldq_be_phys(&address_space_memory, pa); > >+ } > >+ return ldq_le_phys(&address_space_memory, pa); > >+} > >+ > >+static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, > >+ uint16_t value) > >+{ > >+ if (vdev->is_big_endian) { > >+ stw_be_phys(&address_space_memory, pa, value); > >+ } else { > >+ stw_le_phys(&address_space_memory, pa, value); > >+ } > >+} > >+ > >+static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, > >+ uint32_t value) > >+{ > >+ if (vdev->is_big_endian) { > >+ stl_be_phys(&address_space_memory, pa, value); > >+ } else { > >+ stl_le_phys(&address_space_memory, pa, value); > >+ } > >+} > >+ > >+static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) > >+{ > >+ if (vdev->is_big_endian) { > >+ stw_be_p(ptr, v); > >+ } else { > >+ stw_le_p(ptr, v); > >+ } > >+} > >+ > >+static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) > >+{ > >+ if (vdev->is_big_endian) { > >+ stl_be_p(ptr, v); > >+ } else { > >+ stl_le_p(ptr, v); > >+ } > >+} > >+ > >+static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) > >+{ > >+ if (vdev->is_big_endian) { > >+ stq_be_p(ptr, v); > >+ } else { > >+ stq_le_p(ptr, v); > >+ } > >+} > >+ > >+static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) > >+{ > >+ if (vdev->is_big_endian) { > >+ return lduw_be_p(ptr); > >+ } else { > >+ return lduw_le_p(ptr); > >+ } > >+} > >+ > >+static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) > >+{ > >+ if (vdev->is_big_endian) { > >+ return ldl_be_p(ptr); > >+ } else { > >+ return ldl_le_p(ptr); > >+ } > >+} > >+ > >+static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) > >+{ > >+ if (vdev->is_big_endian) { > >+ return ldq_be_p(ptr); > >+ } else { > >+ return ldq_le_p(ptr); > >+ } > >+} > >+ > >+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s); > >+ > >+static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) > >+{ > >+ *s = virtio_tswap16(vdev, *s); > >+} > >+ > >+uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s); > >+ > >+static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) > >+{ > >+ *s = virtio_tswap32(vdev, *s); > >+} > >+ > >+uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); > >+ > >+static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) > >+{ > >+ *s = virtio_tswap64(vdev, *s); > >+} > > Could we try to poison any non-virtio, non-endian-specific memory > accessors when this file is included? That way we don't fall into > pitfalls where we end up running stl_p in a tiny corner case > somewhere still. > > > Alex Not sure about all users but it looks like the only file including this is virtio.c right? I'm guessing that's safe there. Or maybe get rid of the header completely ...
On 14.04.14 14:24, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: >> On 14.04.14 13:58, Greg Kurz wrote: >>> From: Rusty Russell <rusty@rustcorp.com.au> >>> >>> virtio data structures are defined as "target endian", which assumes >>> that's a fixed value. In fact, that actually means it's platform-specific. >>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>> >>> We introduce memory accessors to be used accross the virtio code where >>> needed. These accessors should support both legacy and 1.0 devices. >>> A good way to do it is to introduce a per-device property to store the >>> endianness. We choose to set this flag at device reset time because it >>> is reasonnable to assume the endianness won't change unless we reboot or >>> kexec another kernel. And it is also reasonnable to assume the new kernel >>> will reset the devices before using them (otherwise it will break). >>> >>> We reuse the virtio_is_big_endian() helper since it provides the right >>> value for legacy devices with most of the targets, that have fixed >>> endianness. It can then be overriden to support endian-ambivalent targets. >>> >>> To support migration, we need to set the flag in virtio_load() as well. >>> >>> (a) One solution would be to add it to the stream, but it have some >>> drawbacks: >>> - since this only affects a few targets, the field should be put into a >>> subsection >>> - virtio migration code should be ported to vmstate to be able to introduce >>> such a subsection >>> >>> (b) If we assume the following to be true: >>> - target endianness falls under some cpu state >>> - cpu state is always restored before virtio devices state because they >>> get initialized in this order in main(). >>> Then an alternative is to rely on virtio_is_big_endian() again at >>> load time. No need to mess around with the migration stream in this case. >>> >>> This patch implements (b). >>> >>> Note that the tswap helpers are implemented in virtio.c so that >>> virtio-access.h stays platform independant. Most of the virtio code >>> will be buildable under common-obj instead of obj then, and spare >>> some cycles when building for multiple targets. >>> >>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>> [ ldq_phys() API change, >>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>> add VirtIODevice * arg to virtio helpers, >>> use the existing virtio_is_big_endian() helper, >>> virtio-pci: use the device is_big_endian flag, >>> introduce virtio tswap16 and tswap64 helpers, >>> move calls to tswap* out of virtio-access.h to make it platform independant, >>> migration support, >>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>> --- >>> >>> Changes since v6: >>> - merge the virtio_needs_byteswap() helper from v6 and existing >>> virtio_is_big_endian() >>> - virtio-pci: now supports endianness changes >>> - virtio-access.h fixes (target independant) >>> >>> exec.c | 2 - >>> hw/virtio/Makefile.objs | 2 - >>> hw/virtio/virtio-pci.c | 11 +-- >>> hw/virtio/virtio.c | 35 +++++++++ >>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ >>> include/hw/virtio/virtio.h | 2 + >>> vl.c | 4 + >>> 7 files changed, 185 insertions(+), 9 deletions(-) >>> create mode 100644 include/hw/virtio/virtio-access.h >>> >>> diff --git a/exec.c b/exec.c >>> index 91513c6..e6777d0 100644 >>> --- a/exec.c >>> +++ b/exec.c >>> @@ -42,6 +42,7 @@ >>> #else /* !CONFIG_USER_ONLY */ >>> #include "sysemu/xen-mapcache.h" >>> #include "trace.h" >>> +#include "hw/virtio/virtio.h" >>> #endif >>> #include "exec/cpu-all.h" >>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, >>> * A helper function for the _utterly broken_ virtio device model to find out if >>> * it's running on a big endian machine. Don't do this at home kids! >>> */ >>> -bool virtio_is_big_endian(void); >>> bool virtio_is_big_endian(void) >>> { >>> #if defined(TARGET_WORDS_BIGENDIAN) >>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs >>> index 1ba53d9..68c3064 100644 >>> --- a/hw/virtio/Makefile.objs >>> +++ b/hw/virtio/Makefile.objs >>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o >>> common-obj-y += virtio-mmio.o >>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ >>> -obj-y += virtio.o virtio-balloon.o >>> +obj-y += virtio.o virtio-balloon.o >>> obj-$(CONFIG_LINUX) += vhost.o >>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c >>> index ce97514..82a1689 100644 >>> --- a/hw/virtio/virtio-pci.c >>> +++ b/hw/virtio/virtio-pci.c >>> @@ -89,9 +89,6 @@ >>> /* Flags track per-device state like workarounds for quirks in older guests. */ >>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) >>> -/* HACK for virtio to determine if it's running a big endian guest */ >>> -bool virtio_is_big_endian(void); >>> - >>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, >>> VirtIOPCIProxy *dev); >>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, >>> break; >>> case 2: >>> val = virtio_config_readw(vdev, addr); >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap16(val); >>> } >>> break; >>> case 4: >>> val = virtio_config_readl(vdev, addr); >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap32(val); >>> } >>> break; >>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, >>> virtio_config_writeb(vdev, addr, val); >>> break; >>> case 2: >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap16(val); >>> } >>> virtio_config_writew(vdev, addr, val); >>> break; >>> case 4: >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap32(val); >>> } >>> virtio_config_writel(vdev, addr, val); >>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c >>> index aeabf3a..bb646f0 100644 >>> --- a/hw/virtio/virtio.c >>> +++ b/hw/virtio/virtio.c >>> @@ -19,6 +19,7 @@ >>> #include "hw/virtio/virtio.h" >>> #include "qemu/atomic.h" >>> #include "hw/virtio/virtio-bus.h" >>> +#include "hw/virtio/virtio-access.h" >>> /* >>> * The alignment to use between consumer and producer parts of vring. >>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) >>> virtio_set_status(vdev, 0); >>> + vdev->is_big_endian = virtio_is_big_endian(); >>> + >>> if (k->reset) { >>> k->reset(vdev); >>> } >>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); >>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); >>> + /* NOTE: we assume that endianness is a cpu state AND >>> + * cpu state is restored before virtio devices. >>> + */ >>> + vdev->is_big_endian = virtio_is_big_endian(); >>> + >>> if (k->load_config) { >>> ret = k->load_config(qbus->parent, f); >>> if (ret) >>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) >>> } >>> } >>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return tswap16(s); >>> + } else { >>> + return bswap16(tswap16(s)); >>> + } >> This looks pretty bogus. When virtio wants to do "tswap" what it >> means is "give me a host endianness value in virtio endianness". How >> about something like >> >> #ifdef HOST_WORDS_BIGENDIAN >> return vdev->is_big_endian ? s : bswap16(s); >> #else >> return vdev->is_big_endian ? bswap16(s) : s; >> #endif >> > Actually why doesn't this call virtio_is_big_endian? > As it is, we get extra branches even if target endian-ness > is fixed. Because virtio_is_big_endian() returns the default endianness, not the runtime endianness of a virtio device. > >> That should work pretty well inline as well, so you don't need to >> compile virtio.c as target dependent. >> >> >> Alex > Yes but we'll still need to build two variants: fixed endian and > dynamic endian platforms. > Something along the lines of 32/64 bit split that we have? Why bother? Always make it dynamic and don't change the per-device variable, no? I'd be surprised if the performance difference is measurable. The bulk of the data we transfer gets copied raw anyway. Alex
On 14.04.14 14:28, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:22:36PM +0200, Alexander Graf wrote: >> On 14.04.14 13:58, Greg Kurz wrote: >>> From: Rusty Russell <rusty@rustcorp.com.au> >>> >>> virtio data structures are defined as "target endian", which assumes >>> that's a fixed value. In fact, that actually means it's platform-specific. >>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>> >>> We introduce memory accessors to be used accross the virtio code where >>> needed. These accessors should support both legacy and 1.0 devices. >>> A good way to do it is to introduce a per-device property to store the >>> endianness. We choose to set this flag at device reset time because it >>> is reasonnable to assume the endianness won't change unless we reboot or >>> kexec another kernel. And it is also reasonnable to assume the new kernel >>> will reset the devices before using them (otherwise it will break). >>> >>> We reuse the virtio_is_big_endian() helper since it provides the right >>> value for legacy devices with most of the targets, that have fixed >>> endianness. It can then be overriden to support endian-ambivalent targets. >>> >>> To support migration, we need to set the flag in virtio_load() as well. >>> >>> (a) One solution would be to add it to the stream, but it have some >>> drawbacks: >>> - since this only affects a few targets, the field should be put into a >>> subsection >>> - virtio migration code should be ported to vmstate to be able to introduce >>> such a subsection >>> >>> (b) If we assume the following to be true: >>> - target endianness falls under some cpu state >>> - cpu state is always restored before virtio devices state because they >>> get initialized in this order in main(). >>> Then an alternative is to rely on virtio_is_big_endian() again at >>> load time. No need to mess around with the migration stream in this case. >>> >>> This patch implements (b). >>> >>> Note that the tswap helpers are implemented in virtio.c so that >>> virtio-access.h stays platform independant. Most of the virtio code >>> will be buildable under common-obj instead of obj then, and spare >>> some cycles when building for multiple targets. >>> >>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>> [ ldq_phys() API change, >>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>> add VirtIODevice * arg to virtio helpers, >>> use the existing virtio_is_big_endian() helper, >>> virtio-pci: use the device is_big_endian flag, >>> introduce virtio tswap16 and tswap64 helpers, >>> move calls to tswap* out of virtio-access.h to make it platform independant, >>> migration support, >>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>> --- >>> >>> Changes since v6: >>> - merge the virtio_needs_byteswap() helper from v6 and existing >>> virtio_is_big_endian() >>> - virtio-pci: now supports endianness changes >>> - virtio-access.h fixes (target independant) >>> >>> exec.c | 2 - >>> hw/virtio/Makefile.objs | 2 - >>> hw/virtio/virtio-pci.c | 11 +-- >>> hw/virtio/virtio.c | 35 +++++++++ >>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ >>> include/hw/virtio/virtio.h | 2 + >>> vl.c | 4 + >>> 7 files changed, 185 insertions(+), 9 deletions(-) >>> create mode 100644 include/hw/virtio/virtio-access.h >>> >>> diff --git a/exec.c b/exec.c >>> index 91513c6..e6777d0 100644 >>> --- a/exec.c >>> +++ b/exec.c >>> @@ -42,6 +42,7 @@ >>> #else /* !CONFIG_USER_ONLY */ >>> #include "sysemu/xen-mapcache.h" >>> #include "trace.h" >>> +#include "hw/virtio/virtio.h" >>> #endif >>> #include "exec/cpu-all.h" >>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, >>> * A helper function for the _utterly broken_ virtio device model to find out if >>> * it's running on a big endian machine. Don't do this at home kids! >>> */ >>> -bool virtio_is_big_endian(void); >>> bool virtio_is_big_endian(void) >>> { >>> #if defined(TARGET_WORDS_BIGENDIAN) >>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs >>> index 1ba53d9..68c3064 100644 >>> --- a/hw/virtio/Makefile.objs >>> +++ b/hw/virtio/Makefile.objs >>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o >>> common-obj-y += virtio-mmio.o >>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ >>> -obj-y += virtio.o virtio-balloon.o >>> +obj-y += virtio.o virtio-balloon.o >>> obj-$(CONFIG_LINUX) += vhost.o >>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c >>> index ce97514..82a1689 100644 >>> --- a/hw/virtio/virtio-pci.c >>> +++ b/hw/virtio/virtio-pci.c >>> @@ -89,9 +89,6 @@ >>> /* Flags track per-device state like workarounds for quirks in older guests. */ >>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) >>> -/* HACK for virtio to determine if it's running a big endian guest */ >>> -bool virtio_is_big_endian(void); >>> - >>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, >>> VirtIOPCIProxy *dev); >>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, >>> break; >>> case 2: >>> val = virtio_config_readw(vdev, addr); >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap16(val); >>> } >>> break; >>> case 4: >>> val = virtio_config_readl(vdev, addr); >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap32(val); >>> } >>> break; >>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, >>> virtio_config_writeb(vdev, addr, val); >>> break; >>> case 2: >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap16(val); >>> } >>> virtio_config_writew(vdev, addr, val); >>> break; >>> case 4: >>> - if (virtio_is_big_endian()) { >>> + if (vdev->is_big_endian) { >>> val = bswap32(val); >>> } >>> virtio_config_writel(vdev, addr, val); >>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c >>> index aeabf3a..bb646f0 100644 >>> --- a/hw/virtio/virtio.c >>> +++ b/hw/virtio/virtio.c >>> @@ -19,6 +19,7 @@ >>> #include "hw/virtio/virtio.h" >>> #include "qemu/atomic.h" >>> #include "hw/virtio/virtio-bus.h" >>> +#include "hw/virtio/virtio-access.h" >>> /* >>> * The alignment to use between consumer and producer parts of vring. >>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) >>> virtio_set_status(vdev, 0); >>> + vdev->is_big_endian = virtio_is_big_endian(); >>> + >>> if (k->reset) { >>> k->reset(vdev); >>> } >>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); >>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); >>> + /* NOTE: we assume that endianness is a cpu state AND >>> + * cpu state is restored before virtio devices. >>> + */ >>> + vdev->is_big_endian = virtio_is_big_endian(); >>> + >>> if (k->load_config) { >>> ret = k->load_config(qbus->parent, f); >>> if (ret) >>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) >>> } >>> } >>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return tswap16(s); >>> + } else { >>> + return bswap16(tswap16(s)); >>> + } >>> +} >>> + >>> +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return tswap32(s); >>> + } else { >>> + return bswap32(tswap32(s)); >>> + } >>> +} >>> + >>> +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return tswap64(s); >>> + } else { >>> + return bswap64(tswap64(s)); >>> + } >>> +} >>> + >>> static void virtio_device_realize(DeviceState *dev, Error **errp) >>> { >>> VirtIODevice *vdev = VIRTIO_DEVICE(dev); >>> diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h >>> new file mode 100644 >>> index 0000000..5c9013e >>> --- /dev/null >>> +++ b/include/hw/virtio/virtio-access.h >>> @@ -0,0 +1,138 @@ >>> +/* >>> + * Virtio Accessor Support: In case your target can change endian. >>> + * >>> + * Copyright IBM, Corp. 2013 >>> + * >>> + * Authors: >>> + * Rusty Russell <rusty@au.ibm.com> >>> + * >>> + * This program is free software; you can redistribute it and/or modify >>> + * it under the terms of the GNU General Public License as published by >>> + * the Free Software Foundation, either version 2 of the License, or >>> + * (at your option) any later version. >>> + * >>> + */ >>> +#ifndef _QEMU_VIRTIO_ACCESS_H >>> +#define _QEMU_VIRTIO_ACCESS_H >>> +#include "hw/virtio/virtio.h" >>> +#include "exec/address-spaces.h" >>> + >>> +static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return lduw_be_phys(&address_space_memory, pa); >>> + } >>> + return lduw_le_phys(&address_space_memory, pa); >>> +} >>> + >>> +static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return ldl_be_phys(&address_space_memory, pa); >>> + } >>> + return ldl_le_phys(&address_space_memory, pa); >>> +} >>> + >>> +static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return ldq_be_phys(&address_space_memory, pa); >>> + } >>> + return ldq_le_phys(&address_space_memory, pa); >>> +} >>> + >>> +static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, >>> + uint16_t value) >>> +{ >>> + if (vdev->is_big_endian) { >>> + stw_be_phys(&address_space_memory, pa, value); >>> + } else { >>> + stw_le_phys(&address_space_memory, pa, value); >>> + } >>> +} >>> + >>> +static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, >>> + uint32_t value) >>> +{ >>> + if (vdev->is_big_endian) { >>> + stl_be_phys(&address_space_memory, pa, value); >>> + } else { >>> + stl_le_phys(&address_space_memory, pa, value); >>> + } >>> +} >>> + >>> +static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) >>> +{ >>> + if (vdev->is_big_endian) { >>> + stw_be_p(ptr, v); >>> + } else { >>> + stw_le_p(ptr, v); >>> + } >>> +} >>> + >>> +static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) >>> +{ >>> + if (vdev->is_big_endian) { >>> + stl_be_p(ptr, v); >>> + } else { >>> + stl_le_p(ptr, v); >>> + } >>> +} >>> + >>> +static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) >>> +{ >>> + if (vdev->is_big_endian) { >>> + stq_be_p(ptr, v); >>> + } else { >>> + stq_le_p(ptr, v); >>> + } >>> +} >>> + >>> +static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return lduw_be_p(ptr); >>> + } else { >>> + return lduw_le_p(ptr); >>> + } >>> +} >>> + >>> +static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return ldl_be_p(ptr); >>> + } else { >>> + return ldl_le_p(ptr); >>> + } >>> +} >>> + >>> +static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) >>> +{ >>> + if (vdev->is_big_endian) { >>> + return ldq_be_p(ptr); >>> + } else { >>> + return ldq_le_p(ptr); >>> + } >>> +} >>> + >>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s); >>> + >>> +static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) >>> +{ >>> + *s = virtio_tswap16(vdev, *s); >>> +} >>> + >>> +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s); >>> + >>> +static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) >>> +{ >>> + *s = virtio_tswap32(vdev, *s); >>> +} >>> + >>> +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); >>> + >>> +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) >>> +{ >>> + *s = virtio_tswap64(vdev, *s); >>> +} >> Could we try to poison any non-virtio, non-endian-specific memory >> accessors when this file is included? That way we don't fall into >> pitfalls where we end up running stl_p in a tiny corner case >> somewhere still. >> >> >> Alex > Not sure about all users but it looks like the only file including this > is virtio.c right? > I'm guessing that's safe there. any virtio device implementations, since they need to communicate with the guest :). > Or maybe get rid of the header completely ... and use what instead? Function calls to an external helper .c file? Alex
On Mon, Apr 14, 2014 at 02:31:06PM +0200, Alexander Graf wrote: > > On 14.04.14 14:28, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:22:36PM +0200, Alexander Graf wrote: > >>On 14.04.14 13:58, Greg Kurz wrote: > >>>From: Rusty Russell <rusty@rustcorp.com.au> > >>> > >>>virtio data structures are defined as "target endian", which assumes > >>>that's a fixed value. In fact, that actually means it's platform-specific. > >>>The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>> > >>>We introduce memory accessors to be used accross the virtio code where > >>>needed. These accessors should support both legacy and 1.0 devices. > >>>A good way to do it is to introduce a per-device property to store the > >>>endianness. We choose to set this flag at device reset time because it > >>>is reasonnable to assume the endianness won't change unless we reboot or > >>>kexec another kernel. And it is also reasonnable to assume the new kernel > >>>will reset the devices before using them (otherwise it will break). > >>> > >>>We reuse the virtio_is_big_endian() helper since it provides the right > >>>value for legacy devices with most of the targets, that have fixed > >>>endianness. It can then be overriden to support endian-ambivalent targets. > >>> > >>>To support migration, we need to set the flag in virtio_load() as well. > >>> > >>>(a) One solution would be to add it to the stream, but it have some > >>> drawbacks: > >>>- since this only affects a few targets, the field should be put into a > >>> subsection > >>>- virtio migration code should be ported to vmstate to be able to introduce > >>> such a subsection > >>> > >>>(b) If we assume the following to be true: > >>>- target endianness falls under some cpu state > >>>- cpu state is always restored before virtio devices state because they > >>> get initialized in this order in main(). > >>>Then an alternative is to rely on virtio_is_big_endian() again at > >>>load time. No need to mess around with the migration stream in this case. > >>> > >>>This patch implements (b). > >>> > >>>Note that the tswap helpers are implemented in virtio.c so that > >>>virtio-access.h stays platform independant. Most of the virtio code > >>>will be buildable under common-obj instead of obj then, and spare > >>>some cycles when building for multiple targets. > >>> > >>>Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>[ ldq_phys() API change, > >>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>> add VirtIODevice * arg to virtio helpers, > >>> use the existing virtio_is_big_endian() helper, > >>> virtio-pci: use the device is_big_endian flag, > >>> introduce virtio tswap16 and tswap64 helpers, > >>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>> migration support, > >>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>--- > >>> > >>>Changes since v6: > >>>- merge the virtio_needs_byteswap() helper from v6 and existing > >>> virtio_is_big_endian() > >>>- virtio-pci: now supports endianness changes > >>>- virtio-access.h fixes (target independant) > >>> > >>> exec.c | 2 - > >>> hw/virtio/Makefile.objs | 2 - > >>> hw/virtio/virtio-pci.c | 11 +-- > >>> hw/virtio/virtio.c | 35 +++++++++ > >>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>> include/hw/virtio/virtio.h | 2 + > >>> vl.c | 4 + > >>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>> create mode 100644 include/hw/virtio/virtio-access.h > >>> > >>>diff --git a/exec.c b/exec.c > >>>index 91513c6..e6777d0 100644 > >>>--- a/exec.c > >>>+++ b/exec.c > >>>@@ -42,6 +42,7 @@ > >>> #else /* !CONFIG_USER_ONLY */ > >>> #include "sysemu/xen-mapcache.h" > >>> #include "trace.h" > >>>+#include "hw/virtio/virtio.h" > >>> #endif > >>> #include "exec/cpu-all.h" > >>>@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>> * A helper function for the _utterly broken_ virtio device model to find out if > >>> * it's running on a big endian machine. Don't do this at home kids! > >>> */ > >>>-bool virtio_is_big_endian(void); > >>> bool virtio_is_big_endian(void) > >>> { > >>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>index 1ba53d9..68c3064 100644 > >>>--- a/hw/virtio/Makefile.objs > >>>+++ b/hw/virtio/Makefile.objs > >>>@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>> common-obj-y += virtio-mmio.o > >>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>-obj-y += virtio.o virtio-balloon.o > >>>+obj-y += virtio.o virtio-balloon.o > >>> obj-$(CONFIG_LINUX) += vhost.o > >>>diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>index ce97514..82a1689 100644 > >>>--- a/hw/virtio/virtio-pci.c > >>>+++ b/hw/virtio/virtio-pci.c > >>>@@ -89,9 +89,6 @@ > >>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>-/* HACK for virtio to determine if it's running a big endian guest */ > >>>-bool virtio_is_big_endian(void); > >>>- > >>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>> VirtIOPCIProxy *dev); > >>>@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>> break; > >>> case 2: > >>> val = virtio_config_readw(vdev, addr); > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap16(val); > >>> } > >>> break; > >>> case 4: > >>> val = virtio_config_readl(vdev, addr); > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap32(val); > >>> } > >>> break; > >>>@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>> virtio_config_writeb(vdev, addr, val); > >>> break; > >>> case 2: > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap16(val); > >>> } > >>> virtio_config_writew(vdev, addr, val); > >>> break; > >>> case 4: > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap32(val); > >>> } > >>> virtio_config_writel(vdev, addr, val); > >>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>index aeabf3a..bb646f0 100644 > >>>--- a/hw/virtio/virtio.c > >>>+++ b/hw/virtio/virtio.c > >>>@@ -19,6 +19,7 @@ > >>> #include "hw/virtio/virtio.h" > >>> #include "qemu/atomic.h" > >>> #include "hw/virtio/virtio-bus.h" > >>>+#include "hw/virtio/virtio-access.h" > >>> /* > >>> * The alignment to use between consumer and producer parts of vring. > >>>@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>> virtio_set_status(vdev, 0); > >>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>+ > >>> if (k->reset) { > >>> k->reset(vdev); > >>> } > >>>@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>+ /* NOTE: we assume that endianness is a cpu state AND > >>>+ * cpu state is restored before virtio devices. > >>>+ */ > >>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>+ > >>> if (k->load_config) { > >>> ret = k->load_config(qbus->parent, f); > >>> if (ret) > >>>@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>> } > >>> } > >>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return tswap16(s); > >>>+ } else { > >>>+ return bswap16(tswap16(s)); > >>>+ } > >>>+} > >>>+ > >>>+uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return tswap32(s); > >>>+ } else { > >>>+ return bswap32(tswap32(s)); > >>>+ } > >>>+} > >>>+ > >>>+uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return tswap64(s); > >>>+ } else { > >>>+ return bswap64(tswap64(s)); > >>>+ } > >>>+} > >>>+ > >>> static void virtio_device_realize(DeviceState *dev, Error **errp) > >>> { > >>> VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >>>diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h > >>>new file mode 100644 > >>>index 0000000..5c9013e > >>>--- /dev/null > >>>+++ b/include/hw/virtio/virtio-access.h > >>>@@ -0,0 +1,138 @@ > >>>+/* > >>>+ * Virtio Accessor Support: In case your target can change endian. > >>>+ * > >>>+ * Copyright IBM, Corp. 2013 > >>>+ * > >>>+ * Authors: > >>>+ * Rusty Russell <rusty@au.ibm.com> > >>>+ * > >>>+ * This program is free software; you can redistribute it and/or modify > >>>+ * it under the terms of the GNU General Public License as published by > >>>+ * the Free Software Foundation, either version 2 of the License, or > >>>+ * (at your option) any later version. > >>>+ * > >>>+ */ > >>>+#ifndef _QEMU_VIRTIO_ACCESS_H > >>>+#define _QEMU_VIRTIO_ACCESS_H > >>>+#include "hw/virtio/virtio.h" > >>>+#include "exec/address-spaces.h" > >>>+ > >>>+static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return lduw_be_phys(&address_space_memory, pa); > >>>+ } > >>>+ return lduw_le_phys(&address_space_memory, pa); > >>>+} > >>>+ > >>>+static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return ldl_be_phys(&address_space_memory, pa); > >>>+ } > >>>+ return ldl_le_phys(&address_space_memory, pa); > >>>+} > >>>+ > >>>+static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return ldq_be_phys(&address_space_memory, pa); > >>>+ } > >>>+ return ldq_le_phys(&address_space_memory, pa); > >>>+} > >>>+ > >>>+static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, > >>>+ uint16_t value) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ stw_be_phys(&address_space_memory, pa, value); > >>>+ } else { > >>>+ stw_le_phys(&address_space_memory, pa, value); > >>>+ } > >>>+} > >>>+ > >>>+static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, > >>>+ uint32_t value) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ stl_be_phys(&address_space_memory, pa, value); > >>>+ } else { > >>>+ stl_le_phys(&address_space_memory, pa, value); > >>>+ } > >>>+} > >>>+ > >>>+static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ stw_be_p(ptr, v); > >>>+ } else { > >>>+ stw_le_p(ptr, v); > >>>+ } > >>>+} > >>>+ > >>>+static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ stl_be_p(ptr, v); > >>>+ } else { > >>>+ stl_le_p(ptr, v); > >>>+ } > >>>+} > >>>+ > >>>+static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ stq_be_p(ptr, v); > >>>+ } else { > >>>+ stq_le_p(ptr, v); > >>>+ } > >>>+} > >>>+ > >>>+static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return lduw_be_p(ptr); > >>>+ } else { > >>>+ return lduw_le_p(ptr); > >>>+ } > >>>+} > >>>+ > >>>+static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return ldl_be_p(ptr); > >>>+ } else { > >>>+ return ldl_le_p(ptr); > >>>+ } > >>>+} > >>>+ > >>>+static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return ldq_be_p(ptr); > >>>+ } else { > >>>+ return ldq_le_p(ptr); > >>>+ } > >>>+} > >>>+ > >>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s); > >>>+ > >>>+static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) > >>>+{ > >>>+ *s = virtio_tswap16(vdev, *s); > >>>+} > >>>+ > >>>+uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s); > >>>+ > >>>+static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) > >>>+{ > >>>+ *s = virtio_tswap32(vdev, *s); > >>>+} > >>>+ > >>>+uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); > >>>+ > >>>+static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) > >>>+{ > >>>+ *s = virtio_tswap64(vdev, *s); > >>>+} > >>Could we try to poison any non-virtio, non-endian-specific memory > >>accessors when this file is included? That way we don't fall into > >>pitfalls where we end up running stl_p in a tiny corner case > >>somewhere still. > >> > >> > >>Alex > >Not sure about all users but it looks like the only file including this > >is virtio.c right? > >I'm guessing that's safe there. > > any virtio device implementations, since they need to communicate > with the guest :). In that case how can we poison regular memory accesses? Devices normally do more than virtio related things. > >Or maybe get rid of the header completely ... > > and use what instead? Function calls to an external helper .c file? > > > Alex
On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > > On 14.04.14 14:24, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>On 14.04.14 13:58, Greg Kurz wrote: > >>>From: Rusty Russell <rusty@rustcorp.com.au> > >>> > >>>virtio data structures are defined as "target endian", which assumes > >>>that's a fixed value. In fact, that actually means it's platform-specific. > >>>The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>> > >>>We introduce memory accessors to be used accross the virtio code where > >>>needed. These accessors should support both legacy and 1.0 devices. > >>>A good way to do it is to introduce a per-device property to store the > >>>endianness. We choose to set this flag at device reset time because it > >>>is reasonnable to assume the endianness won't change unless we reboot or > >>>kexec another kernel. And it is also reasonnable to assume the new kernel > >>>will reset the devices before using them (otherwise it will break). > >>> > >>>We reuse the virtio_is_big_endian() helper since it provides the right > >>>value for legacy devices with most of the targets, that have fixed > >>>endianness. It can then be overriden to support endian-ambivalent targets. > >>> > >>>To support migration, we need to set the flag in virtio_load() as well. > >>> > >>>(a) One solution would be to add it to the stream, but it have some > >>> drawbacks: > >>>- since this only affects a few targets, the field should be put into a > >>> subsection > >>>- virtio migration code should be ported to vmstate to be able to introduce > >>> such a subsection > >>> > >>>(b) If we assume the following to be true: > >>>- target endianness falls under some cpu state > >>>- cpu state is always restored before virtio devices state because they > >>> get initialized in this order in main(). > >>>Then an alternative is to rely on virtio_is_big_endian() again at > >>>load time. No need to mess around with the migration stream in this case. > >>> > >>>This patch implements (b). > >>> > >>>Note that the tswap helpers are implemented in virtio.c so that > >>>virtio-access.h stays platform independant. Most of the virtio code > >>>will be buildable under common-obj instead of obj then, and spare > >>>some cycles when building for multiple targets. > >>> > >>>Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>[ ldq_phys() API change, > >>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>> add VirtIODevice * arg to virtio helpers, > >>> use the existing virtio_is_big_endian() helper, > >>> virtio-pci: use the device is_big_endian flag, > >>> introduce virtio tswap16 and tswap64 helpers, > >>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>> migration support, > >>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>--- > >>> > >>>Changes since v6: > >>>- merge the virtio_needs_byteswap() helper from v6 and existing > >>> virtio_is_big_endian() > >>>- virtio-pci: now supports endianness changes > >>>- virtio-access.h fixes (target independant) > >>> > >>> exec.c | 2 - > >>> hw/virtio/Makefile.objs | 2 - > >>> hw/virtio/virtio-pci.c | 11 +-- > >>> hw/virtio/virtio.c | 35 +++++++++ > >>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>> include/hw/virtio/virtio.h | 2 + > >>> vl.c | 4 + > >>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>> create mode 100644 include/hw/virtio/virtio-access.h > >>> > >>>diff --git a/exec.c b/exec.c > >>>index 91513c6..e6777d0 100644 > >>>--- a/exec.c > >>>+++ b/exec.c > >>>@@ -42,6 +42,7 @@ > >>> #else /* !CONFIG_USER_ONLY */ > >>> #include "sysemu/xen-mapcache.h" > >>> #include "trace.h" > >>>+#include "hw/virtio/virtio.h" > >>> #endif > >>> #include "exec/cpu-all.h" > >>>@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>> * A helper function for the _utterly broken_ virtio device model to find out if > >>> * it's running on a big endian machine. Don't do this at home kids! > >>> */ > >>>-bool virtio_is_big_endian(void); > >>> bool virtio_is_big_endian(void) > >>> { > >>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>index 1ba53d9..68c3064 100644 > >>>--- a/hw/virtio/Makefile.objs > >>>+++ b/hw/virtio/Makefile.objs > >>>@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>> common-obj-y += virtio-mmio.o > >>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>-obj-y += virtio.o virtio-balloon.o > >>>+obj-y += virtio.o virtio-balloon.o > >>> obj-$(CONFIG_LINUX) += vhost.o > >>>diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>index ce97514..82a1689 100644 > >>>--- a/hw/virtio/virtio-pci.c > >>>+++ b/hw/virtio/virtio-pci.c > >>>@@ -89,9 +89,6 @@ > >>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>-/* HACK for virtio to determine if it's running a big endian guest */ > >>>-bool virtio_is_big_endian(void); > >>>- > >>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>> VirtIOPCIProxy *dev); > >>>@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>> break; > >>> case 2: > >>> val = virtio_config_readw(vdev, addr); > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap16(val); > >>> } > >>> break; > >>> case 4: > >>> val = virtio_config_readl(vdev, addr); > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap32(val); > >>> } > >>> break; > >>>@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>> virtio_config_writeb(vdev, addr, val); > >>> break; > >>> case 2: > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap16(val); > >>> } > >>> virtio_config_writew(vdev, addr, val); > >>> break; > >>> case 4: > >>>- if (virtio_is_big_endian()) { > >>>+ if (vdev->is_big_endian) { > >>> val = bswap32(val); > >>> } > >>> virtio_config_writel(vdev, addr, val); > >>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>index aeabf3a..bb646f0 100644 > >>>--- a/hw/virtio/virtio.c > >>>+++ b/hw/virtio/virtio.c > >>>@@ -19,6 +19,7 @@ > >>> #include "hw/virtio/virtio.h" > >>> #include "qemu/atomic.h" > >>> #include "hw/virtio/virtio-bus.h" > >>>+#include "hw/virtio/virtio-access.h" > >>> /* > >>> * The alignment to use between consumer and producer parts of vring. > >>>@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>> virtio_set_status(vdev, 0); > >>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>+ > >>> if (k->reset) { > >>> k->reset(vdev); > >>> } > >>>@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>+ /* NOTE: we assume that endianness is a cpu state AND > >>>+ * cpu state is restored before virtio devices. > >>>+ */ > >>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>+ > >>> if (k->load_config) { > >>> ret = k->load_config(qbus->parent, f); > >>> if (ret) > >>>@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>> } > >>> } > >>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>+{ > >>>+ if (vdev->is_big_endian) { > >>>+ return tswap16(s); > >>>+ } else { > >>>+ return bswap16(tswap16(s)); > >>>+ } > >>This looks pretty bogus. When virtio wants to do "tswap" what it > >>means is "give me a host endianness value in virtio endianness". How > >>about something like > >> > >>#ifdef HOST_WORDS_BIGENDIAN > >> return vdev->is_big_endian ? s : bswap16(s); > >>#else > >> return vdev->is_big_endian ? bswap16(s) : s; > >>#endif > >> > >Actually why doesn't this call virtio_is_big_endian? > >As it is, we get extra branches even if target endian-ness > >is fixed. > > Because virtio_is_big_endian() returns the default endianness, not > the runtime endianness of a virtio device. > > > > >>That should work pretty well inline as well, so you don't need to > >>compile virtio.c as target dependent. > >> > >> > >>Alex > >Yes but we'll still need to build two variants: fixed endian and > >dynamic endian platforms. > >Something along the lines of 32/64 bit split that we have? > > Why bother? Always make it dynamic and don't change the per-device > variable, no? I'd be surprised if the performance difference is > measurable. The bulk of the data we transfer gets copied raw anyway. > > > Alex This will have to be measured and proved by whoever's proposing the patch, not by reviewers. Platforms such as AMD which don't do prediction well would be especially interesting to test on.
On 14.04.14 14:34, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:31:06PM +0200, Alexander Graf wrote: >> On 14.04.14 14:28, Michael S. Tsirkin wrote: >>> On Mon, Apr 14, 2014 at 02:22:36PM +0200, Alexander Graf wrote: >>>> Could we try to poison any non-virtio, non-endian-specific memory >>>> accessors when this file is included? That way we don't fall into >>>> pitfalls where we end up running stl_p in a tiny corner case >>>> somewhere still. >>>> >>>> >>>> Alex >>> Not sure about all users but it looks like the only file including this >>> is virtio.c right? >>> I'm guessing that's safe there. >> any virtio device implementations, since they need to communicate >> with the guest :). > In that case how can we poison regular memory accesses? > Devices normally do more than virtio related things. Device should never do anything based on the guest endianness (because they don't know), so stx_p and ldx_p should always be poisoned. Alex
On Mon, 14 Apr 2014 14:16:03 +0200 Alexander Graf <agraf@suse.de> wrote: > > On 14.04.14 13:58, Greg Kurz wrote: > > From: Rusty Russell <rusty@rustcorp.com.au> > > > > virtio data structures are defined as "target endian", which assumes > > that's a fixed value. In fact, that actually means it's platform-specific. > > The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > > > We introduce memory accessors to be used accross the virtio code where > > needed. These accessors should support both legacy and 1.0 devices. > > A good way to do it is to introduce a per-device property to store the > > endianness. We choose to set this flag at device reset time because it > > is reasonnable to assume the endianness won't change unless we reboot or > > kexec another kernel. And it is also reasonnable to assume the new kernel > > will reset the devices before using them (otherwise it will break). > > > > We reuse the virtio_is_big_endian() helper since it provides the right > > value for legacy devices with most of the targets, that have fixed > > endianness. It can then be overriden to support endian-ambivalent targets. > > > > To support migration, we need to set the flag in virtio_load() as well. > > > > (a) One solution would be to add it to the stream, but it have some > > drawbacks: > > - since this only affects a few targets, the field should be put into a > > subsection > > - virtio migration code should be ported to vmstate to be able to introduce > > such a subsection > > > > (b) If we assume the following to be true: > > - target endianness falls under some cpu state > > - cpu state is always restored before virtio devices state because they > > get initialized in this order in main(). > > Then an alternative is to rely on virtio_is_big_endian() again at > > load time. No need to mess around with the migration stream in this case. > > > > This patch implements (b). > > > > Note that the tswap helpers are implemented in virtio.c so that > > virtio-access.h stays platform independant. Most of the virtio code > > will be buildable under common-obj instead of obj then, and spare > > some cycles when building for multiple targets. > > > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > > [ ldq_phys() API change, > > relicensed virtio-access.h to GPLv2+ on Rusty's request, > > introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > > add VirtIODevice * arg to virtio helpers, > > use the existing virtio_is_big_endian() helper, > > virtio-pci: use the device is_big_endian flag, > > introduce virtio tswap16 and tswap64 helpers, > > move calls to tswap* out of virtio-access.h to make it platform independant, > > migration support, > > Greg Kurz <gkurz@linux.vnet.ibm.com> ] > > Cc: Cédric Le Goater <clg@fr.ibm.com> > > Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > > --- > > > > Changes since v6: > > - merge the virtio_needs_byteswap() helper from v6 and existing > > virtio_is_big_endian() > > - virtio-pci: now supports endianness changes > > - virtio-access.h fixes (target independant) > > > > exec.c | 2 - > > hw/virtio/Makefile.objs | 2 - > > hw/virtio/virtio-pci.c | 11 +-- > > hw/virtio/virtio.c | 35 +++++++++ > > include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > > include/hw/virtio/virtio.h | 2 + > > vl.c | 4 + > > 7 files changed, 185 insertions(+), 9 deletions(-) > > create mode 100644 include/hw/virtio/virtio-access.h > > > > diff --git a/exec.c b/exec.c > > index 91513c6..e6777d0 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -42,6 +42,7 @@ > > #else /* !CONFIG_USER_ONLY */ > > #include "sysemu/xen-mapcache.h" > > #include "trace.h" > > +#include "hw/virtio/virtio.h" > > #endif > > #include "exec/cpu-all.h" > > > > @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > > * A helper function for the _utterly broken_ virtio device model to find out if > > * it's running on a big endian machine. Don't do this at home kids! > > */ > > -bool virtio_is_big_endian(void); > > bool virtio_is_big_endian(void) > > { > > #if defined(TARGET_WORDS_BIGENDIAN) > > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > > index 1ba53d9..68c3064 100644 > > --- a/hw/virtio/Makefile.objs > > +++ b/hw/virtio/Makefile.objs > > @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > > common-obj-y += virtio-mmio.o > > common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > > > > -obj-y += virtio.o virtio-balloon.o > > +obj-y += virtio.o virtio-balloon.o > > obj-$(CONFIG_LINUX) += vhost.o > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > > index ce97514..82a1689 100644 > > --- a/hw/virtio/virtio-pci.c > > +++ b/hw/virtio/virtio-pci.c > > @@ -89,9 +89,6 @@ > > /* Flags track per-device state like workarounds for quirks in older guests. */ > > #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > > > > -/* HACK for virtio to determine if it's running a big endian guest */ > > -bool virtio_is_big_endian(void); > > - > > static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > > VirtIOPCIProxy *dev); > > > > @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > > break; > > case 2: > > val = virtio_config_readw(vdev, addr); > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > break; > > case 4: > > val = virtio_config_readl(vdev, addr); > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > break; > > @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > > virtio_config_writeb(vdev, addr, val); > > break; > > case 2: > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > virtio_config_writew(vdev, addr, val); > > break; > > case 4: > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > virtio_config_writel(vdev, addr, val); > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > index aeabf3a..bb646f0 100644 > > --- a/hw/virtio/virtio.c > > +++ b/hw/virtio/virtio.c > > @@ -19,6 +19,7 @@ > > #include "hw/virtio/virtio.h" > > #include "qemu/atomic.h" > > #include "hw/virtio/virtio-bus.h" > > +#include "hw/virtio/virtio-access.h" > > > > /* > > * The alignment to use between consumer and producer parts of vring. > > @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > > > virtio_set_status(vdev, 0); > > > > + vdev->is_big_endian = virtio_is_big_endian(); > > + > > if (k->reset) { > > k->reset(vdev); > > } > > @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > > BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > > > > + /* NOTE: we assume that endianness is a cpu state AND > > + * cpu state is restored before virtio devices. > > + */ > > + vdev->is_big_endian = virtio_is_big_endian(); > > + > > if (k->load_config) { > > ret = k->load_config(qbus->parent, f); > > if (ret) > > @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > > } > > } > > > > +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > > +{ > > + if (vdev->is_big_endian) { > > + return tswap16(s); > > + } else { > > + return bswap16(tswap16(s)); > > + } > > This looks pretty bogus. When virtio wants to do "tswap" what it means > is "give me a host endianness value in virtio endianness". How about > something like > > #ifdef HOST_WORDS_BIGENDIAN > return vdev->is_big_endian ? s : bswap16(s); > #else > return vdev->is_big_endian ? bswap16(s) : s; > #endif > Thanks ! > That should work pretty well inline as well, so you don't need to > compile virtio.c as target dependent. > virtio.c is target dependant... as long as it uses *_phys accessors. I'll make the change in patch 2/8 then. :)
On 14.04.14 14:37, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: >> On 14.04.14 14:24, Michael S. Tsirkin wrote: >>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: >>>> On 14.04.14 13:58, Greg Kurz wrote: >>>>> From: Rusty Russell <rusty@rustcorp.com.au> >>>>> >>>>> virtio data structures are defined as "target endian", which assumes >>>>> that's a fixed value. In fact, that actually means it's platform-specific. >>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>>>> >>>>> We introduce memory accessors to be used accross the virtio code where >>>>> needed. These accessors should support both legacy and 1.0 devices. >>>>> A good way to do it is to introduce a per-device property to store the >>>>> endianness. We choose to set this flag at device reset time because it >>>>> is reasonnable to assume the endianness won't change unless we reboot or >>>>> kexec another kernel. And it is also reasonnable to assume the new kernel >>>>> will reset the devices before using them (otherwise it will break). >>>>> >>>>> We reuse the virtio_is_big_endian() helper since it provides the right >>>>> value for legacy devices with most of the targets, that have fixed >>>>> endianness. It can then be overriden to support endian-ambivalent targets. >>>>> >>>>> To support migration, we need to set the flag in virtio_load() as well. >>>>> >>>>> (a) One solution would be to add it to the stream, but it have some >>>>> drawbacks: >>>>> - since this only affects a few targets, the field should be put into a >>>>> subsection >>>>> - virtio migration code should be ported to vmstate to be able to introduce >>>>> such a subsection >>>>> >>>>> (b) If we assume the following to be true: >>>>> - target endianness falls under some cpu state >>>>> - cpu state is always restored before virtio devices state because they >>>>> get initialized in this order in main(). >>>>> Then an alternative is to rely on virtio_is_big_endian() again at >>>>> load time. No need to mess around with the migration stream in this case. >>>>> >>>>> This patch implements (b). >>>>> >>>>> Note that the tswap helpers are implemented in virtio.c so that >>>>> virtio-access.h stays platform independant. Most of the virtio code >>>>> will be buildable under common-obj instead of obj then, and spare >>>>> some cycles when building for multiple targets. >>>>> >>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>>>> [ ldq_phys() API change, >>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>>>> add VirtIODevice * arg to virtio helpers, >>>>> use the existing virtio_is_big_endian() helper, >>>>> virtio-pci: use the device is_big_endian flag, >>>>> introduce virtio tswap16 and tswap64 helpers, >>>>> move calls to tswap* out of virtio-access.h to make it platform independant, >>>>> migration support, >>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>>>> --- >>>>> >>>>> Changes since v6: >>>>> - merge the virtio_needs_byteswap() helper from v6 and existing >>>>> virtio_is_big_endian() >>>>> - virtio-pci: now supports endianness changes >>>>> - virtio-access.h fixes (target independant) >>>>> >>>>> exec.c | 2 - >>>>> hw/virtio/Makefile.objs | 2 - >>>>> hw/virtio/virtio-pci.c | 11 +-- >>>>> hw/virtio/virtio.c | 35 +++++++++ >>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ >>>>> include/hw/virtio/virtio.h | 2 + >>>>> vl.c | 4 + >>>>> 7 files changed, 185 insertions(+), 9 deletions(-) >>>>> create mode 100644 include/hw/virtio/virtio-access.h >>>>> >>>>> diff --git a/exec.c b/exec.c >>>>> index 91513c6..e6777d0 100644 >>>>> --- a/exec.c >>>>> +++ b/exec.c >>>>> @@ -42,6 +42,7 @@ >>>>> #else /* !CONFIG_USER_ONLY */ >>>>> #include "sysemu/xen-mapcache.h" >>>>> #include "trace.h" >>>>> +#include "hw/virtio/virtio.h" >>>>> #endif >>>>> #include "exec/cpu-all.h" >>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, >>>>> * A helper function for the _utterly broken_ virtio device model to find out if >>>>> * it's running on a big endian machine. Don't do this at home kids! >>>>> */ >>>>> -bool virtio_is_big_endian(void); >>>>> bool virtio_is_big_endian(void) >>>>> { >>>>> #if defined(TARGET_WORDS_BIGENDIAN) >>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs >>>>> index 1ba53d9..68c3064 100644 >>>>> --- a/hw/virtio/Makefile.objs >>>>> +++ b/hw/virtio/Makefile.objs >>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o >>>>> common-obj-y += virtio-mmio.o >>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ >>>>> -obj-y += virtio.o virtio-balloon.o >>>>> +obj-y += virtio.o virtio-balloon.o >>>>> obj-$(CONFIG_LINUX) += vhost.o >>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c >>>>> index ce97514..82a1689 100644 >>>>> --- a/hw/virtio/virtio-pci.c >>>>> +++ b/hw/virtio/virtio-pci.c >>>>> @@ -89,9 +89,6 @@ >>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ >>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) >>>>> -/* HACK for virtio to determine if it's running a big endian guest */ >>>>> -bool virtio_is_big_endian(void); >>>>> - >>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, >>>>> VirtIOPCIProxy *dev); >>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, >>>>> break; >>>>> case 2: >>>>> val = virtio_config_readw(vdev, addr); >>>>> - if (virtio_is_big_endian()) { >>>>> + if (vdev->is_big_endian) { >>>>> val = bswap16(val); >>>>> } >>>>> break; >>>>> case 4: >>>>> val = virtio_config_readl(vdev, addr); >>>>> - if (virtio_is_big_endian()) { >>>>> + if (vdev->is_big_endian) { >>>>> val = bswap32(val); >>>>> } >>>>> break; >>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, >>>>> virtio_config_writeb(vdev, addr, val); >>>>> break; >>>>> case 2: >>>>> - if (virtio_is_big_endian()) { >>>>> + if (vdev->is_big_endian) { >>>>> val = bswap16(val); >>>>> } >>>>> virtio_config_writew(vdev, addr, val); >>>>> break; >>>>> case 4: >>>>> - if (virtio_is_big_endian()) { >>>>> + if (vdev->is_big_endian) { >>>>> val = bswap32(val); >>>>> } >>>>> virtio_config_writel(vdev, addr, val); >>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c >>>>> index aeabf3a..bb646f0 100644 >>>>> --- a/hw/virtio/virtio.c >>>>> +++ b/hw/virtio/virtio.c >>>>> @@ -19,6 +19,7 @@ >>>>> #include "hw/virtio/virtio.h" >>>>> #include "qemu/atomic.h" >>>>> #include "hw/virtio/virtio-bus.h" >>>>> +#include "hw/virtio/virtio-access.h" >>>>> /* >>>>> * The alignment to use between consumer and producer parts of vring. >>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) >>>>> virtio_set_status(vdev, 0); >>>>> + vdev->is_big_endian = virtio_is_big_endian(); >>>>> + >>>>> if (k->reset) { >>>>> k->reset(vdev); >>>>> } >>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); >>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); >>>>> + /* NOTE: we assume that endianness is a cpu state AND >>>>> + * cpu state is restored before virtio devices. >>>>> + */ >>>>> + vdev->is_big_endian = virtio_is_big_endian(); >>>>> + >>>>> if (k->load_config) { >>>>> ret = k->load_config(qbus->parent, f); >>>>> if (ret) >>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) >>>>> } >>>>> } >>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) >>>>> +{ >>>>> + if (vdev->is_big_endian) { >>>>> + return tswap16(s); >>>>> + } else { >>>>> + return bswap16(tswap16(s)); >>>>> + } >>>> This looks pretty bogus. When virtio wants to do "tswap" what it >>>> means is "give me a host endianness value in virtio endianness". How >>>> about something like >>>> >>>> #ifdef HOST_WORDS_BIGENDIAN >>>> return vdev->is_big_endian ? s : bswap16(s); >>>> #else >>>> return vdev->is_big_endian ? bswap16(s) : s; >>>> #endif >>>> >>> Actually why doesn't this call virtio_is_big_endian? >>> As it is, we get extra branches even if target endian-ness >>> is fixed. >> Because virtio_is_big_endian() returns the default endianness, not >> the runtime endianness of a virtio device. In fact, we should probably rename it accordingly. >> >>>> That should work pretty well inline as well, so you don't need to >>>> compile virtio.c as target dependent. >>>> >>>> >>>> Alex >>> Yes but we'll still need to build two variants: fixed endian and >>> dynamic endian platforms. >>> Something along the lines of 32/64 bit split that we have? >> Why bother? Always make it dynamic and don't change the per-device >> variable, no? I'd be surprised if the performance difference is >> measurable. The bulk of the data we transfer gets copied raw anyway. >> >> >> Alex > This will have to be measured and proved by whoever's proposing the > patch, not by reviewers. Platforms such as AMD which don't do > prediction well would be especially interesting to test on. Sure, Greg, can you do that? I'm sure Michael has test cases available he can give you to measure performance on this. Speaking of which, how does all of this work with vhost? Alex
On Mon, Apr 14, 2014 at 02:38:14PM +0200, Alexander Graf wrote: > > On 14.04.14 14:34, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:31:06PM +0200, Alexander Graf wrote: > >>On 14.04.14 14:28, Michael S. Tsirkin wrote: > >>>On Mon, Apr 14, 2014 at 02:22:36PM +0200, Alexander Graf wrote: > >>>>Could we try to poison any non-virtio, non-endian-specific memory > >>>>accessors when this file is included? That way we don't fall into > >>>>pitfalls where we end up running stl_p in a tiny corner case > >>>>somewhere still. > >>>> > >>>> > >>>>Alex > >>>Not sure about all users but it looks like the only file including this > >>>is virtio.c right? > >>>I'm guessing that's safe there. > >>any virtio device implementations, since they need to communicate > >>with the guest :). > >In that case how can we poison regular memory accesses? > >Devices normally do more than virtio related things. > > Device should never do anything based on the guest endianness > (because they don't know), so stx_p and ldx_p should always be > poisoned. > > > Alex Makes sense.
On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > > On 14.04.14 14:37, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >>On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>>On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>>>On 14.04.14 13:58, Greg Kurz wrote: > >>>>>From: Rusty Russell <rusty@rustcorp.com.au> > >>>>> > >>>>>virtio data structures are defined as "target endian", which assumes > >>>>>that's a fixed value. In fact, that actually means it's platform-specific. > >>>>>The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>>>> > >>>>>We introduce memory accessors to be used accross the virtio code where > >>>>>needed. These accessors should support both legacy and 1.0 devices. > >>>>>A good way to do it is to introduce a per-device property to store the > >>>>>endianness. We choose to set this flag at device reset time because it > >>>>>is reasonnable to assume the endianness won't change unless we reboot or > >>>>>kexec another kernel. And it is also reasonnable to assume the new kernel > >>>>>will reset the devices before using them (otherwise it will break). > >>>>> > >>>>>We reuse the virtio_is_big_endian() helper since it provides the right > >>>>>value for legacy devices with most of the targets, that have fixed > >>>>>endianness. It can then be overriden to support endian-ambivalent targets. > >>>>> > >>>>>To support migration, we need to set the flag in virtio_load() as well. > >>>>> > >>>>>(a) One solution would be to add it to the stream, but it have some > >>>>> drawbacks: > >>>>>- since this only affects a few targets, the field should be put into a > >>>>> subsection > >>>>>- virtio migration code should be ported to vmstate to be able to introduce > >>>>> such a subsection > >>>>> > >>>>>(b) If we assume the following to be true: > >>>>>- target endianness falls under some cpu state > >>>>>- cpu state is always restored before virtio devices state because they > >>>>> get initialized in this order in main(). > >>>>>Then an alternative is to rely on virtio_is_big_endian() again at > >>>>>load time. No need to mess around with the migration stream in this case. > >>>>> > >>>>>This patch implements (b). > >>>>> > >>>>>Note that the tswap helpers are implemented in virtio.c so that > >>>>>virtio-access.h stays platform independant. Most of the virtio code > >>>>>will be buildable under common-obj instead of obj then, and spare > >>>>>some cycles when building for multiple targets. > >>>>> > >>>>>Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>>>[ ldq_phys() API change, > >>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>>>> add VirtIODevice * arg to virtio helpers, > >>>>> use the existing virtio_is_big_endian() helper, > >>>>> virtio-pci: use the device is_big_endian flag, > >>>>> introduce virtio tswap16 and tswap64 helpers, > >>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>>>> migration support, > >>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>>>Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>>>Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>>>--- > >>>>> > >>>>>Changes since v6: > >>>>>- merge the virtio_needs_byteswap() helper from v6 and existing > >>>>> virtio_is_big_endian() > >>>>>- virtio-pci: now supports endianness changes > >>>>>- virtio-access.h fixes (target independant) > >>>>> > >>>>> exec.c | 2 - > >>>>> hw/virtio/Makefile.objs | 2 - > >>>>> hw/virtio/virtio-pci.c | 11 +-- > >>>>> hw/virtio/virtio.c | 35 +++++++++ > >>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>>>> include/hw/virtio/virtio.h | 2 + > >>>>> vl.c | 4 + > >>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>>>> create mode 100644 include/hw/virtio/virtio-access.h > >>>>> > >>>>>diff --git a/exec.c b/exec.c > >>>>>index 91513c6..e6777d0 100644 > >>>>>--- a/exec.c > >>>>>+++ b/exec.c > >>>>>@@ -42,6 +42,7 @@ > >>>>> #else /* !CONFIG_USER_ONLY */ > >>>>> #include "sysemu/xen-mapcache.h" > >>>>> #include "trace.h" > >>>>>+#include "hw/virtio/virtio.h" > >>>>> #endif > >>>>> #include "exec/cpu-all.h" > >>>>>@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>>>> * A helper function for the _utterly broken_ virtio device model to find out if > >>>>> * it's running on a big endian machine. Don't do this at home kids! > >>>>> */ > >>>>>-bool virtio_is_big_endian(void); > >>>>> bool virtio_is_big_endian(void) > >>>>> { > >>>>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>>>diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>>>index 1ba53d9..68c3064 100644 > >>>>>--- a/hw/virtio/Makefile.objs > >>>>>+++ b/hw/virtio/Makefile.objs > >>>>>@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>>>> common-obj-y += virtio-mmio.o > >>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>>>-obj-y += virtio.o virtio-balloon.o > >>>>>+obj-y += virtio.o virtio-balloon.o > >>>>> obj-$(CONFIG_LINUX) += vhost.o > >>>>>diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>>>index ce97514..82a1689 100644 > >>>>>--- a/hw/virtio/virtio-pci.c > >>>>>+++ b/hw/virtio/virtio-pci.c > >>>>>@@ -89,9 +89,6 @@ > >>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>>>-/* HACK for virtio to determine if it's running a big endian guest */ > >>>>>-bool virtio_is_big_endian(void); > >>>>>- > >>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>>>> VirtIOPCIProxy *dev); > >>>>>@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>>>> break; > >>>>> case 2: > >>>>> val = virtio_config_readw(vdev, addr); > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap16(val); > >>>>> } > >>>>> break; > >>>>> case 4: > >>>>> val = virtio_config_readl(vdev, addr); > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap32(val); > >>>>> } > >>>>> break; > >>>>>@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>>>> virtio_config_writeb(vdev, addr, val); > >>>>> break; > >>>>> case 2: > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap16(val); > >>>>> } > >>>>> virtio_config_writew(vdev, addr, val); > >>>>> break; > >>>>> case 4: > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap32(val); > >>>>> } > >>>>> virtio_config_writel(vdev, addr, val); > >>>>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>>>index aeabf3a..bb646f0 100644 > >>>>>--- a/hw/virtio/virtio.c > >>>>>+++ b/hw/virtio/virtio.c > >>>>>@@ -19,6 +19,7 @@ > >>>>> #include "hw/virtio/virtio.h" > >>>>> #include "qemu/atomic.h" > >>>>> #include "hw/virtio/virtio-bus.h" > >>>>>+#include "hw/virtio/virtio-access.h" > >>>>> /* > >>>>> * The alignment to use between consumer and producer parts of vring. > >>>>>@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>>>> virtio_set_status(vdev, 0); > >>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>>>+ > >>>>> if (k->reset) { > >>>>> k->reset(vdev); > >>>>> } > >>>>>@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>>>+ /* NOTE: we assume that endianness is a cpu state AND > >>>>>+ * cpu state is restored before virtio devices. > >>>>>+ */ > >>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>>>+ > >>>>> if (k->load_config) { > >>>>> ret = k->load_config(qbus->parent, f); > >>>>> if (ret) > >>>>>@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>>>> } > >>>>> } > >>>>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>>>+{ > >>>>>+ if (vdev->is_big_endian) { > >>>>>+ return tswap16(s); > >>>>>+ } else { > >>>>>+ return bswap16(tswap16(s)); > >>>>>+ } > >>>>This looks pretty bogus. When virtio wants to do "tswap" what it > >>>>means is "give me a host endianness value in virtio endianness". How > >>>>about something like > >>>> > >>>>#ifdef HOST_WORDS_BIGENDIAN > >>>> return vdev->is_big_endian ? s : bswap16(s); > >>>>#else > >>>> return vdev->is_big_endian ? bswap16(s) : s; > >>>>#endif > >>>> > >>>Actually why doesn't this call virtio_is_big_endian? > >>>As it is, we get extra branches even if target endian-ness > >>>is fixed. > >>Because virtio_is_big_endian() returns the default endianness, not > >>the runtime endianness of a virtio device. > > In fact, we should probably rename it accordingly. > > >> > >>>>That should work pretty well inline as well, so you don't need to > >>>>compile virtio.c as target dependent. > >>>> > >>>> > >>>>Alex > >>>Yes but we'll still need to build two variants: fixed endian and > >>>dynamic endian platforms. > >>>Something along the lines of 32/64 bit split that we have? > >>Why bother? Always make it dynamic and don't change the per-device > >>variable, no? I'd be surprised if the performance difference is > >>measurable. The bulk of the data we transfer gets copied raw anyway. > >> > >> > >>Alex > >This will have to be measured and proved by whoever's proposing the > >patch, not by reviewers. Platforms such as AMD which don't do > >prediction well would be especially interesting to test on. > > Sure, Greg, can you do that? I'm sure Michael has test cases > available he can give you to measure performance on this. > > Speaking of which, how does all of this work with vhost? > > > Alex I think that's missing. As a first step, we need to disable vhost when host/guest endian-ness do not match. We could also add cross-endian support to vhost. Or just wait a couple more months for virtio 1.0 which is fixed endian-ness.
On 14.04.14 14:46, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: >> On 14.04.14 14:37, Michael S. Tsirkin wrote: >>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: >>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: >>>> >>> This will have to be measured and proved by whoever's proposing the >>> patch, not by reviewers. Platforms such as AMD which don't do >>> prediction well would be especially interesting to test on. >> Sure, Greg, can you do that? I'm sure Michael has test cases >> available he can give you to measure performance on this. >> >> Speaking of which, how does all of this work with vhost? >> >> >> Alex > I think that's missing. > As a first step, we need to disable vhost when > host/guest endian-ness do not match. > > We could also add cross-endian support to vhost. > > Or just wait a couple more months for virtio 1.0 which is fixed > endian-ness. That won't help for current ppc64le guests, so I suppose we will need cross-endian vhost. Alex
On Mon, 14 Apr 2014 14:22:36 +0200 Alexander Graf <agraf@suse.de> wrote: > > On 14.04.14 13:58, Greg Kurz wrote: > > From: Rusty Russell <rusty@rustcorp.com.au> > > > > virtio data structures are defined as "target endian", which assumes > > that's a fixed value. In fact, that actually means it's platform-specific. > > The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > > > We introduce memory accessors to be used accross the virtio code where > > needed. These accessors should support both legacy and 1.0 devices. > > A good way to do it is to introduce a per-device property to store the > > endianness. We choose to set this flag at device reset time because it > > is reasonnable to assume the endianness won't change unless we reboot or > > kexec another kernel. And it is also reasonnable to assume the new kernel > > will reset the devices before using them (otherwise it will break). > > > > We reuse the virtio_is_big_endian() helper since it provides the right > > value for legacy devices with most of the targets, that have fixed > > endianness. It can then be overriden to support endian-ambivalent targets. > > > > To support migration, we need to set the flag in virtio_load() as well. > > > > (a) One solution would be to add it to the stream, but it have some > > drawbacks: > > - since this only affects a few targets, the field should be put into a > > subsection > > - virtio migration code should be ported to vmstate to be able to introduce > > such a subsection > > > > (b) If we assume the following to be true: > > - target endianness falls under some cpu state > > - cpu state is always restored before virtio devices state because they > > get initialized in this order in main(). > > Then an alternative is to rely on virtio_is_big_endian() again at > > load time. No need to mess around with the migration stream in this case. > > > > This patch implements (b). > > > > Note that the tswap helpers are implemented in virtio.c so that > > virtio-access.h stays platform independant. Most of the virtio code > > will be buildable under common-obj instead of obj then, and spare > > some cycles when building for multiple targets. > > > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > > [ ldq_phys() API change, > > relicensed virtio-access.h to GPLv2+ on Rusty's request, > > introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > > add VirtIODevice * arg to virtio helpers, > > use the existing virtio_is_big_endian() helper, > > virtio-pci: use the device is_big_endian flag, > > introduce virtio tswap16 and tswap64 helpers, > > move calls to tswap* out of virtio-access.h to make it platform independant, > > migration support, > > Greg Kurz <gkurz@linux.vnet.ibm.com> ] > > Cc: Cédric Le Goater <clg@fr.ibm.com> > > Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > > --- > > > > Changes since v6: > > - merge the virtio_needs_byteswap() helper from v6 and existing > > virtio_is_big_endian() > > - virtio-pci: now supports endianness changes > > - virtio-access.h fixes (target independant) > > > > exec.c | 2 - > > hw/virtio/Makefile.objs | 2 - > > hw/virtio/virtio-pci.c | 11 +-- > > hw/virtio/virtio.c | 35 +++++++++ > > include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > > include/hw/virtio/virtio.h | 2 + > > vl.c | 4 + > > 7 files changed, 185 insertions(+), 9 deletions(-) > > create mode 100644 include/hw/virtio/virtio-access.h > > > > diff --git a/exec.c b/exec.c > > index 91513c6..e6777d0 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -42,6 +42,7 @@ > > #else /* !CONFIG_USER_ONLY */ > > #include "sysemu/xen-mapcache.h" > > #include "trace.h" > > +#include "hw/virtio/virtio.h" > > #endif > > #include "exec/cpu-all.h" > > > > @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > > * A helper function for the _utterly broken_ virtio device model to find out if > > * it's running on a big endian machine. Don't do this at home kids! > > */ > > -bool virtio_is_big_endian(void); > > bool virtio_is_big_endian(void) > > { > > #if defined(TARGET_WORDS_BIGENDIAN) > > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > > index 1ba53d9..68c3064 100644 > > --- a/hw/virtio/Makefile.objs > > +++ b/hw/virtio/Makefile.objs > > @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > > common-obj-y += virtio-mmio.o > > common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > > > > -obj-y += virtio.o virtio-balloon.o > > +obj-y += virtio.o virtio-balloon.o > > obj-$(CONFIG_LINUX) += vhost.o > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > > index ce97514..82a1689 100644 > > --- a/hw/virtio/virtio-pci.c > > +++ b/hw/virtio/virtio-pci.c > > @@ -89,9 +89,6 @@ > > /* Flags track per-device state like workarounds for quirks in older guests. */ > > #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > > > > -/* HACK for virtio to determine if it's running a big endian guest */ > > -bool virtio_is_big_endian(void); > > - > > static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > > VirtIOPCIProxy *dev); > > > > @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > > break; > > case 2: > > val = virtio_config_readw(vdev, addr); > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > break; > > case 4: > > val = virtio_config_readl(vdev, addr); > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > break; > > @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > > virtio_config_writeb(vdev, addr, val); > > break; > > case 2: > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap16(val); > > } > > virtio_config_writew(vdev, addr, val); > > break; > > case 4: > > - if (virtio_is_big_endian()) { > > + if (vdev->is_big_endian) { > > val = bswap32(val); > > } > > virtio_config_writel(vdev, addr, val); > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > index aeabf3a..bb646f0 100644 > > --- a/hw/virtio/virtio.c > > +++ b/hw/virtio/virtio.c > > @@ -19,6 +19,7 @@ > > #include "hw/virtio/virtio.h" > > #include "qemu/atomic.h" > > #include "hw/virtio/virtio-bus.h" > > +#include "hw/virtio/virtio-access.h" > > > > /* > > * The alignment to use between consumer and producer parts of vring. > > @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > > > virtio_set_status(vdev, 0); > > > > + vdev->is_big_endian = virtio_is_big_endian(); > > + > > if (k->reset) { > > k->reset(vdev); > > } > > @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > > BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > > > > + /* NOTE: we assume that endianness is a cpu state AND > > + * cpu state is restored before virtio devices. > > + */ > > + vdev->is_big_endian = virtio_is_big_endian(); > > + > > if (k->load_config) { > > ret = k->load_config(qbus->parent, f); > > if (ret) > > @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > > } > > } > > > > +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > > +{ > > + if (vdev->is_big_endian) { > > + return tswap16(s); > > + } else { > > + return bswap16(tswap16(s)); > > + } > > +} > > + > > +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) > > +{ > > + if (vdev->is_big_endian) { > > + return tswap32(s); > > + } else { > > + return bswap32(tswap32(s)); > > + } > > +} > > + > > +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) > > +{ > > + if (vdev->is_big_endian) { > > + return tswap64(s); > > + } else { > > + return bswap64(tswap64(s)); > > + } > > +} > > + > > static void virtio_device_realize(DeviceState *dev, Error **errp) > > { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h > > new file mode 100644 > > index 0000000..5c9013e > > --- /dev/null > > +++ b/include/hw/virtio/virtio-access.h > > @@ -0,0 +1,138 @@ > > +/* > > + * Virtio Accessor Support: In case your target can change endian. > > + * > > + * Copyright IBM, Corp. 2013 > > + * > > + * Authors: > > + * Rusty Russell <rusty@au.ibm.com> > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License as published by > > + * the Free Software Foundation, either version 2 of the License, or > > + * (at your option) any later version. > > + * > > + */ > > +#ifndef _QEMU_VIRTIO_ACCESS_H > > +#define _QEMU_VIRTIO_ACCESS_H > > +#include "hw/virtio/virtio.h" > > +#include "exec/address-spaces.h" > > + > > +static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) > > +{ > > + if (vdev->is_big_endian) { > > + return lduw_be_phys(&address_space_memory, pa); > > + } > > + return lduw_le_phys(&address_space_memory, pa); > > +} > > + > > +static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) > > +{ > > + if (vdev->is_big_endian) { > > + return ldl_be_phys(&address_space_memory, pa); > > + } > > + return ldl_le_phys(&address_space_memory, pa); > > +} > > + > > +static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) > > +{ > > + if (vdev->is_big_endian) { > > + return ldq_be_phys(&address_space_memory, pa); > > + } > > + return ldq_le_phys(&address_space_memory, pa); > > +} > > + > > +static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, > > + uint16_t value) > > +{ > > + if (vdev->is_big_endian) { > > + stw_be_phys(&address_space_memory, pa, value); > > + } else { > > + stw_le_phys(&address_space_memory, pa, value); > > + } > > +} > > + > > +static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, > > + uint32_t value) > > +{ > > + if (vdev->is_big_endian) { > > + stl_be_phys(&address_space_memory, pa, value); > > + } else { > > + stl_le_phys(&address_space_memory, pa, value); > > + } > > +} > > + > > +static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) > > +{ > > + if (vdev->is_big_endian) { > > + stw_be_p(ptr, v); > > + } else { > > + stw_le_p(ptr, v); > > + } > > +} > > + > > +static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) > > +{ > > + if (vdev->is_big_endian) { > > + stl_be_p(ptr, v); > > + } else { > > + stl_le_p(ptr, v); > > + } > > +} > > + > > +static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) > > +{ > > + if (vdev->is_big_endian) { > > + stq_be_p(ptr, v); > > + } else { > > + stq_le_p(ptr, v); > > + } > > +} > > + > > +static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) > > +{ > > + if (vdev->is_big_endian) { > > + return lduw_be_p(ptr); > > + } else { > > + return lduw_le_p(ptr); > > + } > > +} > > + > > +static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) > > +{ > > + if (vdev->is_big_endian) { > > + return ldl_be_p(ptr); > > + } else { > > + return ldl_le_p(ptr); > > + } > > +} > > + > > +static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) > > +{ > > + if (vdev->is_big_endian) { > > + return ldq_be_p(ptr); > > + } else { > > + return ldq_le_p(ptr); > > + } > > +} > > + > > +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s); > > + > > +static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) > > +{ > > + *s = virtio_tswap16(vdev, *s); > > +} > > + > > +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s); > > + > > +static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) > > +{ > > + *s = virtio_tswap32(vdev, *s); > > +} > > + > > +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); > > + > > +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) > > +{ > > + *s = virtio_tswap64(vdev, *s); > > +} > > Could we try to poison any non-virtio, non-endian-specific memory > accessors when this file is included? That way we don't fall into > pitfalls where we end up running stl_p in a tiny corner case somewhere > still. > > > Alex > Hmmm... there are still some stl_p users in virtio.c that I could not port to the new virtio memory accessors... These are the virtio_config_* helpers that gets called from virtio-pci and virtio-mmio.
On 14.04.14 14:50, Greg Kurz wrote: > On Mon, 14 Apr 2014 14:22:36 +0200 > Alexander Graf <agraf@suse.de> wrote: > >> On 14.04.14 13:58, Greg Kurz wrote: >>> From: Rusty Russell <rusty@rustcorp.com.au> >>> >>> virtio data structures are defined as "target endian", which assumes >>> that's a fixed value. In fact, that actually means it's platform-specific. >>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>> >>> We introduce memory accessors to be used accross the virtio code where >>> needed. These accessors should support both legacy and 1.0 devices. >>> A good way to do it is to introduce a per-device property to store the >>> endianness. We choose to set this flag at device reset time because it >>> is reasonnable to assume the endianness won't change unless we reboot or >>> kexec another kernel. And it is also reasonnable to assume the new kernel >>> will reset the devices before using them (otherwise it will break). >>> >>> We reuse the virtio_is_big_endian() helper since it provides the right >>> value for legacy devices with most of the targets, that have fixed >>> endianness. It can then be overriden to support endian-ambivalent targets. >>> >>> To support migration, we need to set the flag in virtio_load() as well. >>> >>> (a) One solution would be to add it to the stream, but it have some >>> drawbacks: >>> - since this only affects a few targets, the field should be put into a >>> subsection >>> - virtio migration code should be ported to vmstate to be able to introduce >>> such a subsection >>> >>> (b) If we assume the following to be true: >>> - target endianness falls under some cpu state >>> - cpu state is always restored before virtio devices state because they >>> get initialized in this order in main(). >>> Then an alternative is to rely on virtio_is_big_endian() again at >>> load time. No need to mess around with the migration stream in this case. >>> >>> This patch implements (b). >>> >>> Note that the tswap helpers are implemented in virtio.c so that >>> virtio-access.h stays platform independant. Most of the virtio code >>> will be buildable under common-obj instead of obj then, and spare >>> some cycles when building for multiple targets. >>> >>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>> [ ldq_phys() API change, >>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>> add VirtIODevice * arg to virtio helpers, >>> use the existing virtio_is_big_endian() helper, >>> virtio-pci: use the device is_big_endian flag, >>> introduce virtio tswap16 and tswap64 helpers, >>> move calls to tswap* out of virtio-access.h to make it platform independant, >>> migration support, >>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>> --- [...] >>> + >>> +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); >>> + >>> +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) >>> +{ >>> + *s = virtio_tswap64(vdev, *s); >>> +} >> Could we try to poison any non-virtio, non-endian-specific memory >> accessors when this file is included? That way we don't fall into >> pitfalls where we end up running stl_p in a tiny corner case somewhere >> still. >> >> >> Alex >> > Hmmm... there are still some stl_p users in virtio.c that I could not > port to the new virtio memory accessors... These are the virtio_config_* > helpers that gets called from virtio-pci and virtio-mmio. Why couldn't you port them to the new virtio memory accessors? Are you missing a vdev parameter? Could you add one? IIRC config space is a weird mix of target endianness and little endian. Alex
On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > > On 14.04.14 14:37, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >>On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>>On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>>>On 14.04.14 13:58, Greg Kurz wrote: > >>>>>From: Rusty Russell <rusty@rustcorp.com.au> > >>>>> > >>>>>virtio data structures are defined as "target endian", which assumes > >>>>>that's a fixed value. In fact, that actually means it's platform-specific. > >>>>>The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>>>> > >>>>>We introduce memory accessors to be used accross the virtio code where > >>>>>needed. These accessors should support both legacy and 1.0 devices. > >>>>>A good way to do it is to introduce a per-device property to store the > >>>>>endianness. We choose to set this flag at device reset time because it > >>>>>is reasonnable to assume the endianness won't change unless we reboot or > >>>>>kexec another kernel. And it is also reasonnable to assume the new kernel > >>>>>will reset the devices before using them (otherwise it will break). > >>>>> > >>>>>We reuse the virtio_is_big_endian() helper since it provides the right > >>>>>value for legacy devices with most of the targets, that have fixed > >>>>>endianness. It can then be overriden to support endian-ambivalent targets. > >>>>> > >>>>>To support migration, we need to set the flag in virtio_load() as well. > >>>>> > >>>>>(a) One solution would be to add it to the stream, but it have some > >>>>> drawbacks: > >>>>>- since this only affects a few targets, the field should be put into a > >>>>> subsection > >>>>>- virtio migration code should be ported to vmstate to be able to introduce > >>>>> such a subsection > >>>>> > >>>>>(b) If we assume the following to be true: > >>>>>- target endianness falls under some cpu state > >>>>>- cpu state is always restored before virtio devices state because they > >>>>> get initialized in this order in main(). > >>>>>Then an alternative is to rely on virtio_is_big_endian() again at > >>>>>load time. No need to mess around with the migration stream in this case. > >>>>> > >>>>>This patch implements (b). > >>>>> > >>>>>Note that the tswap helpers are implemented in virtio.c so that > >>>>>virtio-access.h stays platform independant. Most of the virtio code > >>>>>will be buildable under common-obj instead of obj then, and spare > >>>>>some cycles when building for multiple targets. > >>>>> > >>>>>Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>>>[ ldq_phys() API change, > >>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>>>> add VirtIODevice * arg to virtio helpers, > >>>>> use the existing virtio_is_big_endian() helper, > >>>>> virtio-pci: use the device is_big_endian flag, > >>>>> introduce virtio tswap16 and tswap64 helpers, > >>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>>>> migration support, > >>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>>>Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>>>Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>>>--- > >>>>> > >>>>>Changes since v6: > >>>>>- merge the virtio_needs_byteswap() helper from v6 and existing > >>>>> virtio_is_big_endian() > >>>>>- virtio-pci: now supports endianness changes > >>>>>- virtio-access.h fixes (target independant) > >>>>> > >>>>> exec.c | 2 - > >>>>> hw/virtio/Makefile.objs | 2 - > >>>>> hw/virtio/virtio-pci.c | 11 +-- > >>>>> hw/virtio/virtio.c | 35 +++++++++ > >>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>>>> include/hw/virtio/virtio.h | 2 + > >>>>> vl.c | 4 + > >>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>>>> create mode 100644 include/hw/virtio/virtio-access.h > >>>>> > >>>>>diff --git a/exec.c b/exec.c > >>>>>index 91513c6..e6777d0 100644 > >>>>>--- a/exec.c > >>>>>+++ b/exec.c > >>>>>@@ -42,6 +42,7 @@ > >>>>> #else /* !CONFIG_USER_ONLY */ > >>>>> #include "sysemu/xen-mapcache.h" > >>>>> #include "trace.h" > >>>>>+#include "hw/virtio/virtio.h" > >>>>> #endif > >>>>> #include "exec/cpu-all.h" > >>>>>@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>>>> * A helper function for the _utterly broken_ virtio device model to find out if > >>>>> * it's running on a big endian machine. Don't do this at home kids! > >>>>> */ > >>>>>-bool virtio_is_big_endian(void); > >>>>> bool virtio_is_big_endian(void) > >>>>> { > >>>>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>>>diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>>>index 1ba53d9..68c3064 100644 > >>>>>--- a/hw/virtio/Makefile.objs > >>>>>+++ b/hw/virtio/Makefile.objs > >>>>>@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>>>> common-obj-y += virtio-mmio.o > >>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>>>-obj-y += virtio.o virtio-balloon.o > >>>>>+obj-y += virtio.o virtio-balloon.o > >>>>> obj-$(CONFIG_LINUX) += vhost.o > >>>>>diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>>>index ce97514..82a1689 100644 > >>>>>--- a/hw/virtio/virtio-pci.c > >>>>>+++ b/hw/virtio/virtio-pci.c > >>>>>@@ -89,9 +89,6 @@ > >>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>>>-/* HACK for virtio to determine if it's running a big endian guest */ > >>>>>-bool virtio_is_big_endian(void); > >>>>>- > >>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>>>> VirtIOPCIProxy *dev); > >>>>>@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>>>> break; > >>>>> case 2: > >>>>> val = virtio_config_readw(vdev, addr); > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap16(val); > >>>>> } > >>>>> break; > >>>>> case 4: > >>>>> val = virtio_config_readl(vdev, addr); > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap32(val); > >>>>> } > >>>>> break; > >>>>>@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>>>> virtio_config_writeb(vdev, addr, val); > >>>>> break; > >>>>> case 2: > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap16(val); > >>>>> } > >>>>> virtio_config_writew(vdev, addr, val); > >>>>> break; > >>>>> case 4: > >>>>>- if (virtio_is_big_endian()) { > >>>>>+ if (vdev->is_big_endian) { > >>>>> val = bswap32(val); > >>>>> } > >>>>> virtio_config_writel(vdev, addr, val); > >>>>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>>>index aeabf3a..bb646f0 100644 > >>>>>--- a/hw/virtio/virtio.c > >>>>>+++ b/hw/virtio/virtio.c > >>>>>@@ -19,6 +19,7 @@ > >>>>> #include "hw/virtio/virtio.h" > >>>>> #include "qemu/atomic.h" > >>>>> #include "hw/virtio/virtio-bus.h" > >>>>>+#include "hw/virtio/virtio-access.h" > >>>>> /* > >>>>> * The alignment to use between consumer and producer parts of vring. > >>>>>@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>>>> virtio_set_status(vdev, 0); > >>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>>>+ > >>>>> if (k->reset) { > >>>>> k->reset(vdev); > >>>>> } > >>>>>@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>>>+ /* NOTE: we assume that endianness is a cpu state AND > >>>>>+ * cpu state is restored before virtio devices. > >>>>>+ */ > >>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>>>+ > >>>>> if (k->load_config) { > >>>>> ret = k->load_config(qbus->parent, f); > >>>>> if (ret) > >>>>>@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>>>> } > >>>>> } > >>>>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>>>+{ > >>>>>+ if (vdev->is_big_endian) { > >>>>>+ return tswap16(s); > >>>>>+ } else { > >>>>>+ return bswap16(tswap16(s)); > >>>>>+ } > >>>>This looks pretty bogus. When virtio wants to do "tswap" what it > >>>>means is "give me a host endianness value in virtio endianness". How > >>>>about something like > >>>> > >>>>#ifdef HOST_WORDS_BIGENDIAN > >>>> return vdev->is_big_endian ? s : bswap16(s); > >>>>#else > >>>> return vdev->is_big_endian ? bswap16(s) : s; > >>>>#endif > >>>> > >>>Actually why doesn't this call virtio_is_big_endian? > >>>As it is, we get extra branches even if target endian-ness > >>>is fixed. > >>Because virtio_is_big_endian() returns the default endianness, not > >>the runtime endianness of a virtio device. > > In fact, we should probably rename it accordingly. > > >> > >>>>That should work pretty well inline as well, so you don't need to > >>>>compile virtio.c as target dependent. > >>>> > >>>> > >>>>Alex > >>>Yes but we'll still need to build two variants: fixed endian and > >>>dynamic endian platforms. > >>>Something along the lines of 32/64 bit split that we have? > >>Why bother? Always make it dynamic and don't change the per-device > >>variable, no? I'd be surprised if the performance difference is > >>measurable. The bulk of the data we transfer gets copied raw anyway. > >> > >> > >>Alex > >This will have to be measured and proved by whoever's proposing the > >patch, not by reviewers. Platforms such as AMD which don't do > >prediction well would be especially interesting to test on. > > Sure, Greg, can you do that? I'm sure Michael has test cases > available he can give you to measure performance on this. Measuring performance is hard ATM. Disk io stress when backed by raw ramdisk in host is usually the easiest way to stress userspace virtio. You need to make sure your baseline is repeateable though: pin down everything from cpu to hardware interrupts, disable power management, c states etc Just taking an average and hoping for the best doesn't cut it. We really should write a benchmark device, to measure just the transport overhead. For example, start with virtio-net but fuse TX and RX VQs together, so you'll get right back whatever you send out. So really, virtio is ATM target-specific and I think it's a good idea to get it working as such first, do extra cleanups like getting rid of target specific code separately. > Speaking of which, how does all of this work with vhost? > > > Alex
On Mon, Apr 14, 2014 at 02:49:31PM +0200, Alexander Graf wrote: > > On 14.04.14 14:46, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > >>On 14.04.14 14:37, Michael S. Tsirkin wrote: > >>>On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >>>>On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>>> > >>>This will have to be measured and proved by whoever's proposing the > >>>patch, not by reviewers. Platforms such as AMD which don't do > >>>prediction well would be especially interesting to test on. > >>Sure, Greg, can you do that? I'm sure Michael has test cases > >>available he can give you to measure performance on this. > >> > >>Speaking of which, how does all of this work with vhost? > >> > >> > >>Alex > >I think that's missing. > >As a first step, we need to disable vhost when > >host/guest endian-ness do not match. > > > >We could also add cross-endian support to vhost. > > > >Or just wait a couple more months for virtio 1.0 which is fixed > >endian-ness. > > That won't help for current ppc64le guests, so I suppose we will > need cross-endian vhost. > > > Alex It's kernel level work anyway. Just backport a new driver. Seems less work for more benefit.
On 04/14/2014 02:49 PM, Alexander Graf wrote: > > On 14.04.14 14:46, Michael S. Tsirkin wrote: >> On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: >>> On 14.04.14 14:37, Michael S. Tsirkin wrote: >>>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: >>>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: >>>>> >>>> This will have to be measured and proved by whoever's proposing the >>>> patch, not by reviewers. Platforms such as AMD which don't do >>>> prediction well would be especially interesting to test on. >>> Sure, Greg, can you do that? I'm sure Michael has test cases >>> available he can give you to measure performance on this. >>> >>> Speaking of which, how does all of this work with vhost? >>> >>> >>> Alex >> I think that's missing. >> As a first step, we need to disable vhost when >> host/guest endian-ness do not match. >> >> We could also add cross-endian support to vhost. >> >> Or just wait a couple more months for virtio 1.0 which is fixed >> endian-ness. > > That won't help for current ppc64le guests, so I suppose we will > need cross-endian vhost. Yes. For the moment, vhost=off is needed and even with that, qemu needs to byteswap a few attributes (see virtio-net header in patch 3). I am looking for a simple way to propagate the vring endianness to the host without breaking the compatibility with the current virtio drivers. Hopefully this is feasible. C.
On 14.04.14 14:56, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:49:31PM +0200, Alexander Graf wrote: >> On 14.04.14 14:46, Michael S. Tsirkin wrote: >>> On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: >>>> On 14.04.14 14:37, Michael S. Tsirkin wrote: >>>>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: >>>>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: >>>>>> >>>>> This will have to be measured and proved by whoever's proposing the >>>>> patch, not by reviewers. Platforms such as AMD which don't do >>>>> prediction well would be especially interesting to test on. >>>> Sure, Greg, can you do that? I'm sure Michael has test cases >>>> available he can give you to measure performance on this. >>>> >>>> Speaking of which, how does all of this work with vhost? >>>> >>>> >>>> Alex >>> I think that's missing. >>> As a first step, we need to disable vhost when >>> host/guest endian-ness do not match. >>> >>> We could also add cross-endian support to vhost. >>> >>> Or just wait a couple more months for virtio 1.0 which is fixed >>> endian-ness. >> That won't help for current ppc64le guests, so I suppose we will >> need cross-endian vhost. It's quite hard to backport a driver onto installation media :). We already have openSUSE isos out there that support virtio and ppc64le. Alex
On Mon, 14 Apr 2014 15:46:34 +0300 "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > > > > On 14.04.14 14:37, Michael S. Tsirkin wrote: > > >On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > > >>On 14.04.14 14:24, Michael S. Tsirkin wrote: > > >>>On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > > >>>>On 14.04.14 13:58, Greg Kurz wrote: > > >>>>>From: Rusty Russell <rusty@rustcorp.com.au> > > >>>>> > > >>>>>virtio data structures are defined as "target endian", which assumes > > >>>>>that's a fixed value. In fact, that actually means it's platform-specific. > > >>>>>The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > >>>>> > > >>>>>We introduce memory accessors to be used accross the virtio code where > > >>>>>needed. These accessors should support both legacy and 1.0 devices. > > >>>>>A good way to do it is to introduce a per-device property to store the > > >>>>>endianness. We choose to set this flag at device reset time because it > > >>>>>is reasonnable to assume the endianness won't change unless we reboot or > > >>>>>kexec another kernel. And it is also reasonnable to assume the new kernel > > >>>>>will reset the devices before using them (otherwise it will break). > > >>>>> > > >>>>>We reuse the virtio_is_big_endian() helper since it provides the right > > >>>>>value for legacy devices with most of the targets, that have fixed > > >>>>>endianness. It can then be overriden to support endian-ambivalent targets. > > >>>>> > > >>>>>To support migration, we need to set the flag in virtio_load() as well. > > >>>>> > > >>>>>(a) One solution would be to add it to the stream, but it have some > > >>>>> drawbacks: > > >>>>>- since this only affects a few targets, the field should be put into a > > >>>>> subsection > > >>>>>- virtio migration code should be ported to vmstate to be able to introduce > > >>>>> such a subsection > > >>>>> > > >>>>>(b) If we assume the following to be true: > > >>>>>- target endianness falls under some cpu state > > >>>>>- cpu state is always restored before virtio devices state because they > > >>>>> get initialized in this order in main(). > > >>>>>Then an alternative is to rely on virtio_is_big_endian() again at > > >>>>>load time. No need to mess around with the migration stream in this case. > > >>>>> > > >>>>>This patch implements (b). > > >>>>> > > >>>>>Note that the tswap helpers are implemented in virtio.c so that > > >>>>>virtio-access.h stays platform independant. Most of the virtio code > > >>>>>will be buildable under common-obj instead of obj then, and spare > > >>>>>some cycles when building for multiple targets. > > >>>>> > > >>>>>Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > > >>>>>[ ldq_phys() API change, > > >>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > > >>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > > >>>>> add VirtIODevice * arg to virtio helpers, > > >>>>> use the existing virtio_is_big_endian() helper, > > >>>>> virtio-pci: use the device is_big_endian flag, > > >>>>> introduce virtio tswap16 and tswap64 helpers, > > >>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > > >>>>> migration support, > > >>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > > >>>>>Cc: Cédric Le Goater <clg@fr.ibm.com> > > >>>>>Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > > >>>>>--- > > >>>>> > > >>>>>Changes since v6: > > >>>>>- merge the virtio_needs_byteswap() helper from v6 and existing > > >>>>> virtio_is_big_endian() > > >>>>>- virtio-pci: now supports endianness changes > > >>>>>- virtio-access.h fixes (target independant) > > >>>>> > > >>>>> exec.c | 2 - > > >>>>> hw/virtio/Makefile.objs | 2 - > > >>>>> hw/virtio/virtio-pci.c | 11 +-- > > >>>>> hw/virtio/virtio.c | 35 +++++++++ > > >>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > > >>>>> include/hw/virtio/virtio.h | 2 + > > >>>>> vl.c | 4 + > > >>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > > >>>>> create mode 100644 include/hw/virtio/virtio-access.h > > >>>>> > > >>>>>diff --git a/exec.c b/exec.c > > >>>>>index 91513c6..e6777d0 100644 > > >>>>>--- a/exec.c > > >>>>>+++ b/exec.c > > >>>>>@@ -42,6 +42,7 @@ > > >>>>> #else /* !CONFIG_USER_ONLY */ > > >>>>> #include "sysemu/xen-mapcache.h" > > >>>>> #include "trace.h" > > >>>>>+#include "hw/virtio/virtio.h" > > >>>>> #endif > > >>>>> #include "exec/cpu-all.h" > > >>>>>@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > > >>>>> * A helper function for the _utterly broken_ virtio device model to find out if > > >>>>> * it's running on a big endian machine. Don't do this at home kids! > > >>>>> */ > > >>>>>-bool virtio_is_big_endian(void); > > >>>>> bool virtio_is_big_endian(void) > > >>>>> { > > >>>>> #if defined(TARGET_WORDS_BIGENDIAN) > > >>>>>diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > > >>>>>index 1ba53d9..68c3064 100644 > > >>>>>--- a/hw/virtio/Makefile.objs > > >>>>>+++ b/hw/virtio/Makefile.objs > > >>>>>@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > > >>>>> common-obj-y += virtio-mmio.o > > >>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > > >>>>>-obj-y += virtio.o virtio-balloon.o > > >>>>>+obj-y += virtio.o virtio-balloon.o > > >>>>> obj-$(CONFIG_LINUX) += vhost.o > > >>>>>diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > > >>>>>index ce97514..82a1689 100644 > > >>>>>--- a/hw/virtio/virtio-pci.c > > >>>>>+++ b/hw/virtio/virtio-pci.c > > >>>>>@@ -89,9 +89,6 @@ > > >>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > > >>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > > >>>>>-/* HACK for virtio to determine if it's running a big endian guest */ > > >>>>>-bool virtio_is_big_endian(void); > > >>>>>- > > >>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > > >>>>> VirtIOPCIProxy *dev); > > >>>>>@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > > >>>>> break; > > >>>>> case 2: > > >>>>> val = virtio_config_readw(vdev, addr); > > >>>>>- if (virtio_is_big_endian()) { > > >>>>>+ if (vdev->is_big_endian) { > > >>>>> val = bswap16(val); > > >>>>> } > > >>>>> break; > > >>>>> case 4: > > >>>>> val = virtio_config_readl(vdev, addr); > > >>>>>- if (virtio_is_big_endian()) { > > >>>>>+ if (vdev->is_big_endian) { > > >>>>> val = bswap32(val); > > >>>>> } > > >>>>> break; > > >>>>>@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > > >>>>> virtio_config_writeb(vdev, addr, val); > > >>>>> break; > > >>>>> case 2: > > >>>>>- if (virtio_is_big_endian()) { > > >>>>>+ if (vdev->is_big_endian) { > > >>>>> val = bswap16(val); > > >>>>> } > > >>>>> virtio_config_writew(vdev, addr, val); > > >>>>> break; > > >>>>> case 4: > > >>>>>- if (virtio_is_big_endian()) { > > >>>>>+ if (vdev->is_big_endian) { > > >>>>> val = bswap32(val); > > >>>>> } > > >>>>> virtio_config_writel(vdev, addr, val); > > >>>>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > >>>>>index aeabf3a..bb646f0 100644 > > >>>>>--- a/hw/virtio/virtio.c > > >>>>>+++ b/hw/virtio/virtio.c > > >>>>>@@ -19,6 +19,7 @@ > > >>>>> #include "hw/virtio/virtio.h" > > >>>>> #include "qemu/atomic.h" > > >>>>> #include "hw/virtio/virtio-bus.h" > > >>>>>+#include "hw/virtio/virtio-access.h" > > >>>>> /* > > >>>>> * The alignment to use between consumer and producer parts of vring. > > >>>>>@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > >>>>> virtio_set_status(vdev, 0); > > >>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > > >>>>>+ > > >>>>> if (k->reset) { > > >>>>> k->reset(vdev); > > >>>>> } > > >>>>>@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > > >>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > > >>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > > >>>>>+ /* NOTE: we assume that endianness is a cpu state AND > > >>>>>+ * cpu state is restored before virtio devices. > > >>>>>+ */ > > >>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > > >>>>>+ > > >>>>> if (k->load_config) { > > >>>>> ret = k->load_config(qbus->parent, f); > > >>>>> if (ret) > > >>>>>@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > > >>>>> } > > >>>>> } > > >>>>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > > >>>>>+{ > > >>>>>+ if (vdev->is_big_endian) { > > >>>>>+ return tswap16(s); > > >>>>>+ } else { > > >>>>>+ return bswap16(tswap16(s)); > > >>>>>+ } > > >>>>This looks pretty bogus. When virtio wants to do "tswap" what it > > >>>>means is "give me a host endianness value in virtio endianness". How > > >>>>about something like > > >>>> > > >>>>#ifdef HOST_WORDS_BIGENDIAN > > >>>> return vdev->is_big_endian ? s : bswap16(s); > > >>>>#else > > >>>> return vdev->is_big_endian ? bswap16(s) : s; > > >>>>#endif > > >>>> > > >>>Actually why doesn't this call virtio_is_big_endian? > > >>>As it is, we get extra branches even if target endian-ness > > >>>is fixed. > > >>Because virtio_is_big_endian() returns the default endianness, not > > >>the runtime endianness of a virtio device. > > > > In fact, we should probably rename it accordingly. > > > > >> > > >>>>That should work pretty well inline as well, so you don't need to > > >>>>compile virtio.c as target dependent. > > >>>> > > >>>> > > >>>>Alex > > >>>Yes but we'll still need to build two variants: fixed endian and > > >>>dynamic endian platforms. > > >>>Something along the lines of 32/64 bit split that we have? > > >>Why bother? Always make it dynamic and don't change the per-device > > >>variable, no? I'd be surprised if the performance difference is > > >>measurable. The bulk of the data we transfer gets copied raw anyway. > > >> > > >> > > >>Alex > > >This will have to be measured and proved by whoever's proposing the > > >patch, not by reviewers. Platforms such as AMD which don't do > > >prediction well would be especially interesting to test on. > > > > Sure, Greg, can you do that? I'm sure Michael has test cases > > available he can give you to measure performance on this. > > > > Speaking of which, how does all of this work with vhost? > > > > > > Alex > > I think that's missing. > As a first step, we need to disable vhost when > host/guest endian-ness do not match. > > We could also add cross-endian support to vhost. > I confirm: vhost has to be fixed to use guest endian virtio rings. > Or just wait a couple more months for virtio 1.0 which is fixed > endian-ness. > Oohhh no... don't say that ! Legacy virtio is not dead yet.
On 14.04.14 14:55, Michael S. Tsirkin wrote: > On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: >> On 14.04.14 14:37, Michael S. Tsirkin wrote: >>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: >>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: >>>>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: >>>>>> On 14.04.14 13:58, Greg Kurz wrote: >>>>>>> From: Rusty Russell <rusty@rustcorp.com.au> >>>>>>> >>>>>>> virtio data structures are defined as "target endian", which assumes >>>>>>> that's a fixed value. In fact, that actually means it's platform-specific. >>>>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>>>>>> >>>>>>> We introduce memory accessors to be used accross the virtio code where >>>>>>> needed. These accessors should support both legacy and 1.0 devices. >>>>>>> A good way to do it is to introduce a per-device property to store the >>>>>>> endianness. We choose to set this flag at device reset time because it >>>>>>> is reasonnable to assume the endianness won't change unless we reboot or >>>>>>> kexec another kernel. And it is also reasonnable to assume the new kernel >>>>>>> will reset the devices before using them (otherwise it will break). >>>>>>> >>>>>>> We reuse the virtio_is_big_endian() helper since it provides the right >>>>>>> value for legacy devices with most of the targets, that have fixed >>>>>>> endianness. It can then be overriden to support endian-ambivalent targets. >>>>>>> >>>>>>> To support migration, we need to set the flag in virtio_load() as well. >>>>>>> >>>>>>> (a) One solution would be to add it to the stream, but it have some >>>>>>> drawbacks: >>>>>>> - since this only affects a few targets, the field should be put into a >>>>>>> subsection >>>>>>> - virtio migration code should be ported to vmstate to be able to introduce >>>>>>> such a subsection >>>>>>> >>>>>>> (b) If we assume the following to be true: >>>>>>> - target endianness falls under some cpu state >>>>>>> - cpu state is always restored before virtio devices state because they >>>>>>> get initialized in this order in main(). >>>>>>> Then an alternative is to rely on virtio_is_big_endian() again at >>>>>>> load time. No need to mess around with the migration stream in this case. >>>>>>> >>>>>>> This patch implements (b). >>>>>>> >>>>>>> Note that the tswap helpers are implemented in virtio.c so that >>>>>>> virtio-access.h stays platform independant. Most of the virtio code >>>>>>> will be buildable under common-obj instead of obj then, and spare >>>>>>> some cycles when building for multiple targets. >>>>>>> >>>>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>>>>>> [ ldq_phys() API change, >>>>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>>>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>>>>>> add VirtIODevice * arg to virtio helpers, >>>>>>> use the existing virtio_is_big_endian() helper, >>>>>>> virtio-pci: use the device is_big_endian flag, >>>>>>> introduce virtio tswap16 and tswap64 helpers, >>>>>>> move calls to tswap* out of virtio-access.h to make it platform independant, >>>>>>> migration support, >>>>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>>>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>>>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>>>>>> --- >>>>>>> >>>>>>> Changes since v6: >>>>>>> - merge the virtio_needs_byteswap() helper from v6 and existing >>>>>>> virtio_is_big_endian() >>>>>>> - virtio-pci: now supports endianness changes >>>>>>> - virtio-access.h fixes (target independant) >>>>>>> >>>>>>> exec.c | 2 - >>>>>>> hw/virtio/Makefile.objs | 2 - >>>>>>> hw/virtio/virtio-pci.c | 11 +-- >>>>>>> hw/virtio/virtio.c | 35 +++++++++ >>>>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ >>>>>>> include/hw/virtio/virtio.h | 2 + >>>>>>> vl.c | 4 + >>>>>>> 7 files changed, 185 insertions(+), 9 deletions(-) >>>>>>> create mode 100644 include/hw/virtio/virtio-access.h >>>>>>> >>>>>>> diff --git a/exec.c b/exec.c >>>>>>> index 91513c6..e6777d0 100644 >>>>>>> --- a/exec.c >>>>>>> +++ b/exec.c >>>>>>> @@ -42,6 +42,7 @@ >>>>>>> #else /* !CONFIG_USER_ONLY */ >>>>>>> #include "sysemu/xen-mapcache.h" >>>>>>> #include "trace.h" >>>>>>> +#include "hw/virtio/virtio.h" >>>>>>> #endif >>>>>>> #include "exec/cpu-all.h" >>>>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, >>>>>>> * A helper function for the _utterly broken_ virtio device model to find out if >>>>>>> * it's running on a big endian machine. Don't do this at home kids! >>>>>>> */ >>>>>>> -bool virtio_is_big_endian(void); >>>>>>> bool virtio_is_big_endian(void) >>>>>>> { >>>>>>> #if defined(TARGET_WORDS_BIGENDIAN) >>>>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs >>>>>>> index 1ba53d9..68c3064 100644 >>>>>>> --- a/hw/virtio/Makefile.objs >>>>>>> +++ b/hw/virtio/Makefile.objs >>>>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o >>>>>>> common-obj-y += virtio-mmio.o >>>>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ >>>>>>> -obj-y += virtio.o virtio-balloon.o >>>>>>> +obj-y += virtio.o virtio-balloon.o >>>>>>> obj-$(CONFIG_LINUX) += vhost.o >>>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c >>>>>>> index ce97514..82a1689 100644 >>>>>>> --- a/hw/virtio/virtio-pci.c >>>>>>> +++ b/hw/virtio/virtio-pci.c >>>>>>> @@ -89,9 +89,6 @@ >>>>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ >>>>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) >>>>>>> -/* HACK for virtio to determine if it's running a big endian guest */ >>>>>>> -bool virtio_is_big_endian(void); >>>>>>> - >>>>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, >>>>>>> VirtIOPCIProxy *dev); >>>>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, >>>>>>> break; >>>>>>> case 2: >>>>>>> val = virtio_config_readw(vdev, addr); >>>>>>> - if (virtio_is_big_endian()) { >>>>>>> + if (vdev->is_big_endian) { >>>>>>> val = bswap16(val); >>>>>>> } >>>>>>> break; >>>>>>> case 4: >>>>>>> val = virtio_config_readl(vdev, addr); >>>>>>> - if (virtio_is_big_endian()) { >>>>>>> + if (vdev->is_big_endian) { >>>>>>> val = bswap32(val); >>>>>>> } >>>>>>> break; >>>>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, >>>>>>> virtio_config_writeb(vdev, addr, val); >>>>>>> break; >>>>>>> case 2: >>>>>>> - if (virtio_is_big_endian()) { >>>>>>> + if (vdev->is_big_endian) { >>>>>>> val = bswap16(val); >>>>>>> } >>>>>>> virtio_config_writew(vdev, addr, val); >>>>>>> break; >>>>>>> case 4: >>>>>>> - if (virtio_is_big_endian()) { >>>>>>> + if (vdev->is_big_endian) { >>>>>>> val = bswap32(val); >>>>>>> } >>>>>>> virtio_config_writel(vdev, addr, val); >>>>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c >>>>>>> index aeabf3a..bb646f0 100644 >>>>>>> --- a/hw/virtio/virtio.c >>>>>>> +++ b/hw/virtio/virtio.c >>>>>>> @@ -19,6 +19,7 @@ >>>>>>> #include "hw/virtio/virtio.h" >>>>>>> #include "qemu/atomic.h" >>>>>>> #include "hw/virtio/virtio-bus.h" >>>>>>> +#include "hw/virtio/virtio-access.h" >>>>>>> /* >>>>>>> * The alignment to use between consumer and producer parts of vring. >>>>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) >>>>>>> virtio_set_status(vdev, 0); >>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); >>>>>>> + >>>>>>> if (k->reset) { >>>>>>> k->reset(vdev); >>>>>>> } >>>>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >>>>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); >>>>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); >>>>>>> + /* NOTE: we assume that endianness is a cpu state AND >>>>>>> + * cpu state is restored before virtio devices. >>>>>>> + */ >>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); >>>>>>> + >>>>>>> if (k->load_config) { >>>>>>> ret = k->load_config(qbus->parent, f); >>>>>>> if (ret) >>>>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) >>>>>>> } >>>>>>> } >>>>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) >>>>>>> +{ >>>>>>> + if (vdev->is_big_endian) { >>>>>>> + return tswap16(s); >>>>>>> + } else { >>>>>>> + return bswap16(tswap16(s)); >>>>>>> + } >>>>>> This looks pretty bogus. When virtio wants to do "tswap" what it >>>>>> means is "give me a host endianness value in virtio endianness". How >>>>>> about something like >>>>>> >>>>>> #ifdef HOST_WORDS_BIGENDIAN >>>>>> return vdev->is_big_endian ? s : bswap16(s); >>>>>> #else >>>>>> return vdev->is_big_endian ? bswap16(s) : s; >>>>>> #endif >>>>>> >>>>> Actually why doesn't this call virtio_is_big_endian? >>>>> As it is, we get extra branches even if target endian-ness >>>>> is fixed. >>>> Because virtio_is_big_endian() returns the default endianness, not >>>> the runtime endianness of a virtio device. >> In fact, we should probably rename it accordingly. >> >>>>>> That should work pretty well inline as well, so you don't need to >>>>>> compile virtio.c as target dependent. >>>>>> >>>>>> >>>>>> Alex >>>>> Yes but we'll still need to build two variants: fixed endian and >>>>> dynamic endian platforms. >>>>> Something along the lines of 32/64 bit split that we have? >>>> Why bother? Always make it dynamic and don't change the per-device >>>> variable, no? I'd be surprised if the performance difference is >>>> measurable. The bulk of the data we transfer gets copied raw anyway. >>>> >>>> >>>> Alex >>> This will have to be measured and proved by whoever's proposing the >>> patch, not by reviewers. Platforms such as AMD which don't do >>> prediction well would be especially interesting to test on. >> Sure, Greg, can you do that? I'm sure Michael has test cases >> available he can give you to measure performance on this. > Measuring performance is hard ATM. > > Disk io stress when backed by raw ramdisk in host is usually the easiest > way to stress userspace virtio. > You need to make sure your baseline is repeateable though: > pin down everything from cpu to hardware interrupts, > disable power management, c states etc > Just taking an average and hoping for the best doesn't cut it. > > > We really should write a benchmark device, > to measure just the transport overhead. > For example, start with virtio-net but fuse TX and RX > VQs together, so you'll get right back whatever you send out. > > So really, virtio is ATM target-specific and I think it's > a good idea to get it working as such first, > do extra cleanups like getting rid of target specific code > separately. How does it help when we keep it target specific? The only way to remove any overhead from this refactoring would be to instead of if (vdev->is_big_endian) write it as if (virtio_is_big_endian_device(vdev)) which we could define as static inline bool virtio_is_big_endian_device(VirtIODevice *vdev) { #if defined(TARGET_PPC) return vdev->is_big_endian #elif defined(TARGET_WORDS_BIGENDIAN) return true; #else return false; #endif } If it makes you happy to do it this way first and move virtio to target-independent files later, we can certainly do this. Alex
On Mon, 14 Apr 2014 14:53:03 +0200 Alexander Graf <agraf@suse.de> wrote: > > On 14.04.14 14:50, Greg Kurz wrote: > > On Mon, 14 Apr 2014 14:22:36 +0200 > > Alexander Graf <agraf@suse.de> wrote: > > > >> On 14.04.14 13:58, Greg Kurz wrote: > >>> From: Rusty Russell <rusty@rustcorp.com.au> > >>> > >>> virtio data structures are defined as "target endian", which assumes > >>> that's a fixed value. In fact, that actually means it's platform-specific. > >>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>> > >>> We introduce memory accessors to be used accross the virtio code where > >>> needed. These accessors should support both legacy and 1.0 devices. > >>> A good way to do it is to introduce a per-device property to store the > >>> endianness. We choose to set this flag at device reset time because it > >>> is reasonnable to assume the endianness won't change unless we reboot or > >>> kexec another kernel. And it is also reasonnable to assume the new kernel > >>> will reset the devices before using them (otherwise it will break). > >>> > >>> We reuse the virtio_is_big_endian() helper since it provides the right > >>> value for legacy devices with most of the targets, that have fixed > >>> endianness. It can then be overriden to support endian-ambivalent targets. > >>> > >>> To support migration, we need to set the flag in virtio_load() as well. > >>> > >>> (a) One solution would be to add it to the stream, but it have some > >>> drawbacks: > >>> - since this only affects a few targets, the field should be put into a > >>> subsection > >>> - virtio migration code should be ported to vmstate to be able to introduce > >>> such a subsection > >>> > >>> (b) If we assume the following to be true: > >>> - target endianness falls under some cpu state > >>> - cpu state is always restored before virtio devices state because they > >>> get initialized in this order in main(). > >>> Then an alternative is to rely on virtio_is_big_endian() again at > >>> load time. No need to mess around with the migration stream in this case. > >>> > >>> This patch implements (b). > >>> > >>> Note that the tswap helpers are implemented in virtio.c so that > >>> virtio-access.h stays platform independant. Most of the virtio code > >>> will be buildable under common-obj instead of obj then, and spare > >>> some cycles when building for multiple targets. > >>> > >>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>> [ ldq_phys() API change, > >>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>> add VirtIODevice * arg to virtio helpers, > >>> use the existing virtio_is_big_endian() helper, > >>> virtio-pci: use the device is_big_endian flag, > >>> introduce virtio tswap16 and tswap64 helpers, > >>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>> migration support, > >>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>> Cc: Cédric Le Goater <clg@fr.ibm.com> > >>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>> --- > > [...] > > >>> + > >>> +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); > >>> + > >>> +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) > >>> +{ > >>> + *s = virtio_tswap64(vdev, *s); > >>> +} > >> Could we try to poison any non-virtio, non-endian-specific memory > >> accessors when this file is included? That way we don't fall into > >> pitfalls where we end up running stl_p in a tiny corner case somewhere > >> still. > >> > >> > >> Alex > >> > > Hmmm... there are still some stl_p users in virtio.c that I could not > > port to the new virtio memory accessors... These are the virtio_config_* > > helpers that gets called from virtio-pci and virtio-mmio. > > Why couldn't you port them to the new virtio memory accessors? Are you > missing a vdev parameter? Could you add one? > > IIRC config space is a weird mix of target endianness and little endian. > The helpers in virtio do the stl_p, but it is the caller that does the byteswap (virtio-pci) or not (virtio-mmio)... That's where I got a little confused and did not dare to touch this code :-\ Of course, I can try to focus harder see what can/should be done here. :)
On Mon, Apr 14, 2014 at 03:08:23PM +0200, Alexander Graf wrote: > > On 14.04.14 14:55, Michael S. Tsirkin wrote: > >On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > >>On 14.04.14 14:37, Michael S. Tsirkin wrote: > >>>On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >>>>On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>>>>On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>>>>>On 14.04.14 13:58, Greg Kurz wrote: > >>>>>>>From: Rusty Russell <rusty@rustcorp.com.au> > >>>>>>> > >>>>>>>virtio data structures are defined as "target endian", which assumes > >>>>>>>that's a fixed value. In fact, that actually means it's platform-specific. > >>>>>>>The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>>>>>> > >>>>>>>We introduce memory accessors to be used accross the virtio code where > >>>>>>>needed. These accessors should support both legacy and 1.0 devices. > >>>>>>>A good way to do it is to introduce a per-device property to store the > >>>>>>>endianness. We choose to set this flag at device reset time because it > >>>>>>>is reasonnable to assume the endianness won't change unless we reboot or > >>>>>>>kexec another kernel. And it is also reasonnable to assume the new kernel > >>>>>>>will reset the devices before using them (otherwise it will break). > >>>>>>> > >>>>>>>We reuse the virtio_is_big_endian() helper since it provides the right > >>>>>>>value for legacy devices with most of the targets, that have fixed > >>>>>>>endianness. It can then be overriden to support endian-ambivalent targets. > >>>>>>> > >>>>>>>To support migration, we need to set the flag in virtio_load() as well. > >>>>>>> > >>>>>>>(a) One solution would be to add it to the stream, but it have some > >>>>>>> drawbacks: > >>>>>>>- since this only affects a few targets, the field should be put into a > >>>>>>> subsection > >>>>>>>- virtio migration code should be ported to vmstate to be able to introduce > >>>>>>> such a subsection > >>>>>>> > >>>>>>>(b) If we assume the following to be true: > >>>>>>>- target endianness falls under some cpu state > >>>>>>>- cpu state is always restored before virtio devices state because they > >>>>>>> get initialized in this order in main(). > >>>>>>>Then an alternative is to rely on virtio_is_big_endian() again at > >>>>>>>load time. No need to mess around with the migration stream in this case. > >>>>>>> > >>>>>>>This patch implements (b). > >>>>>>> > >>>>>>>Note that the tswap helpers are implemented in virtio.c so that > >>>>>>>virtio-access.h stays platform independant. Most of the virtio code > >>>>>>>will be buildable under common-obj instead of obj then, and spare > >>>>>>>some cycles when building for multiple targets. > >>>>>>> > >>>>>>>Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>>>>>[ ldq_phys() API change, > >>>>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>>>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>>>>>> add VirtIODevice * arg to virtio helpers, > >>>>>>> use the existing virtio_is_big_endian() helper, > >>>>>>> virtio-pci: use the device is_big_endian flag, > >>>>>>> introduce virtio tswap16 and tswap64 helpers, > >>>>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>>>>>> migration support, > >>>>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>>>>>Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>>>>>Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>>>>>--- > >>>>>>> > >>>>>>>Changes since v6: > >>>>>>>- merge the virtio_needs_byteswap() helper from v6 and existing > >>>>>>> virtio_is_big_endian() > >>>>>>>- virtio-pci: now supports endianness changes > >>>>>>>- virtio-access.h fixes (target independant) > >>>>>>> > >>>>>>> exec.c | 2 - > >>>>>>> hw/virtio/Makefile.objs | 2 - > >>>>>>> hw/virtio/virtio-pci.c | 11 +-- > >>>>>>> hw/virtio/virtio.c | 35 +++++++++ > >>>>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>>>>>> include/hw/virtio/virtio.h | 2 + > >>>>>>> vl.c | 4 + > >>>>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>>>>>> create mode 100644 include/hw/virtio/virtio-access.h > >>>>>>> > >>>>>>>diff --git a/exec.c b/exec.c > >>>>>>>index 91513c6..e6777d0 100644 > >>>>>>>--- a/exec.c > >>>>>>>+++ b/exec.c > >>>>>>>@@ -42,6 +42,7 @@ > >>>>>>> #else /* !CONFIG_USER_ONLY */ > >>>>>>> #include "sysemu/xen-mapcache.h" > >>>>>>> #include "trace.h" > >>>>>>>+#include "hw/virtio/virtio.h" > >>>>>>> #endif > >>>>>>> #include "exec/cpu-all.h" > >>>>>>>@@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>>>>>> * A helper function for the _utterly broken_ virtio device model to find out if > >>>>>>> * it's running on a big endian machine. Don't do this at home kids! > >>>>>>> */ > >>>>>>>-bool virtio_is_big_endian(void); > >>>>>>> bool virtio_is_big_endian(void) > >>>>>>> { > >>>>>>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>>>>>diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>>>>>index 1ba53d9..68c3064 100644 > >>>>>>>--- a/hw/virtio/Makefile.objs > >>>>>>>+++ b/hw/virtio/Makefile.objs > >>>>>>>@@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>>>>>> common-obj-y += virtio-mmio.o > >>>>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>>>>>-obj-y += virtio.o virtio-balloon.o > >>>>>>>+obj-y += virtio.o virtio-balloon.o > >>>>>>> obj-$(CONFIG_LINUX) += vhost.o > >>>>>>>diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>>>>>index ce97514..82a1689 100644 > >>>>>>>--- a/hw/virtio/virtio-pci.c > >>>>>>>+++ b/hw/virtio/virtio-pci.c > >>>>>>>@@ -89,9 +89,6 @@ > >>>>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>>>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>>>>>-/* HACK for virtio to determine if it's running a big endian guest */ > >>>>>>>-bool virtio_is_big_endian(void); > >>>>>>>- > >>>>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>>>>>> VirtIOPCIProxy *dev); > >>>>>>>@@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>>>>>> break; > >>>>>>> case 2: > >>>>>>> val = virtio_config_readw(vdev, addr); > >>>>>>>- if (virtio_is_big_endian()) { > >>>>>>>+ if (vdev->is_big_endian) { > >>>>>>> val = bswap16(val); > >>>>>>> } > >>>>>>> break; > >>>>>>> case 4: > >>>>>>> val = virtio_config_readl(vdev, addr); > >>>>>>>- if (virtio_is_big_endian()) { > >>>>>>>+ if (vdev->is_big_endian) { > >>>>>>> val = bswap32(val); > >>>>>>> } > >>>>>>> break; > >>>>>>>@@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>>>>>> virtio_config_writeb(vdev, addr, val); > >>>>>>> break; > >>>>>>> case 2: > >>>>>>>- if (virtio_is_big_endian()) { > >>>>>>>+ if (vdev->is_big_endian) { > >>>>>>> val = bswap16(val); > >>>>>>> } > >>>>>>> virtio_config_writew(vdev, addr, val); > >>>>>>> break; > >>>>>>> case 4: > >>>>>>>- if (virtio_is_big_endian()) { > >>>>>>>+ if (vdev->is_big_endian) { > >>>>>>> val = bswap32(val); > >>>>>>> } > >>>>>>> virtio_config_writel(vdev, addr, val); > >>>>>>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>>>>>index aeabf3a..bb646f0 100644 > >>>>>>>--- a/hw/virtio/virtio.c > >>>>>>>+++ b/hw/virtio/virtio.c > >>>>>>>@@ -19,6 +19,7 @@ > >>>>>>> #include "hw/virtio/virtio.h" > >>>>>>> #include "qemu/atomic.h" > >>>>>>> #include "hw/virtio/virtio-bus.h" > >>>>>>>+#include "hw/virtio/virtio-access.h" > >>>>>>> /* > >>>>>>> * The alignment to use between consumer and producer parts of vring. > >>>>>>>@@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>>>>>> virtio_set_status(vdev, 0); > >>>>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>>>>>+ > >>>>>>> if (k->reset) { > >>>>>>> k->reset(vdev); > >>>>>>> } > >>>>>>>@@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>>>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>>>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>>>>>+ /* NOTE: we assume that endianness is a cpu state AND > >>>>>>>+ * cpu state is restored before virtio devices. > >>>>>>>+ */ > >>>>>>>+ vdev->is_big_endian = virtio_is_big_endian(); > >>>>>>>+ > >>>>>>> if (k->load_config) { > >>>>>>> ret = k->load_config(qbus->parent, f); > >>>>>>> if (ret) > >>>>>>>@@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>>>>>> } > >>>>>>> } > >>>>>>>+uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>>>>>+{ > >>>>>>>+ if (vdev->is_big_endian) { > >>>>>>>+ return tswap16(s); > >>>>>>>+ } else { > >>>>>>>+ return bswap16(tswap16(s)); > >>>>>>>+ } > >>>>>>This looks pretty bogus. When virtio wants to do "tswap" what it > >>>>>>means is "give me a host endianness value in virtio endianness". How > >>>>>>about something like > >>>>>> > >>>>>>#ifdef HOST_WORDS_BIGENDIAN > >>>>>> return vdev->is_big_endian ? s : bswap16(s); > >>>>>>#else > >>>>>> return vdev->is_big_endian ? bswap16(s) : s; > >>>>>>#endif > >>>>>> > >>>>>Actually why doesn't this call virtio_is_big_endian? > >>>>>As it is, we get extra branches even if target endian-ness > >>>>>is fixed. > >>>>Because virtio_is_big_endian() returns the default endianness, not > >>>>the runtime endianness of a virtio device. > >>In fact, we should probably rename it accordingly. > >> > >>>>>>That should work pretty well inline as well, so you don't need to > >>>>>>compile virtio.c as target dependent. > >>>>>> > >>>>>> > >>>>>>Alex > >>>>>Yes but we'll still need to build two variants: fixed endian and > >>>>>dynamic endian platforms. > >>>>>Something along the lines of 32/64 bit split that we have? > >>>>Why bother? Always make it dynamic and don't change the per-device > >>>>variable, no? I'd be surprised if the performance difference is > >>>>measurable. The bulk of the data we transfer gets copied raw anyway. > >>>> > >>>> > >>>>Alex > >>>This will have to be measured and proved by whoever's proposing the > >>>patch, not by reviewers. Platforms such as AMD which don't do > >>>prediction well would be especially interesting to test on. > >>Sure, Greg, can you do that? I'm sure Michael has test cases > >>available he can give you to measure performance on this. > >Measuring performance is hard ATM. > > > >Disk io stress when backed by raw ramdisk in host is usually the easiest > >way to stress userspace virtio. > >You need to make sure your baseline is repeateable though: > >pin down everything from cpu to hardware interrupts, > >disable power management, c states etc > >Just taking an average and hoping for the best doesn't cut it. > > > > > >We really should write a benchmark device, > >to measure just the transport overhead. > >For example, start with virtio-net but fuse TX and RX > >VQs together, so you'll get right back whatever you send out. > > > >So really, virtio is ATM target-specific and I think it's > >a good idea to get it working as such first, > >do extra cleanups like getting rid of target specific code > >separately. > > How does it help when we keep it target specific? The only way to > remove any overhead from this refactoring would be to instead of > > if (vdev->is_big_endian) > > write it as > > if (virtio_is_big_endian_device(vdev)) > > which we could define as > > static inline bool virtio_is_big_endian_device(VirtIODevice *vdev) > { > #if defined(TARGET_PPC) > return vdev->is_big_endian > #elif defined(TARGET_WORDS_BIGENDIAN) > return true; > #else > return false; > #endif > } > > If it makes you happy to do it this way first and move virtio to > target-independent files later, we can certainly do this. > > > Alex Yes, that's enough to make me happy.
On 14.04.14 15:12, Greg Kurz wrote: > On Mon, 14 Apr 2014 14:53:03 +0200 > Alexander Graf <agraf@suse.de> wrote: >> On 14.04.14 14:50, Greg Kurz wrote: >>> On Mon, 14 Apr 2014 14:22:36 +0200 >>> Alexander Graf <agraf@suse.de> wrote: >>> >>>> On 14.04.14 13:58, Greg Kurz wrote: >>>>> From: Rusty Russell <rusty@rustcorp.com.au> >>>>> >>>>> virtio data structures are defined as "target endian", which assumes >>>>> that's a fixed value. In fact, that actually means it's platform-specific. >>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>>>> >>>>> We introduce memory accessors to be used accross the virtio code where >>>>> needed. These accessors should support both legacy and 1.0 devices. >>>>> A good way to do it is to introduce a per-device property to store the >>>>> endianness. We choose to set this flag at device reset time because it >>>>> is reasonnable to assume the endianness won't change unless we reboot or >>>>> kexec another kernel. And it is also reasonnable to assume the new kernel >>>>> will reset the devices before using them (otherwise it will break). >>>>> >>>>> We reuse the virtio_is_big_endian() helper since it provides the right >>>>> value for legacy devices with most of the targets, that have fixed >>>>> endianness. It can then be overriden to support endian-ambivalent targets. >>>>> >>>>> To support migration, we need to set the flag in virtio_load() as well. >>>>> >>>>> (a) One solution would be to add it to the stream, but it have some >>>>> drawbacks: >>>>> - since this only affects a few targets, the field should be put into a >>>>> subsection >>>>> - virtio migration code should be ported to vmstate to be able to introduce >>>>> such a subsection >>>>> >>>>> (b) If we assume the following to be true: >>>>> - target endianness falls under some cpu state >>>>> - cpu state is always restored before virtio devices state because they >>>>> get initialized in this order in main(). >>>>> Then an alternative is to rely on virtio_is_big_endian() again at >>>>> load time. No need to mess around with the migration stream in this case. >>>>> >>>>> This patch implements (b). >>>>> >>>>> Note that the tswap helpers are implemented in virtio.c so that >>>>> virtio-access.h stays platform independant. Most of the virtio code >>>>> will be buildable under common-obj instead of obj then, and spare >>>>> some cycles when building for multiple targets. >>>>> >>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>>>> [ ldq_phys() API change, >>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>>>> add VirtIODevice * arg to virtio helpers, >>>>> use the existing virtio_is_big_endian() helper, >>>>> virtio-pci: use the device is_big_endian flag, >>>>> introduce virtio tswap16 and tswap64 helpers, >>>>> move calls to tswap* out of virtio-access.h to make it platform independant, >>>>> migration support, >>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>>>> --- >> [...] >> >>>>> + >>>>> +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); >>>>> + >>>>> +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) >>>>> +{ >>>>> + *s = virtio_tswap64(vdev, *s); >>>>> +} >>>> Could we try to poison any non-virtio, non-endian-specific memory >>>> accessors when this file is included? That way we don't fall into >>>> pitfalls where we end up running stl_p in a tiny corner case somewhere >>>> still. >>>> >>>> >>>> Alex >>>> >>> Hmmm... there are still some stl_p users in virtio.c that I could not >>> port to the new virtio memory accessors... These are the virtio_config_* >>> helpers that gets called from virtio-pci and virtio-mmio. >> Why couldn't you port them to the new virtio memory accessors? Are you >> missing a vdev parameter? Could you add one? >> >> IIRC config space is a weird mix of target endianness and little endian. >> > The helpers in virtio do the stl_p, but it is the caller that does the > byteswap (virtio-pci) or not (virtio-mmio)... That's where I got a little > confused and did not dare to touch this code :-\ > > Of course, I can try to focus harder see what can/should be done here. :) Let's make Michael happy with target specific virtio drivers first and then look at this mess ;). Alex
On Mon, 14 Apr 2014 14:40:04 +0200 Alexander Graf <agraf@suse.de> wrote: > > On 14.04.14 14:37, Michael S. Tsirkin wrote: > > On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >> On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>>> On 14.04.14 13:58, Greg Kurz wrote: > >>>>> From: Rusty Russell <rusty@rustcorp.com.au> > >>>>> > >>>>> virtio data structures are defined as "target endian", which assumes > >>>>> that's a fixed value. In fact, that actually means it's platform-specific. > >>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>>>> > >>>>> We introduce memory accessors to be used accross the virtio code where > >>>>> needed. These accessors should support both legacy and 1.0 devices. > >>>>> A good way to do it is to introduce a per-device property to store the > >>>>> endianness. We choose to set this flag at device reset time because it > >>>>> is reasonnable to assume the endianness won't change unless we reboot or > >>>>> kexec another kernel. And it is also reasonnable to assume the new kernel > >>>>> will reset the devices before using them (otherwise it will break). > >>>>> > >>>>> We reuse the virtio_is_big_endian() helper since it provides the right > >>>>> value for legacy devices with most of the targets, that have fixed > >>>>> endianness. It can then be overriden to support endian-ambivalent targets. > >>>>> > >>>>> To support migration, we need to set the flag in virtio_load() as well. > >>>>> > >>>>> (a) One solution would be to add it to the stream, but it have some > >>>>> drawbacks: > >>>>> - since this only affects a few targets, the field should be put into a > >>>>> subsection > >>>>> - virtio migration code should be ported to vmstate to be able to introduce > >>>>> such a subsection > >>>>> > >>>>> (b) If we assume the following to be true: > >>>>> - target endianness falls under some cpu state > >>>>> - cpu state is always restored before virtio devices state because they > >>>>> get initialized in this order in main(). > >>>>> Then an alternative is to rely on virtio_is_big_endian() again at > >>>>> load time. No need to mess around with the migration stream in this case. > >>>>> > >>>>> This patch implements (b). > >>>>> > >>>>> Note that the tswap helpers are implemented in virtio.c so that > >>>>> virtio-access.h stays platform independant. Most of the virtio code > >>>>> will be buildable under common-obj instead of obj then, and spare > >>>>> some cycles when building for multiple targets. > >>>>> > >>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>>> [ ldq_phys() API change, > >>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>>>> add VirtIODevice * arg to virtio helpers, > >>>>> use the existing virtio_is_big_endian() helper, > >>>>> virtio-pci: use the device is_big_endian flag, > >>>>> introduce virtio tswap16 and tswap64 helpers, > >>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>>>> migration support, > >>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>>> --- > >>>>> > >>>>> Changes since v6: > >>>>> - merge the virtio_needs_byteswap() helper from v6 and existing > >>>>> virtio_is_big_endian() > >>>>> - virtio-pci: now supports endianness changes > >>>>> - virtio-access.h fixes (target independant) > >>>>> > >>>>> exec.c | 2 - > >>>>> hw/virtio/Makefile.objs | 2 - > >>>>> hw/virtio/virtio-pci.c | 11 +-- > >>>>> hw/virtio/virtio.c | 35 +++++++++ > >>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>>>> include/hw/virtio/virtio.h | 2 + > >>>>> vl.c | 4 + > >>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>>>> create mode 100644 include/hw/virtio/virtio-access.h > >>>>> > >>>>> diff --git a/exec.c b/exec.c > >>>>> index 91513c6..e6777d0 100644 > >>>>> --- a/exec.c > >>>>> +++ b/exec.c > >>>>> @@ -42,6 +42,7 @@ > >>>>> #else /* !CONFIG_USER_ONLY */ > >>>>> #include "sysemu/xen-mapcache.h" > >>>>> #include "trace.h" > >>>>> +#include "hw/virtio/virtio.h" > >>>>> #endif > >>>>> #include "exec/cpu-all.h" > >>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>>>> * A helper function for the _utterly broken_ virtio device model to find out if > >>>>> * it's running on a big endian machine. Don't do this at home kids! > >>>>> */ > >>>>> -bool virtio_is_big_endian(void); > >>>>> bool virtio_is_big_endian(void) > >>>>> { > >>>>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>>> index 1ba53d9..68c3064 100644 > >>>>> --- a/hw/virtio/Makefile.objs > >>>>> +++ b/hw/virtio/Makefile.objs > >>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>>>> common-obj-y += virtio-mmio.o > >>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>>> -obj-y += virtio.o virtio-balloon.o > >>>>> +obj-y += virtio.o virtio-balloon.o > >>>>> obj-$(CONFIG_LINUX) += vhost.o > >>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>>> index ce97514..82a1689 100644 > >>>>> --- a/hw/virtio/virtio-pci.c > >>>>> +++ b/hw/virtio/virtio-pci.c > >>>>> @@ -89,9 +89,6 @@ > >>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>>> -/* HACK for virtio to determine if it's running a big endian guest */ > >>>>> -bool virtio_is_big_endian(void); > >>>>> - > >>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>>>> VirtIOPCIProxy *dev); > >>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>>>> break; > >>>>> case 2: > >>>>> val = virtio_config_readw(vdev, addr); > >>>>> - if (virtio_is_big_endian()) { > >>>>> + if (vdev->is_big_endian) { > >>>>> val = bswap16(val); > >>>>> } > >>>>> break; > >>>>> case 4: > >>>>> val = virtio_config_readl(vdev, addr); > >>>>> - if (virtio_is_big_endian()) { > >>>>> + if (vdev->is_big_endian) { > >>>>> val = bswap32(val); > >>>>> } > >>>>> break; > >>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>>>> virtio_config_writeb(vdev, addr, val); > >>>>> break; > >>>>> case 2: > >>>>> - if (virtio_is_big_endian()) { > >>>>> + if (vdev->is_big_endian) { > >>>>> val = bswap16(val); > >>>>> } > >>>>> virtio_config_writew(vdev, addr, val); > >>>>> break; > >>>>> case 4: > >>>>> - if (virtio_is_big_endian()) { > >>>>> + if (vdev->is_big_endian) { > >>>>> val = bswap32(val); > >>>>> } > >>>>> virtio_config_writel(vdev, addr, val); > >>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>>> index aeabf3a..bb646f0 100644 > >>>>> --- a/hw/virtio/virtio.c > >>>>> +++ b/hw/virtio/virtio.c > >>>>> @@ -19,6 +19,7 @@ > >>>>> #include "hw/virtio/virtio.h" > >>>>> #include "qemu/atomic.h" > >>>>> #include "hw/virtio/virtio-bus.h" > >>>>> +#include "hw/virtio/virtio-access.h" > >>>>> /* > >>>>> * The alignment to use between consumer and producer parts of vring. > >>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>>>> virtio_set_status(vdev, 0); > >>>>> + vdev->is_big_endian = virtio_is_big_endian(); > >>>>> + > >>>>> if (k->reset) { > >>>>> k->reset(vdev); > >>>>> } > >>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>>> + /* NOTE: we assume that endianness is a cpu state AND > >>>>> + * cpu state is restored before virtio devices. > >>>>> + */ > >>>>> + vdev->is_big_endian = virtio_is_big_endian(); > >>>>> + > >>>>> if (k->load_config) { > >>>>> ret = k->load_config(qbus->parent, f); > >>>>> if (ret) > >>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>>>> } > >>>>> } > >>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>>> +{ > >>>>> + if (vdev->is_big_endian) { > >>>>> + return tswap16(s); > >>>>> + } else { > >>>>> + return bswap16(tswap16(s)); > >>>>> + } > >>>> This looks pretty bogus. When virtio wants to do "tswap" what it > >>>> means is "give me a host endianness value in virtio endianness". How > >>>> about something like > >>>> > >>>> #ifdef HOST_WORDS_BIGENDIAN > >>>> return vdev->is_big_endian ? s : bswap16(s); > >>>> #else > >>>> return vdev->is_big_endian ? bswap16(s) : s; > >>>> #endif > >>>> > >>> Actually why doesn't this call virtio_is_big_endian? > >>> As it is, we get extra branches even if target endian-ness > >>> is fixed. > >> Because virtio_is_big_endian() returns the default endianness, not > >> the runtime endianness of a virtio device. > > In fact, we should probably rename it accordingly. > > >> > >>>> That should work pretty well inline as well, so you don't need to > >>>> compile virtio.c as target dependent. > >>>> > >>>> > >>>> Alex > >>> Yes but we'll still need to build two variants: fixed endian and > >>> dynamic endian platforms. > >>> Something along the lines of 32/64 bit split that we have? > >> Why bother? Always make it dynamic and don't change the per-device > >> variable, no? I'd be surprised if the performance difference is > >> measurable. The bulk of the data we transfer gets copied raw anyway. > >> > >> > >> Alex > > This will have to be measured and proved by whoever's proposing the > > patch, not by reviewers. Platforms such as AMD which don't do > > prediction well would be especially interesting to test on. > > Sure, Greg, can you do that? I'm sure Michael has test cases available > he can give you to measure performance on this. > Sure... if I had access to AMD hardware ;) > Speaking of which, how does all of this work with vhost? > > > Alex >
On Mon, 14 Apr 2014 15:08:23 +0200 Alexander Graf <agraf@suse.de> wrote: > > On 14.04.14 14:55, Michael S. Tsirkin wrote: > > On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > >> On 14.04.14 14:37, Michael S. Tsirkin wrote: > >>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>>>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>>>>> On 14.04.14 13:58, Greg Kurz wrote: > >>>>>>> From: Rusty Russell <rusty@rustcorp.com.au> > >>>>>>> > >>>>>>> virtio data structures are defined as "target endian", which assumes > >>>>>>> that's a fixed value. In fact, that actually means it's platform-specific. > >>>>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>>>>>> > >>>>>>> We introduce memory accessors to be used accross the virtio code where > >>>>>>> needed. These accessors should support both legacy and 1.0 devices. > >>>>>>> A good way to do it is to introduce a per-device property to store the > >>>>>>> endianness. We choose to set this flag at device reset time because it > >>>>>>> is reasonnable to assume the endianness won't change unless we reboot or > >>>>>>> kexec another kernel. And it is also reasonnable to assume the new kernel > >>>>>>> will reset the devices before using them (otherwise it will break). > >>>>>>> > >>>>>>> We reuse the virtio_is_big_endian() helper since it provides the right > >>>>>>> value for legacy devices with most of the targets, that have fixed > >>>>>>> endianness. It can then be overriden to support endian-ambivalent targets. > >>>>>>> > >>>>>>> To support migration, we need to set the flag in virtio_load() as well. > >>>>>>> > >>>>>>> (a) One solution would be to add it to the stream, but it have some > >>>>>>> drawbacks: > >>>>>>> - since this only affects a few targets, the field should be put into a > >>>>>>> subsection > >>>>>>> - virtio migration code should be ported to vmstate to be able to introduce > >>>>>>> such a subsection > >>>>>>> > >>>>>>> (b) If we assume the following to be true: > >>>>>>> - target endianness falls under some cpu state > >>>>>>> - cpu state is always restored before virtio devices state because they > >>>>>>> get initialized in this order in main(). > >>>>>>> Then an alternative is to rely on virtio_is_big_endian() again at > >>>>>>> load time. No need to mess around with the migration stream in this case. > >>>>>>> > >>>>>>> This patch implements (b). > >>>>>>> > >>>>>>> Note that the tswap helpers are implemented in virtio.c so that > >>>>>>> virtio-access.h stays platform independant. Most of the virtio code > >>>>>>> will be buildable under common-obj instead of obj then, and spare > >>>>>>> some cycles when building for multiple targets. > >>>>>>> > >>>>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>>>>> [ ldq_phys() API change, > >>>>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>>>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>>>>>> add VirtIODevice * arg to virtio helpers, > >>>>>>> use the existing virtio_is_big_endian() helper, > >>>>>>> virtio-pci: use the device is_big_endian flag, > >>>>>>> introduce virtio tswap16 and tswap64 helpers, > >>>>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>>>>>> migration support, > >>>>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>>>>> --- > >>>>>>> > >>>>>>> Changes since v6: > >>>>>>> - merge the virtio_needs_byteswap() helper from v6 and existing > >>>>>>> virtio_is_big_endian() > >>>>>>> - virtio-pci: now supports endianness changes > >>>>>>> - virtio-access.h fixes (target independant) > >>>>>>> > >>>>>>> exec.c | 2 - > >>>>>>> hw/virtio/Makefile.objs | 2 - > >>>>>>> hw/virtio/virtio-pci.c | 11 +-- > >>>>>>> hw/virtio/virtio.c | 35 +++++++++ > >>>>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>>>>>> include/hw/virtio/virtio.h | 2 + > >>>>>>> vl.c | 4 + > >>>>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>>>>>> create mode 100644 include/hw/virtio/virtio-access.h > >>>>>>> > >>>>>>> diff --git a/exec.c b/exec.c > >>>>>>> index 91513c6..e6777d0 100644 > >>>>>>> --- a/exec.c > >>>>>>> +++ b/exec.c > >>>>>>> @@ -42,6 +42,7 @@ > >>>>>>> #else /* !CONFIG_USER_ONLY */ > >>>>>>> #include "sysemu/xen-mapcache.h" > >>>>>>> #include "trace.h" > >>>>>>> +#include "hw/virtio/virtio.h" > >>>>>>> #endif > >>>>>>> #include "exec/cpu-all.h" > >>>>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>>>>>> * A helper function for the _utterly broken_ virtio device model to find out if > >>>>>>> * it's running on a big endian machine. Don't do this at home kids! > >>>>>>> */ > >>>>>>> -bool virtio_is_big_endian(void); > >>>>>>> bool virtio_is_big_endian(void) > >>>>>>> { > >>>>>>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>>>>> index 1ba53d9..68c3064 100644 > >>>>>>> --- a/hw/virtio/Makefile.objs > >>>>>>> +++ b/hw/virtio/Makefile.objs > >>>>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>>>>>> common-obj-y += virtio-mmio.o > >>>>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>>>>> -obj-y += virtio.o virtio-balloon.o > >>>>>>> +obj-y += virtio.o virtio-balloon.o > >>>>>>> obj-$(CONFIG_LINUX) += vhost.o > >>>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>>>>> index ce97514..82a1689 100644 > >>>>>>> --- a/hw/virtio/virtio-pci.c > >>>>>>> +++ b/hw/virtio/virtio-pci.c > >>>>>>> @@ -89,9 +89,6 @@ > >>>>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>>>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>>>>> -/* HACK for virtio to determine if it's running a big endian guest */ > >>>>>>> -bool virtio_is_big_endian(void); > >>>>>>> - > >>>>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>>>>>> VirtIOPCIProxy *dev); > >>>>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>>>>>> break; > >>>>>>> case 2: > >>>>>>> val = virtio_config_readw(vdev, addr); > >>>>>>> - if (virtio_is_big_endian()) { > >>>>>>> + if (vdev->is_big_endian) { > >>>>>>> val = bswap16(val); > >>>>>>> } > >>>>>>> break; > >>>>>>> case 4: > >>>>>>> val = virtio_config_readl(vdev, addr); > >>>>>>> - if (virtio_is_big_endian()) { > >>>>>>> + if (vdev->is_big_endian) { > >>>>>>> val = bswap32(val); > >>>>>>> } > >>>>>>> break; > >>>>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>>>>>> virtio_config_writeb(vdev, addr, val); > >>>>>>> break; > >>>>>>> case 2: > >>>>>>> - if (virtio_is_big_endian()) { > >>>>>>> + if (vdev->is_big_endian) { > >>>>>>> val = bswap16(val); > >>>>>>> } > >>>>>>> virtio_config_writew(vdev, addr, val); > >>>>>>> break; > >>>>>>> case 4: > >>>>>>> - if (virtio_is_big_endian()) { > >>>>>>> + if (vdev->is_big_endian) { > >>>>>>> val = bswap32(val); > >>>>>>> } > >>>>>>> virtio_config_writel(vdev, addr, val); > >>>>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>>>>> index aeabf3a..bb646f0 100644 > >>>>>>> --- a/hw/virtio/virtio.c > >>>>>>> +++ b/hw/virtio/virtio.c > >>>>>>> @@ -19,6 +19,7 @@ > >>>>>>> #include "hw/virtio/virtio.h" > >>>>>>> #include "qemu/atomic.h" > >>>>>>> #include "hw/virtio/virtio-bus.h" > >>>>>>> +#include "hw/virtio/virtio-access.h" > >>>>>>> /* > >>>>>>> * The alignment to use between consumer and producer parts of vring. > >>>>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>>>>>> virtio_set_status(vdev, 0); > >>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); > >>>>>>> + > >>>>>>> if (k->reset) { > >>>>>>> k->reset(vdev); > >>>>>>> } > >>>>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>>>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>>>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>>>>> + /* NOTE: we assume that endianness is a cpu state AND > >>>>>>> + * cpu state is restored before virtio devices. > >>>>>>> + */ > >>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); > >>>>>>> + > >>>>>>> if (k->load_config) { > >>>>>>> ret = k->load_config(qbus->parent, f); > >>>>>>> if (ret) > >>>>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>>>>>> } > >>>>>>> } > >>>>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>>>>> +{ > >>>>>>> + if (vdev->is_big_endian) { > >>>>>>> + return tswap16(s); > >>>>>>> + } else { > >>>>>>> + return bswap16(tswap16(s)); > >>>>>>> + } > >>>>>> This looks pretty bogus. When virtio wants to do "tswap" what it > >>>>>> means is "give me a host endianness value in virtio endianness". How > >>>>>> about something like > >>>>>> > >>>>>> #ifdef HOST_WORDS_BIGENDIAN > >>>>>> return vdev->is_big_endian ? s : bswap16(s); > >>>>>> #else > >>>>>> return vdev->is_big_endian ? bswap16(s) : s; > >>>>>> #endif > >>>>>> > >>>>> Actually why doesn't this call virtio_is_big_endian? > >>>>> As it is, we get extra branches even if target endian-ness > >>>>> is fixed. > >>>> Because virtio_is_big_endian() returns the default endianness, not > >>>> the runtime endianness of a virtio device. > >> In fact, we should probably rename it accordingly. > >> > >>>>>> That should work pretty well inline as well, so you don't need to > >>>>>> compile virtio.c as target dependent. > >>>>>> > >>>>>> > >>>>>> Alex > >>>>> Yes but we'll still need to build two variants: fixed endian and > >>>>> dynamic endian platforms. > >>>>> Something along the lines of 32/64 bit split that we have? > >>>> Why bother? Always make it dynamic and don't change the per-device > >>>> variable, no? I'd be surprised if the performance difference is > >>>> measurable. The bulk of the data we transfer gets copied raw anyway. > >>>> > >>>> > >>>> Alex > >>> This will have to be measured and proved by whoever's proposing the > >>> patch, not by reviewers. Platforms such as AMD which don't do > >>> prediction well would be especially interesting to test on. > >> Sure, Greg, can you do that? I'm sure Michael has test cases > >> available he can give you to measure performance on this. > > Measuring performance is hard ATM. > > > > Disk io stress when backed by raw ramdisk in host is usually the easiest > > way to stress userspace virtio. > > You need to make sure your baseline is repeateable though: > > pin down everything from cpu to hardware interrupts, > > disable power management, c states etc > > Just taking an average and hoping for the best doesn't cut it. > > > > > > We really should write a benchmark device, > > to measure just the transport overhead. > > For example, start with virtio-net but fuse TX and RX > > VQs together, so you'll get right back whatever you send out. > > > > So really, virtio is ATM target-specific and I think it's > > a good idea to get it working as such first, > > do extra cleanups like getting rid of target specific code > > separately. > > How does it help when we keep it target specific? The only way to remove > any overhead from this refactoring would be to instead of > > if (vdev->is_big_endian) > > write it as > > if (virtio_is_big_endian_device(vdev)) > > which we could define as > > static inline bool virtio_is_big_endian_device(VirtIODevice *vdev) > { > #if defined(TARGET_PPC) > return vdev->is_big_endian > #elif defined(TARGET_WORDS_BIGENDIAN) > return true; > #else > return false; > #endif > } > > If it makes you happy to do it this way first and move virtio to > target-independent files later, we can certainly do this. > > This would end up defined in all virtio files with the consequence of hitting TARGET_WORDS_BIGENDIAN poison for the target independent ones. Should we kick them out of common-obj variables in makefiles as well ? Or would it be acceptable to have this helper not inlined ? > Alex > Thanks. -- Greg
On 04/15/2014 10:40 AM, Greg Kurz wrote: > On Mon, 14 Apr 2014 15:08:23 +0200 > Alexander Graf <agraf@suse.de> wrote: > >> On 14.04.14 14:55, Michael S. Tsirkin wrote: >>> On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: >>>> On 14.04.14 14:37, Michael S. Tsirkin wrote: >>>>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: >>>>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: >>>>>>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: >>>>>>>> On 14.04.14 13:58, Greg Kurz wrote: >>>>>>>>> From: Rusty Russell <rusty@rustcorp.com.au> >>>>>>>>> >>>>>>>>> virtio data structures are defined as "target endian", which assumes >>>>>>>>> that's a fixed value. In fact, that actually means it's platform-specific. >>>>>>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. >>>>>>>>> >>>>>>>>> We introduce memory accessors to be used accross the virtio code where >>>>>>>>> needed. These accessors should support both legacy and 1.0 devices. >>>>>>>>> A good way to do it is to introduce a per-device property to store the >>>>>>>>> endianness. We choose to set this flag at device reset time because it >>>>>>>>> is reasonnable to assume the endianness won't change unless we reboot or >>>>>>>>> kexec another kernel. And it is also reasonnable to assume the new kernel >>>>>>>>> will reset the devices before using them (otherwise it will break). >>>>>>>>> >>>>>>>>> We reuse the virtio_is_big_endian() helper since it provides the right >>>>>>>>> value for legacy devices with most of the targets, that have fixed >>>>>>>>> endianness. It can then be overriden to support endian-ambivalent targets. >>>>>>>>> >>>>>>>>> To support migration, we need to set the flag in virtio_load() as well. >>>>>>>>> >>>>>>>>> (a) One solution would be to add it to the stream, but it have some >>>>>>>>> drawbacks: >>>>>>>>> - since this only affects a few targets, the field should be put into a >>>>>>>>> subsection >>>>>>>>> - virtio migration code should be ported to vmstate to be able to introduce >>>>>>>>> such a subsection >>>>>>>>> >>>>>>>>> (b) If we assume the following to be true: >>>>>>>>> - target endianness falls under some cpu state >>>>>>>>> - cpu state is always restored before virtio devices state because they >>>>>>>>> get initialized in this order in main(). >>>>>>>>> Then an alternative is to rely on virtio_is_big_endian() again at >>>>>>>>> load time. No need to mess around with the migration stream in this case. >>>>>>>>> >>>>>>>>> This patch implements (b). >>>>>>>>> >>>>>>>>> Note that the tswap helpers are implemented in virtio.c so that >>>>>>>>> virtio-access.h stays platform independant. Most of the virtio code >>>>>>>>> will be buildable under common-obj instead of obj then, and spare >>>>>>>>> some cycles when building for multiple targets. >>>>>>>>> >>>>>>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> >>>>>>>>> [ ldq_phys() API change, >>>>>>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, >>>>>>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) >>>>>>>>> add VirtIODevice * arg to virtio helpers, >>>>>>>>> use the existing virtio_is_big_endian() helper, >>>>>>>>> virtio-pci: use the device is_big_endian flag, >>>>>>>>> introduce virtio tswap16 and tswap64 helpers, >>>>>>>>> move calls to tswap* out of virtio-access.h to make it platform independant, >>>>>>>>> migration support, >>>>>>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] >>>>>>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> >>>>>>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> >>>>>>>>> --- >>>>>>>>> >>>>>>>>> Changes since v6: >>>>>>>>> - merge the virtio_needs_byteswap() helper from v6 and existing >>>>>>>>> virtio_is_big_endian() >>>>>>>>> - virtio-pci: now supports endianness changes >>>>>>>>> - virtio-access.h fixes (target independant) >>>>>>>>> >>>>>>>>> exec.c | 2 - >>>>>>>>> hw/virtio/Makefile.objs | 2 - >>>>>>>>> hw/virtio/virtio-pci.c | 11 +-- >>>>>>>>> hw/virtio/virtio.c | 35 +++++++++ >>>>>>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ >>>>>>>>> include/hw/virtio/virtio.h | 2 + >>>>>>>>> vl.c | 4 + >>>>>>>>> 7 files changed, 185 insertions(+), 9 deletions(-) >>>>>>>>> create mode 100644 include/hw/virtio/virtio-access.h >>>>>>>>> >>>>>>>>> diff --git a/exec.c b/exec.c >>>>>>>>> index 91513c6..e6777d0 100644 >>>>>>>>> --- a/exec.c >>>>>>>>> +++ b/exec.c >>>>>>>>> @@ -42,6 +42,7 @@ >>>>>>>>> #else /* !CONFIG_USER_ONLY */ >>>>>>>>> #include "sysemu/xen-mapcache.h" >>>>>>>>> #include "trace.h" >>>>>>>>> +#include "hw/virtio/virtio.h" >>>>>>>>> #endif >>>>>>>>> #include "exec/cpu-all.h" >>>>>>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, >>>>>>>>> * A helper function for the _utterly broken_ virtio device model to find out if >>>>>>>>> * it's running on a big endian machine. Don't do this at home kids! >>>>>>>>> */ >>>>>>>>> -bool virtio_is_big_endian(void); >>>>>>>>> bool virtio_is_big_endian(void) >>>>>>>>> { >>>>>>>>> #if defined(TARGET_WORDS_BIGENDIAN) >>>>>>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs >>>>>>>>> index 1ba53d9..68c3064 100644 >>>>>>>>> --- a/hw/virtio/Makefile.objs >>>>>>>>> +++ b/hw/virtio/Makefile.objs >>>>>>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o >>>>>>>>> common-obj-y += virtio-mmio.o >>>>>>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ >>>>>>>>> -obj-y += virtio.o virtio-balloon.o >>>>>>>>> +obj-y += virtio.o virtio-balloon.o >>>>>>>>> obj-$(CONFIG_LINUX) += vhost.o >>>>>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c >>>>>>>>> index ce97514..82a1689 100644 >>>>>>>>> --- a/hw/virtio/virtio-pci.c >>>>>>>>> +++ b/hw/virtio/virtio-pci.c >>>>>>>>> @@ -89,9 +89,6 @@ >>>>>>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ >>>>>>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) >>>>>>>>> -/* HACK for virtio to determine if it's running a big endian guest */ >>>>>>>>> -bool virtio_is_big_endian(void); >>>>>>>>> - >>>>>>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, >>>>>>>>> VirtIOPCIProxy *dev); >>>>>>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, >>>>>>>>> break; >>>>>>>>> case 2: >>>>>>>>> val = virtio_config_readw(vdev, addr); >>>>>>>>> - if (virtio_is_big_endian()) { >>>>>>>>> + if (vdev->is_big_endian) { >>>>>>>>> val = bswap16(val); >>>>>>>>> } >>>>>>>>> break; >>>>>>>>> case 4: >>>>>>>>> val = virtio_config_readl(vdev, addr); >>>>>>>>> - if (virtio_is_big_endian()) { >>>>>>>>> + if (vdev->is_big_endian) { >>>>>>>>> val = bswap32(val); >>>>>>>>> } >>>>>>>>> break; >>>>>>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, >>>>>>>>> virtio_config_writeb(vdev, addr, val); >>>>>>>>> break; >>>>>>>>> case 2: >>>>>>>>> - if (virtio_is_big_endian()) { >>>>>>>>> + if (vdev->is_big_endian) { >>>>>>>>> val = bswap16(val); >>>>>>>>> } >>>>>>>>> virtio_config_writew(vdev, addr, val); >>>>>>>>> break; >>>>>>>>> case 4: >>>>>>>>> - if (virtio_is_big_endian()) { >>>>>>>>> + if (vdev->is_big_endian) { >>>>>>>>> val = bswap32(val); >>>>>>>>> } >>>>>>>>> virtio_config_writel(vdev, addr, val); >>>>>>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c >>>>>>>>> index aeabf3a..bb646f0 100644 >>>>>>>>> --- a/hw/virtio/virtio.c >>>>>>>>> +++ b/hw/virtio/virtio.c >>>>>>>>> @@ -19,6 +19,7 @@ >>>>>>>>> #include "hw/virtio/virtio.h" >>>>>>>>> #include "qemu/atomic.h" >>>>>>>>> #include "hw/virtio/virtio-bus.h" >>>>>>>>> +#include "hw/virtio/virtio-access.h" >>>>>>>>> /* >>>>>>>>> * The alignment to use between consumer and producer parts of vring. >>>>>>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) >>>>>>>>> virtio_set_status(vdev, 0); >>>>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); >>>>>>>>> + >>>>>>>>> if (k->reset) { >>>>>>>>> k->reset(vdev); >>>>>>>>> } >>>>>>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >>>>>>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); >>>>>>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); >>>>>>>>> + /* NOTE: we assume that endianness is a cpu state AND >>>>>>>>> + * cpu state is restored before virtio devices. >>>>>>>>> + */ >>>>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); >>>>>>>>> + >>>>>>>>> if (k->load_config) { >>>>>>>>> ret = k->load_config(qbus->parent, f); >>>>>>>>> if (ret) >>>>>>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) >>>>>>>>> } >>>>>>>>> } >>>>>>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) >>>>>>>>> +{ >>>>>>>>> + if (vdev->is_big_endian) { >>>>>>>>> + return tswap16(s); >>>>>>>>> + } else { >>>>>>>>> + return bswap16(tswap16(s)); >>>>>>>>> + } >>>>>>>> This looks pretty bogus. When virtio wants to do "tswap" what it >>>>>>>> means is "give me a host endianness value in virtio endianness". How >>>>>>>> about something like >>>>>>>> >>>>>>>> #ifdef HOST_WORDS_BIGENDIAN >>>>>>>> return vdev->is_big_endian ? s : bswap16(s); >>>>>>>> #else >>>>>>>> return vdev->is_big_endian ? bswap16(s) : s; >>>>>>>> #endif >>>>>>>> >>>>>>> Actually why doesn't this call virtio_is_big_endian? >>>>>>> As it is, we get extra branches even if target endian-ness >>>>>>> is fixed. >>>>>> Because virtio_is_big_endian() returns the default endianness, not >>>>>> the runtime endianness of a virtio device. >>>> In fact, we should probably rename it accordingly. >>>> >>>>>>>> That should work pretty well inline as well, so you don't need to >>>>>>>> compile virtio.c as target dependent. >>>>>>>> >>>>>>>> >>>>>>>> Alex >>>>>>> Yes but we'll still need to build two variants: fixed endian and >>>>>>> dynamic endian platforms. >>>>>>> Something along the lines of 32/64 bit split that we have? >>>>>> Why bother? Always make it dynamic and don't change the per-device >>>>>> variable, no? I'd be surprised if the performance difference is >>>>>> measurable. The bulk of the data we transfer gets copied raw anyway. >>>>>> >>>>>> >>>>>> Alex >>>>> This will have to be measured and proved by whoever's proposing the >>>>> patch, not by reviewers. Platforms such as AMD which don't do >>>>> prediction well would be especially interesting to test on. >>>> Sure, Greg, can you do that? I'm sure Michael has test cases >>>> available he can give you to measure performance on this. >>> Measuring performance is hard ATM. >>> >>> Disk io stress when backed by raw ramdisk in host is usually the easiest >>> way to stress userspace virtio. >>> You need to make sure your baseline is repeateable though: >>> pin down everything from cpu to hardware interrupts, >>> disable power management, c states etc >>> Just taking an average and hoping for the best doesn't cut it. >>> >>> >>> We really should write a benchmark device, >>> to measure just the transport overhead. >>> For example, start with virtio-net but fuse TX and RX >>> VQs together, so you'll get right back whatever you send out. >>> >>> So really, virtio is ATM target-specific and I think it's >>> a good idea to get it working as such first, >>> do extra cleanups like getting rid of target specific code >>> separately. >> How does it help when we keep it target specific? The only way to remove >> any overhead from this refactoring would be to instead of >> >> if (vdev->is_big_endian) >> >> write it as >> >> if (virtio_is_big_endian_device(vdev)) >> >> which we could define as >> >> static inline bool virtio_is_big_endian_device(VirtIODevice *vdev) >> { >> #if defined(TARGET_PPC) >> return vdev->is_big_endian >> #elif defined(TARGET_WORDS_BIGENDIAN) >> return true; >> #else >> return false; >> #endif >> } >> >> If it makes you happy to do it this way first and move virtio to >> target-independent files later, we can certainly do this. >> >> > This would end up defined in all virtio files with the consequence of > hitting TARGET_WORDS_BIGENDIAN poison for the target independent ones. > Should we kick them out of common-obj variables in makefiles as well ? > Or would it be acceptable to have this helper not inlined ? That would defeat the purpose - the reason to have the helper inlined is to remove the conditional branch for x86. Alex
On Tue, 15 Apr 2014 13:35:03 +0200 Alexander Graf <agraf@suse.de> wrote: > On 04/15/2014 10:40 AM, Greg Kurz wrote: > > On Mon, 14 Apr 2014 15:08:23 +0200 > > Alexander Graf <agraf@suse.de> wrote: > > > >> On 14.04.14 14:55, Michael S. Tsirkin wrote: > >>> On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > >>>> On 14.04.14 14:37, Michael S. Tsirkin wrote: > >>>>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > >>>>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: > >>>>>>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > >>>>>>>> On 14.04.14 13:58, Greg Kurz wrote: > >>>>>>>>> From: Rusty Russell <rusty@rustcorp.com.au> > >>>>>>>>> > >>>>>>>>> virtio data structures are defined as "target endian", which assumes > >>>>>>>>> that's a fixed value. In fact, that actually means it's platform-specific. > >>>>>>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. > >>>>>>>>> > >>>>>>>>> We introduce memory accessors to be used accross the virtio code where > >>>>>>>>> needed. These accessors should support both legacy and 1.0 devices. > >>>>>>>>> A good way to do it is to introduce a per-device property to store the > >>>>>>>>> endianness. We choose to set this flag at device reset time because it > >>>>>>>>> is reasonnable to assume the endianness won't change unless we reboot or > >>>>>>>>> kexec another kernel. And it is also reasonnable to assume the new kernel > >>>>>>>>> will reset the devices before using them (otherwise it will break). > >>>>>>>>> > >>>>>>>>> We reuse the virtio_is_big_endian() helper since it provides the right > >>>>>>>>> value for legacy devices with most of the targets, that have fixed > >>>>>>>>> endianness. It can then be overriden to support endian-ambivalent targets. > >>>>>>>>> > >>>>>>>>> To support migration, we need to set the flag in virtio_load() as well. > >>>>>>>>> > >>>>>>>>> (a) One solution would be to add it to the stream, but it have some > >>>>>>>>> drawbacks: > >>>>>>>>> - since this only affects a few targets, the field should be put into a > >>>>>>>>> subsection > >>>>>>>>> - virtio migration code should be ported to vmstate to be able to introduce > >>>>>>>>> such a subsection > >>>>>>>>> > >>>>>>>>> (b) If we assume the following to be true: > >>>>>>>>> - target endianness falls under some cpu state > >>>>>>>>> - cpu state is always restored before virtio devices state because they > >>>>>>>>> get initialized in this order in main(). > >>>>>>>>> Then an alternative is to rely on virtio_is_big_endian() again at > >>>>>>>>> load time. No need to mess around with the migration stream in this case. > >>>>>>>>> > >>>>>>>>> This patch implements (b). > >>>>>>>>> > >>>>>>>>> Note that the tswap helpers are implemented in virtio.c so that > >>>>>>>>> virtio-access.h stays platform independant. Most of the virtio code > >>>>>>>>> will be buildable under common-obj instead of obj then, and spare > >>>>>>>>> some cycles when building for multiple targets. > >>>>>>>>> > >>>>>>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > >>>>>>>>> [ ldq_phys() API change, > >>>>>>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > >>>>>>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > >>>>>>>>> add VirtIODevice * arg to virtio helpers, > >>>>>>>>> use the existing virtio_is_big_endian() helper, > >>>>>>>>> virtio-pci: use the device is_big_endian flag, > >>>>>>>>> introduce virtio tswap16 and tswap64 helpers, > >>>>>>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > >>>>>>>>> migration support, > >>>>>>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > >>>>>>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> > >>>>>>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > >>>>>>>>> --- > >>>>>>>>> > >>>>>>>>> Changes since v6: > >>>>>>>>> - merge the virtio_needs_byteswap() helper from v6 and existing > >>>>>>>>> virtio_is_big_endian() > >>>>>>>>> - virtio-pci: now supports endianness changes > >>>>>>>>> - virtio-access.h fixes (target independant) > >>>>>>>>> > >>>>>>>>> exec.c | 2 - > >>>>>>>>> hw/virtio/Makefile.objs | 2 - > >>>>>>>>> hw/virtio/virtio-pci.c | 11 +-- > >>>>>>>>> hw/virtio/virtio.c | 35 +++++++++ > >>>>>>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > >>>>>>>>> include/hw/virtio/virtio.h | 2 + > >>>>>>>>> vl.c | 4 + > >>>>>>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > >>>>>>>>> create mode 100644 include/hw/virtio/virtio-access.h > >>>>>>>>> > >>>>>>>>> diff --git a/exec.c b/exec.c > >>>>>>>>> index 91513c6..e6777d0 100644 > >>>>>>>>> --- a/exec.c > >>>>>>>>> +++ b/exec.c > >>>>>>>>> @@ -42,6 +42,7 @@ > >>>>>>>>> #else /* !CONFIG_USER_ONLY */ > >>>>>>>>> #include "sysemu/xen-mapcache.h" > >>>>>>>>> #include "trace.h" > >>>>>>>>> +#include "hw/virtio/virtio.h" > >>>>>>>>> #endif > >>>>>>>>> #include "exec/cpu-all.h" > >>>>>>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > >>>>>>>>> * A helper function for the _utterly broken_ virtio device model to find out if > >>>>>>>>> * it's running on a big endian machine. Don't do this at home kids! > >>>>>>>>> */ > >>>>>>>>> -bool virtio_is_big_endian(void); > >>>>>>>>> bool virtio_is_big_endian(void) > >>>>>>>>> { > >>>>>>>>> #if defined(TARGET_WORDS_BIGENDIAN) > >>>>>>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > >>>>>>>>> index 1ba53d9..68c3064 100644 > >>>>>>>>> --- a/hw/virtio/Makefile.objs > >>>>>>>>> +++ b/hw/virtio/Makefile.objs > >>>>>>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > >>>>>>>>> common-obj-y += virtio-mmio.o > >>>>>>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > >>>>>>>>> -obj-y += virtio.o virtio-balloon.o > >>>>>>>>> +obj-y += virtio.o virtio-balloon.o > >>>>>>>>> obj-$(CONFIG_LINUX) += vhost.o > >>>>>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > >>>>>>>>> index ce97514..82a1689 100644 > >>>>>>>>> --- a/hw/virtio/virtio-pci.c > >>>>>>>>> +++ b/hw/virtio/virtio-pci.c > >>>>>>>>> @@ -89,9 +89,6 @@ > >>>>>>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > >>>>>>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > >>>>>>>>> -/* HACK for virtio to determine if it's running a big endian guest */ > >>>>>>>>> -bool virtio_is_big_endian(void); > >>>>>>>>> - > >>>>>>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > >>>>>>>>> VirtIOPCIProxy *dev); > >>>>>>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > >>>>>>>>> break; > >>>>>>>>> case 2: > >>>>>>>>> val = virtio_config_readw(vdev, addr); > >>>>>>>>> - if (virtio_is_big_endian()) { > >>>>>>>>> + if (vdev->is_big_endian) { > >>>>>>>>> val = bswap16(val); > >>>>>>>>> } > >>>>>>>>> break; > >>>>>>>>> case 4: > >>>>>>>>> val = virtio_config_readl(vdev, addr); > >>>>>>>>> - if (virtio_is_big_endian()) { > >>>>>>>>> + if (vdev->is_big_endian) { > >>>>>>>>> val = bswap32(val); > >>>>>>>>> } > >>>>>>>>> break; > >>>>>>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > >>>>>>>>> virtio_config_writeb(vdev, addr, val); > >>>>>>>>> break; > >>>>>>>>> case 2: > >>>>>>>>> - if (virtio_is_big_endian()) { > >>>>>>>>> + if (vdev->is_big_endian) { > >>>>>>>>> val = bswap16(val); > >>>>>>>>> } > >>>>>>>>> virtio_config_writew(vdev, addr, val); > >>>>>>>>> break; > >>>>>>>>> case 4: > >>>>>>>>> - if (virtio_is_big_endian()) { > >>>>>>>>> + if (vdev->is_big_endian) { > >>>>>>>>> val = bswap32(val); > >>>>>>>>> } > >>>>>>>>> virtio_config_writel(vdev, addr, val); > >>>>>>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>>>>>>> index aeabf3a..bb646f0 100644 > >>>>>>>>> --- a/hw/virtio/virtio.c > >>>>>>>>> +++ b/hw/virtio/virtio.c > >>>>>>>>> @@ -19,6 +19,7 @@ > >>>>>>>>> #include "hw/virtio/virtio.h" > >>>>>>>>> #include "qemu/atomic.h" > >>>>>>>>> #include "hw/virtio/virtio-bus.h" > >>>>>>>>> +#include "hw/virtio/virtio-access.h" > >>>>>>>>> /* > >>>>>>>>> * The alignment to use between consumer and producer parts of vring. > >>>>>>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > >>>>>>>>> virtio_set_status(vdev, 0); > >>>>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); > >>>>>>>>> + > >>>>>>>>> if (k->reset) { > >>>>>>>>> k->reset(vdev); > >>>>>>>>> } > >>>>>>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >>>>>>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > >>>>>>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >>>>>>>>> + /* NOTE: we assume that endianness is a cpu state AND > >>>>>>>>> + * cpu state is restored before virtio devices. > >>>>>>>>> + */ > >>>>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); > >>>>>>>>> + > >>>>>>>>> if (k->load_config) { > >>>>>>>>> ret = k->load_config(qbus->parent, f); > >>>>>>>>> if (ret) > >>>>>>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > >>>>>>>>> } > >>>>>>>>> } > >>>>>>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > >>>>>>>>> +{ > >>>>>>>>> + if (vdev->is_big_endian) { > >>>>>>>>> + return tswap16(s); > >>>>>>>>> + } else { > >>>>>>>>> + return bswap16(tswap16(s)); > >>>>>>>>> + } > >>>>>>>> This looks pretty bogus. When virtio wants to do "tswap" what it > >>>>>>>> means is "give me a host endianness value in virtio endianness". How > >>>>>>>> about something like > >>>>>>>> > >>>>>>>> #ifdef HOST_WORDS_BIGENDIAN > >>>>>>>> return vdev->is_big_endian ? s : bswap16(s); > >>>>>>>> #else > >>>>>>>> return vdev->is_big_endian ? bswap16(s) : s; > >>>>>>>> #endif > >>>>>>>> > >>>>>>> Actually why doesn't this call virtio_is_big_endian? > >>>>>>> As it is, we get extra branches even if target endian-ness > >>>>>>> is fixed. > >>>>>> Because virtio_is_big_endian() returns the default endianness, not > >>>>>> the runtime endianness of a virtio device. > >>>> In fact, we should probably rename it accordingly. > >>>> > >>>>>>>> That should work pretty well inline as well, so you don't need to > >>>>>>>> compile virtio.c as target dependent. > >>>>>>>> > >>>>>>>> > >>>>>>>> Alex > >>>>>>> Yes but we'll still need to build two variants: fixed endian and > >>>>>>> dynamic endian platforms. > >>>>>>> Something along the lines of 32/64 bit split that we have? > >>>>>> Why bother? Always make it dynamic and don't change the per-device > >>>>>> variable, no? I'd be surprised if the performance difference is > >>>>>> measurable. The bulk of the data we transfer gets copied raw anyway. > >>>>>> > >>>>>> > >>>>>> Alex > >>>>> This will have to be measured and proved by whoever's proposing the > >>>>> patch, not by reviewers. Platforms such as AMD which don't do > >>>>> prediction well would be especially interesting to test on. > >>>> Sure, Greg, can you do that? I'm sure Michael has test cases > >>>> available he can give you to measure performance on this. > >>> Measuring performance is hard ATM. > >>> > >>> Disk io stress when backed by raw ramdisk in host is usually the easiest > >>> way to stress userspace virtio. > >>> You need to make sure your baseline is repeateable though: > >>> pin down everything from cpu to hardware interrupts, > >>> disable power management, c states etc > >>> Just taking an average and hoping for the best doesn't cut it. > >>> > >>> > >>> We really should write a benchmark device, > >>> to measure just the transport overhead. > >>> For example, start with virtio-net but fuse TX and RX > >>> VQs together, so you'll get right back whatever you send out. > >>> > >>> So really, virtio is ATM target-specific and I think it's > >>> a good idea to get it working as such first, > >>> do extra cleanups like getting rid of target specific code > >>> separately. > >> How does it help when we keep it target specific? The only way to remove > >> any overhead from this refactoring would be to instead of > >> > >> if (vdev->is_big_endian) > >> > >> write it as > >> > >> if (virtio_is_big_endian_device(vdev)) > >> > >> which we could define as > >> > >> static inline bool virtio_is_big_endian_device(VirtIODevice *vdev) > >> { > >> #if defined(TARGET_PPC) > >> return vdev->is_big_endian > >> #elif defined(TARGET_WORDS_BIGENDIAN) > >> return true; > >> #else > >> return false; > >> #endif > >> } > >> > >> If it makes you happy to do it this way first and move virtio to > >> target-independent files later, we can certainly do this. > >> > >> > > This would end up defined in all virtio files with the consequence of > > hitting TARGET_WORDS_BIGENDIAN poison for the target independent ones. > > Should we kick them out of common-obj variables in makefiles as well ? > > Or would it be acceptable to have this helper not inlined ? > > That would defeat the purpose - the reason to have the helper inlined is > to remove the conditional branch for x86. > Sure but on the other hand, Peter does not like the idea of moving virtio to compiled-per-target... these look like contradictory requests to me... :-\ Unless I have missed something, we have to settle whether we favor building/testing time or performance of non-{powerpc,arm} targets to have legacy virtio supporting LE powerpc and BE arm... > > Alex >
On 15 April 2014 14:22, Greg Kurz <gkurz@linux.vnet.ibm.com> wrote: > On Tue, 15 Apr 2014 13:35:03 +0200 > Alexander Graf <agraf@suse.de> wrote: >> That would defeat the purpose - the reason to have the helper inlined is >> to remove the conditional branch for x86. >> > > Sure but on the other hand, Peter does not like the idea of moving virtio > to compiled-per-target... these look like contradictory requests to me... :-\ > Unless I have missed something, we have to settle whether we favor building/testing > time or performance of non-{powerpc,arm} targets to have legacy virtio supporting > LE powerpc and BE arm... To repeat my remarks from IRC: this weirdo endianness behaviour was a dumb spec decision, and dumb spec decisions have to be paid for. virtio 1.0 should fix this, so we can just suggest that anybody who really cares about performance down to avoiding conditional branches should switch to that, right? My suggestion was that: * virtio_is_big_endian should be a CPU object method * PPC (and eventually ARM) implement this by checking whatever the state flag is * base class implementation returns true/false based on TARGET_WORDS_BIGENDIAN * virtio devices call this method on current_cpu when the guest first touches/resets the virtio device, and cache the result thanks -- PMM
On Tue, Apr 15, 2014 at 03:22:56PM +0200, Greg Kurz wrote: > On Tue, 15 Apr 2014 13:35:03 +0200 > Alexander Graf <agraf@suse.de> wrote: > > > On 04/15/2014 10:40 AM, Greg Kurz wrote: > > > On Mon, 14 Apr 2014 15:08:23 +0200 > > > Alexander Graf <agraf@suse.de> wrote: > > > > > >> On 14.04.14 14:55, Michael S. Tsirkin wrote: > > >>> On Mon, Apr 14, 2014 at 02:40:04PM +0200, Alexander Graf wrote: > > >>>> On 14.04.14 14:37, Michael S. Tsirkin wrote: > > >>>>> On Mon, Apr 14, 2014 at 02:29:20PM +0200, Alexander Graf wrote: > > >>>>>> On 14.04.14 14:24, Michael S. Tsirkin wrote: > > >>>>>>> On Mon, Apr 14, 2014 at 02:16:03PM +0200, Alexander Graf wrote: > > >>>>>>>> On 14.04.14 13:58, Greg Kurz wrote: > > >>>>>>>>> From: Rusty Russell <rusty@rustcorp.com.au> > > >>>>>>>>> > > >>>>>>>>> virtio data structures are defined as "target endian", which assumes > > >>>>>>>>> that's a fixed value. In fact, that actually means it's platform-specific. > > >>>>>>>>> The OASIS virtio 1.0 spec will fix this, by making it all little endian. > > >>>>>>>>> > > >>>>>>>>> We introduce memory accessors to be used accross the virtio code where > > >>>>>>>>> needed. These accessors should support both legacy and 1.0 devices. > > >>>>>>>>> A good way to do it is to introduce a per-device property to store the > > >>>>>>>>> endianness. We choose to set this flag at device reset time because it > > >>>>>>>>> is reasonnable to assume the endianness won't change unless we reboot or > > >>>>>>>>> kexec another kernel. And it is also reasonnable to assume the new kernel > > >>>>>>>>> will reset the devices before using them (otherwise it will break). > > >>>>>>>>> > > >>>>>>>>> We reuse the virtio_is_big_endian() helper since it provides the right > > >>>>>>>>> value for legacy devices with most of the targets, that have fixed > > >>>>>>>>> endianness. It can then be overriden to support endian-ambivalent targets. > > >>>>>>>>> > > >>>>>>>>> To support migration, we need to set the flag in virtio_load() as well. > > >>>>>>>>> > > >>>>>>>>> (a) One solution would be to add it to the stream, but it have some > > >>>>>>>>> drawbacks: > > >>>>>>>>> - since this only affects a few targets, the field should be put into a > > >>>>>>>>> subsection > > >>>>>>>>> - virtio migration code should be ported to vmstate to be able to introduce > > >>>>>>>>> such a subsection > > >>>>>>>>> > > >>>>>>>>> (b) If we assume the following to be true: > > >>>>>>>>> - target endianness falls under some cpu state > > >>>>>>>>> - cpu state is always restored before virtio devices state because they > > >>>>>>>>> get initialized in this order in main(). > > >>>>>>>>> Then an alternative is to rely on virtio_is_big_endian() again at > > >>>>>>>>> load time. No need to mess around with the migration stream in this case. > > >>>>>>>>> > > >>>>>>>>> This patch implements (b). > > >>>>>>>>> > > >>>>>>>>> Note that the tswap helpers are implemented in virtio.c so that > > >>>>>>>>> virtio-access.h stays platform independant. Most of the virtio code > > >>>>>>>>> will be buildable under common-obj instead of obj then, and spare > > >>>>>>>>> some cycles when building for multiple targets. > > >>>>>>>>> > > >>>>>>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > > >>>>>>>>> [ ldq_phys() API change, > > >>>>>>>>> relicensed virtio-access.h to GPLv2+ on Rusty's request, > > >>>>>>>>> introduce a per-device is_big_endian flag (supersedes needs_byteswap global) > > >>>>>>>>> add VirtIODevice * arg to virtio helpers, > > >>>>>>>>> use the existing virtio_is_big_endian() helper, > > >>>>>>>>> virtio-pci: use the device is_big_endian flag, > > >>>>>>>>> introduce virtio tswap16 and tswap64 helpers, > > >>>>>>>>> move calls to tswap* out of virtio-access.h to make it platform independant, > > >>>>>>>>> migration support, > > >>>>>>>>> Greg Kurz <gkurz@linux.vnet.ibm.com> ] > > >>>>>>>>> Cc: Cédric Le Goater <clg@fr.ibm.com> > > >>>>>>>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com> > > >>>>>>>>> --- > > >>>>>>>>> > > >>>>>>>>> Changes since v6: > > >>>>>>>>> - merge the virtio_needs_byteswap() helper from v6 and existing > > >>>>>>>>> virtio_is_big_endian() > > >>>>>>>>> - virtio-pci: now supports endianness changes > > >>>>>>>>> - virtio-access.h fixes (target independant) > > >>>>>>>>> > > >>>>>>>>> exec.c | 2 - > > >>>>>>>>> hw/virtio/Makefile.objs | 2 - > > >>>>>>>>> hw/virtio/virtio-pci.c | 11 +-- > > >>>>>>>>> hw/virtio/virtio.c | 35 +++++++++ > > >>>>>>>>> include/hw/virtio/virtio-access.h | 138 +++++++++++++++++++++++++++++++++++++ > > >>>>>>>>> include/hw/virtio/virtio.h | 2 + > > >>>>>>>>> vl.c | 4 + > > >>>>>>>>> 7 files changed, 185 insertions(+), 9 deletions(-) > > >>>>>>>>> create mode 100644 include/hw/virtio/virtio-access.h > > >>>>>>>>> > > >>>>>>>>> diff --git a/exec.c b/exec.c > > >>>>>>>>> index 91513c6..e6777d0 100644 > > >>>>>>>>> --- a/exec.c > > >>>>>>>>> +++ b/exec.c > > >>>>>>>>> @@ -42,6 +42,7 @@ > > >>>>>>>>> #else /* !CONFIG_USER_ONLY */ > > >>>>>>>>> #include "sysemu/xen-mapcache.h" > > >>>>>>>>> #include "trace.h" > > >>>>>>>>> +#include "hw/virtio/virtio.h" > > >>>>>>>>> #endif > > >>>>>>>>> #include "exec/cpu-all.h" > > >>>>>>>>> @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, > > >>>>>>>>> * A helper function for the _utterly broken_ virtio device model to find out if > > >>>>>>>>> * it's running on a big endian machine. Don't do this at home kids! > > >>>>>>>>> */ > > >>>>>>>>> -bool virtio_is_big_endian(void); > > >>>>>>>>> bool virtio_is_big_endian(void) > > >>>>>>>>> { > > >>>>>>>>> #if defined(TARGET_WORDS_BIGENDIAN) > > >>>>>>>>> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > > >>>>>>>>> index 1ba53d9..68c3064 100644 > > >>>>>>>>> --- a/hw/virtio/Makefile.objs > > >>>>>>>>> +++ b/hw/virtio/Makefile.objs > > >>>>>>>>> @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o > > >>>>>>>>> common-obj-y += virtio-mmio.o > > >>>>>>>>> common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ > > >>>>>>>>> -obj-y += virtio.o virtio-balloon.o > > >>>>>>>>> +obj-y += virtio.o virtio-balloon.o > > >>>>>>>>> obj-$(CONFIG_LINUX) += vhost.o > > >>>>>>>>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > > >>>>>>>>> index ce97514..82a1689 100644 > > >>>>>>>>> --- a/hw/virtio/virtio-pci.c > > >>>>>>>>> +++ b/hw/virtio/virtio-pci.c > > >>>>>>>>> @@ -89,9 +89,6 @@ > > >>>>>>>>> /* Flags track per-device state like workarounds for quirks in older guests. */ > > >>>>>>>>> #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) > > >>>>>>>>> -/* HACK for virtio to determine if it's running a big endian guest */ > > >>>>>>>>> -bool virtio_is_big_endian(void); > > >>>>>>>>> - > > >>>>>>>>> static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, > > >>>>>>>>> VirtIOPCIProxy *dev); > > >>>>>>>>> @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, > > >>>>>>>>> break; > > >>>>>>>>> case 2: > > >>>>>>>>> val = virtio_config_readw(vdev, addr); > > >>>>>>>>> - if (virtio_is_big_endian()) { > > >>>>>>>>> + if (vdev->is_big_endian) { > > >>>>>>>>> val = bswap16(val); > > >>>>>>>>> } > > >>>>>>>>> break; > > >>>>>>>>> case 4: > > >>>>>>>>> val = virtio_config_readl(vdev, addr); > > >>>>>>>>> - if (virtio_is_big_endian()) { > > >>>>>>>>> + if (vdev->is_big_endian) { > > >>>>>>>>> val = bswap32(val); > > >>>>>>>>> } > > >>>>>>>>> break; > > >>>>>>>>> @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, > > >>>>>>>>> virtio_config_writeb(vdev, addr, val); > > >>>>>>>>> break; > > >>>>>>>>> case 2: > > >>>>>>>>> - if (virtio_is_big_endian()) { > > >>>>>>>>> + if (vdev->is_big_endian) { > > >>>>>>>>> val = bswap16(val); > > >>>>>>>>> } > > >>>>>>>>> virtio_config_writew(vdev, addr, val); > > >>>>>>>>> break; > > >>>>>>>>> case 4: > > >>>>>>>>> - if (virtio_is_big_endian()) { > > >>>>>>>>> + if (vdev->is_big_endian) { > > >>>>>>>>> val = bswap32(val); > > >>>>>>>>> } > > >>>>>>>>> virtio_config_writel(vdev, addr, val); > > >>>>>>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > >>>>>>>>> index aeabf3a..bb646f0 100644 > > >>>>>>>>> --- a/hw/virtio/virtio.c > > >>>>>>>>> +++ b/hw/virtio/virtio.c > > >>>>>>>>> @@ -19,6 +19,7 @@ > > >>>>>>>>> #include "hw/virtio/virtio.h" > > >>>>>>>>> #include "qemu/atomic.h" > > >>>>>>>>> #include "hw/virtio/virtio-bus.h" > > >>>>>>>>> +#include "hw/virtio/virtio-access.h" > > >>>>>>>>> /* > > >>>>>>>>> * The alignment to use between consumer and producer parts of vring. > > >>>>>>>>> @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) > > >>>>>>>>> virtio_set_status(vdev, 0); > > >>>>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); > > >>>>>>>>> + > > >>>>>>>>> if (k->reset) { > > >>>>>>>>> k->reset(vdev); > > >>>>>>>>> } > > >>>>>>>>> @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > > >>>>>>>>> BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > > >>>>>>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > > >>>>>>>>> + /* NOTE: we assume that endianness is a cpu state AND > > >>>>>>>>> + * cpu state is restored before virtio devices. > > >>>>>>>>> + */ > > >>>>>>>>> + vdev->is_big_endian = virtio_is_big_endian(); > > >>>>>>>>> + > > >>>>>>>>> if (k->load_config) { > > >>>>>>>>> ret = k->load_config(qbus->parent, f); > > >>>>>>>>> if (ret) > > >>>>>>>>> @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) > > >>>>>>>>> } > > >>>>>>>>> } > > >>>>>>>>> +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) > > >>>>>>>>> +{ > > >>>>>>>>> + if (vdev->is_big_endian) { > > >>>>>>>>> + return tswap16(s); > > >>>>>>>>> + } else { > > >>>>>>>>> + return bswap16(tswap16(s)); > > >>>>>>>>> + } > > >>>>>>>> This looks pretty bogus. When virtio wants to do "tswap" what it > > >>>>>>>> means is "give me a host endianness value in virtio endianness". How > > >>>>>>>> about something like > > >>>>>>>> > > >>>>>>>> #ifdef HOST_WORDS_BIGENDIAN > > >>>>>>>> return vdev->is_big_endian ? s : bswap16(s); > > >>>>>>>> #else > > >>>>>>>> return vdev->is_big_endian ? bswap16(s) : s; > > >>>>>>>> #endif > > >>>>>>>> > > >>>>>>> Actually why doesn't this call virtio_is_big_endian? > > >>>>>>> As it is, we get extra branches even if target endian-ness > > >>>>>>> is fixed. > > >>>>>> Because virtio_is_big_endian() returns the default endianness, not > > >>>>>> the runtime endianness of a virtio device. > > >>>> In fact, we should probably rename it accordingly. > > >>>> > > >>>>>>>> That should work pretty well inline as well, so you don't need to > > >>>>>>>> compile virtio.c as target dependent. > > >>>>>>>> > > >>>>>>>> > > >>>>>>>> Alex > > >>>>>>> Yes but we'll still need to build two variants: fixed endian and > > >>>>>>> dynamic endian platforms. > > >>>>>>> Something along the lines of 32/64 bit split that we have? > > >>>>>> Why bother? Always make it dynamic and don't change the per-device > > >>>>>> variable, no? I'd be surprised if the performance difference is > > >>>>>> measurable. The bulk of the data we transfer gets copied raw anyway. > > >>>>>> > > >>>>>> > > >>>>>> Alex > > >>>>> This will have to be measured and proved by whoever's proposing the > > >>>>> patch, not by reviewers. Platforms such as AMD which don't do > > >>>>> prediction well would be especially interesting to test on. > > >>>> Sure, Greg, can you do that? I'm sure Michael has test cases > > >>>> available he can give you to measure performance on this. > > >>> Measuring performance is hard ATM. > > >>> > > >>> Disk io stress when backed by raw ramdisk in host is usually the easiest > > >>> way to stress userspace virtio. > > >>> You need to make sure your baseline is repeateable though: > > >>> pin down everything from cpu to hardware interrupts, > > >>> disable power management, c states etc > > >>> Just taking an average and hoping for the best doesn't cut it. > > >>> > > >>> > > >>> We really should write a benchmark device, > > >>> to measure just the transport overhead. > > >>> For example, start with virtio-net but fuse TX and RX > > >>> VQs together, so you'll get right back whatever you send out. > > >>> > > >>> So really, virtio is ATM target-specific and I think it's > > >>> a good idea to get it working as such first, > > >>> do extra cleanups like getting rid of target specific code > > >>> separately. > > >> How does it help when we keep it target specific? The only way to remove > > >> any overhead from this refactoring would be to instead of > > >> > > >> if (vdev->is_big_endian) > > >> > > >> write it as > > >> > > >> if (virtio_is_big_endian_device(vdev)) > > >> > > >> which we could define as > > >> > > >> static inline bool virtio_is_big_endian_device(VirtIODevice *vdev) > > >> { > > >> #if defined(TARGET_PPC) > > >> return vdev->is_big_endian > > >> #elif defined(TARGET_WORDS_BIGENDIAN) > > >> return true; > > >> #else > > >> return false; > > >> #endif > > >> } > > >> > > >> If it makes you happy to do it this way first and move virtio to > > >> target-independent files later, we can certainly do this. > > >> > > >> > > > This would end up defined in all virtio files with the consequence of > > > hitting TARGET_WORDS_BIGENDIAN poison for the target independent ones. > > > Should we kick them out of common-obj variables in makefiles as well ? > > > Or would it be acceptable to have this helper not inlined ? > > > > That would defeat the purpose - the reason to have the helper inlined is > > to remove the conditional branch for x86. > > > > Sure but on the other hand, Peter does not like the idea of moving virtio > to compiled-per-target... Whenever I do touch hw/virtio/virtio.c I see: CC alpha-softmmu/hw/virtio/virtio.o CC arm-softmmu/hw/virtio/virtio.o LINK alpha-softmmu/qemu-system-alpha LINK arm-softmmu/qemu-system-arm CC aarch64-softmmu/hw/virtio/virtio.o LINK aarch64-softmmu/qemu-system-aarch64 CC i386-softmmu/hw/virtio/virtio.o LINK i386-softmmu/qemu-system-i386 CC m68k-softmmu/hw/virtio/virtio.o LINK m68k-softmmu/qemu-system-m68k CC mips64el-softmmu/hw/virtio/virtio.o LINK mips64el-softmmu/qemu-system-mips64el CC mips64-softmmu/hw/virtio/virtio.o CC mipsel-softmmu/hw/virtio/virtio.o LINK mips64-softmmu/qemu-system-mips64 LINK mipsel-softmmu/qemu-system-mipsel CC mips-softmmu/hw/virtio/virtio.o LINK mips-softmmu/qemu-system-mips CC ppcemb-softmmu/hw/virtio/virtio.o CC ppc64-softmmu/hw/virtio/virtio.o LINK ppcemb-softmmu/qemu-system-ppcemb LINK ppc64-softmmu/qemu-system-ppc64 CC ppc-softmmu/hw/virtio/virtio.o LINK ppc-softmmu/qemu-system-ppc CC s390x-softmmu/hw/virtio/virtio.o LINK s390x-softmmu/qemu-system-s390x CC sh4eb-softmmu/hw/virtio/virtio.o LINK sh4eb-softmmu/qemu-system-sh4eb CC sh4-softmmu/hw/virtio/virtio.o LINK sh4-softmmu/qemu-system-sh4 CC sparc64-softmmu/hw/virtio/virtio.o LINK sparc64-softmmu/qemu-system-sparc64 CC x86_64-softmmu/hw/virtio/virtio.o LINK x86_64-softmmu/qemu-system-x86_64 so it looks like virtio is currently compiled per-target. So why isn't it reasonable to keep it per-target for purpose of this enhancement? What am I missing? > these look like contradictory requests to me... :-\ > Unless I have missed something, we have to settle whether we favor building/testing > time or performance of non-{powerpc,arm} targets to have legacy virtio supporting > LE powerpc and BE arm... > > > > > Alex > > > > > > -- > Gregory Kurz kurzgreg@fr.ibm.com > gkurz@linux.vnet.ibm.com > Software Engineer @ IBM/Meiosys http://www.ibm.com > Tel +33 (0)562 165 496 > > "Anarchy is about taking complete responsibility for yourself." > Alan Moore.
On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > so it looks like virtio is currently compiled per-target. > So why isn't it reasonable to keep it per-target for > purpose of this enhancement? > What am I missing? "virtio" is more than one C file. Currently per-target: hw/virtio/virtio.c hw/virtio/virtio-balloon.c hw/scsi/virtio-scsi.c hw/block/virtio-blk.c hw/char/virtio-serial-bus.c hw/net/virtio-net.c Currently built once: hw/char/virtio-console.c hw/virtio/virtio-bus.c hw/virtio/virtio-mmio.c hw/virtio/virtio-pci.c hw/virtio/virtio-rng.c If we can move files from the first group to the second that's cool. Moving files from the second to the first is what I'd like to avoid... thanks -- PMM
On Wed, Apr 16, 2014 at 05:42:22PM +0100, Peter Maydell wrote: > On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > > so it looks like virtio is currently compiled per-target. > > So why isn't it reasonable to keep it per-target for > > purpose of this enhancement? > > What am I missing? > > "virtio" is more than one C file. Currently per-target: > hw/virtio/virtio.c > hw/virtio/virtio-balloon.c > hw/scsi/virtio-scsi.c > hw/block/virtio-blk.c > hw/char/virtio-serial-bus.c > hw/net/virtio-net.c > > Currently built once: > hw/char/virtio-console.c > hw/virtio/virtio-bus.c > hw/virtio/virtio-mmio.c > hw/virtio/virtio-pci.c > hw/virtio/virtio-rng.c > > If we can move files from the first group to the second > that's cool. But doesn't have to be part of this series, yes? I would say let's at least have virtio 1.0 out and implemented in qemu and linux guests, then we can start deprecate virtio 0.X in favor of it. > Moving files from the second to the first is > what I'd like to avoid... > > thanks > -- PMM Absolutely. Looks like we are in agreement after all. So as far as I could see the only issue is with config access e.g. in virtio-pci? That one already has a branch around virtio_is_big_endian, so it's not a problem, there's no need to optimize that.
On Wed, 16 Apr 2014 20:32:07 +0300 "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Wed, Apr 16, 2014 at 05:42:22PM +0100, Peter Maydell wrote: > > On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > > > so it looks like virtio is currently compiled per-target. > > > So why isn't it reasonable to keep it per-target for > > > purpose of this enhancement? > > > What am I missing? > > > > "virtio" is more than one C file. Currently per-target: > > hw/virtio/virtio.c > > hw/virtio/virtio-balloon.c > > hw/scsi/virtio-scsi.c > > hw/block/virtio-blk.c > > hw/char/virtio-serial-bus.c > > hw/net/virtio-net.c > > > > Currently built once: > > hw/char/virtio-console.c > > hw/virtio/virtio-bus.c > > hw/virtio/virtio-mmio.c > > hw/virtio/virtio-pci.c > > hw/virtio/virtio-rng.c > > > > If we can move files from the first group to the second > > that's cool. > > But doesn't have to be part of this series, yes? > I would say let's at least have virtio 1.0 out > and implemented in qemu and linux guests, then > we can start deprecate virtio 0.X in favor of it. > > > Moving files from the second to the first is > > what I'd like to avoid... > > > > thanks > > -- PMM > > Absolutely. > > Looks like we are in agreement after all. > > So as far as I could see the only issue is with config access > e.g. in virtio-pci? > That one already has a branch around virtio_is_big_endian, > so it's not a problem, there's no need to optimize that. > It is because you haven't looked at the other patches... When the whole serie is applied, all virtio files use inlined helpers from virtio-access.h to access memory and fix endianness. That is why we'd rather not add target dependency here... This being said, if you have an insight to have branch-less code for fixed endian targets, and dedicated code for ambivalent endian targets, please send something. Cheers. -- Greg
On Thu, Apr 17, 2014 at 08:54:12AM +0200, Greg Kurz wrote: > On Wed, 16 Apr 2014 20:32:07 +0300 > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > On Wed, Apr 16, 2014 at 05:42:22PM +0100, Peter Maydell wrote: > > > On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > so it looks like virtio is currently compiled per-target. > > > > So why isn't it reasonable to keep it per-target for > > > > purpose of this enhancement? > > > > What am I missing? > > > > > > "virtio" is more than one C file. Currently per-target: > > > hw/virtio/virtio.c > > > hw/virtio/virtio-balloon.c > > > hw/scsi/virtio-scsi.c > > > hw/block/virtio-blk.c > > > hw/char/virtio-serial-bus.c > > > hw/net/virtio-net.c > > > > > > Currently built once: > > > hw/char/virtio-console.c > > > hw/virtio/virtio-bus.c > > > hw/virtio/virtio-mmio.c > > > hw/virtio/virtio-pci.c > > > hw/virtio/virtio-rng.c > > > > > > If we can move files from the first group to the second > > > that's cool. > > > > But doesn't have to be part of this series, yes? > > I would say let's at least have virtio 1.0 out > > and implemented in qemu and linux guests, then > > we can start deprecate virtio 0.X in favor of it. > > > > > Moving files from the second to the first is > > > what I'd like to avoid... > > > > > > thanks > > > -- PMM > > > > Absolutely. > > > > Looks like we are in agreement after all. > > > > So as far as I could see the only issue is with config access > > e.g. in virtio-pci? > > That one already has a branch around virtio_is_big_endian, > > so it's not a problem, there's no need to optimize that. > > > > It is because you haven't looked at the other patches... When > the whole serie is applied, all virtio files use inlined helpers > from virtio-access.h to access memory and fix endianness. That is > why we'd rather not add target dependency here... Right, so what I'm saying is very simple: there are two types of helpers: - per target ones for guest memory access - generic ones for config access One way to implement generic ones is as out of line wrappers for inline per target ones, another way is a separate implementation. > This being said, if you have an insight to have branch-less code > for fixed endian targets, and dedicated code for ambivalent endian > targets, please send something. > > Cheers. > > -- > Greg Sure but I'm a bit busy otherwise - if above hints are insufficient, expect something by Tuesday.
On Wed, 16 Apr 2014 17:42:22 +0100 Peter Maydell <peter.maydell@linaro.org> wrote: > On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > > so it looks like virtio is currently compiled per-target. > > So why isn't it reasonable to keep it per-target for > > purpose of this enhancement? > > What am I missing? > > "virtio" is more than one C file. Currently per-target: > hw/virtio/virtio.c > hw/virtio/virtio-balloon.c > hw/scsi/virtio-scsi.c > hw/block/virtio-blk.c > hw/char/virtio-serial-bus.c > hw/net/virtio-net.c > > Currently built once: > hw/char/virtio-console.c > hw/virtio/virtio-bus.c > hw/virtio/virtio-mmio.c > hw/virtio/virtio-pci.c > hw/virtio/virtio-rng.c > > If we can move files from the first group to the second > that's cool. Moving files from the second to the first is > what I'd like to avoid... > > thanks > -- PMM > I had got at a point where I could move nearly all the code to common-obj, and the cool benefits distracted me from that unwanted branch for fixed endian targets... Sorry for the confusion and noise. :)
On Thu, 17 Apr 2014 11:00:26 +0300 "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Thu, Apr 17, 2014 at 08:54:12AM +0200, Greg Kurz wrote: > > On Wed, 16 Apr 2014 20:32:07 +0300 > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > On Wed, Apr 16, 2014 at 05:42:22PM +0100, Peter Maydell wrote: > > > > On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > so it looks like virtio is currently compiled per-target. > > > > > So why isn't it reasonable to keep it per-target for > > > > > purpose of this enhancement? > > > > > What am I missing? > > > > > > > > "virtio" is more than one C file. Currently per-target: > > > > hw/virtio/virtio.c > > > > hw/virtio/virtio-balloon.c > > > > hw/scsi/virtio-scsi.c > > > > hw/block/virtio-blk.c > > > > hw/char/virtio-serial-bus.c > > > > hw/net/virtio-net.c > > > > > > > > Currently built once: > > > > hw/char/virtio-console.c > > > > hw/virtio/virtio-bus.c > > > > hw/virtio/virtio-mmio.c > > > > hw/virtio/virtio-pci.c > > > > hw/virtio/virtio-rng.c > > > > > > > > If we can move files from the first group to the second > > > > that's cool. > > > > > > But doesn't have to be part of this series, yes? > > > I would say let's at least have virtio 1.0 out > > > and implemented in qemu and linux guests, then > > > we can start deprecate virtio 0.X in favor of it. > > > > > > > Moving files from the second to the first is > > > > what I'd like to avoid... > > > > > > > > thanks > > > > -- PMM > > > > > > Absolutely. > > > > > > Looks like we are in agreement after all. > > > > > > So as far as I could see the only issue is with config access > > > e.g. in virtio-pci? > > > That one already has a branch around virtio_is_big_endian, > > > so it's not a problem, there's no need to optimize that. > > > > > > > It is because you haven't looked at the other patches... When > > the whole serie is applied, all virtio files use inlined helpers > > from virtio-access.h to access memory and fix endianness. That is > > why we'd rather not add target dependency here... > > Right, so what I'm saying is very simple: there are two types of helpers: > - per target ones for guest memory access > - generic ones for config access > > > One way to implement generic ones is as out of line wrappers > for inline per target ones, another way is a separate implementation. > Then you want something like this ? static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) { #ifdef TARGET_BIENDIAN if (virtio_is_big_endian(vdev)) { stl_be_p(ptr, v); } else { stl_le_p(ptr, v); } #else stl_p(ptr, v); #endif } > > > This being said, if you have an insight to have branch-less code > > for fixed endian targets, and dedicated code for ambivalent endian > > targets, please send something. > > > > Cheers. > > > > -- > > Greg > > Sure but I'm a bit busy otherwise - if above hints are insufficient, > expect something by Tuesday. > >
On Thu, Apr 17, 2014 at 02:29:13PM +0200, Greg Kurz wrote: > On Thu, 17 Apr 2014 11:00:26 +0300 > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > On Thu, Apr 17, 2014 at 08:54:12AM +0200, Greg Kurz wrote: > > > On Wed, 16 Apr 2014 20:32:07 +0300 > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > On Wed, Apr 16, 2014 at 05:42:22PM +0100, Peter Maydell wrote: > > > > > On 16 April 2014 17:34, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > so it looks like virtio is currently compiled per-target. > > > > > > So why isn't it reasonable to keep it per-target for > > > > > > purpose of this enhancement? > > > > > > What am I missing? > > > > > > > > > > "virtio" is more than one C file. Currently per-target: > > > > > hw/virtio/virtio.c > > > > > hw/virtio/virtio-balloon.c > > > > > hw/scsi/virtio-scsi.c > > > > > hw/block/virtio-blk.c > > > > > hw/char/virtio-serial-bus.c > > > > > hw/net/virtio-net.c > > > > > > > > > > Currently built once: > > > > > hw/char/virtio-console.c > > > > > hw/virtio/virtio-bus.c > > > > > hw/virtio/virtio-mmio.c > > > > > hw/virtio/virtio-pci.c > > > > > hw/virtio/virtio-rng.c > > > > > > > > > > If we can move files from the first group to the second > > > > > that's cool. > > > > > > > > But doesn't have to be part of this series, yes? > > > > I would say let's at least have virtio 1.0 out > > > > and implemented in qemu and linux guests, then > > > > we can start deprecate virtio 0.X in favor of it. > > > > > > > > > Moving files from the second to the first is > > > > > what I'd like to avoid... > > > > > > > > > > thanks > > > > > -- PMM > > > > > > > > Absolutely. > > > > > > > > Looks like we are in agreement after all. > > > > > > > > So as far as I could see the only issue is with config access > > > > e.g. in virtio-pci? > > > > That one already has a branch around virtio_is_big_endian, > > > > so it's not a problem, there's no need to optimize that. > > > > > > > > > > It is because you haven't looked at the other patches... When > > > the whole serie is applied, all virtio files use inlined helpers > > > from virtio-access.h to access memory and fix endianness. That is > > > why we'd rather not add target dependency here... > > > > Right, so what I'm saying is very simple: there are two types of helpers: > > - per target ones for guest memory access > > - generic ones for config access > > > > > > One way to implement generic ones is as out of line wrappers > > for inline per target ones, another way is a separate implementation. > > > > Then you want something like this ? > > static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) > { > #ifdef TARGET_BIENDIAN > if (virtio_is_big_endian(vdev)) { > stl_be_p(ptr, v); > } else { > stl_le_p(ptr, v); > } > #else > stl_p(ptr, v); > #endif > } I'm fine with this. And I'm not against a runtime switch to get rid of per-target build of virtio. I am merely asking for separatable patchsets, e.g. structured like this: 1. implement bi-endian support with no data path overhead for fixed endian targets 2. cleanup getting rid of per-target build adding minor data path overhead This way down the line if we see performance regressions it's easy to revert and verify what causes it. An alternative is accompanying patchset with performance benchmarking results. > > > > > This being said, if you have an insight to have branch-less code > > > for fixed endian targets, and dedicated code for ambivalent endian > > > targets, please send something. > > > > > > Cheers. > > > > > > -- > > > Greg > > > > Sure but I'm a bit busy otherwise - if above hints are insufficient, > > expect something by Tuesday. > > > > > > -- > Gregory Kurz kurzgreg@fr.ibm.com > gkurz@linux.vnet.ibm.com > Software Engineer @ IBM/Meiosys http://www.ibm.com > Tel +33 (0)562 165 496 > > "Anarchy is about taking complete responsibility for yourself." > Alan Moore. >
Il 17/04/2014 15:44, Michael S. Tsirkin ha scritto: > I'm fine with this. > And I'm not against a runtime switch to get rid of per-target build of virtio. > I am merely asking for separatable patchsets, e.g. structured like this: > > 1. implement bi-endian support with no data path overhead for fixed endian targets > 2. cleanup getting rid of per-target build adding minor data path overhead > > This way down the line if we see performance regressions it's easy to > revert and verify what causes it. > > An alternative is accompanying patchset with performance benchmarking > results. There is no such thing as complete benchmarks, and splitting the series in two as you suggest is cleaner (in addition to more clearly bisectable), so I'd really go that way. Paolo
diff --git a/exec.c b/exec.c index 91513c6..e6777d0 100644 --- a/exec.c +++ b/exec.c @@ -42,6 +42,7 @@ #else /* !CONFIG_USER_ONLY */ #include "sysemu/xen-mapcache.h" #include "trace.h" +#include "hw/virtio/virtio.h" #endif #include "exec/cpu-all.h" @@ -2745,7 +2746,6 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, * A helper function for the _utterly broken_ virtio device model to find out if * it's running on a big endian machine. Don't do this at home kids! */ -bool virtio_is_big_endian(void); bool virtio_is_big_endian(void) { #if defined(TARGET_WORDS_BIGENDIAN) diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 1ba53d9..68c3064 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -4,5 +4,5 @@ common-obj-y += virtio-bus.o common-obj-y += virtio-mmio.o common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ -obj-y += virtio.o virtio-balloon.o +obj-y += virtio.o virtio-balloon.o obj-$(CONFIG_LINUX) += vhost.o diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index ce97514..82a1689 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -89,9 +89,6 @@ /* Flags track per-device state like workarounds for quirks in older guests. */ #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) -/* HACK for virtio to determine if it's running a big endian guest */ -bool virtio_is_big_endian(void); - static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, VirtIOPCIProxy *dev); @@ -409,13 +406,13 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, break; case 2: val = virtio_config_readw(vdev, addr); - if (virtio_is_big_endian()) { + if (vdev->is_big_endian) { val = bswap16(val); } break; case 4: val = virtio_config_readl(vdev, addr); - if (virtio_is_big_endian()) { + if (vdev->is_big_endian) { val = bswap32(val); } break; @@ -443,13 +440,13 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, virtio_config_writeb(vdev, addr, val); break; case 2: - if (virtio_is_big_endian()) { + if (vdev->is_big_endian) { val = bswap16(val); } virtio_config_writew(vdev, addr, val); break; case 4: - if (virtio_is_big_endian()) { + if (vdev->is_big_endian) { val = bswap32(val); } virtio_config_writel(vdev, addr, val); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index aeabf3a..bb646f0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -19,6 +19,7 @@ #include "hw/virtio/virtio.h" #include "qemu/atomic.h" #include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" /* * The alignment to use between consumer and producer parts of vring. @@ -546,6 +547,8 @@ void virtio_reset(void *opaque) virtio_set_status(vdev, 0); + vdev->is_big_endian = virtio_is_big_endian(); + if (k->reset) { k->reset(vdev); } @@ -897,6 +900,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + /* NOTE: we assume that endianness is a cpu state AND + * cpu state is restored before virtio devices. + */ + vdev->is_big_endian = virtio_is_big_endian(); + if (k->load_config) { ret = k->load_config(qbus->parent, f); if (ret) @@ -1153,6 +1161,33 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) } } +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) +{ + if (vdev->is_big_endian) { + return tswap16(s); + } else { + return bswap16(tswap16(s)); + } +} + +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s) +{ + if (vdev->is_big_endian) { + return tswap32(s); + } else { + return bswap32(tswap32(s)); + } +} + +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s) +{ + if (vdev->is_big_endian) { + return tswap64(s); + } else { + return bswap64(tswap64(s)); + } +} + static void virtio_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h new file mode 100644 index 0000000..5c9013e --- /dev/null +++ b/include/hw/virtio/virtio-access.h @@ -0,0 +1,138 @@ +/* + * Virtio Accessor Support: In case your target can change endian. + * + * Copyright IBM, Corp. 2013 + * + * Authors: + * Rusty Russell <rusty@au.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef _QEMU_VIRTIO_ACCESS_H +#define _QEMU_VIRTIO_ACCESS_H +#include "hw/virtio/virtio.h" +#include "exec/address-spaces.h" + +static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) +{ + if (vdev->is_big_endian) { + return lduw_be_phys(&address_space_memory, pa); + } + return lduw_le_phys(&address_space_memory, pa); +} + +static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) +{ + if (vdev->is_big_endian) { + return ldl_be_phys(&address_space_memory, pa); + } + return ldl_le_phys(&address_space_memory, pa); +} + +static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) +{ + if (vdev->is_big_endian) { + return ldq_be_phys(&address_space_memory, pa); + } + return ldq_le_phys(&address_space_memory, pa); +} + +static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, + uint16_t value) +{ + if (vdev->is_big_endian) { + stw_be_phys(&address_space_memory, pa, value); + } else { + stw_le_phys(&address_space_memory, pa, value); + } +} + +static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, + uint32_t value) +{ + if (vdev->is_big_endian) { + stl_be_phys(&address_space_memory, pa, value); + } else { + stl_le_phys(&address_space_memory, pa, value); + } +} + +static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v) +{ + if (vdev->is_big_endian) { + stw_be_p(ptr, v); + } else { + stw_le_p(ptr, v); + } +} + +static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v) +{ + if (vdev->is_big_endian) { + stl_be_p(ptr, v); + } else { + stl_le_p(ptr, v); + } +} + +static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v) +{ + if (vdev->is_big_endian) { + stq_be_p(ptr, v); + } else { + stq_le_p(ptr, v); + } +} + +static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr) +{ + if (vdev->is_big_endian) { + return lduw_be_p(ptr); + } else { + return lduw_le_p(ptr); + } +} + +static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr) +{ + if (vdev->is_big_endian) { + return ldl_be_p(ptr); + } else { + return ldl_le_p(ptr); + } +} + +static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) +{ + if (vdev->is_big_endian) { + return ldq_be_p(ptr); + } else { + return ldq_le_p(ptr); + } +} + +uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s); + +static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s) +{ + *s = virtio_tswap16(vdev, *s); +} + +uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s); + +static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s) +{ + *s = virtio_tswap32(vdev, *s); +} + +uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s); + +static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) +{ + *s = virtio_tswap64(vdev, *s); +} +#endif /* _QEMU_VIRTIO_ACCESS_H */ diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 3e54e90..baab794 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -121,6 +121,7 @@ struct VirtIODevice bool vm_running; VMChangeStateEntry *vmstate; char *bus_name; + bool is_big_endian; }; typedef struct VirtioDeviceClass { @@ -253,4 +254,5 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, bool set_handler); void virtio_queue_notify_vq(VirtQueue *vq); void virtio_irq(VirtQueue *vq); +bool virtio_is_big_endian(void); #endif diff --git a/vl.c b/vl.c index 9975e5a..8918eb6 100644 --- a/vl.c +++ b/vl.c @@ -4379,6 +4379,10 @@ int main(int argc, char **argv, char **envp) .initrd_filename = initrd_filename, .cpu_model = cpu_model }; + /* CAUTION: the virtio migration code assumes that cpus are initialized + * before generic devices. This means that machine->init() MUST be called + * before qemu_opts_foreach(...device_init_func...). + */ current_machine->init_args = args; machine->init(¤t_machine->init_args);