Patchwork virtio-pci: Fix endianness of virtio config

login
register
mail settings
Submitter Benjamin Herrenschmidt
Date Jan. 10, 2012, 11:35 a.m.
Message ID <1326195311.23910.59.camel@pasglop>
Download mbox | patch
Permalink /patch/135400/
State New
Headers show

Comments

Benjamin Herrenschmidt - Jan. 10, 2012, 11:35 a.m.
The virtio config area in PIO space is a bit special. The initial
header is little endian but the rest (device specific) is guest
native endian.

The PIO accessors for PCI on machines that don't have native IO ports
assume that all PIO is little endian, which works fine for everything
except the above.

A complicated way to fix it would be to split the BAR into two memory
regions with different endianess settings, but this isn't practical
to do, besides, the PIO code doesn't honor region endianness anyway
(I have a patch for that too but it isn't necessary at this stage).

So I decided to go for the quick fix instead which consists of
reverting the swap in virtio-pci in selected places, hoping that when
we eventually do a "v2" of the virtio protocols, we sort that out once
and for all using a fixed endian setting for everything.

Unfortunately, that mean moving virtio-pci from Makefile.objs to
Makefile.target so we can use TARGET_WORDS_BIGENDIAN which would
otherwise be poisoned.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Makefile.objs   |    1 -
 Makefile.target |    1 +
 hw/virtio-pci.c |   24 ++++++++++++++++++++++--
 3 files changed, 23 insertions(+), 3 deletions(-)
Benjamin Herrenschmidt - Jan. 10, 2012, 9:04 p.m.
On Tue, 2012-01-10 at 21:46 +0100, Alexander Graf wrote:
> On 10.01.2012, at 21:35, Andreas Färber wrote:
> 
> > Am 10.01.2012 21:30, schrieb Alexander Graf:
> >> Maybe the RTAS callbacks really want you to return stuff in little
> endian?
> > 
> > IIRC all RTAS callbacks need to be in the same bitness and
> endianness
> > (MSR LE+SB) as when instantiating RTAS from OF.
> 
> Sure, the question is how the PCI controller is wired up usually. Just
> because RTAS works in native endianness doesn't mean that endianness
> of the actual device access isn't defined differently.

Eugh... you guys don't get it :-) This is -not- about the PCI
configuration space, but the virtio configuration space which is a
slightly different thing (PCI config space works with what's there at
the moment).

So this has nothing to do with e1000 or any other emulated device like
that, but it does have to do with virtio-blk for example.

The way virtio works is that the PIO BAR is split into two parts. The
first part (header) is a set of what's basically pseudo MMIO registers
that are little endian. The second part is device-specific an is ...
guest native endian. (Yeah it's horrible, I know).

This is how it's specified in the virtio spec and how Linux expects it,
with this patch, I can boot an existing fedora 16 with virtio-blk for
example.

Cheers,
Ben.
Anthony Liguori - Jan. 10, 2012, 10:02 p.m.
On 01/10/2012 05:35 AM, Benjamin Herrenschmidt wrote:
> The virtio config area in PIO space is a bit special. The initial
> header is little endian but the rest (device specific) is guest
> native endian.
>
> The PIO accessors for PCI on machines that don't have native IO ports
> assume that all PIO is little endian, which works fine for everything
> except the above.
>
> A complicated way to fix it would be to split the BAR into two memory
> regions with different endianess settings, but this isn't practical
> to do, besides, the PIO code doesn't honor region endianness anyway
> (I have a patch for that too but it isn't necessary at this stage).
>
> So I decided to go for the quick fix instead which consists of
> reverting the swap in virtio-pci in selected places, hoping that when
> we eventually do a "v2" of the virtio protocols, we sort that out once
> and for all using a fixed endian setting for everything.
>
> Unfortunately, that mean moving virtio-pci from Makefile.objs to
> Makefile.target so we can use TARGET_WORDS_BIGENDIAN which would
> otherwise be poisoned.
>
> Signed-off-by: Benjamin Herrenschmidt<benh@kernel.crashing.org>
> ---
>   Makefile.objs   |    1 -
>   Makefile.target |    1 +
>   hw/virtio-pci.c |   24 ++++++++++++++++++++++--
>   3 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/Makefile.objs b/Makefile.objs
> index 4f6d26c..b721fca 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -186,7 +186,6 @@ hw-obj-y =
>   hw-obj-y += vl.o loader.o
>   hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
>   hw-obj-y += usb-libhw.o
> -hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
>   hw-obj-y += fw_cfg.o
>   hw-obj-$(CONFIG_PCI) += pci.o pci_bridge.o
>   hw-obj-$(CONFIG_PCI) += msix.o msi.o
> diff --git a/Makefile.target b/Makefile.target
> index ef6834b..03d44c3 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -191,6 +191,7 @@ obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
>   # need to fix this properly
>   obj-$(CONFIG_NO_PCI) += pci-stub.o
>   obj-$(CONFIG_VIRTIO) += virtio.o virtio-blk.o virtio-balloon.o virtio-net.o virtio-serial-bus.o
> +obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
>   obj-y += vhost_net.o
>   obj-$(CONFIG_VHOST_NET) += vhost.o
>   obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/virtio-9p-device.o
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 77b75bc..ca70e42 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -412,20 +412,34 @@ static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
>   {
>       VirtIOPCIProxy *proxy = opaque;
>       uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
> +    uint16_t val;
>       if (addr<  config)
>           return virtio_ioport_read(proxy, addr);
>       addr -= config;
> -    return virtio_config_readw(proxy->vdev, addr);
> +    val = virtio_config_readw(proxy->vdev, addr);
> +#ifdef TARGET_WORDS_BIGENDIAN
> +    /* virtio is odd, ioports are LE but config space is target native
> +     * endian. However, in qemu, all PIO is LE, so we need to re-swap
> +     * on BE targets
> +     */
> +    val = bswap16(val);
> +#endif

I think this is the only reasonable way to do it, but I'd suggest adding 
target_is_bigendian() to arch_init.c that way we wouldn't have to move the 
object from libhw.

Regards,

Anthony Liguori


> +    return val;
>   }
>
>   static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
>   {
>       VirtIOPCIProxy *proxy = opaque;
>       uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
> +    uint32_t val;
>       if (addr<  config)
>           return virtio_ioport_read(proxy, addr);
>       addr -= config;
> -    return virtio_config_readl(proxy->vdev, addr);
> +    val = virtio_config_readl(proxy->vdev, addr);
> +#ifdef TARGET_WORDS_BIGENDIAN
> +    val = bswap32(val);
> +#endif
> +    return val;
>   }
>
>   static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
> @@ -449,6 +463,9 @@ static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
>           return;
>       }
>       addr -= config;
> +#ifdef TARGET_WORDS_BIGENDIAN
> +    val = bswap16(val);
> +#endif
>       virtio_config_writew(proxy->vdev, addr, val);
>   }
>
> @@ -461,6 +478,9 @@ static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
>           return;
>       }
>       addr -= config;
> +#ifdef TARGET_WORDS_BIGENDIAN
> +    val = bswap32(val);
> +#endif
>       virtio_config_writel(proxy->vdev, addr, val);
>   }
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt - Jan. 10, 2012, 10:04 p.m.
On Tue, 2012-01-10 at 22:45 +0100, Alexander Graf wrote:
> Here's the thing that I don't understand. What exactly is breaking for
> you? I tried -M pseries on a ppc box and on an x86 box and both times
> was able to see /dev/vda.

And mount it and use it ? Here I get the capacity wrong if I don't have
my patch and can't actually boot a distro off it.

Cheers,
Ben.
Benjamin Herrenschmidt - Jan. 10, 2012, 10:10 p.m.
On Wed, 2012-01-11 at 09:04 +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2012-01-10 at 22:45 +0100, Alexander Graf wrote:
> > Here's the thing that I don't understand. What exactly is breaking for
> > you? I tried -M pseries on a ppc box and on an x86 box and both times
> > was able to see /dev/vda.
> 
> And mount it and use it ? Here I get the capacity wrong if I don't have
> my patch and can't actually boot a distro off it.

Hrm actually, it might well work with Linux regardless because Linux
only ever does 8 bit accesses to the virtio config space.

It's SLOF that breaks because SLOF uses 2 and 4 byte accesses (which are
allowed per spec as far as I can tell) but the spurrious swapping
happening in that case means we lost the byte address invariance.

Cheers,
Ben.
Benjamin Herrenschmidt - Jan. 10, 2012, 10:49 p.m.
On Tue, 2012-01-10 at 23:41 +0100, Alexander Graf wrote:
> 
> No. Libhw shouldn't be able to know anything about target endianness.
> If a device is as brokenly spec'ed as virtio and is coupled to the
> "main CPU endianness", it clearly belongs with the CPU, not into
> libhw.

Ok, can you guys solve this and tell me what I should do ? :-)

Cheers,
Ben.

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 4f6d26c..b721fca 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -186,7 +186,6 @@  hw-obj-y =
 hw-obj-y += vl.o loader.o
 hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
 hw-obj-y += usb-libhw.o
-hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
 hw-obj-y += fw_cfg.o
 hw-obj-$(CONFIG_PCI) += pci.o pci_bridge.o
 hw-obj-$(CONFIG_PCI) += msix.o msi.o
diff --git a/Makefile.target b/Makefile.target
index ef6834b..03d44c3 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -191,6 +191,7 @@  obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
 # need to fix this properly
 obj-$(CONFIG_NO_PCI) += pci-stub.o
 obj-$(CONFIG_VIRTIO) += virtio.o virtio-blk.o virtio-balloon.o virtio-net.o virtio-serial-bus.o
+obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
 obj-y += vhost_net.o
 obj-$(CONFIG_VHOST_NET) += vhost.o
 obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/virtio-9p-device.o
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 77b75bc..ca70e42 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -412,20 +412,34 @@  static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
     uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    uint16_t val;
     if (addr < config)
         return virtio_ioport_read(proxy, addr);
     addr -= config;
-    return virtio_config_readw(proxy->vdev, addr);
+    val = virtio_config_readw(proxy->vdev, addr);
+#ifdef TARGET_WORDS_BIGENDIAN
+    /* virtio is odd, ioports are LE but config space is target native
+     * endian. However, in qemu, all PIO is LE, so we need to re-swap
+     * on BE targets
+     */
+    val = bswap16(val);
+#endif
+    return val;
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
     uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    uint32_t val;
     if (addr < config)
         return virtio_ioport_read(proxy, addr);
     addr -= config;
-    return virtio_config_readl(proxy->vdev, addr);
+    val = virtio_config_readl(proxy->vdev, addr);
+#ifdef TARGET_WORDS_BIGENDIAN
+    val = bswap32(val);
+#endif
+    return val;
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
@@ -449,6 +463,9 @@  static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
         return;
     }
     addr -= config;
+#ifdef TARGET_WORDS_BIGENDIAN
+    val = bswap16(val);
+#endif
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
@@ -461,6 +478,9 @@  static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
         return;
     }
     addr -= config;
+#ifdef TARGET_WORDS_BIGENDIAN
+    val = bswap32(val);
+#endif
     virtio_config_writel(proxy->vdev, addr, val);
 }