diff mbox

[2/2] memory: fix the eventfd data endianness according to the host

Message ID 20150313081125.4669.92074.stgit@bahia.local
State New
Headers show

Commit Message

Greg Kurz March 13, 2015, 8:11 a.m. UTC
The data argument is a host entity. It is not related to the target
endianness. Let's introduce a HOST_WORDS_BIGENDIAN based helper for
that.

This patch fixes ioeventfd and vhost for a ppc64le host running a ppc64le
guest (only virtqueue 0 was handled, all others being byteswapped because
of TARGET_WORDS_BIGENDIAN). It doesn't change functionnality for fixed
endian architectures (i.e. doesn't break x86).

Reported-by: Cédric Le Goater <clg@fr.ibm.com>
Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
---
 memory.c |   13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini March 13, 2015, 11:06 a.m. UTC | #1
On 13/03/2015 09:11, Greg Kurz wrote:
> The data argument is a host entity. It is not related to the target
> endianness. Let's introduce a HOST_WORDS_BIGENDIAN based helper for
> that.
> 
> This patch fixes ioeventfd and vhost for a ppc64le host running a ppc64le
> guest (only virtqueue 0 was handled, all others being byteswapped because
> of TARGET_WORDS_BIGENDIAN). It doesn't change functionnality for fixed
> endian architectures (i.e. doesn't break x86).
> 
> Reported-by: Cédric Le Goater <clg@fr.ibm.com>
> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
> ---
>  memory.c |   13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 6291cc0..1e29d40 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1549,6 +1549,15 @@ void memory_region_clear_flush_coalesced(MemoryRegion *mr)
>      }
>  }
>  
> +static bool eventfd_wrong_endianness(MemoryRegion *mr)
> +{
> +#ifdef HOST_WORDS_BIGENDIAN
> +    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
> +#else
> +    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
> +#endif
> +}
> +
>  void memory_region_add_eventfd(MemoryRegion *mr,
>                                 hwaddr addr,
>                                 unsigned size,
> @@ -1565,7 +1574,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>      };
>      unsigned i;
>  
> -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));

Strictly speaking, the place to do this would be kvm_set_ioeventfd_mmio.
 A hypothetical userspace ioeventfd emulation would not need the swap.

I can accept the patch, but it's better to add a comment.

Paolo

>      memory_region_transaction_begin();
>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
> @@ -1598,7 +1607,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>      };
>      unsigned i;
>  
> -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
>      memory_region_transaction_begin();
>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
>
Greg Kurz March 13, 2015, 11:32 a.m. UTC | #2
On Fri, 13 Mar 2015 12:06:06 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> 
> On 13/03/2015 09:11, Greg Kurz wrote:
> > The data argument is a host entity. It is not related to the target
> > endianness. Let's introduce a HOST_WORDS_BIGENDIAN based helper for
> > that.
> > 
> > This patch fixes ioeventfd and vhost for a ppc64le host running a ppc64le
> > guest (only virtqueue 0 was handled, all others being byteswapped because
> > of TARGET_WORDS_BIGENDIAN). It doesn't change functionnality for fixed
> > endian architectures (i.e. doesn't break x86).
> > 
> > Reported-by: Cédric Le Goater <clg@fr.ibm.com>
> > Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
> > ---
> >  memory.c |   13 +++++++++++--
> >  1 file changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/memory.c b/memory.c
> > index 6291cc0..1e29d40 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -1549,6 +1549,15 @@ void memory_region_clear_flush_coalesced(MemoryRegion *mr)
> >      }
> >  }
> >  
> > +static bool eventfd_wrong_endianness(MemoryRegion *mr)
> > +{
> > +#ifdef HOST_WORDS_BIGENDIAN
> > +    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
> > +#else
> > +    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
> > +#endif
> > +}
> > +
> >  void memory_region_add_eventfd(MemoryRegion *mr,
> >                                 hwaddr addr,
> >                                 unsigned size,
> > @@ -1565,7 +1574,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
> >      };
> >      unsigned i;
> >  
> > -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> > +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
> 
> Strictly speaking, the place to do this would be kvm_set_ioeventfd_mmio.

FWIW the swap is being done in memory.c since commit:

commit 28f362be6e7f45ea9b7a57a08555c4c784f36198
Author: Alexander Graf <agraf@suse.de>
Date:   Mon Oct 15 20:30:28 2012 +0200

    memory: Make eventfd adhere to device endianness

Are you asking to revert this commit and to pass the device endianness to
kvm_set_ioeventfd_mmio() so it can fix the ordering ?

>  A hypothetical userspace ioeventfd emulation would not need the swap.
> 

I don't understand why "would not need the swap"...


> I can accept the patch, but it's better to add a comment.
> 

... and so I don't know what to write. :)

Please enlight !

--
Greg

> Paolo
> 
> >      memory_region_transaction_begin();
> >      for (i = 0; i < mr->ioeventfd_nb; ++i) {
> >          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
> > @@ -1598,7 +1607,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
> >      };
> >      unsigned i;
> >  
> > -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> > +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
> >      memory_region_transaction_begin();
> >      for (i = 0; i < mr->ioeventfd_nb; ++i) {
> >          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
> > 
>
Michael S. Tsirkin March 13, 2015, 2:23 p.m. UTC | #3
On Fri, Mar 13, 2015 at 12:06:06PM +0100, Paolo Bonzini wrote:
> 
> 
> On 13/03/2015 09:11, Greg Kurz wrote:
> > The data argument is a host entity. It is not related to the target
> > endianness. Let's introduce a HOST_WORDS_BIGENDIAN based helper for
> > that.
> > 
> > This patch fixes ioeventfd and vhost for a ppc64le host running a ppc64le
> > guest (only virtqueue 0 was handled, all others being byteswapped because
> > of TARGET_WORDS_BIGENDIAN). It doesn't change functionnality for fixed
> > endian architectures (i.e. doesn't break x86).
> > 
> > Reported-by: Cédric Le Goater <clg@fr.ibm.com>
> > Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
> > ---
> >  memory.c |   13 +++++++++++--
> >  1 file changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/memory.c b/memory.c
> > index 6291cc0..1e29d40 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -1549,6 +1549,15 @@ void memory_region_clear_flush_coalesced(MemoryRegion *mr)
> >      }
> >  }
> >  
> > +static bool eventfd_wrong_endianness(MemoryRegion *mr)
> > +{
> > +#ifdef HOST_WORDS_BIGENDIAN
> > +    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
> > +#else
> > +    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
> > +#endif
> > +}
> > +
> >  void memory_region_add_eventfd(MemoryRegion *mr,
> >                                 hwaddr addr,
> >                                 unsigned size,
> > @@ -1565,7 +1574,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
> >      };
> >      unsigned i;
> >  
> > -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> > +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
> 
> Strictly speaking, the place to do this would be kvm_set_ioeventfd_mmio.
>  A hypothetical userspace ioeventfd emulation would not need the swap.
> 
> I can accept the patch, but it's better to add a comment.
> 
> Paolo

Better to fix really, making ioeventfd work with tcg seems
something quite reasonable.

> >      for (i = 0; i < mr->ioeventfd_nb; ++i) {
> >          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
> > @@ -1598,7 +1607,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
> >      };
> >      unsigned i;
> >  
> > -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> > +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
> >      memory_region_transaction_begin();
> >      for (i = 0; i < mr->ioeventfd_nb; ++i) {
> >          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
> >
Paolo Bonzini March 13, 2015, 2:52 p.m. UTC | #4
On 13/03/2015 12:32, Greg Kurz wrote:
> On Fri, 13 Mar 2015 12:06:06 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>
>> On 13/03/2015 09:11, Greg Kurz wrote:
>>> The data argument is a host entity. It is not related to the target
>>> endianness. Let's introduce a HOST_WORDS_BIGENDIAN based helper for
>>> that.
>>>
>>> This patch fixes ioeventfd and vhost for a ppc64le host running a ppc64le
>>> guest (only virtqueue 0 was handled, all others being byteswapped because
>>> of TARGET_WORDS_BIGENDIAN). It doesn't change functionnality for fixed
>>> endian architectures (i.e. doesn't break x86).
>>>
>>> Reported-by: Cédric Le Goater <clg@fr.ibm.com>
>>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
>>> ---
>>>  memory.c |   13 +++++++++++--
>>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/memory.c b/memory.c
>>> index 6291cc0..1e29d40 100644
>>> --- a/memory.c
>>> +++ b/memory.c
>>> @@ -1549,6 +1549,15 @@ void memory_region_clear_flush_coalesced(MemoryRegion *mr)
>>>      }
>>>  }
>>>  
>>> +static bool eventfd_wrong_endianness(MemoryRegion *mr)
>>> +{
>>> +#ifdef HOST_WORDS_BIGENDIAN
>>> +    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
>>> +#else
>>> +    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
>>> +#endif
>>> +}
>>> +
>>>  void memory_region_add_eventfd(MemoryRegion *mr,
>>>                                 hwaddr addr,
>>>                                 unsigned size,
>>> @@ -1565,7 +1574,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>>>      };
>>>      unsigned i;
>>>  
>>> -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
>>> +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
>>
>> Strictly speaking, the place to do this would be kvm_set_ioeventfd_mmio.
> 
> FWIW the swap is being done in memory.c since commit:
> 
> commit 28f362be6e7f45ea9b7a57a08555c4c784f36198
> Author: Alexander Graf <agraf@suse.de>
> Date:   Mon Oct 15 20:30:28 2012 +0200
> 
>     memory: Make eventfd adhere to device endianness
> 
> Are you asking to revert this commit and to pass the device endianness to
> kvm_set_ioeventfd_mmio() so it can fix the ordering ?

Yes.

>>  A hypothetical userspace ioeventfd emulation would not need the swap.
> 
> I don't understand why "would not need the swap"...

Because the userspace ioeventfd emulation would look at the value as it
comes from the target CPU, in target CPU endianness.  So it would have a
swap done at ioeventfd time, and a swap done at access time.  Host
endianness doesn't matter.

Here, QEMU believes that the target's natural endianness is big-endian,
but the kernel believes that the target's natural endianness is
little-endian (based on MSR_KERNEL).  So there is a different number of
swaps in memory_region_add_eventfd and in the kernel's kvmppc_handle_store.

Does something like this work?

#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
    /* The kernel expects ioeventfd values in HOST_WORDS_BIGENDIAN
     * endianness, but the memory core hands them in target endianness.
     * For example, PPC is always treated as big-endian even if running
     * on KVM and on PPC64LE.  Correct here.
     */
    switch(size) {
    case 2:
        val = bswap16(val);
        break;
    case 4:
        val = bswap32(val);
        break;
    }
#endif

in kvm_set_ioeventfd_mmio and kvm_set_ioeventfd_pio?

Paolo

> 
>> I can accept the patch, but it's better to add a comment.
>>
> 
> ... and so I don't know what to write. :)
> 
> Please enlight !
> 
> --
> Greg
> 
>> Paolo
>>
>>>      memory_region_transaction_begin();
>>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>>>          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
>>> @@ -1598,7 +1607,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>>>      };
>>>      unsigned i;
>>>  
>>> -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
>>> +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
>>>      memory_region_transaction_begin();
>>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>>>          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
>>>
>>
>
Michael S. Tsirkin March 13, 2015, 3:24 p.m. UTC | #5
On Fri, Mar 13, 2015 at 03:52:50PM +0100, Paolo Bonzini wrote:
> 
> 
> On 13/03/2015 12:32, Greg Kurz wrote:
> > On Fri, 13 Mar 2015 12:06:06 +0100
> > Paolo Bonzini <pbonzini@redhat.com> wrote:
> >>
> >>
> >> On 13/03/2015 09:11, Greg Kurz wrote:
> >>> The data argument is a host entity. It is not related to the target
> >>> endianness. Let's introduce a HOST_WORDS_BIGENDIAN based helper for
> >>> that.
> >>>
> >>> This patch fixes ioeventfd and vhost for a ppc64le host running a ppc64le
> >>> guest (only virtqueue 0 was handled, all others being byteswapped because
> >>> of TARGET_WORDS_BIGENDIAN). It doesn't change functionnality for fixed
> >>> endian architectures (i.e. doesn't break x86).
> >>>
> >>> Reported-by: Cédric Le Goater <clg@fr.ibm.com>
> >>> Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
> >>> ---
> >>>  memory.c |   13 +++++++++++--
> >>>  1 file changed, 11 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/memory.c b/memory.c
> >>> index 6291cc0..1e29d40 100644
> >>> --- a/memory.c
> >>> +++ b/memory.c
> >>> @@ -1549,6 +1549,15 @@ void memory_region_clear_flush_coalesced(MemoryRegion *mr)
> >>>      }
> >>>  }
> >>>  
> >>> +static bool eventfd_wrong_endianness(MemoryRegion *mr)
> >>> +{
> >>> +#ifdef HOST_WORDS_BIGENDIAN
> >>> +    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
> >>> +#else
> >>> +    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
> >>> +#endif
> >>> +}
> >>> +
> >>>  void memory_region_add_eventfd(MemoryRegion *mr,
> >>>                                 hwaddr addr,
> >>>                                 unsigned size,
> >>> @@ -1565,7 +1574,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
> >>>      };
> >>>      unsigned i;
> >>>  
> >>> -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> >>> +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
> >>
> >> Strictly speaking, the place to do this would be kvm_set_ioeventfd_mmio.
> > 
> > FWIW the swap is being done in memory.c since commit:
> > 
> > commit 28f362be6e7f45ea9b7a57a08555c4c784f36198
> > Author: Alexander Graf <agraf@suse.de>
> > Date:   Mon Oct 15 20:30:28 2012 +0200
> > 
> >     memory: Make eventfd adhere to device endianness
> > 
> > Are you asking to revert this commit and to pass the device endianness to
> > kvm_set_ioeventfd_mmio() so it can fix the ordering ?
> 
> Yes.
> 
> >>  A hypothetical userspace ioeventfd emulation would not need the swap.
> > 
> > I don't understand why "would not need the swap"...
> 
> Because the userspace ioeventfd emulation would look at the value as it
> comes from the target CPU, in target CPU endianness.  So it would have a
> swap done at ioeventfd time, and a swap done at access time.  Host
> endianness doesn't matter.
> 
> Here, QEMU believes that the target's natural endianness is big-endian,
> but the kernel believes that the target's natural endianness is
> little-endian (based on MSR_KERNEL).  So there is a different number of
> swaps in memory_region_add_eventfd and in the kernel's kvmppc_handle_store.
> 
> Does something like this work?
> 
> #if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
>     /* The kernel expects ioeventfd values in HOST_WORDS_BIGENDIAN
>      * endianness, but the memory core hands them in target endianness.
>      * For example, PPC is always treated as big-endian even if running
>      * on KVM and on PPC64LE.  Correct here.
>      */
>     switch(size) {
>     case 2:
>         val = bswap16(val);
>         break;
>     case 4:
>         val = bswap32(val);
>         break;
>     }
> #endif
> 
> in kvm_set_ioeventfd_mmio and kvm_set_ioeventfd_pio?
> 
> Paolo

Maybe you can post a patch? Would be easier ...

> > 
> >> I can accept the patch, but it's better to add a comment.
> >>
> > 
> > ... and so I don't know what to write. :)
> > 
> > Please enlight !
> > 
> > --
> > Greg
> > 
> >> Paolo
> >>
> >>>      memory_region_transaction_begin();
> >>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
> >>>          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
> >>> @@ -1598,7 +1607,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
> >>>      };
> >>>      unsigned i;
> >>>  
> >>> -    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
> >>> +    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
> >>>      memory_region_transaction_begin();
> >>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
> >>>          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
> >>>
> >>
> >
diff mbox

Patch

diff --git a/memory.c b/memory.c
index 6291cc0..1e29d40 100644
--- a/memory.c
+++ b/memory.c
@@ -1549,6 +1549,15 @@  void memory_region_clear_flush_coalesced(MemoryRegion *mr)
     }
 }
 
+static bool eventfd_wrong_endianness(MemoryRegion *mr)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
+#else
+    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
+#endif
+}
+
 void memory_region_add_eventfd(MemoryRegion *mr,
                                hwaddr addr,
                                unsigned size,
@@ -1565,7 +1574,7 @@  void memory_region_add_eventfd(MemoryRegion *mr,
     };
     unsigned i;
 
-    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
+    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
     memory_region_transaction_begin();
     for (i = 0; i < mr->ioeventfd_nb; ++i) {
         if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
@@ -1598,7 +1607,7 @@  void memory_region_del_eventfd(MemoryRegion *mr,
     };
     unsigned i;
 
-    adjust_endianness(&mrfd.data, size, memory_region_wrong_endianness(mr));
+    adjust_endianness(&mrfd.data, size, eventfd_wrong_endianness(mr));
     memory_region_transaction_begin();
     for (i = 0; i < mr->ioeventfd_nb; ++i) {
         if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {