Patchwork [RESENT] msix: allow byte and word reading from mmio

login
register
mail settings
Submitter Bernhard Kohl
Date Aug. 19, 2010, 12:56 p.m.
Message ID <1282222611-21192-1-git-send-email-bernhard.kohl@nsn.com>
Download mbox | patch
Permalink /patch/62152/
State New
Headers show

Comments

Bernhard Kohl - Aug. 19, 2010, 12:56 p.m.
It's legal that the guest reads a single byte or word from mmio.
I have an OS which reads single bytes and it works fine on real
hardware. Maybe this happens due to casting.

Signed-off-by: Bernhard Kohl <bernhard.kohl@nsn.com>
---
 hw/msix.c |   20 ++++++++++++++++----
 1 files changed, 16 insertions(+), 4 deletions(-)
Anthony Liguori - Aug. 22, 2010, 9:34 p.m.
On 08/19/2010 07:56 AM, Bernhard Kohl wrote:
> It's legal that the guest reads a single byte or word from mmio.
> I have an OS which reads single bytes and it works fine on real
> hardware. Maybe this happens due to casting.
>
> Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com>
>    

Hi Michael,

I'm looking for this to come through your tree unless you disagree.

Regards,

Anthony Liguori

> ---
>   hw/msix.c |   20 ++++++++++++++++----
>   1 files changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/hw/msix.c b/hw/msix.c
> index d99403a..7dac7f7 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -100,10 +100,22 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
>       return pci_get_long(page + offset);
>   }
>
> -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
> +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
>   {
> -    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
> -    return 0;
> +    PCIDevice *dev = opaque;
> +    unsigned int offset = addr&  (MSIX_PAGE_SIZE - 1)&  ~0x1;
> +    void *page = dev->msix_table_page;
> +
> +    return pci_get_word(page + offset);
> +}
> +
> +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
> +{
> +    PCIDevice *dev = opaque;
> +    unsigned int offset = addr&  (MSIX_PAGE_SIZE - 1);
> +    void *page = dev->msix_table_page;
> +
> +    return pci_get_byte(page + offset);
>   }
>
>   static uint8_t msix_pending_mask(int vector)
> @@ -198,7 +210,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = {
>   };
>
>   static CPUReadMemoryFunc * const msix_mmio_read[] = {
> -    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
> +    msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
>   };
>
>   /* Should be called from device's map method. */
>
Bernhard Kohl - Nov. 15, 2010, 9:43 a.m.
Am 22.08.2010 23:34, schrieb ext Anthony Liguori:
> On 08/19/2010 07:56 AM, Bernhard Kohl wrote:
>    
>> It's legal that the guest reads a single byte or word from mmio.
>> I have an OS which reads single bytes and it works fine on real
>> hardware. Maybe this happens due to casting.
>>
>> Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com>
>>
>>      
> Hi Michael,
>
> I'm looking for this to come through your tree unless you disagree.
>
> Regards,
>
> Anthony Liguori
>    

Hi,

could you please look at this again?

Thanks
Bernhard

>    
>> ---
>>    hw/msix.c |   20 ++++++++++++++++----
>>    1 files changed, 16 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/msix.c b/hw/msix.c
>> index d99403a..7dac7f7 100644
>> --- a/hw/msix.c
>> +++ b/hw/msix.c
>> @@ -100,10 +100,22 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
>>        return pci_get_long(page + offset);
>>    }
>>
>> -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
>> +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
>>    {
>> -    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
>> -    return 0;
>> +    PCIDevice *dev = opaque;
>> +    unsigned int offset = addr&   (MSIX_PAGE_SIZE - 1)&   ~0x1;
>> +    void *page = dev->msix_table_page;
>> +
>> +    return pci_get_word(page + offset);
>> +}
>> +
>> +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
>> +{
>> +    PCIDevice *dev = opaque;
>> +    unsigned int offset = addr&   (MSIX_PAGE_SIZE - 1);
>> +    void *page = dev->msix_table_page;
>> +
>> +    return pci_get_byte(page + offset);
>>    }
>>
>>    static uint8_t msix_pending_mask(int vector)
>> @@ -198,7 +210,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = {
>>    };
>>
>>    static CPUReadMemoryFunc * const msix_mmio_read[] = {
>> -    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
>> +    msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
>>    };
>>
>>    /* Should be called from device's map method. */
>>
>>      
>
>
Michael S. Tsirkin - Nov. 15, 2010, 10:42 a.m.
On Thu, Aug 19, 2010 at 02:56:51PM +0200, Bernhard Kohl wrote:
> It's legal that the guest reads a single byte or word from mmio.

Interesting. The spec seems to say this:

	For all accesses to MSI-X Table and MSI-X PBA fields, software must use
	aligned full DWORD or aligned full QWORD transactions; otherwise, the
	result is undefined.

> I have an OS which reads single bytes and it works fine on real
> hardware. Maybe this happens due to casting.

What do you mean by casting?

> 
> Signed-off-by: Bernhard Kohl <bernhard.kohl@nsn.com>

I guess we can merge this, but we need a comment I think since this
seems to contradict the spec, and people were sending patches relying on
this.

Does something like the following describe the situation correctly?

/* Note: PCI spec requires that all MSI-X table accesses
   are either DWORD or QWORD, size aligned.  Some guests seem to violate
   this rule for read accesses, performing single byte reads.
   Since it's easy to support this, let's do so.
   Also support 16 bit size aligned reads, just in case.
 */

Does you guest also do 16 bit reads?  Are accesses at least aligned?

> ---
>  hw/msix.c |   20 ++++++++++++++++----
>  1 files changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/msix.c b/hw/msix.c
> index d99403a..7dac7f7 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -100,10 +100,22 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
>      return pci_get_long(page + offset);
>  }
>  
> -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
> +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
>  {
> -    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
> -    return 0;
> +    PCIDevice *dev = opaque;
> +    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x1;
> +    void *page = dev->msix_table_page;
> +
> +    return pci_get_word(page + offset);
> +}
> +
> +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
> +{
> +    PCIDevice *dev = opaque;
> +    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
> +    void *page = dev->msix_table_page;
> +
> +    return pci_get_byte(page + offset);
>  }
>  
>  static uint8_t msix_pending_mask(int vector)
> @@ -198,7 +210,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = {
>  };
>  
>  static CPUReadMemoryFunc * const msix_mmio_read[] = {
> -    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
> +    msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
>  };
>  
>  /* Should be called from device's map method. */
> -- 
> 1.7.2.1
Bernhard Kohl - Nov. 15, 2010, 4:40 p.m.
Am 15.11.2010 11:42, schrieb ext Michael S. Tsirkin:
> On Thu, Aug 19, 2010 at 02:56:51PM +0200, Bernhard Kohl wrote:
>    
>> It's legal that the guest reads a single byte or word from mmio.
>>      
> Interesting. The spec seems to say this:
>
> 	For all accesses to MSI-X Table and MSI-X PBA fields, software must use
> 	aligned full DWORD or aligned full QWORD transactions; otherwise, the
> 	result is undefined.
>    

I will remove the first statement from the commit message and add
something like the comment you proposed below.

>    
>> I have an OS which reads single bytes and it works fine on real
>> hardware. Maybe this happens due to casting.
>>      
> What do you mean by casting?
>    

I did not yet locate the line of code where this happens in the guest.
As all other accesses are dword, I assume that there is some masking,
shifting or casting to a byte variable in the source code, and the
compiler generates a byte access from this.

>    
>> Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com>
>>      
> I guess we can merge this, but we need a comment I think since this
> seems to contradict the spec, and people were sending patches relying on
> this.
>
> Does something like the following describe the situation correctly?
>
> /* Note: PCI spec requires that all MSI-X table accesses
>     are either DWORD or QWORD, size aligned.  Some guests seem to violate
>     this rule for read accesses, performing single byte reads.
>     Since it's easy to support this, let's do so.
>     Also support 16 bit size aligned reads, just in case.
>   */
>    

Yes, that's is exactly the situation with my guest.
I will add this comment.

> Does you guest also do 16 bit reads?  Are accesses at least aligned?
>    

I will check this with my guest and the readw function disabled
in the patch. This will take some time.

>    
>> ---
>>   hw/msix.c |   20 ++++++++++++++++----
>>   1 files changed, 16 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/msix.c b/hw/msix.c
>> index d99403a..7dac7f7 100644
>> --- a/hw/msix.c
>> +++ b/hw/msix.c
>> @@ -100,10 +100,22 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
>>       return pci_get_long(page + offset);
>>   }
>>
>> -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
>> +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
>>   {
>> -    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
>> -    return 0;
>> +    PCIDevice *dev = opaque;
>> +    unsigned int offset = addr&  (MSIX_PAGE_SIZE - 1)&  ~0x1;
>> +    void *page = dev->msix_table_page;
>> +
>> +    return pci_get_word(page + offset);
>> +}
>> +
>> +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
>> +{
>> +    PCIDevice *dev = opaque;
>> +    unsigned int offset = addr&  (MSIX_PAGE_SIZE - 1);
>> +    void *page = dev->msix_table_page;
>> +
>> +    return pci_get_byte(page + offset);
>>   }
>>
>>   static uint8_t msix_pending_mask(int vector)
>> @@ -198,7 +210,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = {
>>   };
>>
>>   static CPUReadMemoryFunc * const msix_mmio_read[] = {
>> -    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
>> +    msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
>>   };
>>
>>   /* Should be called from device's map method. */
>> -- 
>> 1.7.2.1
>>      
>
Michael S. Tsirkin - Nov. 15, 2010, 7:51 p.m.
On Mon, Nov 15, 2010 at 05:40:30PM +0100, Bernhard Kohl wrote:
> Am 15.11.2010 11:42, schrieb ext Michael S. Tsirkin:
> >On Thu, Aug 19, 2010 at 02:56:51PM +0200, Bernhard Kohl wrote:
> >>It's legal that the guest reads a single byte or word from mmio.
> >Interesting. The spec seems to say this:
> >
> >	For all accesses to MSI-X Table and MSI-X PBA fields, software must use
> >	aligned full DWORD or aligned full QWORD transactions; otherwise, the
> >	result is undefined.
> 
> I will remove the first statement from the commit message and add
> something like the comment you proposed below.
> 
> >>I have an OS which reads single bytes and it works fine on real
> >>hardware. Maybe this happens due to casting.
> >What do you mean by casting?
> 
> I did not yet locate the line of code where this happens in the guest.
> As all other accesses are dword, I assume that there is some masking,
> shifting or casting to a byte variable in the source code, and the
> compiler generates a byte access from this.

And I presume it's too late to fix the guest in question?

> >>Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com>
> >I guess we can merge this, but we need a comment I think since this
> >seems to contradict the spec, and people were sending patches relying on
> >this.
> >
> >Does something like the following describe the situation correctly?
> >
> >/* Note: PCI spec requires that all MSI-X table accesses
> >    are either DWORD or QWORD, size aligned.  Some guests seem to violate
> >    this rule for read accesses, performing single byte reads.
> >    Since it's easy to support this, let's do so.
> >    Also support 16 bit size aligned reads, just in case.
> >  */
> 
> Yes, that's is exactly the situation with my guest.
> I will add this comment.
> 
> >Does you guest also do 16 bit reads?  Are accesses at least aligned?
> 
> I will check this with my guest and the readw function disabled
> in the patch. This will take some time.
> 
> >>---
> >>  hw/msix.c |   20 ++++++++++++++++----
> >>  1 files changed, 16 insertions(+), 4 deletions(-)
> >>
> >>diff --git a/hw/msix.c b/hw/msix.c
> >>index d99403a..7dac7f7 100644
> >>--- a/hw/msix.c
> >>+++ b/hw/msix.c
> >>@@ -100,10 +100,22 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
> >>      return pci_get_long(page + offset);
> >>  }
> >>
> >>-static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
> >>+static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
> >>  {
> >>-    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
> >>-    return 0;
> >>+    PCIDevice *dev = opaque;
> >>+    unsigned int offset = addr&  (MSIX_PAGE_SIZE - 1)&  ~0x1;
> >>+    void *page = dev->msix_table_page;
> >>+
> >>+    return pci_get_word(page + offset);
> >>+}
> >>+
> >>+static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
> >>+{
> >>+    PCIDevice *dev = opaque;
> >>+    unsigned int offset = addr&  (MSIX_PAGE_SIZE - 1);
> >>+    void *page = dev->msix_table_page;
> >>+
> >>+    return pci_get_byte(page + offset);
> >>  }
> >>
> >>  static uint8_t msix_pending_mask(int vector)
> >>@@ -198,7 +210,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = {
> >>  };
> >>
> >>  static CPUReadMemoryFunc * const msix_mmio_read[] = {
> >>-    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
> >>+    msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
> >>  };
> >>
> >>  /* Should be called from device's map method. */
> >>-- 
> >>1.7.2.1

Patch

diff --git a/hw/msix.c b/hw/msix.c
index d99403a..7dac7f7 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -100,10 +100,22 @@  static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
     return pci_get_long(page + offset);
 }
 
-static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
 {
-    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
-    return 0;
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x1;
+    void *page = dev->msix_table_page;
+
+    return pci_get_word(page + offset);
+}
+
+static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+
+    return pci_get_byte(page + offset);
 }
 
 static uint8_t msix_pending_mask(int vector)
@@ -198,7 +210,7 @@  static CPUWriteMemoryFunc * const msix_mmio_write[] = {
 };
 
 static CPUReadMemoryFunc * const msix_mmio_read[] = {
-    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+    msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
 };
 
 /* Should be called from device's map method. */