diff mbox

msi/msix: added API to set MSI message address and data

Message ID 4FE307DE.5070002@ozlabs.ru
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy June 21, 2012, 11:39 a.m. UTC
Added (msi|msix)_set_message() functions.

Currently msi_notify()/msix_notify() write to these vectors to
signal the guest about an interrupt so the correct values have to
written there by the guest or QEMU.

For example, POWER guest never initializes MSI/MSIX vectors, instead
it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
POWER we have to initialize MSI/MSIX message from QEMU.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/msi.c  |   13 +++++++++++++
 hw/msi.h  |    1 +
 hw/msix.c |    9 +++++++++
 hw/msix.h |    2 ++
 4 files changed, 25 insertions(+)

Comments

Jan Kiszka June 21, 2012, 11:49 a.m. UTC | #1
On 2012-06-21 13:39, Alexey Kardashevskiy wrote:
> Added (msi|msix)_set_message() functions.
> 
> Currently msi_notify()/msix_notify() write to these vectors to
> signal the guest about an interrupt so the correct values have to
> written there by the guest or QEMU.
> 
> For example, POWER guest never initializes MSI/MSIX vectors, instead
> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> POWER we have to initialize MSI/MSIX message from QEMU.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/msi.c  |   13 +++++++++++++
>  hw/msi.h  |    1 +
>  hw/msix.c |    9 +++++++++
>  hw/msix.h |    2 ++
>  4 files changed, 25 insertions(+)
> 
> diff --git a/hw/msi.c b/hw/msi.c
> index 5233204..cc6102f 100644
> --- a/hw/msi.c
> +++ b/hw/msi.c
> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>  }
>  
> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> +
> +    if (msi64bit) {
> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
> +    } else {
> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
> +    }
> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
> +}
> +
>  bool msi_enabled(const PCIDevice *dev)
>  {
>      return msi_present(dev) &&
> diff --git a/hw/msi.h b/hw/msi.h
> index 75747ab..6ec1f99 100644
> --- a/hw/msi.h
> +++ b/hw/msi.h
> @@ -31,6 +31,7 @@ struct MSIMessage {
>  
>  extern bool msi_supported;
>  
> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
>  bool msi_enabled(const PCIDevice *dev);
>  int msi_init(struct PCIDevice *dev, uint8_t offset,
>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
> diff --git a/hw/msix.c b/hw/msix.c
> index ded3c55..5f7d6d3 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>      return msg;
>  }
>  
> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
> +{
> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
> +
> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
> +}
> +
>  /* Add MSI-X capability to the config space for the device. */
>  /* Given a bar and its size, add MSI-X table on top of it
>   * and fill MSI-X capability in the config space.
> diff --git a/hw/msix.h b/hw/msix.h
> index 50aee82..26a437e 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -4,6 +4,8 @@
>  #include "qemu-common.h"
>  #include "pci.h"
>  
> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
> +
>  int msix_init(PCIDevice *pdev, unsigned short nentries,
>                MemoryRegion *bar,
>                unsigned bar_nr, unsigned bar_size);
> 

Interface looks good as fas as I can tell (can't asses the POWER need
for clearing the mask bit on msix_set_message).

> -- 
> 1.7.10
> 
> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly 

Check man git-am.

Jan
Alexey Kardashevskiy June 22, 2012, 1:03 a.m. UTC | #2
On 21/06/12 21:49, Jan Kiszka wrote:
> On 2012-06-21 13:39, Alexey Kardashevskiy wrote:
>> Added (msi|msix)_set_message() functions.
>>
>> Currently msi_notify()/msix_notify() write to these vectors to
>> signal the guest about an interrupt so the correct values have to
>> written there by the guest or QEMU.
>>
>> For example, POWER guest never initializes MSI/MSIX vectors, instead
>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
>> POWER we have to initialize MSI/MSIX message from QEMU.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  hw/msi.c  |   13 +++++++++++++
>>  hw/msi.h  |    1 +
>>  hw/msix.c |    9 +++++++++
>>  hw/msix.h |    2 ++
>>  4 files changed, 25 insertions(+)
>>
>> diff --git a/hw/msi.c b/hw/msi.c
>> index 5233204..cc6102f 100644
>> --- a/hw/msi.c
>> +++ b/hw/msi.c
>> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>>  }
>>  
>> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
>> +{
>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>> +
>> +    if (msi64bit) {
>> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
>> +    } else {
>> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
>> +    }
>> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
>> +}
>> +
>>  bool msi_enabled(const PCIDevice *dev)
>>  {
>>      return msi_present(dev) &&
>> diff --git a/hw/msi.h b/hw/msi.h
>> index 75747ab..6ec1f99 100644
>> --- a/hw/msi.h
>> +++ b/hw/msi.h
>> @@ -31,6 +31,7 @@ struct MSIMessage {
>>  
>>  extern bool msi_supported;
>>  
>> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
>>  bool msi_enabled(const PCIDevice *dev);
>>  int msi_init(struct PCIDevice *dev, uint8_t offset,
>>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
>> diff --git a/hw/msix.c b/hw/msix.c
>> index ded3c55..5f7d6d3 100644
>> --- a/hw/msix.c
>> +++ b/hw/msix.c
>> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>>      return msg;
>>  }
>>  
>> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
>> +{
>> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
>> +
>> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
>> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
>> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
>> +}
>> +
>>  /* Add MSI-X capability to the config space for the device. */
>>  /* Given a bar and its size, add MSI-X table on top of it
>>   * and fill MSI-X capability in the config space.
>> diff --git a/hw/msix.h b/hw/msix.h
>> index 50aee82..26a437e 100644
>> --- a/hw/msix.h
>> +++ b/hw/msix.h
>> @@ -4,6 +4,8 @@
>>  #include "qemu-common.h"
>>  #include "pci.h"
>>  
>> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
>> +
>>  int msix_init(PCIDevice *pdev, unsigned short nentries,
>>                MemoryRegion *bar,
>>                unsigned bar_nr, unsigned bar_size);
>>
> 
> Interface looks good as fas as I can tell (can't asses the POWER need
> for clearing the mask bit on msix_set_message).


I do not know exactly how x86 works (who/how allocates addresses for MSI/MSIX). On POWER at the
moment I did the following thing in QEMU:

- registered memory_region_init_io at some big address which the guest won't use, it is just for QEMU
- put address from the previous step to the MSIX BAR via msix_set_message() when msi is being configured
- then the sequence looks like:
	- vfio_msi_interrupt() calls msix_notify()
	- msix_notify() checks if it is masked via msix_is_masked() - and here PCI_MSIX_ENTRY_CTRL_MASKBIT
must be unset
	- stl_le_phys() - here I get a notification in my MemoryRegionOps::write() and do qemu_irq_pulse()

2 reasons to do that:
1) I did not have to change either msix or vfio - cool for submitting patches;
2) neither POWER guest or qemu changes the msi or msix PCI config (it is done by different mechanism
called RTAS), so I have to do this myself to support 1) and I do not have to care about someone
breaking my settings


>> -- 
>> 1.7.10
>>
>> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly 
> 
> Check man git-am.

Ahhh. Confused end-of-message with end-of-patch. I'll repost it.
Michael S. Tsirkin July 18, 2012, 12:43 p.m. UTC | #3
On Thu, Jun 21, 2012 at 09:39:10PM +1000, Alexey Kardashevskiy wrote:
> Added (msi|msix)_set_message() functions.
> 
> Currently msi_notify()/msix_notify() write to these vectors to
> signal the guest about an interrupt so the correct values have to
> written there by the guest or QEMU.
> 
> For example, POWER guest never initializes MSI/MSIX vectors, instead
> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> POWER we have to initialize MSI/MSIX message from QEMU.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

So guests do enable MSI through config space, but do
not fill in vectors? Very strange. Are you sure it's not
just a guest bug? How does it work for other PCI devices?
Can't we just fix guest drivers to program the vectors properly?

Also pls address the comment below.

Thanks!

> ---
>  hw/msi.c  |   13 +++++++++++++
>  hw/msi.h  |    1 +
>  hw/msix.c |    9 +++++++++
>  hw/msix.h |    2 ++
>  4 files changed, 25 insertions(+)
> 
> diff --git a/hw/msi.c b/hw/msi.c
> index 5233204..cc6102f 100644
> --- a/hw/msi.c
> +++ b/hw/msi.c
> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>  }
>  
> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
> +{
> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> +
> +    if (msi64bit) {
> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
> +    } else {
> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
> +    }
> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
> +}
> +

Please add documentation. Something like

/*
 * Special API for POWER to configure the vectors through
 * a side channel. Should never be used by devices.
 */

>  bool msi_enabled(const PCIDevice *dev)
>  {
>      return msi_present(dev) &&
> diff --git a/hw/msi.h b/hw/msi.h
> index 75747ab..6ec1f99 100644
> --- a/hw/msi.h
> +++ b/hw/msi.h
> @@ -31,6 +31,7 @@ struct MSIMessage {
>  
>  extern bool msi_supported;
>  
> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
>  bool msi_enabled(const PCIDevice *dev);
>  int msi_init(struct PCIDevice *dev, uint8_t offset,
>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
> diff --git a/hw/msix.c b/hw/msix.c
> index ded3c55..5f7d6d3 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>      return msg;
>  }
>  
> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
> +{
> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
> +
> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
> +}
> +
>  /* Add MSI-X capability to the config space for the device. */
>  /* Given a bar and its size, add MSI-X table on top of it
>   * and fill MSI-X capability in the config space.
> diff --git a/hw/msix.h b/hw/msix.h
> index 50aee82..26a437e 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -4,6 +4,8 @@
>  #include "qemu-common.h"
>  #include "pci.h"
>  
> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
> +
>  int msix_init(PCIDevice *pdev, unsigned short nentries,
>                MemoryRegion *bar,
>                unsigned bar_nr, unsigned bar_size);
> -- 
> 1.7.10
> 
> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly :)
> 
> 
> 
> 
> 
> 
> On 21/06/12 20:56, Jan Kiszka wrote:
> > On 2012-06-21 12:50, Alexey Kardashevskiy wrote:
> >> On 21/06/12 20:38, Jan Kiszka wrote:
> >>> On 2012-06-21 12:28, Alexey Kardashevskiy wrote:
> >>>> On 21/06/12 17:39, Jan Kiszka wrote:
> >>>>> On 2012-06-21 09:18, Alexey Kardashevskiy wrote:
> >>>>>>
> >>>>>> agrhhh. sha1 of the patch changed after rebasing :)
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> Added (msi|msix)_(set|get)_message() function for whoever might
> >>>>>> want to use them.
> >>>>>>
> >>>>>> Currently msi_notify()/msix_notify() write to these vectors to
> >>>>>> signal the guest about an interrupt so the correct values have to
> >>>>>> written there by the guest or QEMU.
> >>>>>>
> >>>>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
> >>>>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> >>>>>> POWER we have to initialize MSI/MSIX message from QEMU.
> >>>>>>
> >>>>>> As only set* function are required by now, the "get" functions were added
> >>>>>> or made public for a symmetry.
> >>>>>>
> >>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>>>> ---
> >>>>>>  hw/msi.c  |   29 +++++++++++++++++++++++++++++
> >>>>>>  hw/msi.h  |    2 ++
> >>>>>>  hw/msix.c |   11 ++++++++++-
> >>>>>>  hw/msix.h |    3 +++
> >>>>>>  4 files changed, 44 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/hw/msi.c b/hw/msi.c
> >>>>>> index 5233204..9ad84a4 100644
> >>>>>> --- a/hw/msi.c
> >>>>>> +++ b/hw/msi.c
> >>>>>> @@ -105,6 +105,35 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
> >>>>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
> >>>>>>  }
> >>>>>>  
> >>>>>> +MSIMessage msi_get_message(PCIDevice *dev)
> >>>>>
> >>>>> MSIMessage msi_get_message(PCIDevice *dev, unsigned vector)
> >>>>
> >>>>
> >>>> Who/how/why is going to calculate the vector here?
> >>>>
> >>>>>
> >>>>>> +{
> >>>>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> >>>>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> >>>>>> +    MSIMessage msg;
> >>>>>> +
> >>>>>> +    if (msi64bit) {
> >>>>>> +        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
> >>>>>> +    } else {
> >>>>>> +        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
> >>>>>> +    }
> >>>>>> +    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
> >>>>>
> >>>>> And I have this here in addition:
> >>>>>
> >>>>>     unsigned int nr_vectors = msi_nr_vectors(flags);
> >>>>>     ...
> >>>>>
> >>>>>     if (nr_vectors > 1) {
> >>>>>         msg.data &= ~(nr_vectors - 1);
> >>>>>         msg.data |= vector;
> >>>>>     }
> >>>>>
> >>>>> See PCI spec and existing code.
> >>>>
> >>>>
> >>>> What for? I really do not get it why someone might want to read something but not real value.
> >>>> What PCI code should I look?
> >>>
> >>> I'm not sure what your use case for reading the message is. For KVM
> >>> device assignment it is preparing an alternative message delivery path
> >>> for MSI vectors. And for this we will need vector notifier support for
> >>> MSI as well. You can check the MSI-X code for corresponding use cases of
> >>> msix_get_message.
> >>
> >>> And when we already have msi_get_message, another logical use case is
> >>> msi_notify. See msix.c again.
> >>
> >> Aaaa.
> >>
> >> I have no case for reading the message. All I need is writing. And I want it public as I want to use
> >> it from hw/spapr_pci.c. You suggested to add reading, I added "get" to be _symmetric_ to "set"
> >> ("get" returns what "set" wrote). You want a different thing which I can do but it is not
> >> msi_get_message(), it is something like msi_prepare_message(MSImessage msg) or
> >> msi_set_vector(uint16_t data) or simply internal kitchen of msi_notify().
> >>
> >> Still can do what you suggested, it just does not seem right.
> > 
> > It is right - when looking at it from a different angle. ;)
> > 
> > I don't mind if you add msi_get_message now or leave this to me. Likely
> > the latter is better as you have no use case for msi_get_message (and
> > also msix_get_message!) outside of their modules, thus we should not
> > export those functions anyway.
> > 
> > Jan
> > 
> 
> 
> -- 
> Alexey
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy July 18, 2012, 1:17 p.m. UTC | #4
On 18/07/12 22:43, Michael S. Tsirkin wrote:
> On Thu, Jun 21, 2012 at 09:39:10PM +1000, Alexey Kardashevskiy wrote:
>> Added (msi|msix)_set_message() functions.
>>
>> Currently msi_notify()/msix_notify() write to these vectors to
>> signal the guest about an interrupt so the correct values have to
>> written there by the guest or QEMU.
>>
>> For example, POWER guest never initializes MSI/MSIX vectors, instead
>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
>> POWER we have to initialize MSI/MSIX message from QEMU.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> So guests do enable MSI through config space, but do
> not fill in vectors? 

Yes. msix_capability_init() calls arch_setup_msi_irqs() which does everything it needs to do (i.e. calls hypervisor) before msix_capability_init() writes PCI_MSIX_FLAGS_ENABLE to the PCI_MSIX_FLAGS register.

These vectors are the PCI bus addresses, the way they are set is specific for a PCI host controller, I do not see why the current scheme is a bug.


> Very strange. Are you sure it's not
> just a guest bug? How does it work for other PCI devices?

Did not get the question. It works the same for every PCI device under POWER guest.


> Can't we just fix guest drivers to program the vectors properly?
> 
> Also pls address the comment below.

Comment below.

> Thanks!
> 
>> ---
>>  hw/msi.c  |   13 +++++++++++++
>>  hw/msi.h  |    1 +
>>  hw/msix.c |    9 +++++++++
>>  hw/msix.h |    2 ++
>>  4 files changed, 25 insertions(+)
>>
>> diff --git a/hw/msi.c b/hw/msi.c
>> index 5233204..cc6102f 100644
>> --- a/hw/msi.c
>> +++ b/hw/msi.c
>> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>>  }
>>  
>> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
>> +{
>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>> +
>> +    if (msi64bit) {
>> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
>> +    } else {
>> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
>> +    }
>> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
>> +}
>> +
> 
> Please add documentation. Something like
> 
> /*
>  * Special API for POWER to configure the vectors through
>  * a side channel. Should never be used by devices.
>  */


It is useful for any para-virtualized environment I believe, is not it?
For s390 as well. Of course, if it supports PCI, for example, what I am not sure it does though :)



>>  bool msi_enabled(const PCIDevice *dev)
>>  {
>>      return msi_present(dev) &&
>> diff --git a/hw/msi.h b/hw/msi.h
>> index 75747ab..6ec1f99 100644
>> --- a/hw/msi.h
>> +++ b/hw/msi.h
>> @@ -31,6 +31,7 @@ struct MSIMessage {
>>  
>>  extern bool msi_supported;
>>  
>> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
>>  bool msi_enabled(const PCIDevice *dev);
>>  int msi_init(struct PCIDevice *dev, uint8_t offset,
>>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
>> diff --git a/hw/msix.c b/hw/msix.c
>> index ded3c55..5f7d6d3 100644
>> --- a/hw/msix.c
>> +++ b/hw/msix.c
>> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>>      return msg;
>>  }
>>  
>> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
>> +{
>> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
>> +
>> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
>> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
>> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
>> +}
>> +
>>  /* Add MSI-X capability to the config space for the device. */
>>  /* Given a bar and its size, add MSI-X table on top of it
>>   * and fill MSI-X capability in the config space.
>> diff --git a/hw/msix.h b/hw/msix.h
>> index 50aee82..26a437e 100644
>> --- a/hw/msix.h
>> +++ b/hw/msix.h
>> @@ -4,6 +4,8 @@
>>  #include "qemu-common.h"
>>  #include "pci.h"
>>  
>> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
>> +
>>  int msix_init(PCIDevice *pdev, unsigned short nentries,
>>                MemoryRegion *bar,
>>                unsigned bar_nr, unsigned bar_size);
>> -- 
>> 1.7.10
>>
>> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly :)
>>
>>
>>
>>
>>
>>
>> On 21/06/12 20:56, Jan Kiszka wrote:
>>> On 2012-06-21 12:50, Alexey Kardashevskiy wrote:
>>>> On 21/06/12 20:38, Jan Kiszka wrote:
>>>>> On 2012-06-21 12:28, Alexey Kardashevskiy wrote:
>>>>>> On 21/06/12 17:39, Jan Kiszka wrote:
>>>>>>> On 2012-06-21 09:18, Alexey Kardashevskiy wrote:
>>>>>>>>
>>>>>>>> agrhhh. sha1 of the patch changed after rebasing :)
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Added (msi|msix)_(set|get)_message() function for whoever might
>>>>>>>> want to use them.
>>>>>>>>
>>>>>>>> Currently msi_notify()/msix_notify() write to these vectors to
>>>>>>>> signal the guest about an interrupt so the correct values have to
>>>>>>>> written there by the guest or QEMU.
>>>>>>>>
>>>>>>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
>>>>>>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
>>>>>>>> POWER we have to initialize MSI/MSIX message from QEMU.
>>>>>>>>
>>>>>>>> As only set* function are required by now, the "get" functions were added
>>>>>>>> or made public for a symmetry.
>>>>>>>>
>>>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>>>>>> ---
>>>>>>>>  hw/msi.c  |   29 +++++++++++++++++++++++++++++
>>>>>>>>  hw/msi.h  |    2 ++
>>>>>>>>  hw/msix.c |   11 ++++++++++-
>>>>>>>>  hw/msix.h |    3 +++
>>>>>>>>  4 files changed, 44 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/msi.c b/hw/msi.c
>>>>>>>> index 5233204..9ad84a4 100644
>>>>>>>> --- a/hw/msi.c
>>>>>>>> +++ b/hw/msi.c
>>>>>>>> @@ -105,6 +105,35 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>>>>>>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +MSIMessage msi_get_message(PCIDevice *dev)
>>>>>>>
>>>>>>> MSIMessage msi_get_message(PCIDevice *dev, unsigned vector)
>>>>>>
>>>>>>
>>>>>> Who/how/why is going to calculate the vector here?
>>>>>>
>>>>>>>
>>>>>>>> +{
>>>>>>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>>>>>>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>>>>>>>> +    MSIMessage msg;
>>>>>>>> +
>>>>>>>> +    if (msi64bit) {
>>>>>>>> +        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
>>>>>>>> +    } else {
>>>>>>>> +        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
>>>>>>>> +    }
>>>>>>>> +    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
>>>>>>>
>>>>>>> And I have this here in addition:
>>>>>>>
>>>>>>>     unsigned int nr_vectors = msi_nr_vectors(flags);
>>>>>>>     ...
>>>>>>>
>>>>>>>     if (nr_vectors > 1) {
>>>>>>>         msg.data &= ~(nr_vectors - 1);
>>>>>>>         msg.data |= vector;
>>>>>>>     }
>>>>>>>
>>>>>>> See PCI spec and existing code.
>>>>>>
>>>>>>
>>>>>> What for? I really do not get it why someone might want to read something but not real value.
>>>>>> What PCI code should I look?
>>>>>
>>>>> I'm not sure what your use case for reading the message is. For KVM
>>>>> device assignment it is preparing an alternative message delivery path
>>>>> for MSI vectors. And for this we will need vector notifier support for
>>>>> MSI as well. You can check the MSI-X code for corresponding use cases of
>>>>> msix_get_message.
>>>>
>>>>> And when we already have msi_get_message, another logical use case is
>>>>> msi_notify. See msix.c again.
>>>>
>>>> Aaaa.
>>>>
>>>> I have no case for reading the message. All I need is writing. And I want it public as I want to use
>>>> it from hw/spapr_pci.c. You suggested to add reading, I added "get" to be _symmetric_ to "set"
>>>> ("get" returns what "set" wrote). You want a different thing which I can do but it is not
>>>> msi_get_message(), it is something like msi_prepare_message(MSImessage msg) or
>>>> msi_set_vector(uint16_t data) or simply internal kitchen of msi_notify().
>>>>
>>>> Still can do what you suggested, it just does not seem right.
>>>
>>> It is right - when looking at it from a different angle. ;)
>>>
>>> I don't mind if you add msi_get_message now or leave this to me. Likely
>>> the latter is better as you have no use case for msi_get_message (and
>>> also msix_get_message!) outside of their modules, thus we should not
>>> export those functions anyway.
Michael S. Tsirkin July 18, 2012, 3:23 p.m. UTC | #5
On Wed, Jul 18, 2012 at 11:17:12PM +1000, Alexey Kardashevskiy wrote:
> On 18/07/12 22:43, Michael S. Tsirkin wrote:
> > On Thu, Jun 21, 2012 at 09:39:10PM +1000, Alexey Kardashevskiy wrote:
> >> Added (msi|msix)_set_message() functions.
> >>
> >> Currently msi_notify()/msix_notify() write to these vectors to
> >> signal the guest about an interrupt so the correct values have to
> >> written there by the guest or QEMU.
> >>
> >> For example, POWER guest never initializes MSI/MSIX vectors, instead
> >> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> >> POWER we have to initialize MSI/MSIX message from QEMU.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > 
> > So guests do enable MSI through config space, but do
> > not fill in vectors? 
> 
> Yes. msix_capability_init() calls arch_setup_msi_irqs() which does everything it needs to do (i.e. calls hypervisor) before msix_capability_init() writes PCI_MSIX_FLAGS_ENABLE to the PCI_MSIX_FLAGS register.
> 
> These vectors are the PCI bus addresses, the way they are set is specific for a PCI host controller, I do not see why the current scheme is a bug.

I won't work with any real PCI device, will it? Real pci devices expect
vectors to be written into their memory.

> > Very strange. Are you sure it's not
> > just a guest bug? How does it work for other PCI devices?
> 
> Did not get the question. It works the same for every PCI device under POWER guest.

I mean for real PCI devices.

> > Can't we just fix guest drivers to program the vectors properly?
> > 
> > Also pls address the comment below.
> 
> Comment below.
> 
> > Thanks!
> > 
> >> ---
> >>  hw/msi.c  |   13 +++++++++++++
> >>  hw/msi.h  |    1 +
> >>  hw/msix.c |    9 +++++++++
> >>  hw/msix.h |    2 ++
> >>  4 files changed, 25 insertions(+)
> >>
> >> diff --git a/hw/msi.c b/hw/msi.c
> >> index 5233204..cc6102f 100644
> >> --- a/hw/msi.c
> >> +++ b/hw/msi.c
> >> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
> >>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
> >>  }
> >>  
> >> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
> >> +{
> >> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> >> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> >> +
> >> +    if (msi64bit) {
> >> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
> >> +    } else {
> >> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
> >> +    }
> >> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
> >> +}
> >> +
> > 
> > Please add documentation. Something like
> > 
> > /*
> >  * Special API for POWER to configure the vectors through
> >  * a side channel. Should never be used by devices.
> >  */
> 
> 
> It is useful for any para-virtualized environment I believe, is not it?
> For s390 as well. Of course, if it supports PCI, for example, what I am not sure it does though :)

I expect the normal guest to program the address into MSI register using
config accesses, same way that it enables MSI/MSIX.
Why POWER does it differently I did not yet figure out but I hope
this weirdness is not so widespread.

> >>  bool msi_enabled(const PCIDevice *dev)
> >>  {
> >>      return msi_present(dev) &&
> >> diff --git a/hw/msi.h b/hw/msi.h
> >> index 75747ab..6ec1f99 100644
> >> --- a/hw/msi.h
> >> +++ b/hw/msi.h
> >> @@ -31,6 +31,7 @@ struct MSIMessage {
> >>  
> >>  extern bool msi_supported;
> >>  
> >> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
> >>  bool msi_enabled(const PCIDevice *dev);
> >>  int msi_init(struct PCIDevice *dev, uint8_t offset,
> >>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
> >> diff --git a/hw/msix.c b/hw/msix.c
> >> index ded3c55..5f7d6d3 100644
> >> --- a/hw/msix.c
> >> +++ b/hw/msix.c
> >> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> >>      return msg;
> >>  }
> >>  
> >> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
> >> +{
> >> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
> >> +
> >> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
> >> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
> >> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
> >> +}
> >> +
> >>  /* Add MSI-X capability to the config space for the device. */
> >>  /* Given a bar and its size, add MSI-X table on top of it
> >>   * and fill MSI-X capability in the config space.
> >> diff --git a/hw/msix.h b/hw/msix.h
> >> index 50aee82..26a437e 100644
> >> --- a/hw/msix.h
> >> +++ b/hw/msix.h
> >> @@ -4,6 +4,8 @@
> >>  #include "qemu-common.h"
> >>  #include "pci.h"
> >>  
> >> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
> >> +
> >>  int msix_init(PCIDevice *pdev, unsigned short nentries,
> >>                MemoryRegion *bar,
> >>                unsigned bar_nr, unsigned bar_size);
> >> -- 
> >> 1.7.10
> >>
> >> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly :)
> >>
> >>
> >>
> >>
> >>
> >>
> >> On 21/06/12 20:56, Jan Kiszka wrote:
> >>> On 2012-06-21 12:50, Alexey Kardashevskiy wrote:
> >>>> On 21/06/12 20:38, Jan Kiszka wrote:
> >>>>> On 2012-06-21 12:28, Alexey Kardashevskiy wrote:
> >>>>>> On 21/06/12 17:39, Jan Kiszka wrote:
> >>>>>>> On 2012-06-21 09:18, Alexey Kardashevskiy wrote:
> >>>>>>>>
> >>>>>>>> agrhhh. sha1 of the patch changed after rebasing :)
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> Added (msi|msix)_(set|get)_message() function for whoever might
> >>>>>>>> want to use them.
> >>>>>>>>
> >>>>>>>> Currently msi_notify()/msix_notify() write to these vectors to
> >>>>>>>> signal the guest about an interrupt so the correct values have to
> >>>>>>>> written there by the guest or QEMU.
> >>>>>>>>
> >>>>>>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
> >>>>>>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> >>>>>>>> POWER we have to initialize MSI/MSIX message from QEMU.
> >>>>>>>>
> >>>>>>>> As only set* function are required by now, the "get" functions were added
> >>>>>>>> or made public for a symmetry.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>>>>>> ---
> >>>>>>>>  hw/msi.c  |   29 +++++++++++++++++++++++++++++
> >>>>>>>>  hw/msi.h  |    2 ++
> >>>>>>>>  hw/msix.c |   11 ++++++++++-
> >>>>>>>>  hw/msix.h |    3 +++
> >>>>>>>>  4 files changed, 44 insertions(+), 1 deletion(-)
> >>>>>>>>
> >>>>>>>> diff --git a/hw/msi.c b/hw/msi.c
> >>>>>>>> index 5233204..9ad84a4 100644
> >>>>>>>> --- a/hw/msi.c
> >>>>>>>> +++ b/hw/msi.c
> >>>>>>>> @@ -105,6 +105,35 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
> >>>>>>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +MSIMessage msi_get_message(PCIDevice *dev)
> >>>>>>>
> >>>>>>> MSIMessage msi_get_message(PCIDevice *dev, unsigned vector)
> >>>>>>
> >>>>>>
> >>>>>> Who/how/why is going to calculate the vector here?
> >>>>>>
> >>>>>>>
> >>>>>>>> +{
> >>>>>>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> >>>>>>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> >>>>>>>> +    MSIMessage msg;
> >>>>>>>> +
> >>>>>>>> +    if (msi64bit) {
> >>>>>>>> +        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
> >>>>>>>> +    } else {
> >>>>>>>> +        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
> >>>>>>>> +    }
> >>>>>>>> +    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
> >>>>>>>
> >>>>>>> And I have this here in addition:
> >>>>>>>
> >>>>>>>     unsigned int nr_vectors = msi_nr_vectors(flags);
> >>>>>>>     ...
> >>>>>>>
> >>>>>>>     if (nr_vectors > 1) {
> >>>>>>>         msg.data &= ~(nr_vectors - 1);
> >>>>>>>         msg.data |= vector;
> >>>>>>>     }
> >>>>>>>
> >>>>>>> See PCI spec and existing code.
> >>>>>>
> >>>>>>
> >>>>>> What for? I really do not get it why someone might want to read something but not real value.
> >>>>>> What PCI code should I look?
> >>>>>
> >>>>> I'm not sure what your use case for reading the message is. For KVM
> >>>>> device assignment it is preparing an alternative message delivery path
> >>>>> for MSI vectors. And for this we will need vector notifier support for
> >>>>> MSI as well. You can check the MSI-X code for corresponding use cases of
> >>>>> msix_get_message.
> >>>>
> >>>>> And when we already have msi_get_message, another logical use case is
> >>>>> msi_notify. See msix.c again.
> >>>>
> >>>> Aaaa.
> >>>>
> >>>> I have no case for reading the message. All I need is writing. And I want it public as I want to use
> >>>> it from hw/spapr_pci.c. You suggested to add reading, I added "get" to be _symmetric_ to "set"
> >>>> ("get" returns what "set" wrote). You want a different thing which I can do but it is not
> >>>> msi_get_message(), it is something like msi_prepare_message(MSImessage msg) or
> >>>> msi_set_vector(uint16_t data) or simply internal kitchen of msi_notify().
> >>>>
> >>>> Still can do what you suggested, it just does not seem right.
> >>>
> >>> It is right - when looking at it from a different angle. ;)
> >>>
> >>> I don't mind if you add msi_get_message now or leave this to me. Likely
> >>> the latter is better as you have no use case for msi_get_message (and
> >>> also msix_get_message!) outside of their modules, thus we should not
> >>> export those functions anyway.
> 
> 
> 
> -- 
> Alexey
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy July 19, 2012, 12:32 a.m. UTC | #6
On 19/07/12 01:23, Michael S. Tsirkin wrote:
> On Wed, Jul 18, 2012 at 11:17:12PM +1000, Alexey Kardashevskiy wrote:
>> On 18/07/12 22:43, Michael S. Tsirkin wrote:
>>> On Thu, Jun 21, 2012 at 09:39:10PM +1000, Alexey Kardashevskiy wrote:
>>>> Added (msi|msix)_set_message() functions.
>>>>
>>>> Currently msi_notify()/msix_notify() write to these vectors to
>>>> signal the guest about an interrupt so the correct values have to
>>>> written there by the guest or QEMU.
>>>>
>>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
>>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
>>>> POWER we have to initialize MSI/MSIX message from QEMU.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>
>>> So guests do enable MSI through config space, but do
>>> not fill in vectors? 
>>
>> Yes. msix_capability_init() calls arch_setup_msi_irqs() which does everything it needs to do (i.e. calls hypervisor) before msix_capability_init() writes PCI_MSIX_FLAGS_ENABLE to the PCI_MSIX_FLAGS register.
>>
>> These vectors are the PCI bus addresses, the way they are set is specific for a PCI host controller, I do not see why the current scheme is a bug.
> 
> I won't work with any real PCI device, will it? Real pci devices expect
> vectors to be written into their memory.


Yes. And the hypervisor does this. On POWER (at least book3s - server powerpc, the whole config space kitchen is hidden behind RTAS (kind of bios). For the guest, this RTAS is implemented in hypervisor, for the host - in the system firmware. So powerpc linux does not have to have PHB drivers. Kinda cool.

Usual powerpc server is running without the host linux at all, it is running a hypervisor called pHyp. And every guest knows that it is a guest, there is no full machine emulation, it is para-virtualization. In power-kvm, we replace that pHyp with the host linux and now QEMU plays a hypervisor role. Some day We will move the hypervisor to the host kernel completely (?) but now it is in QEMU.


>>> Very strange. Are you sure it's not
>>> just a guest bug? How does it work for other PCI devices?
>>
>> Did not get the question. It works the same for every PCI device under POWER guest.
> 
> I mean for real PCI devices.
> 
>>> Can't we just fix guest drivers to program the vectors properly?
>>>
>>> Also pls address the comment below.
>>
>> Comment below.
>>
>>> Thanks!
>>>
>>>> ---
>>>>  hw/msi.c  |   13 +++++++++++++
>>>>  hw/msi.h  |    1 +
>>>>  hw/msix.c |    9 +++++++++
>>>>  hw/msix.h |    2 ++
>>>>  4 files changed, 25 insertions(+)
>>>>
>>>> diff --git a/hw/msi.c b/hw/msi.c
>>>> index 5233204..cc6102f 100644
>>>> --- a/hw/msi.c
>>>> +++ b/hw/msi.c
>>>> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>>>>  }
>>>>  
>>>> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
>>>> +{
>>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>>>> +
>>>> +    if (msi64bit) {
>>>> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
>>>> +    } else {
>>>> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
>>>> +    }
>>>> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
>>>> +}
>>>> +
>>>
>>> Please add documentation. Something like
>>>
>>> /*
>>>  * Special API for POWER to configure the vectors through
>>>  * a side channel. Should never be used by devices.
>>>  */
>>
>>
>> It is useful for any para-virtualized environment I believe, is not it?
>> For s390 as well. Of course, if it supports PCI, for example, what I am not sure it does though :)
> 
> I expect the normal guest to program the address into MSI register using
> config accesses, same way that it enables MSI/MSIX.
> Why POWER does it differently I did not yet figure out but I hope
> this weirdness is not so widespread.


In para-virt I would expect the guest not to touch config space at all. At least it should use one interface rather than two but this is how it is.


>>>>  bool msi_enabled(const PCIDevice *dev)
>>>>  {
>>>>      return msi_present(dev) &&
>>>> diff --git a/hw/msi.h b/hw/msi.h
>>>> index 75747ab..6ec1f99 100644
>>>> --- a/hw/msi.h
>>>> +++ b/hw/msi.h
>>>> @@ -31,6 +31,7 @@ struct MSIMessage {
>>>>  
>>>>  extern bool msi_supported;
>>>>  
>>>> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
>>>>  bool msi_enabled(const PCIDevice *dev);
>>>>  int msi_init(struct PCIDevice *dev, uint8_t offset,
>>>>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
>>>> diff --git a/hw/msix.c b/hw/msix.c
>>>> index ded3c55..5f7d6d3 100644
>>>> --- a/hw/msix.c
>>>> +++ b/hw/msix.c
>>>> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
>>>>      return msg;
>>>>  }
>>>>  
>>>> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
>>>> +{
>>>> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
>>>> +
>>>> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
>>>> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
>>>> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
>>>> +}
>>>> +
>>>>  /* Add MSI-X capability to the config space for the device. */
>>>>  /* Given a bar and its size, add MSI-X table on top of it
>>>>   * and fill MSI-X capability in the config space.
>>>> diff --git a/hw/msix.h b/hw/msix.h
>>>> index 50aee82..26a437e 100644
>>>> --- a/hw/msix.h
>>>> +++ b/hw/msix.h
>>>> @@ -4,6 +4,8 @@
>>>>  #include "qemu-common.h"
>>>>  #include "pci.h"
>>>>  
>>>> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
>>>> +
>>>>  int msix_init(PCIDevice *pdev, unsigned short nentries,
>>>>                MemoryRegion *bar,
>>>>                unsigned bar_nr, unsigned bar_size);
>>>> -- 
>>>> 1.7.10
>>>>
>>>> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly :)
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> On 21/06/12 20:56, Jan Kiszka wrote:
>>>>> On 2012-06-21 12:50, Alexey Kardashevskiy wrote:
>>>>>> On 21/06/12 20:38, Jan Kiszka wrote:
>>>>>>> On 2012-06-21 12:28, Alexey Kardashevskiy wrote:
>>>>>>>> On 21/06/12 17:39, Jan Kiszka wrote:
>>>>>>>>> On 2012-06-21 09:18, Alexey Kardashevskiy wrote:
>>>>>>>>>>
>>>>>>>>>> agrhhh. sha1 of the patch changed after rebasing :)
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Added (msi|msix)_(set|get)_message() function for whoever might
>>>>>>>>>> want to use them.
>>>>>>>>>>
>>>>>>>>>> Currently msi_notify()/msix_notify() write to these vectors to
>>>>>>>>>> signal the guest about an interrupt so the correct values have to
>>>>>>>>>> written there by the guest or QEMU.
>>>>>>>>>>
>>>>>>>>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
>>>>>>>>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
>>>>>>>>>> POWER we have to initialize MSI/MSIX message from QEMU.
>>>>>>>>>>
>>>>>>>>>> As only set* function are required by now, the "get" functions were added
>>>>>>>>>> or made public for a symmetry.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>>>>>>>> ---
>>>>>>>>>>  hw/msi.c  |   29 +++++++++++++++++++++++++++++
>>>>>>>>>>  hw/msi.h  |    2 ++
>>>>>>>>>>  hw/msix.c |   11 ++++++++++-
>>>>>>>>>>  hw/msix.h |    3 +++
>>>>>>>>>>  4 files changed, 44 insertions(+), 1 deletion(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/hw/msi.c b/hw/msi.c
>>>>>>>>>> index 5233204..9ad84a4 100644
>>>>>>>>>> --- a/hw/msi.c
>>>>>>>>>> +++ b/hw/msi.c
>>>>>>>>>> @@ -105,6 +105,35 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
>>>>>>>>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>> +MSIMessage msi_get_message(PCIDevice *dev)
>>>>>>>>>
>>>>>>>>> MSIMessage msi_get_message(PCIDevice *dev, unsigned vector)
>>>>>>>>
>>>>>>>>
>>>>>>>> Who/how/why is going to calculate the vector here?
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> +{
>>>>>>>>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
>>>>>>>>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
>>>>>>>>>> +    MSIMessage msg;
>>>>>>>>>> +
>>>>>>>>>> +    if (msi64bit) {
>>>>>>>>>> +        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
>>>>>>>>>> +    } else {
>>>>>>>>>> +        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
>>>>>>>>>> +    }
>>>>>>>>>> +    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
>>>>>>>>>
>>>>>>>>> And I have this here in addition:
>>>>>>>>>
>>>>>>>>>     unsigned int nr_vectors = msi_nr_vectors(flags);
>>>>>>>>>     ...
>>>>>>>>>
>>>>>>>>>     if (nr_vectors > 1) {
>>>>>>>>>         msg.data &= ~(nr_vectors - 1);
>>>>>>>>>         msg.data |= vector;
>>>>>>>>>     }
>>>>>>>>>
>>>>>>>>> See PCI spec and existing code.
>>>>>>>>
>>>>>>>>
>>>>>>>> What for? I really do not get it why someone might want to read something but not real value.
>>>>>>>> What PCI code should I look?
>>>>>>>
>>>>>>> I'm not sure what your use case for reading the message is. For KVM
>>>>>>> device assignment it is preparing an alternative message delivery path
>>>>>>> for MSI vectors. And for this we will need vector notifier support for
>>>>>>> MSI as well. You can check the MSI-X code for corresponding use cases of
>>>>>>> msix_get_message.
>>>>>>
>>>>>>> And when we already have msi_get_message, another logical use case is
>>>>>>> msi_notify. See msix.c again.
>>>>>>
>>>>>> Aaaa.
>>>>>>
>>>>>> I have no case for reading the message. All I need is writing. And I want it public as I want to use
>>>>>> it from hw/spapr_pci.c. You suggested to add reading, I added "get" to be _symmetric_ to "set"
>>>>>> ("get" returns what "set" wrote). You want a different thing which I can do but it is not
>>>>>> msi_get_message(), it is something like msi_prepare_message(MSImessage msg) or
>>>>>> msi_set_vector(uint16_t data) or simply internal kitchen of msi_notify().
>>>>>>
>>>>>> Still can do what you suggested, it just does not seem right.
>>>>>
>>>>> It is right - when looking at it from a different angle. ;)
>>>>>
>>>>> I don't mind if you add msi_get_message now or leave this to me. Likely
>>>>> the latter is better as you have no use case for msi_get_message (and
>>>>> also msix_get_message!) outside of their modules, thus we should not
>>>>> export those functions anyway.
Michael S. Tsirkin July 19, 2012, 9:27 a.m. UTC | #7
On Thu, Jul 19, 2012 at 10:32:40AM +1000, Alexey Kardashevskiy wrote:
> On 19/07/12 01:23, Michael S. Tsirkin wrote:
> > On Wed, Jul 18, 2012 at 11:17:12PM +1000, Alexey Kardashevskiy wrote:
> >> On 18/07/12 22:43, Michael S. Tsirkin wrote:
> >>> On Thu, Jun 21, 2012 at 09:39:10PM +1000, Alexey Kardashevskiy wrote:
> >>>> Added (msi|msix)_set_message() functions.
> >>>>
> >>>> Currently msi_notify()/msix_notify() write to these vectors to
> >>>> signal the guest about an interrupt so the correct values have to
> >>>> written there by the guest or QEMU.
> >>>>
> >>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
> >>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> >>>> POWER we have to initialize MSI/MSIX message from QEMU.
> >>>>
> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>
> >>> So guests do enable MSI through config space, but do
> >>> not fill in vectors? 
> >>
> >> Yes. msix_capability_init() calls arch_setup_msi_irqs() which does everything it needs to do (i.e. calls hypervisor) before msix_capability_init() writes PCI_MSIX_FLAGS_ENABLE to the PCI_MSIX_FLAGS register.
> >>
> >> These vectors are the PCI bus addresses, the way they are set is specific for a PCI host controller, I do not see why the current scheme is a bug.
> > 
> > I won't work with any real PCI device, will it? Real pci devices expect
> > vectors to be written into their memory.
> 
> 
> Yes. And the hypervisor does this. On POWER (at least book3s - server powerpc, the whole config space kitchen is hidden behind RTAS (kind of bios). For the guest, this RTAS is implemented in hypervisor, for the host - in the system firmware. So powerpc linux does not have to have PHB drivers. Kinda cool.
> 
> Usual powerpc server is running without the host linux at all, it is running a hypervisor called pHyp. And every guest knows that it is a guest, there is no full machine emulation, it is para-virtualization. In power-kvm, we replace that pHyp with the host linux and now QEMU plays a hypervisor role. Some day We will move the hypervisor to the host kernel completely (?) but now it is in QEMU.

OKay. So it is a POWER-specific weirdness as I suspected.
Sure, if this is what real hardware does we pretty much have to
emulate this.

> >>> Very strange. Are you sure it's not
> >>> just a guest bug? How does it work for other PCI devices?
> >>
> >> Did not get the question. It works the same for every PCI device under POWER guest.
> > 
> > I mean for real PCI devices.
> > 
> >>> Can't we just fix guest drivers to program the vectors properly?
> >>>
> >>> Also pls address the comment below.
> >>
> >> Comment below.
> >>
> >>> Thanks!
> >>>
> >>>> ---
> >>>>  hw/msi.c  |   13 +++++++++++++
> >>>>  hw/msi.h  |    1 +
> >>>>  hw/msix.c |    9 +++++++++
> >>>>  hw/msix.h |    2 ++
> >>>>  4 files changed, 25 insertions(+)
> >>>>
> >>>> diff --git a/hw/msi.c b/hw/msi.c
> >>>> index 5233204..cc6102f 100644
> >>>> --- a/hw/msi.c
> >>>> +++ b/hw/msi.c
> >>>> @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
> >>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
> >>>>  }
> >>>>  
> >>>> +void msi_set_message(PCIDevice *dev, MSIMessage msg)
> >>>> +{
> >>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> >>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> >>>> +
> >>>> +    if (msi64bit) {
> >>>> +        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
> >>>> +    } else {
> >>>> +        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
> >>>> +    }
> >>>> +    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
> >>>> +}
> >>>> +
> >>>
> >>> Please add documentation. Something like
> >>>
> >>> /*
> >>>  * Special API for POWER to configure the vectors through
> >>>  * a side channel. Should never be used by devices.
> >>>  */
> >>
> >>
> >> It is useful for any para-virtualized environment I believe, is not it?
> >> For s390 as well. Of course, if it supports PCI, for example, what I am not sure it does though :)
> > 
> > I expect the normal guest to program the address into MSI register using
> > config accesses, same way that it enables MSI/MSIX.
> > Why POWER does it differently I did not yet figure out but I hope
> > this weirdness is not so widespread.
> 
> 
> In para-virt I would expect the guest not to touch config space at all. At least it should use one interface rather than two but this is how it is.

It's not new that firmware developers consistently make inconsistent
design decisions :)

> >>>>  bool msi_enabled(const PCIDevice *dev)
> >>>>  {
> >>>>      return msi_present(dev) &&
> >>>> diff --git a/hw/msi.h b/hw/msi.h
> >>>> index 75747ab..6ec1f99 100644
> >>>> --- a/hw/msi.h
> >>>> +++ b/hw/msi.h
> >>>> @@ -31,6 +31,7 @@ struct MSIMessage {
> >>>>  
> >>>>  extern bool msi_supported;
> >>>>  
> >>>> +void msi_set_message(PCIDevice *dev, MSIMessage msg);
> >>>>  bool msi_enabled(const PCIDevice *dev);
> >>>>  int msi_init(struct PCIDevice *dev, uint8_t offset,
> >>>>               unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
> >>>> diff --git a/hw/msix.c b/hw/msix.c
> >>>> index ded3c55..5f7d6d3 100644
> >>>> --- a/hw/msix.c
> >>>> +++ b/hw/msix.c
> >>>> @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
> >>>>      return msg;
> >>>>  }
> >>>>  
> >>>> +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
> >>>> +{
> >>>> +    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
> >>>> +
> >>>> +    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
> >>>> +    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
> >>>> +    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
> >>>> +}
> >>>> +
> >>>>  /* Add MSI-X capability to the config space for the device. */
> >>>>  /* Given a bar and its size, add MSI-X table on top of it
> >>>>   * and fill MSI-X capability in the config space.
> >>>> diff --git a/hw/msix.h b/hw/msix.h
> >>>> index 50aee82..26a437e 100644
> >>>> --- a/hw/msix.h
> >>>> +++ b/hw/msix.h
> >>>> @@ -4,6 +4,8 @@
> >>>>  #include "qemu-common.h"
> >>>>  #include "pci.h"
> >>>>  
> >>>> +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
> >>>> +
> >>>>  int msix_init(PCIDevice *pdev, unsigned short nentries,
> >>>>                MemoryRegion *bar,
> >>>>                unsigned bar_nr, unsigned bar_size);
> >>>> -- 
> >>>> 1.7.10
> >>>>
> >>>> ps. double '-' and git version is an end-of-patch scissor as I read somewhere, cannot recall where exactly :)
> >>>>
> >>>>
> >>>>
> >>>>
> >>>>
> >>>>
> >>>> On 21/06/12 20:56, Jan Kiszka wrote:
> >>>>> On 2012-06-21 12:50, Alexey Kardashevskiy wrote:
> >>>>>> On 21/06/12 20:38, Jan Kiszka wrote:
> >>>>>>> On 2012-06-21 12:28, Alexey Kardashevskiy wrote:
> >>>>>>>> On 21/06/12 17:39, Jan Kiszka wrote:
> >>>>>>>>> On 2012-06-21 09:18, Alexey Kardashevskiy wrote:
> >>>>>>>>>>
> >>>>>>>>>> agrhhh. sha1 of the patch changed after rebasing :)
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> Added (msi|msix)_(set|get)_message() function for whoever might
> >>>>>>>>>> want to use them.
> >>>>>>>>>>
> >>>>>>>>>> Currently msi_notify()/msix_notify() write to these vectors to
> >>>>>>>>>> signal the guest about an interrupt so the correct values have to
> >>>>>>>>>> written there by the guest or QEMU.
> >>>>>>>>>>
> >>>>>>>>>> For example, POWER guest never initializes MSI/MSIX vectors, instead
> >>>>>>>>>> it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
> >>>>>>>>>> POWER we have to initialize MSI/MSIX message from QEMU.
> >>>>>>>>>>
> >>>>>>>>>> As only set* function are required by now, the "get" functions were added
> >>>>>>>>>> or made public for a symmetry.
> >>>>>>>>>>
> >>>>>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>>>>>>>> ---
> >>>>>>>>>>  hw/msi.c  |   29 +++++++++++++++++++++++++++++
> >>>>>>>>>>  hw/msi.h  |    2 ++
> >>>>>>>>>>  hw/msix.c |   11 ++++++++++-
> >>>>>>>>>>  hw/msix.h |    3 +++
> >>>>>>>>>>  4 files changed, 44 insertions(+), 1 deletion(-)
> >>>>>>>>>>
> >>>>>>>>>> diff --git a/hw/msi.c b/hw/msi.c
> >>>>>>>>>> index 5233204..9ad84a4 100644
> >>>>>>>>>> --- a/hw/msi.c
> >>>>>>>>>> +++ b/hw/msi.c
> >>>>>>>>>> @@ -105,6 +105,35 @@ static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
> >>>>>>>>>>      return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
> >>>>>>>>>>  }
> >>>>>>>>>>  
> >>>>>>>>>> +MSIMessage msi_get_message(PCIDevice *dev)
> >>>>>>>>>
> >>>>>>>>> MSIMessage msi_get_message(PCIDevice *dev, unsigned vector)
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> Who/how/why is going to calculate the vector here?
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>>> +{
> >>>>>>>>>> +    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
> >>>>>>>>>> +    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
> >>>>>>>>>> +    MSIMessage msg;
> >>>>>>>>>> +
> >>>>>>>>>> +    if (msi64bit) {
> >>>>>>>>>> +        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
> >>>>>>>>>> +    } else {
> >>>>>>>>>> +        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
> >>>>>>>>>> +    }
> >>>>>>>>>> +    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
> >>>>>>>>>
> >>>>>>>>> And I have this here in addition:
> >>>>>>>>>
> >>>>>>>>>     unsigned int nr_vectors = msi_nr_vectors(flags);
> >>>>>>>>>     ...
> >>>>>>>>>
> >>>>>>>>>     if (nr_vectors > 1) {
> >>>>>>>>>         msg.data &= ~(nr_vectors - 1);
> >>>>>>>>>         msg.data |= vector;
> >>>>>>>>>     }
> >>>>>>>>>
> >>>>>>>>> See PCI spec and existing code.
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> What for? I really do not get it why someone might want to read something but not real value.
> >>>>>>>> What PCI code should I look?
> >>>>>>>
> >>>>>>> I'm not sure what your use case for reading the message is. For KVM
> >>>>>>> device assignment it is preparing an alternative message delivery path
> >>>>>>> for MSI vectors. And for this we will need vector notifier support for
> >>>>>>> MSI as well. You can check the MSI-X code for corresponding use cases of
> >>>>>>> msix_get_message.
> >>>>>>
> >>>>>>> And when we already have msi_get_message, another logical use case is
> >>>>>>> msi_notify. See msix.c again.
> >>>>>>
> >>>>>> Aaaa.
> >>>>>>
> >>>>>> I have no case for reading the message. All I need is writing. And I want it public as I want to use
> >>>>>> it from hw/spapr_pci.c. You suggested to add reading, I added "get" to be _symmetric_ to "set"
> >>>>>> ("get" returns what "set" wrote). You want a different thing which I can do but it is not
> >>>>>> msi_get_message(), it is something like msi_prepare_message(MSImessage msg) or
> >>>>>> msi_set_vector(uint16_t data) or simply internal kitchen of msi_notify().
> >>>>>>
> >>>>>> Still can do what you suggested, it just does not seem right.
> >>>>>
> >>>>> It is right - when looking at it from a different angle. ;)
> >>>>>
> >>>>> I don't mind if you add msi_get_message now or leave this to me. Likely
> >>>>> the latter is better as you have no use case for msi_get_message (and
> >>>>> also msix_get_message!) outside of their modules, thus we should not
> >>>>> export those functions anyway.
> 
> 
> -- 
> Alexey
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/msi.c b/hw/msi.c
index 5233204..cc6102f 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -105,6 +105,19 @@  static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
     return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
 }
 
+void msi_set_message(PCIDevice *dev, MSIMessage msg)
+{
+    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
+    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+
+    if (msi64bit) {
+        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
+    } else {
+        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
+    }
+    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
+}
+
 bool msi_enabled(const PCIDevice *dev)
 {
     return msi_present(dev) &&
diff --git a/hw/msi.h b/hw/msi.h
index 75747ab..6ec1f99 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -31,6 +31,7 @@  struct MSIMessage {
 
 extern bool msi_supported;
 
+void msi_set_message(PCIDevice *dev, MSIMessage msg);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
              unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
diff --git a/hw/msix.c b/hw/msix.c
index ded3c55..5f7d6d3 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -45,6 +45,15 @@  static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
     return msg;
 }
 
+void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+
+    pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
+    pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
+    table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+}
+
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
  * and fill MSI-X capability in the config space.
diff --git a/hw/msix.h b/hw/msix.h
index 50aee82..26a437e 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -4,6 +4,8 @@ 
 #include "qemu-common.h"
 #include "pci.h"
 
+void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
+
 int msix_init(PCIDevice *pdev, unsigned short nentries,
               MemoryRegion *bar,
               unsigned bar_nr, unsigned bar_size);