Patchwork MSI / MSIX injection for Xen HVM

login
register
mail settings
Submitter Wei Liu
Date March 1, 2012, 11:51 a.m.
Message ID <1330602678.10387.73.camel@leeds.uk.xensource.com>
Download mbox | patch
Permalink /patch/143997/
State New
Headers show

Comments

Wei Liu - March 1, 2012, 11:51 a.m.
On Thu, 2012-03-01 at 11:22 +0000, Jan Kiszka wrote:
> On 2012-03-01 11:20, Wei Liu wrote:
> >> However, you know that you miss those (uncommon) messages that are
> >> injected via DMA? They end up directly in apic_deliver_msi (where KVM
> >> will once pick them up as well).
> >>
> > 
> > Thanks for pointing this out. However I cannot find apic_deliver_msi in
> > qemu-kvm or upstream qemu tree. I can only find apic_send_msi and
> > apic_deliver_irq, none of which seems to be KVM-specific. Can you please
> > give me some pointer on this?
> 
> Yes, apic_send_msi. I was accidentally looking at my msi refactoring tree.
> 
> Jan
> 

New version of this patch.

------8<------
From d20edc78c71827da9f8be3308c0d473fec03b705 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Wed, 29 Feb 2012 16:46:41 +0000
Subject: [PATCH] MSI / MSIX injection for Xen.

This is supposed to be used in conjunction with Xen's
hypercall interface for emualted MSI / MSIX injection.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 hw/apic.c  |   10 ++++++++--
 hw/msi.c   |    7 ++++++-
 hw/msix.c  |    8 +++++++-
 hw/xen.h   |    1 +
 xen-all.c  |    5 +++++
 xen-stub.c |    4 ++++
 6 files changed, 31 insertions(+), 4 deletions(-)
Paolo Bonzini - March 1, 2012, 12:56 p.m.
Il 01/03/2012 12:51, Wei Liu ha scritto:
> On Thu, 2012-03-01 at 11:22 +0000, Jan Kiszka wrote:
>> On 2012-03-01 11:20, Wei Liu wrote:
>>>> However, you know that you miss those (uncommon) messages that are
>>>> injected via DMA? They end up directly in apic_deliver_msi (where KVM
>>>> will once pick them up as well).
>>>>
>>>
>>> Thanks for pointing this out. However I cannot find apic_deliver_msi in
>>> qemu-kvm or upstream qemu tree. I can only find apic_send_msi and
>>> apic_deliver_irq, none of which seems to be KVM-specific. Can you please
>>> give me some pointer on this?
>>
>> Yes, apic_send_msi. I was accidentally looking at my msi refactoring tree.
>>
>> Jan
>>
> 
> New version of this patch.
> 
> ------8<------
> From d20edc78c71827da9f8be3308c0d473fec03b705 Mon Sep 17 00:00:00 2001
> From: Wei Liu <wei.liu2@citrix.com>
> Date: Wed, 29 Feb 2012 16:46:41 +0000
> Subject: [PATCH] MSI / MSIX injection for Xen.
> 
> This is supposed to be used in conjunction with Xen's
> hypercall interface for emualted MSI / MSIX injection.
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>  hw/apic.c  |   10 ++++++++--
>  hw/msi.c   |    7 ++++++-
>  hw/msix.c  |    8 +++++++-
>  hw/xen.h   |    1 +
>  xen-all.c  |    5 +++++
>  xen-stub.c |    4 ++++
>  6 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/apic.c b/hw/apic.c
> index ff9d24e..9fc0b17 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -22,6 +22,7 @@
>  #include "host-utils.h"
>  #include "trace.h"
>  #include "pc.h"
> +#include "xen.h"
>  
>  #define MAX_APIC_WORDS 8
>  
> @@ -641,8 +642,13 @@ static void apic_send_msi(target_phys_addr_t addr, uint32_t data)
>      uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
>      uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
>      uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
> -    /* XXX: Ignore redirection hint. */
> -    apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
> +
> +    if (xen_enabled()) {
> +        xen_hvm_inject_msi(addr, data);
> +    } else {
> +        /* XXX: Ignore redirection hint. */
> +        apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
> +    }
>  }
>  
>  static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> diff --git a/hw/msi.c b/hw/msi.c
> index 5d6ceb6..b11eeac 100644
> --- a/hw/msi.c
> +++ b/hw/msi.c
> @@ -20,6 +20,7 @@
>  
>  #include "msi.h"
>  #include "range.h"
> +#include "xen.h"
>  
>  /* Eventually those constants should go to Linux pci_regs.h */
>  #define PCI_MSI_PENDING_32      0x10
> @@ -257,7 +258,11 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
>                     "notify vector 0x%x"
>                     " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
>                     vector, address, data);
> -    stl_le_phys(address, data);
> +    if (xen_enabled()) {
> +        xen_hvm_inject_msi(address, data);
> +    } else {
> +        stl_le_phys(address, data);
> +    }
>  }
>  
>  /* call this function after updating configs by pci_default_write_config(). */
> diff --git a/hw/msix.c b/hw/msix.c
> index 3835eaa..cba302e 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -19,6 +19,7 @@
>  #include "msix.h"
>  #include "pci.h"
>  #include "range.h"
> +#include "xen.h"
>  
>  #define MSIX_CAP_LENGTH 12
>  
> @@ -365,7 +366,12 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>  
>      address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
>      data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
> -    stl_le_phys(address, data);
> +
> +    if (xen_enabled()) {
> +        xen_hvm_inject_msi(address, data);
> +    } else {
> +        stl_le_phys(address, data);
> +    }
>  }
>  
>  void msix_reset(PCIDevice *dev)
> diff --git a/hw/xen.h b/hw/xen.h
> index b46879c..e5926b7 100644
> --- a/hw/xen.h
> +++ b/hw/xen.h
> @@ -34,6 +34,7 @@ static inline int xen_enabled(void)
>  int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
>  void xen_piix3_set_irq(void *opaque, int irq_num, int level);
>  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
> +void xen_hvm_inject_msi(uint64_t addr, uint32_t data);
>  void xen_cmos_set_s3_resume(void *opaque, int irq, int level);
>  
>  qemu_irq *xen_interrupt_controller_init(void);
> diff --git a/xen-all.c b/xen-all.c
> index b0ed1ed..78c6df3 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -122,6 +122,11 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
>      }
>  }
>  
> +void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
> +{
> +    xc_hvm_inject_msi(xen_xc, xen_domid, addr, data);
> +}
> +
>  static void xen_suspend_notifier(Notifier *notifier, void *data)
>  {
>      xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
> diff --git a/xen-stub.c b/xen-stub.c
> index 9ea02d4..8ff2b79 100644
> --- a/xen-stub.c
> +++ b/xen-stub.c
> @@ -29,6 +29,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
>  {
>  }
>  
> +void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
> +{
> +}
> +
>  void xen_cmos_set_s3_resume(void *opaque, int irq, int level)
>  {
>  }

This is not a NACK, but I can't help asking.  Perhaps the fake Xen
interrupt controller is a bit too simplistic?  You can add a memory
region corresponding to the APICs and trap writes in that region.
Writes coming from QEMU are MSIs and can be injected to the hypervisor,
writes coming from the VM will be trapped by Xen before going out to QEMU.

Paolo
Paolo Bonzini - March 1, 2012, 2:03 p.m.
Il 01/03/2012 15:06, Stefano Stabellini ha scritto:
>> > This is not a NACK, but I can't help asking.  Perhaps the fake Xen
>> > interrupt controller is a bit too simplistic?  You can add a memory
>> > region corresponding to the APICs and trap writes in that region.
>> > Writes coming from QEMU are MSIs and can be injected to the hypervisor,
>> > writes coming from the VM will be trapped by Xen before going out to QEMU.
>  
> That is a good point actually: we already have lapic emulation in Xen,
> it makes sense to have apic-msi in Xen too.
> We would still need the changes to msi_notify and msix_notify though.

Why?  The stores would just go to the Xen interrupt controller MMIO area
which then does the xc_hvm_inject_msi.

Paolo
Stefano Stabellini - March 1, 2012, 2:06 p.m.
On Thu, 1 Mar 2012, Paolo Bonzini wrote:
> Il 01/03/2012 12:51, Wei Liu ha scritto:
> > On Thu, 2012-03-01 at 11:22 +0000, Jan Kiszka wrote:
> >> On 2012-03-01 11:20, Wei Liu wrote:
> >>>> However, you know that you miss those (uncommon) messages that are
> >>>> injected via DMA? They end up directly in apic_deliver_msi (where KVM
> >>>> will once pick them up as well).
> >>>>
> >>>
> >>> Thanks for pointing this out. However I cannot find apic_deliver_msi in
> >>> qemu-kvm or upstream qemu tree. I can only find apic_send_msi and
> >>> apic_deliver_irq, none of which seems to be KVM-specific. Can you please
> >>> give me some pointer on this?
> >>
> >> Yes, apic_send_msi. I was accidentally looking at my msi refactoring tree.
> >>
> >> Jan
> >>
> > 
> > New version of this patch.
> > 
> > ------8<------
> > From d20edc78c71827da9f8be3308c0d473fec03b705 Mon Sep 17 00:00:00 2001
> > From: Wei Liu <wei.liu2@citrix.com>
> > Date: Wed, 29 Feb 2012 16:46:41 +0000
> > Subject: [PATCH] MSI / MSIX injection for Xen.
> > 
> > This is supposed to be used in conjunction with Xen's
> > hypercall interface for emualted MSI / MSIX injection.
> > 
> > Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  hw/apic.c  |   10 ++++++++--
> >  hw/msi.c   |    7 ++++++-
> >  hw/msix.c  |    8 +++++++-
> >  hw/xen.h   |    1 +
> >  xen-all.c  |    5 +++++
> >  xen-stub.c |    4 ++++
> >  6 files changed, 31 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/apic.c b/hw/apic.c
> > index ff9d24e..9fc0b17 100644
> > --- a/hw/apic.c
> > +++ b/hw/apic.c
> > @@ -22,6 +22,7 @@
> >  #include "host-utils.h"
> >  #include "trace.h"
> >  #include "pc.h"
> > +#include "xen.h"
> >  
> >  #define MAX_APIC_WORDS 8
> >  
> > @@ -641,8 +642,13 @@ static void apic_send_msi(target_phys_addr_t addr, uint32_t data)
> >      uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
> >      uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
> >      uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
> > -    /* XXX: Ignore redirection hint. */
> > -    apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
> > +
> > +    if (xen_enabled()) {
> > +        xen_hvm_inject_msi(addr, data);
> > +    } else {
> > +        /* XXX: Ignore redirection hint. */
> > +        apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
> > +    }
> >  }
> >  
> >  static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> > diff --git a/hw/msi.c b/hw/msi.c
> > index 5d6ceb6..b11eeac 100644
> > --- a/hw/msi.c
> > +++ b/hw/msi.c
> > @@ -20,6 +20,7 @@
> >  
> >  #include "msi.h"
> >  #include "range.h"
> > +#include "xen.h"
> >  
> >  /* Eventually those constants should go to Linux pci_regs.h */
> >  #define PCI_MSI_PENDING_32      0x10
> > @@ -257,7 +258,11 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
> >                     "notify vector 0x%x"
> >                     " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
> >                     vector, address, data);
> > -    stl_le_phys(address, data);
> > +    if (xen_enabled()) {
> > +        xen_hvm_inject_msi(address, data);
> > +    } else {
> > +        stl_le_phys(address, data);
> > +    }
> >  }
> >  
> >  /* call this function after updating configs by pci_default_write_config(). */
> > diff --git a/hw/msix.c b/hw/msix.c
> > index 3835eaa..cba302e 100644
> > --- a/hw/msix.c
> > +++ b/hw/msix.c
> > @@ -19,6 +19,7 @@
> >  #include "msix.h"
> >  #include "pci.h"
> >  #include "range.h"
> > +#include "xen.h"
> >  
> >  #define MSIX_CAP_LENGTH 12
> >  
> > @@ -365,7 +366,12 @@ void msix_notify(PCIDevice *dev, unsigned vector)
> >  
> >      address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
> >      data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
> > -    stl_le_phys(address, data);
> > +
> > +    if (xen_enabled()) {
> > +        xen_hvm_inject_msi(address, data);
> > +    } else {
> > +        stl_le_phys(address, data);
> > +    }
> >  }
> >  
> >  void msix_reset(PCIDevice *dev)
> > diff --git a/hw/xen.h b/hw/xen.h
> > index b46879c..e5926b7 100644
> > --- a/hw/xen.h
> > +++ b/hw/xen.h
> > @@ -34,6 +34,7 @@ static inline int xen_enabled(void)
> >  int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
> >  void xen_piix3_set_irq(void *opaque, int irq_num, int level);
> >  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
> > +void xen_hvm_inject_msi(uint64_t addr, uint32_t data);
> >  void xen_cmos_set_s3_resume(void *opaque, int irq, int level);
> >  
> >  qemu_irq *xen_interrupt_controller_init(void);
> > diff --git a/xen-all.c b/xen-all.c
> > index b0ed1ed..78c6df3 100644
> > --- a/xen-all.c
> > +++ b/xen-all.c
> > @@ -122,6 +122,11 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
> >      }
> >  }
> >  
> > +void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
> > +{
> > +    xc_hvm_inject_msi(xen_xc, xen_domid, addr, data);
> > +}
> > +
> >  static void xen_suspend_notifier(Notifier *notifier, void *data)
> >  {
> >      xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
> > diff --git a/xen-stub.c b/xen-stub.c
> > index 9ea02d4..8ff2b79 100644
> > --- a/xen-stub.c
> > +++ b/xen-stub.c
> > @@ -29,6 +29,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
> >  {
> >  }
> >  
> > +void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
> > +{
> > +}
> > +
> >  void xen_cmos_set_s3_resume(void *opaque, int irq, int level)
> >  {
> >  }
> 
> This is not a NACK, but I can't help asking.  Perhaps the fake Xen
> interrupt controller is a bit too simplistic?  You can add a memory
> region corresponding to the APICs and trap writes in that region.
> Writes coming from QEMU are MSIs and can be injected to the hypervisor,
> writes coming from the VM will be trapped by Xen before going out to QEMU.
 
That is a good point actually: we already have lapic emulation in Xen,
it makes sense to have apic-msi in Xen too.
We would still need the changes to msi_notify and msix_notify though.
Stefano Stabellini - March 1, 2012, 2:50 p.m.
On Thu, 1 Mar 2012, Paolo Bonzini wrote:
> Il 01/03/2012 15:06, Stefano Stabellini ha scritto:
> >> > This is not a NACK, but I can't help asking.  Perhaps the fake Xen
> >> > interrupt controller is a bit too simplistic?  You can add a memory
> >> > region corresponding to the APICs and trap writes in that region.
> >> > Writes coming from QEMU are MSIs and can be injected to the hypervisor,
> >> > writes coming from the VM will be trapped by Xen before going out to QEMU.
> >  
> > That is a good point actually: we already have lapic emulation in Xen,
> > it makes sense to have apic-msi in Xen too.
> > We would still need the changes to msi_notify and msix_notify though.
> 
> Why?  The stores would just go to the Xen interrupt controller MMIO area
> which then does the xc_hvm_inject_msi.
 
Because msi(x)_notify is called by QEMU's emulated devices: it is not
possible from QEMU to cause an emulation trap in Xen on behalf of the
guest.
Paolo Bonzini - March 1, 2012, 3:56 p.m.
Il 01/03/2012 15:50, Stefano Stabellini ha scritto:
>>> > > That is a good point actually: we already have lapic emulation in Xen,
>>> > > it makes sense to have apic-msi in Xen too.
>>> > > We would still need the changes to msi_notify and msix_notify though.
>> > 
>> > Why?  The stores would just go to the Xen interrupt controller MMIO area
>> > which then does the xc_hvm_inject_msi.
>  
> Because msi(x)_notify is called by QEMU's emulated devices: it is not
> possible from QEMU to cause an emulation trap in Xen on behalf of the
> guest.

msi{x,}_notify doesn't have to go to Xen MMIO emulation, so in Wei's
patch you don't need anymore the msi{,x}_notify parts, only apic_send_msi.

But you could take it further and create a separate subclass of
apic-common that does absolutely nothing except register an MMIO memory
region and use it to trap MSI writes.  Basically an even more stripped
down version than hw/kvm/apic.c, but using memory_region_init_io rather
than memory_region_init_reservation.  You would also get support for the
monitor command inject-nmi (there is another hypercall for that in Xen
IIRC).

Paolo
Stefano Stabellini - March 1, 2012, 4:06 p.m.
On Thu, 1 Mar 2012, Paolo Bonzini wrote:
> Il 01/03/2012 15:50, Stefano Stabellini ha scritto:
> >>> > > That is a good point actually: we already have lapic emulation in Xen,
> >>> > > it makes sense to have apic-msi in Xen too.
> >>> > > We would still need the changes to msi_notify and msix_notify though.
> >> > 
> >> > Why?  The stores would just go to the Xen interrupt controller MMIO area
> >> > which then does the xc_hvm_inject_msi.
> >  
> > Because msi(x)_notify is called by QEMU's emulated devices: it is not
> > possible from QEMU to cause an emulation trap in Xen on behalf of the
> > guest.
> 
> msi{x,}_notify doesn't have to go to Xen MMIO emulation, so in Wei's
> patch you don't need anymore the msi{,x}_notify parts, only apic_send_msi.
> 
> But you could take it further and create a separate subclass of
> apic-common that does absolutely nothing except register an MMIO memory
> region and use it to trap MSI writes.  Basically an even more stripped
> down version than hw/kvm/apic.c, but using memory_region_init_io rather
> than memory_region_init_reservation.  You would also get support for the
> monitor command inject-nmi (there is another hypercall for that in Xen
> IIRC).
 
Yes, that could work and should result in a good and clean patch.
Wei Liu - April 3, 2012, 4:44 p.m.
On Thu, 2012-03-01 at 15:56 +0000, Paolo Bonzini wrote:
> Il 01/03/2012 15:50, Stefano Stabellini ha scritto:
> >>> > > That is a good point actually: we already have lapic emulation in Xen,
> >>> > > it makes sense to have apic-msi in Xen too.
> >>> > > We would still need the changes to msi_notify and msix_notify though.
> >> > 
> >> > Why?  The stores would just go to the Xen interrupt controller MMIO area
> >> > which then does the xc_hvm_inject_msi.
> >  
> > Because msi(x)_notify is called by QEMU's emulated devices: it is not
> > possible from QEMU to cause an emulation trap in Xen on behalf of the
> > guest.
> 

I'm not a QEMU expert, so following question may be dumb. However I do
care about a cleaner implementation. So please be patient with me. :-)

> msi{x,}_notify doesn't have to go to Xen MMIO emulation, so in Wei's
> patch you don't need anymore the msi{,x}_notify parts, only apic_send_msi.
> 

I don't quite understand "you don't need anymore the msi{,x}_notify
parts". Virtio PCI invokes msi_notify directly. If I don't hook up
msi{,x}_notify, how can I deal with devices like Virtio PCI?


Wei.

> But you could take it further and create a separate subclass of
> apic-common that does absolutely nothing except register an MMIO memory
> region and use it to trap MSI writes.  Basically an even more stripped
> down version than hw/kvm/apic.c, but using memory_region_init_io rather
> than memory_region_init_reservation.  You would also get support for the
> monitor command inject-nmi (there is another hypercall for that in Xen
> IIRC).
> 
> Paolo
Jan Kiszka - April 3, 2012, 4:52 p.m.
On 2012-04-03 18:44, Wei Liu wrote:
> On Thu, 2012-03-01 at 15:56 +0000, Paolo Bonzini wrote:
>> Il 01/03/2012 15:50, Stefano Stabellini ha scritto:
>>>>>>> That is a good point actually: we already have lapic emulation in Xen,
>>>>>>> it makes sense to have apic-msi in Xen too.
>>>>>>> We would still need the changes to msi_notify and msix_notify though.
>>>>>
>>>>> Why?  The stores would just go to the Xen interrupt controller MMIO area
>>>>> which then does the xc_hvm_inject_msi.
>>>  
>>> Because msi(x)_notify is called by QEMU's emulated devices: it is not
>>> possible from QEMU to cause an emulation trap in Xen on behalf of the
>>> guest.
>>
> 
> I'm not a QEMU expert, so following question may be dumb. However I do
> care about a cleaner implementation. So please be patient with me. :-)
> 
>> msi{x,}_notify doesn't have to go to Xen MMIO emulation, so in Wei's
>> patch you don't need anymore the msi{,x}_notify parts, only apic_send_msi.
>>
> 
> I don't quite understand "you don't need anymore the msi{,x}_notify
> parts". Virtio PCI invokes msi_notify directly. If I don't hook up
> msi{,x}_notify, how can I deal with devices like Virtio PCI?

See how KVM will solve this in [1].

Jan

[1] http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/89121
Stefano Stabellini - April 4, 2012, 1:40 p.m.
On Tue, 3 Apr 2012, Wei Liu (Intern) wrote:
> On Thu, 2012-03-01 at 15:56 +0000, Paolo Bonzini wrote:
> > Il 01/03/2012 15:50, Stefano Stabellini ha scritto:
> > >>> > > That is a good point actually: we already have lapic emulation in Xen,
> > >>> > > it makes sense to have apic-msi in Xen too.
> > >>> > > We would still need the changes to msi_notify and msix_notify though.
> > >> > 
> > >> > Why?  The stores would just go to the Xen interrupt controller MMIO area
> > >> > which then does the xc_hvm_inject_msi.
> > >  
> > > Because msi(x)_notify is called by QEMU's emulated devices: it is not
> > > possible from QEMU to cause an emulation trap in Xen on behalf of the
> > > guest.
> > 
> 
> I'm not a QEMU expert, so following question may be dumb. However I do
> care about a cleaner implementation. So please be patient with me. :-)
> 
> > msi{x,}_notify doesn't have to go to Xen MMIO emulation, so in Wei's
> > patch you don't need anymore the msi{,x}_notify parts, only apic_send_msi.
> > 
> 
> I don't quite understand "you don't need anymore the msi{,x}_notify
> parts". Virtio PCI invokes msi_notify directly. If I don't hook up
> msi{,x}_notify, how can I deal with devices like Virtio PCI?

msi_notify calls stl_le_phys that is internal to QEMU and goes to the
emulated local apic in QEMU. So you can just hook into the emulated
local apic like KVM, rather than adding a new hook in msi{,x}_notify.

The fact that the real emulated local apic is in Xen is very confusing
for the sake of this discussion. The emulated local apic in Xen is not
involved in Paolo's suggestion.

Patch

diff --git a/hw/apic.c b/hw/apic.c
index ff9d24e..9fc0b17 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -22,6 +22,7 @@ 
 #include "host-utils.h"
 #include "trace.h"
 #include "pc.h"
+#include "xen.h"
 
 #define MAX_APIC_WORDS 8
 
@@ -641,8 +642,13 @@  static void apic_send_msi(target_phys_addr_t addr, uint32_t data)
     uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
     uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
     uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
-    /* XXX: Ignore redirection hint. */
-    apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
+
+    if (xen_enabled()) {
+        xen_hvm_inject_msi(addr, data);
+    } else {
+        /* XXX: Ignore redirection hint. */
+        apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
+    }
 }
 
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
diff --git a/hw/msi.c b/hw/msi.c
index 5d6ceb6..b11eeac 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -20,6 +20,7 @@ 
 
 #include "msi.h"
 #include "range.h"
+#include "xen.h"
 
 /* Eventually those constants should go to Linux pci_regs.h */
 #define PCI_MSI_PENDING_32      0x10
@@ -257,7 +258,11 @@  void msi_notify(PCIDevice *dev, unsigned int vector)
                    "notify vector 0x%x"
                    " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
                    vector, address, data);
-    stl_le_phys(address, data);
+    if (xen_enabled()) {
+        xen_hvm_inject_msi(address, data);
+    } else {
+        stl_le_phys(address, data);
+    }
 }
 
 /* call this function after updating configs by pci_default_write_config(). */
diff --git a/hw/msix.c b/hw/msix.c
index 3835eaa..cba302e 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -19,6 +19,7 @@ 
 #include "msix.h"
 #include "pci.h"
 #include "range.h"
+#include "xen.h"
 
 #define MSIX_CAP_LENGTH 12
 
@@ -365,7 +366,12 @@  void msix_notify(PCIDevice *dev, unsigned vector)
 
     address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
     data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
-    stl_le_phys(address, data);
+
+    if (xen_enabled()) {
+        xen_hvm_inject_msi(address, data);
+    } else {
+        stl_le_phys(address, data);
+    }
 }
 
 void msix_reset(PCIDevice *dev)
diff --git a/hw/xen.h b/hw/xen.h
index b46879c..e5926b7 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -34,6 +34,7 @@  static inline int xen_enabled(void)
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
 void xen_piix3_set_irq(void *opaque, int irq_num, int level);
 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
+void xen_hvm_inject_msi(uint64_t addr, uint32_t data);
 void xen_cmos_set_s3_resume(void *opaque, int irq, int level);
 
 qemu_irq *xen_interrupt_controller_init(void);
diff --git a/xen-all.c b/xen-all.c
index b0ed1ed..78c6df3 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -122,6 +122,11 @@  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
     }
 }
 
+void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
+{
+    xc_hvm_inject_msi(xen_xc, xen_domid, addr, data);
+}
+
 static void xen_suspend_notifier(Notifier *notifier, void *data)
 {
     xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
diff --git a/xen-stub.c b/xen-stub.c
index 9ea02d4..8ff2b79 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -29,6 +29,10 @@  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
 {
 }
 
+void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
+{
+}
+
 void xen_cmos_set_s3_resume(void *opaque, int irq, int level)
 {
 }