[for-4.2,v10,11/15] virtio-iommu: Expose the IOAPIC MSI reserved region when relevant
diff mbox series

Message ID 20190730172137.23114-12-eric.auger@redhat.com
State New
Headers show
Series
  • VIRTIO-IOMMU device
Related show

Commit Message

Eric Auger July 30, 2019, 5:21 p.m. UTC
We introduce a new msi_bypass field which indicates whether
the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed
as a reserved region. By default the field is set to true at
instantiation time. Later on we will introduce a property at
virtio pci proxy level to turn it off.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v8 -> v9:
- pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
- take into account the change in the struct virtio_iommu_probe_resv_mem
  definition
- We just introduce the field here. A property will be introduced later on
  at pci proxy level.
---
 hw/virtio/virtio-iommu.c         | 36 ++++++++++++++++++++++++++++++++
 include/hw/virtio/virtio-iommu.h |  1 +
 2 files changed, 37 insertions(+)

Comments

Michael S. Tsirkin July 30, 2019, 7:38 p.m. UTC | #1
On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
> We introduce a new msi_bypass field which indicates whether
> the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed
> as a reserved region. By default the field is set to true at
> instantiation time. Later on we will introduce a property at
> virtio pci proxy level to turn it off.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> v8 -> v9:
> - pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
> - take into account the change in the struct virtio_iommu_probe_resv_mem
>   definition
> - We just introduce the field here. A property will be introduced later on
>   at pci proxy level.
> ---
>  hw/virtio/virtio-iommu.c         | 36 ++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio-iommu.h |  1 +
>  2 files changed, 37 insertions(+)
> 
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index 66be9a4627..74038288b0 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -39,6 +39,9 @@
>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>  #define VIOMMU_PROBE_SIZE 512
>  
> +#define IOAPIC_RANGE_START      (0xfee00000)
> +#define IOAPIC_RANGE_END        (0xfeefffff)
> +
>  #define SUPPORTED_PROBE_PROPERTIES (\
>      1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
>  

Sorry where are these numbers coming from?
Does this really work on all platforms?
With all guests?

> @@ -100,6 +103,30 @@ static void virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
>      ep->domain = NULL;
>  }
>  
> +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
> +                                              uint8_t subtype,
> +                                              uint64_t start, uint64_t end)
> +{
> +    viommu_interval *interval;
> +    struct virtio_iommu_probe_resv_mem *resv_reg_prop;
> +    size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
> +    size_t value_size = prop_size -
> +                sizeof(struct virtio_iommu_probe_property);
> +
> +    interval = g_malloc0(sizeof(*interval));
> +    interval->low = start;
> +    interval->high = end;
> +
> +    resv_reg_prop = g_malloc0(prop_size);
> +    resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
> +    resv_reg_prop->head.length = cpu_to_le64(value_size);
> +    resv_reg_prop->subtype = cpu_to_le64(subtype);
> +    resv_reg_prop->start = cpu_to_le64(start);
> +    resv_reg_prop->end = cpu_to_le64(end);
> +
> +    g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
> +}
> +
>  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>                                                    uint32_t ep_id)
>  {
> @@ -117,6 +144,12 @@ static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>      ep->reserved_regions = g_tree_new_full((GCompareDataFunc)interval_cmp,
>                                              NULL, (GDestroyNotify)g_free,
>                                              (GDestroyNotify)g_free);
> +    if (s->msi_bypass) {
> +        virtio_iommu_register_resv_region(ep, VIRTIO_IOMMU_RESV_MEM_T_MSI,
> +                                          IOAPIC_RANGE_START,
> +                                          IOAPIC_RANGE_END);
> +    }
> +
>      return ep;
>  }
>  
> @@ -822,6 +855,9 @@ static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
>  
>  static void virtio_iommu_instance_init(Object *obj)
>  {
> +    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
> +
> +    s->msi_bypass = true;
>  }
>  
>  static const VMStateDescription vmstate_virtio_iommu = {
> diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
> index f55f48d304..56c8b4e57f 100644
> --- a/include/hw/virtio/virtio-iommu.h
> +++ b/include/hw/virtio/virtio-iommu.h
> @@ -59,6 +59,7 @@ typedef struct VirtIOIOMMU {
>      GTree *domains;
>      QemuMutex mutex;
>      GTree *endpoints;
> +    bool msi_bypass;
>  } VirtIOIOMMU;
>  
>  #endif
> -- 
> 2.20.1
Tian, Kevin July 30, 2019, 11:20 p.m. UTC | #2
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Wednesday, July 31, 2019 3:38 AM
> 
> On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
> > We introduce a new msi_bypass field which indicates whether
> > the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed

it's not good to call it IOAPIC MSI window. any write to this range, either
from IOAPIC or PCI device, is interpreted by the platform as interrupt
request. I'd call it "x86 interrupt address range".

> > as a reserved region. By default the field is set to true at
> > instantiation time. Later on we will introduce a property at
> > virtio pci proxy level to turn it off.
> >
> > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >
> > ---
> >
> > v8 -> v9:
> > - pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
> > - take into account the change in the struct virtio_iommu_probe_resv_mem
> >   definition
> > - We just introduce the field here. A property will be introduced later on
> >   at pci proxy level.
> > ---
> >  hw/virtio/virtio-iommu.c         | 36 ++++++++++++++++++++++++++++++++
> >  include/hw/virtio/virtio-iommu.h |  1 +
> >  2 files changed, 37 insertions(+)
> >
> > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> > index 66be9a4627..74038288b0 100644
> > --- a/hw/virtio/virtio-iommu.c
> > +++ b/hw/virtio/virtio-iommu.c
> > @@ -39,6 +39,9 @@
> >  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
> >  #define VIOMMU_PROBE_SIZE 512
> >
> > +#define IOAPIC_RANGE_START      (0xfee00000)
> > +#define IOAPIC_RANGE_END        (0xfeefffff)
> > +
> >  #define SUPPORTED_PROBE_PROPERTIES (\
> >      1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
> >
> 
> Sorry where are these numbers coming from?

this is architecturally defined in x86 SDM.

> Does this really work on all platforms?

x86 only. 

> With all guests?

yes.

> 
> > @@ -100,6 +103,30 @@ static void
> virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
> >      ep->domain = NULL;
> >  }
> >
> > +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
> > +                                              uint8_t subtype,
> > +                                              uint64_t start, uint64_t end)
> > +{
> > +    viommu_interval *interval;
> > +    struct virtio_iommu_probe_resv_mem *resv_reg_prop;
> > +    size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
> > +    size_t value_size = prop_size -
> > +                sizeof(struct virtio_iommu_probe_property);
> > +
> > +    interval = g_malloc0(sizeof(*interval));
> > +    interval->low = start;
> > +    interval->high = end;
> > +
> > +    resv_reg_prop = g_malloc0(prop_size);
> > +    resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
> > +    resv_reg_prop->head.length = cpu_to_le64(value_size);
> > +    resv_reg_prop->subtype = cpu_to_le64(subtype);
> > +    resv_reg_prop->start = cpu_to_le64(start);
> > +    resv_reg_prop->end = cpu_to_le64(end);
> > +
> > +    g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
> > +}
> > +
> >  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> >                                                    uint32_t ep_id)
> >  {
> > @@ -117,6 +144,12 @@ static viommu_endpoint
> *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> >      ep->reserved_regions =
> g_tree_new_full((GCompareDataFunc)interval_cmp,
> >                                              NULL, (GDestroyNotify)g_free,
> >                                              (GDestroyNotify)g_free);
> > +    if (s->msi_bypass) {
> > +        virtio_iommu_register_resv_region(ep,
> VIRTIO_IOMMU_RESV_MEM_T_MSI,
> > +                                          IOAPIC_RANGE_START,
> > +                                          IOAPIC_RANGE_END);
> > +    }
> > +
> >      return ep;
> >  }
> >
> > @@ -822,6 +855,9 @@ static void virtio_iommu_set_status(VirtIODevice
> *vdev, uint8_t status)
> >
> >  static void virtio_iommu_instance_init(Object *obj)
> >  {
> > +    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
> > +
> > +    s->msi_bypass = true;
> >  }
> >
> >  static const VMStateDescription vmstate_virtio_iommu = {
> > diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-
> iommu.h
> > index f55f48d304..56c8b4e57f 100644
> > --- a/include/hw/virtio/virtio-iommu.h
> > +++ b/include/hw/virtio/virtio-iommu.h
> > @@ -59,6 +59,7 @@ typedef struct VirtIOIOMMU {
> >      GTree *domains;
> >      QemuMutex mutex;
> >      GTree *endpoints;
> > +    bool msi_bypass;
> >  } VirtIOIOMMU;
> >
> >  #endif
> > --
> > 2.20.1
Eric Auger July 31, 2019, 9:05 a.m. UTC | #3
Hi Kevin, Michael,

On 7/31/19 1:20 AM, Tian, Kevin wrote:
>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>> Sent: Wednesday, July 31, 2019 3:38 AM
>>
>> On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
>>> We introduce a new msi_bypass field which indicates whether
>>> the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed
> 
> it's not good to call it IOAPIC MSI window. any write to this range, either
> from IOAPIC or PCI device, is interpreted by the platform as interrupt
> request. I'd call it "x86 interrupt address range".
Thank you for the clarification. I will reword the commit message as
suggested.
> 
>>> as a reserved region. By default the field is set to true at
>>> instantiation time. Later on we will introduce a property at
>>> virtio pci proxy level to turn it off.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> ---
>>>
>>> v8 -> v9:
>>> - pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
>>> - take into account the change in the struct virtio_iommu_probe_resv_mem
>>>   definition
>>> - We just introduce the field here. A property will be introduced later on
>>>   at pci proxy level.
>>> ---
>>>  hw/virtio/virtio-iommu.c         | 36 ++++++++++++++++++++++++++++++++
>>>  include/hw/virtio/virtio-iommu.h |  1 +
>>>  2 files changed, 37 insertions(+)
>>>
>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>> index 66be9a4627..74038288b0 100644
>>> --- a/hw/virtio/virtio-iommu.c
>>> +++ b/hw/virtio/virtio-iommu.c
>>> @@ -39,6 +39,9 @@
>>>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>>>  #define VIOMMU_PROBE_SIZE 512
>>>
>>> +#define IOAPIC_RANGE_START      (0xfee00000)
>>> +#define IOAPIC_RANGE_END        (0xfeefffff)
>>> +
>>>  #define SUPPORTED_PROBE_PROPERTIES (\
>>>      1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
>>>
>>
>> Sorry where are these numbers coming from?
> 
> this is architecturally defined in x86 SDM.
> 
>> Does this really work on all platforms?
> 
> x86 only. 
Yes, the initial goal was to allow the x86 integration. Maybe I should allow
the machine to pass reserved regions as device properties instead.

As integration with pc/q35 is beyond the scope of this initial series,
maybe I should remove that patch?

Thanks

Eric
> 
>> With all guests?
> 
> yes.
> 
>>
>>> @@ -100,6 +103,30 @@ static void
>> virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
>>>      ep->domain = NULL;
>>>  }
>>>
>>> +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
>>> +                                              uint8_t subtype,
>>> +                                              uint64_t start, uint64_t end)
>>> +{
>>> +    viommu_interval *interval;
>>> +    struct virtio_iommu_probe_resv_mem *resv_reg_prop;
>>> +    size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
>>> +    size_t value_size = prop_size -
>>> +                sizeof(struct virtio_iommu_probe_property);
>>> +
>>> +    interval = g_malloc0(sizeof(*interval));
>>> +    interval->low = start;
>>> +    interval->high = end;
>>> +
>>> +    resv_reg_prop = g_malloc0(prop_size);
>>> +    resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
>>> +    resv_reg_prop->head.length = cpu_to_le64(value_size);
>>> +    resv_reg_prop->subtype = cpu_to_le64(subtype);
>>> +    resv_reg_prop->start = cpu_to_le64(start);
>>> +    resv_reg_prop->end = cpu_to_le64(end);
>>> +
>>> +    g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
>>> +}
>>> +
>>>  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>                                                    uint32_t ep_id)
>>>  {
>>> @@ -117,6 +144,12 @@ static viommu_endpoint
>> *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>      ep->reserved_regions =
>> g_tree_new_full((GCompareDataFunc)interval_cmp,
>>>                                              NULL, (GDestroyNotify)g_free,
>>>                                              (GDestroyNotify)g_free);
>>> +    if (s->msi_bypass) {
>>> +        virtio_iommu_register_resv_region(ep,
>> VIRTIO_IOMMU_RESV_MEM_T_MSI,
>>> +                                          IOAPIC_RANGE_START,
>>> +                                          IOAPIC_RANGE_END);
>>> +    }
>>> +
>>>      return ep;
>>>  }
>>>
>>> @@ -822,6 +855,9 @@ static void virtio_iommu_set_status(VirtIODevice
>> *vdev, uint8_t status)
>>>
>>>  static void virtio_iommu_instance_init(Object *obj)
>>>  {
>>> +    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
>>> +
>>> +    s->msi_bypass = true;
>>>  }
>>>
>>>  static const VMStateDescription vmstate_virtio_iommu = {
>>> diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-
>> iommu.h
>>> index f55f48d304..56c8b4e57f 100644
>>> --- a/include/hw/virtio/virtio-iommu.h
>>> +++ b/include/hw/virtio/virtio-iommu.h
>>> @@ -59,6 +59,7 @@ typedef struct VirtIOIOMMU {
>>>      GTree *domains;
>>>      QemuMutex mutex;
>>>      GTree *endpoints;
>>> +    bool msi_bypass;
>>>  } VirtIOIOMMU;
>>>
>>>  #endif
>>> --
>>> 2.20.1
Michael S. Tsirkin July 31, 2019, 7:25 p.m. UTC | #4
On Tue, Jul 30, 2019 at 11:20:44PM +0000, Tian, Kevin wrote:
> > From: Michael S. Tsirkin [mailto:mst@redhat.com]
> > Sent: Wednesday, July 31, 2019 3:38 AM
> > 
> > On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
> > > We introduce a new msi_bypass field which indicates whether
> > > the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed
> 
> it's not good to call it IOAPIC MSI window. any write to this range, either
> from IOAPIC or PCI device, is interpreted by the platform as interrupt
> request. I'd call it "x86 interrupt address range".

Isn't this APIC_DEFAULT_ADDRESS? I'm not sure guests can't change it
even though I'm not sure qemu supports changing it.

And if so I'd say integrating IOAPIC defaults into the device itself is
inelegant.  How about having guest supply the range through config
space? It's a small change that won't be too late for Linux.

> > > as a reserved region. By default the field is set to true at
> > > instantiation time. Later on we will introduce a property at
> > > virtio pci proxy level to turn it off.
> > >
> > > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> > >
> > > ---
> > >
> > > v8 -> v9:
> > > - pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
> > > - take into account the change in the struct virtio_iommu_probe_resv_mem
> > >   definition
> > > - We just introduce the field here. A property will be introduced later on
> > >   at pci proxy level.
> > > ---
> > >  hw/virtio/virtio-iommu.c         | 36 ++++++++++++++++++++++++++++++++
> > >  include/hw/virtio/virtio-iommu.h |  1 +
> > >  2 files changed, 37 insertions(+)
> > >
> > > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> > > index 66be9a4627..74038288b0 100644
> > > --- a/hw/virtio/virtio-iommu.c
> > > +++ b/hw/virtio/virtio-iommu.c
> > > @@ -39,6 +39,9 @@
> > >  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
> > >  #define VIOMMU_PROBE_SIZE 512
> > >
> > > +#define IOAPIC_RANGE_START      (0xfee00000)
> > > +#define IOAPIC_RANGE_END        (0xfeefffff)
> > > +
> > >  #define SUPPORTED_PROBE_PROPERTIES (\
> > >      1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
> > >
> > 
> > Sorry where are these numbers coming from?
> 
> this is architecturally defined in x86 SDM.
> 
> > Does this really work on all platforms?
> 
> x86 only. 

But you seem to add this code for all platforms:

	@@ -6,6 +6,11 @@ config VIRTIO_RNG
	     default y
	     depends on VIRTIO

	+config VIRTIO_IOMMU
	+    bool
	+    default y
	+    depends on VIRTIO
	+    


> > With all guests?
> 
> yes.
> 
> > 
> > > @@ -100,6 +103,30 @@ static void
> > virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
> > >      ep->domain = NULL;
> > >  }
> > >
> > > +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
> > > +                                              uint8_t subtype,
> > > +                                              uint64_t start, uint64_t end)
> > > +{
> > > +    viommu_interval *interval;
> > > +    struct virtio_iommu_probe_resv_mem *resv_reg_prop;
> > > +    size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
> > > +    size_t value_size = prop_size -
> > > +                sizeof(struct virtio_iommu_probe_property);
> > > +
> > > +    interval = g_malloc0(sizeof(*interval));
> > > +    interval->low = start;
> > > +    interval->high = end;
> > > +
> > > +    resv_reg_prop = g_malloc0(prop_size);
> > > +    resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
> > > +    resv_reg_prop->head.length = cpu_to_le64(value_size);
> > > +    resv_reg_prop->subtype = cpu_to_le64(subtype);
> > > +    resv_reg_prop->start = cpu_to_le64(start);
> > > +    resv_reg_prop->end = cpu_to_le64(end);
> > > +
> > > +    g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
> > > +}
> > > +
> > >  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> > >                                                    uint32_t ep_id)
> > >  {
> > > @@ -117,6 +144,12 @@ static viommu_endpoint
> > *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> > >      ep->reserved_regions =
> > g_tree_new_full((GCompareDataFunc)interval_cmp,
> > >                                              NULL, (GDestroyNotify)g_free,
> > >                                              (GDestroyNotify)g_free);
> > > +    if (s->msi_bypass) {
> > > +        virtio_iommu_register_resv_region(ep,
> > VIRTIO_IOMMU_RESV_MEM_T_MSI,
> > > +                                          IOAPIC_RANGE_START,
> > > +                                          IOAPIC_RANGE_END);
> > > +    }
> > > +
> > >      return ep;
> > >  }
> > >
> > > @@ -822,6 +855,9 @@ static void virtio_iommu_set_status(VirtIODevice
> > *vdev, uint8_t status)
> > >
> > >  static void virtio_iommu_instance_init(Object *obj)
> > >  {
> > > +    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
> > > +
> > > +    s->msi_bypass = true;
> > >  }
> > >
> > >  static const VMStateDescription vmstate_virtio_iommu = {
> > > diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-
> > iommu.h
> > > index f55f48d304..56c8b4e57f 100644
> > > --- a/include/hw/virtio/virtio-iommu.h
> > > +++ b/include/hw/virtio/virtio-iommu.h
> > > @@ -59,6 +59,7 @@ typedef struct VirtIOIOMMU {
> > >      GTree *domains;
> > >      QemuMutex mutex;
> > >      GTree *endpoints;
> > > +    bool msi_bypass;
> > >  } VirtIOIOMMU;
> > >
> > >  #endif
> > > --
> > > 2.20.1
Eric Auger July 31, 2019, 7:44 p.m. UTC | #5
Hi Michael,

On 7/31/19 9:25 PM, Michael S. Tsirkin wrote:
> On Tue, Jul 30, 2019 at 11:20:44PM +0000, Tian, Kevin wrote:
>>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>>> Sent: Wednesday, July 31, 2019 3:38 AM
>>>
>>> On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
>>>> We introduce a new msi_bypass field which indicates whether
>>>> the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed
>>
>> it's not good to call it IOAPIC MSI window. any write to this range, either
>> from IOAPIC or PCI device, is interpreted by the platform as interrupt
>> request. I'd call it "x86 interrupt address range".
> 
> Isn't this APIC_DEFAULT_ADDRESS? I'm not sure guests can't change it
> even though I'm not sure qemu supports changing it.

That's indeed matching:

#define APIC_DEFAULT_ADDRESS 0xfee00000
#define APIC_SPACE_SIZE      0x100000

> 
> And if so I'd say integrating IOAPIC defaults into the device itself is
> inelegant.

I agree.

  How about having guest supply the range through config
> space? It's a small change that won't be too late for Linux.

Isn't it a property of the platform instead. I mean isn't it the job of
the machine model to set this. The guest driver is arch agnostic if I am
not wrong.


> 
>>>> as a reserved region. By default the field is set to true at
>>>> instantiation time. Later on we will introduce a property at
>>>> virtio pci proxy level to turn it off.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> ---
>>>>
>>>> v8 -> v9:
>>>> - pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
>>>> - take into account the change in the struct virtio_iommu_probe_resv_mem
>>>>   definition
>>>> - We just introduce the field here. A property will be introduced later on
>>>>   at pci proxy level.
>>>> ---
>>>>  hw/virtio/virtio-iommu.c         | 36 ++++++++++++++++++++++++++++++++
>>>>  include/hw/virtio/virtio-iommu.h |  1 +
>>>>  2 files changed, 37 insertions(+)
>>>>
>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>>> index 66be9a4627..74038288b0 100644
>>>> --- a/hw/virtio/virtio-iommu.c
>>>> +++ b/hw/virtio/virtio-iommu.c
>>>> @@ -39,6 +39,9 @@
>>>>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>>>>  #define VIOMMU_PROBE_SIZE 512
>>>>
>>>> +#define IOAPIC_RANGE_START      (0xfee00000)
>>>> +#define IOAPIC_RANGE_END        (0xfeefffff)
>>>> +
>>>>  #define SUPPORTED_PROBE_PROPERTIES (\
>>>>      1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
>>>>
>>>
>>> Sorry where are these numbers coming from?
>>
>> this is architecturally defined in x86 SDM.
>>
>>> Does this really work on all platforms?
>>
>> x86 only. 
> 
> But you seem to add this code for all platforms:
> 
> 	@@ -6,6 +6,11 @@ config VIRTIO_RNG
> 	     default y
> 	     depends on VIRTIO
> 
> 	+config VIRTIO_IOMMU
> 	+    bool
> 	+    default y
> 	+    depends on VIRTIO
> 	+
Actually it was supposed to be integrated with ARM first and then with x86.

Thanks

Eric
> 
> 
>>> With all guests?
>>
>> yes.
>>
>>>
>>>> @@ -100,6 +103,30 @@ static void
>>> virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
>>>>      ep->domain = NULL;
>>>>  }
>>>>
>>>> +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
>>>> +                                              uint8_t subtype,
>>>> +                                              uint64_t start, uint64_t end)
>>>> +{
>>>> +    viommu_interval *interval;
>>>> +    struct virtio_iommu_probe_resv_mem *resv_reg_prop;
>>>> +    size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
>>>> +    size_t value_size = prop_size -
>>>> +                sizeof(struct virtio_iommu_probe_property);
>>>> +
>>>> +    interval = g_malloc0(sizeof(*interval));
>>>> +    interval->low = start;
>>>> +    interval->high = end;
>>>> +
>>>> +    resv_reg_prop = g_malloc0(prop_size);
>>>> +    resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
>>>> +    resv_reg_prop->head.length = cpu_to_le64(value_size);
>>>> +    resv_reg_prop->subtype = cpu_to_le64(subtype);
>>>> +    resv_reg_prop->start = cpu_to_le64(start);
>>>> +    resv_reg_prop->end = cpu_to_le64(end);
>>>> +
>>>> +    g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
>>>> +}
>>>> +
>>>>  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>>                                                    uint32_t ep_id)
>>>>  {
>>>> @@ -117,6 +144,12 @@ static viommu_endpoint
>>> *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>>      ep->reserved_regions =
>>> g_tree_new_full((GCompareDataFunc)interval_cmp,
>>>>                                              NULL, (GDestroyNotify)g_free,
>>>>                                              (GDestroyNotify)g_free);
>>>> +    if (s->msi_bypass) {
>>>> +        virtio_iommu_register_resv_region(ep,
>>> VIRTIO_IOMMU_RESV_MEM_T_MSI,
>>>> +                                          IOAPIC_RANGE_START,
>>>> +                                          IOAPIC_RANGE_END);
>>>> +    }
>>>> +
>>>>      return ep;
>>>>  }
>>>>
>>>> @@ -822,6 +855,9 @@ static void virtio_iommu_set_status(VirtIODevice
>>> *vdev, uint8_t status)
>>>>
>>>>  static void virtio_iommu_instance_init(Object *obj)
>>>>  {
>>>> +    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
>>>> +
>>>> +    s->msi_bypass = true;
>>>>  }
>>>>
>>>>  static const VMStateDescription vmstate_virtio_iommu = {
>>>> diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-
>>> iommu.h
>>>> index f55f48d304..56c8b4e57f 100644
>>>> --- a/include/hw/virtio/virtio-iommu.h
>>>> +++ b/include/hw/virtio/virtio-iommu.h
>>>> @@ -59,6 +59,7 @@ typedef struct VirtIOIOMMU {
>>>>      GTree *domains;
>>>>      QemuMutex mutex;
>>>>      GTree *endpoints;
>>>> +    bool msi_bypass;
>>>>  } VirtIOIOMMU;
>>>>
>>>>  #endif
>>>> --
>>>> 2.20.1
Tian, Kevin July 31, 2019, 11:23 p.m. UTC | #6
> From: Auger Eric [mailto:eric.auger@redhat.com]
> Sent: Thursday, August 1, 2019 3:45 AM
> 
> Hi Michael,
> 
> On 7/31/19 9:25 PM, Michael S. Tsirkin wrote:
> > On Tue, Jul 30, 2019 at 11:20:44PM +0000, Tian, Kevin wrote:
> >>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> >>> Sent: Wednesday, July 31, 2019 3:38 AM
> >>>
> >>> On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
> >>>> We introduce a new msi_bypass field which indicates whether
> >>>> the IOAPIC MSI window [0xFEE00000 - 0xFEEFFFFF] must be exposed
> >>
> >> it's not good to call it IOAPIC MSI window. any write to this range, either
> >> from IOAPIC or PCI device, is interpreted by the platform as interrupt
> >> request. I'd call it "x86 interrupt address range".
> >
> > Isn't this APIC_DEFAULT_ADDRESS? I'm not sure guests can't change it
> > even though I'm not sure qemu supports changing it.
> 
> That's indeed matching:
> 
> #define APIC_DEFAULT_ADDRESS 0xfee00000
> #define APIC_SPACE_SIZE      0x100000
> 

They are different thing, though value matches. APIC default address
is the memory-mapped region for software to access APIC register. It
can be relocated by the software, with default as 0xfee00000. On the
other hand, the interrupt address range is for root complex to interpret
interrupt message from devices. You can look at Intel SDM 3A, 10.11
Message Signalled Interrupts, where the message address register
format is defined with 0xfee as the hard prefix.

Thanks
Kevin

Patch
diff mbox series

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 66be9a4627..74038288b0 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -39,6 +39,9 @@ 
 #define VIOMMU_DEFAULT_QUEUE_SIZE 256
 #define VIOMMU_PROBE_SIZE 512
 
+#define IOAPIC_RANGE_START      (0xfee00000)
+#define IOAPIC_RANGE_END        (0xfeefffff)
+
 #define SUPPORTED_PROBE_PROPERTIES (\
     1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
 
@@ -100,6 +103,30 @@  static void virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
     ep->domain = NULL;
 }
 
+static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
+                                              uint8_t subtype,
+                                              uint64_t start, uint64_t end)
+{
+    viommu_interval *interval;
+    struct virtio_iommu_probe_resv_mem *resv_reg_prop;
+    size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
+    size_t value_size = prop_size -
+                sizeof(struct virtio_iommu_probe_property);
+
+    interval = g_malloc0(sizeof(*interval));
+    interval->low = start;
+    interval->high = end;
+
+    resv_reg_prop = g_malloc0(prop_size);
+    resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
+    resv_reg_prop->head.length = cpu_to_le64(value_size);
+    resv_reg_prop->subtype = cpu_to_le64(subtype);
+    resv_reg_prop->start = cpu_to_le64(start);
+    resv_reg_prop->end = cpu_to_le64(end);
+
+    g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
+}
+
 static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
                                                   uint32_t ep_id)
 {
@@ -117,6 +144,12 @@  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
     ep->reserved_regions = g_tree_new_full((GCompareDataFunc)interval_cmp,
                                             NULL, (GDestroyNotify)g_free,
                                             (GDestroyNotify)g_free);
+    if (s->msi_bypass) {
+        virtio_iommu_register_resv_region(ep, VIRTIO_IOMMU_RESV_MEM_T_MSI,
+                                          IOAPIC_RANGE_START,
+                                          IOAPIC_RANGE_END);
+    }
+
     return ep;
 }
 
@@ -822,6 +855,9 @@  static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
 
 static void virtio_iommu_instance_init(Object *obj)
 {
+    VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
+
+    s->msi_bypass = true;
 }
 
 static const VMStateDescription vmstate_virtio_iommu = {
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index f55f48d304..56c8b4e57f 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -59,6 +59,7 @@  typedef struct VirtIOIOMMU {
     GTree *domains;
     QemuMutex mutex;
     GTree *endpoints;
+    bool msi_bypass;
 } VirtIOIOMMU;
 
 #endif