diff mbox

[05/13] acpi: add DMAR scope definition for root IOAPIC

Message ID 1455852618-5224-6-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu Feb. 19, 2016, 3:30 a.m. UTC
To enable interrupt remapping for intel IOMMU device, each IOAPIC device
in the system reported via ACPI MADT must be explicitly enumerated under
one specific remapping hardware unit. This patch adds the root-complex
IOAPIC into the default DMAR device.

Please refer to VT-d spec 8.3.1.1 for more information.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/acpi-build.c        | 23 +++++++++++++++++++++--
 include/hw/acpi/acpi-defs.h | 15 +++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

Comments

Marcel Apfelbaum Feb. 21, 2016, 11:38 a.m. UTC | #1
On 02/19/2016 05:30 AM, Peter Xu wrote:
> To enable interrupt remapping for intel IOMMU device, each IOAPIC device
> in the system reported via ACPI MADT must be explicitly enumerated under
> one specific remapping hardware unit. This patch adds the root-complex
> IOAPIC into the default DMAR device.
>
> Please refer to VT-d spec 8.3.1.1 for more information.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>   hw/i386/acpi-build.c        | 23 +++++++++++++++++++++--
>   include/hw/acpi/acpi-defs.h | 15 +++++++++++++++
>   2 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index d9e4f91..1cefe43 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -76,6 +76,9 @@
>   #define ACPI_BUILD_DPRINTF(fmt, ...)
>   #endif
>
> +/* Default IOAPIC ID */
> +#define ACPI_BUILD_IOAPIC_ID 0x0
> +
>   typedef struct AcpiCpuInfo {
>       DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
>   } AcpiCpuInfo;
> @@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu)
>       io_apic = acpi_data_push(table_data, sizeof *io_apic);
>       io_apic->type = ACPI_APIC_IO;
>       io_apic->length = sizeof(*io_apic);
> -#define ACPI_BUILD_IOAPIC_ID 0x0
>       io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
>       io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
>       io_apic->interrupt = cpu_to_le32(0);
> @@ -2511,6 +2513,9 @@ build_dmar_q35(GArray *table_data, GArray *linker)
>       AcpiDmarHardwareUnit *drhd;
>       uint8_t dmar_flags = 0;
>       IntelIOMMUState *intel_iommu = acpi_get_iommu();
> +    AcpiDmarDeviceScope *scope = NULL;
> +    /* Root complex IOAPIC use one path[0] only */
> +    uint16_t scope_size = sizeof(*scope) + sizeof(uint16_t);
>
>       assert(intel_iommu);
>
> @@ -2526,11 +2531,25 @@ build_dmar_q35(GArray *table_data, GArray *linker)
>       /* DMAR Remapping Hardware Unit Definition structure */
>       drhd = acpi_data_push(table_data, sizeof(*drhd));
>       drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
> -    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
> +    drhd->length = cpu_to_le16(sizeof(*drhd) + scope_size);
>       drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
>       drhd->pci_segment = cpu_to_le16(0);
>       drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
>
> +    /* Scope definition for the root-complex IOAPIC */
> +    scope = acpi_data_push(table_data, scope_size);
> +    scope->entry_type = cpu_to_le16(ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC);
> +    scope->length = scope_size;
> +    /*
> +     * An arbitary but unique bus number, to be used to generate
> +     * source ID for IOAPIC device in BDF format.
> +     */
> +#define ACPI_IOAPIC_BUS_IR         (0xf0)
> +#define ACPI_IOAPIC_DEVFN_IR       (0x00)

Hi,

How do you know for sure there is no bus (or bus & device) having the number 0xf0 in the system?
Now that we support multiple Root Complexes, using the  pxb-pcie device we can simply add a PCI root bus like this:
     -device pxb-pcie,bus_nr=0xf0
or we can add enough switches to get to this number.

You could dynamically query for an unused PCI bus, but the number would change between the runs.
Or, I suppose you can reserve a slot on bus 0 for that. It is interesting how it works on a real machine.

Thanks,
Marcel


> +    scope->enumeration_id = cpu_to_le16(ACPI_BUILD_IOAPIC_ID);
> +    scope->bus = cpu_to_le16(ACPI_IOAPIC_BUS_IR);
> +    scope->path[0] = cpu_to_le16(ACPI_IOAPIC_DEVFN_IR);
> +
>       build_header(linker, table_data, (void *)(table_data->data + dmar_start),
>                    "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
>   }
> diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
> index c7a03d4..2430af6 100644
> --- a/include/hw/acpi/acpi-defs.h
> +++ b/include/hw/acpi/acpi-defs.h
> @@ -556,6 +556,20 @@ enum {
>   /*
>    * Sub-structures for DMAR
>    */
> +
> +#define ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC     (0x03)
> +
> +/* Device scope structure for DRHD. */
> +struct AcpiDmarDeviceScope {
> +    uint8_t entry_type;
> +    uint8_t length;
> +    uint16_t reserved;
> +    uint8_t enumeration_id;
> +    uint8_t bus;
> +    uint16_t path[0];           /* list of dev:func pairs */
> +} QEMU_PACKED;
> +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
> +
>   /* Type 0: Hardware Unit Definition */
>   struct AcpiDmarHardwareUnit {
>       uint16_t type;
> @@ -564,6 +578,7 @@ struct AcpiDmarHardwareUnit {
>       uint8_t reserved;
>       uint16_t pci_segment;   /* The PCI Segment associated with this unit */
>       uint64_t address;   /* Base address of remapping hardware register-set */
> +    AcpiDmarDeviceScope scope[0];
>   } QEMU_PACKED;
>   typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
>
>
Marcel Apfelbaum Feb. 21, 2016, 12:08 p.m. UTC | #2
On 02/21/2016 01:38 PM, Marcel Apfelbaum wrote:
> On 02/19/2016 05:30 AM, Peter Xu wrote:
>> To enable interrupt remapping for intel IOMMU device, each IOAPIC device
>> in the system reported via ACPI MADT must be explicitly enumerated under
>> one specific remapping hardware unit. This patch adds the root-complex
>> IOAPIC into the default DMAR device.
>>
>> Please refer to VT-d spec 8.3.1.1 for more information.
>>
>> Signed-off-by: Peter Xu <peterx@redhat.com>
>> ---
>>   hw/i386/acpi-build.c        | 23 +++++++++++++++++++++--
>>   include/hw/acpi/acpi-defs.h | 15 +++++++++++++++
>>   2 files changed, 36 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index d9e4f91..1cefe43 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -76,6 +76,9 @@
>>   #define ACPI_BUILD_DPRINTF(fmt, ...)
>>   #endif
>>
>> +/* Default IOAPIC ID */
>> +#define ACPI_BUILD_IOAPIC_ID 0x0
>> +
>>   typedef struct AcpiCpuInfo {
>>       DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
>>   } AcpiCpuInfo;
>> @@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu)
>>       io_apic = acpi_data_push(table_data, sizeof *io_apic);
>>       io_apic->type = ACPI_APIC_IO;
>>       io_apic->length = sizeof(*io_apic);
>> -#define ACPI_BUILD_IOAPIC_ID 0x0
>>       io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
>>       io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
>>       io_apic->interrupt = cpu_to_le32(0);
>> @@ -2511,6 +2513,9 @@ build_dmar_q35(GArray *table_data, GArray *linker)
>>       AcpiDmarHardwareUnit *drhd;
>>       uint8_t dmar_flags = 0;
>>       IntelIOMMUState *intel_iommu = acpi_get_iommu();
>> +    AcpiDmarDeviceScope *scope = NULL;
>> +    /* Root complex IOAPIC use one path[0] only */
>> +    uint16_t scope_size = sizeof(*scope) + sizeof(uint16_t);
>>
>>       assert(intel_iommu);
>>
>> @@ -2526,11 +2531,25 @@ build_dmar_q35(GArray *table_data, GArray *linker)
>>       /* DMAR Remapping Hardware Unit Definition structure */
>>       drhd = acpi_data_push(table_data, sizeof(*drhd));
>>       drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
>> -    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
>> +    drhd->length = cpu_to_le16(sizeof(*drhd) + scope_size);
>>       drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
>>       drhd->pci_segment = cpu_to_le16(0);
>>       drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
>>
>> +    /* Scope definition for the root-complex IOAPIC */
>> +    scope = acpi_data_push(table_data, scope_size);
>> +    scope->entry_type = cpu_to_le16(ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC);
>> +    scope->length = scope_size;
>> +    /*
>> +     * An arbitary but unique bus number, to be used to generate
>> +     * source ID for IOAPIC device in BDF format.
>> +     */
>> +#define ACPI_IOAPIC_BUS_IR         (0xf0)
>> +#define ACPI_IOAPIC_DEVFN_IR       (0x00)
>
> Hi,
>
> How do you know for sure there is no bus (or bus & device) having the number 0xf0 in the system?
> Now that we support multiple Root Complexes, using the  pxb-pcie device we can simply add a PCI root bus like this:
>      -device pxb-pcie,bus_nr=0xf0
> or we can add enough switches to get to this number.
>
> You could dynamically query for an unused PCI bus, but the number would change between the runs.
> Or, I suppose you can reserve a slot on bus 0 for that. It is interesting how it works on a real machine.

thinking about it more, maybe we should let the firmware to assign the bus/dev/fun for the IO APIC?

Thanks,
Marcel

>
> Thanks,
> Marcel
>
>
>> +    scope->enumeration_id = cpu_to_le16(ACPI_BUILD_IOAPIC_ID);
>> +    scope->bus = cpu_to_le16(ACPI_IOAPIC_BUS_IR);
>> +    scope->path[0] = cpu_to_le16(ACPI_IOAPIC_DEVFN_IR);
>> +
>>       build_header(linker, table_data, (void *)(table_data->data + dmar_start),
>>                    "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
>>   }
>> diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
>> index c7a03d4..2430af6 100644
>> --- a/include/hw/acpi/acpi-defs.h
>> +++ b/include/hw/acpi/acpi-defs.h
>> @@ -556,6 +556,20 @@ enum {
>>   /*
>>    * Sub-structures for DMAR
>>    */
>> +
>> +#define ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC     (0x03)
>> +
>> +/* Device scope structure for DRHD. */
>> +struct AcpiDmarDeviceScope {
>> +    uint8_t entry_type;
>> +    uint8_t length;
>> +    uint16_t reserved;
>> +    uint8_t enumeration_id;
>> +    uint8_t bus;
>> +    uint16_t path[0];           /* list of dev:func pairs */
>> +} QEMU_PACKED;
>> +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
>> +
>>   /* Type 0: Hardware Unit Definition */
>>   struct AcpiDmarHardwareUnit {
>>       uint16_t type;
>> @@ -564,6 +578,7 @@ struct AcpiDmarHardwareUnit {
>>       uint8_t reserved;
>>       uint16_t pci_segment;   /* The PCI Segment associated with this unit */
>>       uint64_t address;   /* Base address of remapping hardware register-set */
>> +    AcpiDmarDeviceScope scope[0];
>>   } QEMU_PACKED;
>>   typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
>>
>>
>
Jan Kiszka Feb. 21, 2016, 1:40 p.m. UTC | #3
On 2016-02-21 13:08, Marcel Apfelbaum wrote:
> On 02/21/2016 01:38 PM, Marcel Apfelbaum wrote:
>> On 02/19/2016 05:30 AM, Peter Xu wrote:
>>> To enable interrupt remapping for intel IOMMU device, each IOAPIC device
>>> in the system reported via ACPI MADT must be explicitly enumerated under
>>> one specific remapping hardware unit. This patch adds the root-complex
>>> IOAPIC into the default DMAR device.
>>>
>>> Please refer to VT-d spec 8.3.1.1 for more information.
>>>
>>> Signed-off-by: Peter Xu <peterx@redhat.com>
>>> ---
>>>   hw/i386/acpi-build.c        | 23 +++++++++++++++++++++--
>>>   include/hw/acpi/acpi-defs.h | 15 +++++++++++++++
>>>   2 files changed, 36 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>>> index d9e4f91..1cefe43 100644
>>> --- a/hw/i386/acpi-build.c
>>> +++ b/hw/i386/acpi-build.c
>>> @@ -76,6 +76,9 @@
>>>   #define ACPI_BUILD_DPRINTF(fmt, ...)
>>>   #endif
>>>
>>> +/* Default IOAPIC ID */
>>> +#define ACPI_BUILD_IOAPIC_ID 0x0
>>> +
>>>   typedef struct AcpiCpuInfo {
>>>       DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
>>>   } AcpiCpuInfo;
>>> @@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker,
>>> AcpiCpuInfo *cpu)
>>>       io_apic = acpi_data_push(table_data, sizeof *io_apic);
>>>       io_apic->type = ACPI_APIC_IO;
>>>       io_apic->length = sizeof(*io_apic);
>>> -#define ACPI_BUILD_IOAPIC_ID 0x0
>>>       io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
>>>       io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
>>>       io_apic->interrupt = cpu_to_le32(0);
>>> @@ -2511,6 +2513,9 @@ build_dmar_q35(GArray *table_data, GArray *linker)
>>>       AcpiDmarHardwareUnit *drhd;
>>>       uint8_t dmar_flags = 0;
>>>       IntelIOMMUState *intel_iommu = acpi_get_iommu();
>>> +    AcpiDmarDeviceScope *scope = NULL;
>>> +    /* Root complex IOAPIC use one path[0] only */
>>> +    uint16_t scope_size = sizeof(*scope) + sizeof(uint16_t);
>>>
>>>       assert(intel_iommu);
>>>
>>> @@ -2526,11 +2531,25 @@ build_dmar_q35(GArray *table_data, GArray
>>> *linker)
>>>       /* DMAR Remapping Hardware Unit Definition structure */
>>>       drhd = acpi_data_push(table_data, sizeof(*drhd));
>>>       drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
>>> -    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope
>>> now */
>>> +    drhd->length = cpu_to_le16(sizeof(*drhd) + scope_size);
>>>       drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
>>>       drhd->pci_segment = cpu_to_le16(0);
>>>       drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
>>>
>>> +    /* Scope definition for the root-complex IOAPIC */
>>> +    scope = acpi_data_push(table_data, scope_size);
>>> +    scope->entry_type = cpu_to_le16(ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC);
>>> +    scope->length = scope_size;
>>> +    /*
>>> +     * An arbitary but unique bus number, to be used to generate
>>> +     * source ID for IOAPIC device in BDF format.
>>> +     */
>>> +#define ACPI_IOAPIC_BUS_IR         (0xf0)
>>> +#define ACPI_IOAPIC_DEVFN_IR       (0x00)
>>
>> Hi,
>>
>> How do you know for sure there is no bus (or bus & device) having the
>> number 0xf0 in the system?
>> Now that we support multiple Root Complexes, using the  pxb-pcie
>> device we can simply add a PCI root bus like this:
>>      -device pxb-pcie,bus_nr=0xf0
>> or we can add enough switches to get to this number.
>>
>> You could dynamically query for an unused PCI bus, but the number
>> would change between the runs.
>> Or, I suppose you can reserve a slot on bus 0 for that. It is
>> interesting how it works on a real machine.
> 
> thinking about it more, maybe we should let the firmware to assign the
> bus/dev/fun for the IO APIC?

We have the same problem over with VT-d and IR.

I don't think the firmware is not the right place, otherwise there would
be an interface in hw to adjust that parameters. I think we should
simply make sure that the qemu user cannot assign devices to those
addresses as they are reserved for the platform devices (the HPET
requires another ID), or even reserve the hole bus for the platform.

Jan
Marcel Apfelbaum Feb. 21, 2016, 3:54 p.m. UTC | #4
On 02/21/2016 03:40 PM, Jan Kiszka wrote:
> On 2016-02-21 13:08, Marcel Apfelbaum wrote:
>> On 02/21/2016 01:38 PM, Marcel Apfelbaum wrote:
>>> On 02/19/2016 05:30 AM, Peter Xu wrote:
>>>> To enable interrupt remapping for intel IOMMU device, each IOAPIC device
>>>> in the system reported via ACPI MADT must be explicitly enumerated under
>>>> one specific remapping hardware unit. This patch adds the root-complex
>>>> IOAPIC into the default DMAR device.
>>>>
>>>> Please refer to VT-d spec 8.3.1.1 for more information.
>>>>
>>>> Signed-off-by: Peter Xu <peterx@redhat.com>
>>>> ---
>>>>    hw/i386/acpi-build.c        | 23 +++++++++++++++++++++--
>>>>    include/hw/acpi/acpi-defs.h | 15 +++++++++++++++
>>>>    2 files changed, 36 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>>>> index d9e4f91..1cefe43 100644
>>>> --- a/hw/i386/acpi-build.c
>>>> +++ b/hw/i386/acpi-build.c
>>>> @@ -76,6 +76,9 @@
>>>>    #define ACPI_BUILD_DPRINTF(fmt, ...)
>>>>    #endif
>>>>
>>>> +/* Default IOAPIC ID */
>>>> +#define ACPI_BUILD_IOAPIC_ID 0x0
>>>> +
>>>>    typedef struct AcpiCpuInfo {
>>>>        DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
>>>>    } AcpiCpuInfo;
>>>> @@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker,
>>>> AcpiCpuInfo *cpu)
>>>>        io_apic = acpi_data_push(table_data, sizeof *io_apic);
>>>>        io_apic->type = ACPI_APIC_IO;
>>>>        io_apic->length = sizeof(*io_apic);
>>>> -#define ACPI_BUILD_IOAPIC_ID 0x0
>>>>        io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
>>>>        io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
>>>>        io_apic->interrupt = cpu_to_le32(0);
>>>> @@ -2511,6 +2513,9 @@ build_dmar_q35(GArray *table_data, GArray *linker)
>>>>        AcpiDmarHardwareUnit *drhd;
>>>>        uint8_t dmar_flags = 0;
>>>>        IntelIOMMUState *intel_iommu = acpi_get_iommu();
>>>> +    AcpiDmarDeviceScope *scope = NULL;
>>>> +    /* Root complex IOAPIC use one path[0] only */
>>>> +    uint16_t scope_size = sizeof(*scope) + sizeof(uint16_t);
>>>>
>>>>        assert(intel_iommu);
>>>>
>>>> @@ -2526,11 +2531,25 @@ build_dmar_q35(GArray *table_data, GArray
>>>> *linker)
>>>>        /* DMAR Remapping Hardware Unit Definition structure */
>>>>        drhd = acpi_data_push(table_data, sizeof(*drhd));
>>>>        drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
>>>> -    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope
>>>> now */
>>>> +    drhd->length = cpu_to_le16(sizeof(*drhd) + scope_size);
>>>>        drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
>>>>        drhd->pci_segment = cpu_to_le16(0);
>>>>        drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
>>>>
>>>> +    /* Scope definition for the root-complex IOAPIC */
>>>> +    scope = acpi_data_push(table_data, scope_size);
>>>> +    scope->entry_type = cpu_to_le16(ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC);
>>>> +    scope->length = scope_size;
>>>> +    /*
>>>> +     * An arbitary but unique bus number, to be used to generate
>>>> +     * source ID for IOAPIC device in BDF format.
>>>> +     */
>>>> +#define ACPI_IOAPIC_BUS_IR         (0xf0)
>>>> +#define ACPI_IOAPIC_DEVFN_IR       (0x00)
>>>
>>> Hi,
>>>
>>> How do you know for sure there is no bus (or bus & device) having the
>>> number 0xf0 in the system?
>>> Now that we support multiple Root Complexes, using the  pxb-pcie
>>> device we can simply add a PCI root bus like this:
>>>       -device pxb-pcie,bus_nr=0xf0
>>> or we can add enough switches to get to this number.
>>>
>>> You could dynamically query for an unused PCI bus, but the number
>>> would change between the runs.
>>> Or, I suppose you can reserve a slot on bus 0 for that. It is
>>> interesting how it works on a real machine.
>>
>> thinking about it more, maybe we should let the firmware to assign the
>> bus/dev/fun for the IO APIC?
>
> We have the same problem over with VT-d and IR.
>
> I don't think the firmware is not the right place, otherwise there would
> be an interface in hw to adjust that parameters. I think we should
> simply make sure that the qemu user cannot assign devices to those
> addresses as they are reserved for the platform devices (the HPET
> requires another ID), or even reserve the hole bus for the platform.


I understand, but it is the firmware that assign addresses, not QEMU/user.
We need a way to tell the firmware not to use a certain address range, we can do that
with fw_config I suppose.

Reserving a bus might be easier, we take the bus number out from host bridges CRS
ranges and each platform device can be assigned a slot. We would need to select the bus carefully
to not collide with other PCI hierarchies. Maybe bus 0xFF.

Thanks,
Marcel

>
> Jan
>
Jan Kiszka Feb. 21, 2016, 4:01 p.m. UTC | #5
On 2016-02-21 16:54, Marcel Apfelbaum wrote:
> On 02/21/2016 03:40 PM, Jan Kiszka wrote:
>> On 2016-02-21 13:08, Marcel Apfelbaum wrote:
>>> On 02/21/2016 01:38 PM, Marcel Apfelbaum wrote:
>>>> On 02/19/2016 05:30 AM, Peter Xu wrote:
>>>>> To enable interrupt remapping for intel IOMMU device, each IOAPIC
>>>>> device
>>>>> in the system reported via ACPI MADT must be explicitly enumerated
>>>>> under
>>>>> one specific remapping hardware unit. This patch adds the root-complex
>>>>> IOAPIC into the default DMAR device.
>>>>>
>>>>> Please refer to VT-d spec 8.3.1.1 for more information.
>>>>>
>>>>> Signed-off-by: Peter Xu <peterx@redhat.com>
>>>>> ---
>>>>>    hw/i386/acpi-build.c        | 23 +++++++++++++++++++++--
>>>>>    include/hw/acpi/acpi-defs.h | 15 +++++++++++++++
>>>>>    2 files changed, 36 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>>>>> index d9e4f91..1cefe43 100644
>>>>> --- a/hw/i386/acpi-build.c
>>>>> +++ b/hw/i386/acpi-build.c
>>>>> @@ -76,6 +76,9 @@
>>>>>    #define ACPI_BUILD_DPRINTF(fmt, ...)
>>>>>    #endif
>>>>>
>>>>> +/* Default IOAPIC ID */
>>>>> +#define ACPI_BUILD_IOAPIC_ID 0x0
>>>>> +
>>>>>    typedef struct AcpiCpuInfo {
>>>>>        DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
>>>>>    } AcpiCpuInfo;
>>>>> @@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker,
>>>>> AcpiCpuInfo *cpu)
>>>>>        io_apic = acpi_data_push(table_data, sizeof *io_apic);
>>>>>        io_apic->type = ACPI_APIC_IO;
>>>>>        io_apic->length = sizeof(*io_apic);
>>>>> -#define ACPI_BUILD_IOAPIC_ID 0x0
>>>>>        io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
>>>>>        io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
>>>>>        io_apic->interrupt = cpu_to_le32(0);
>>>>> @@ -2511,6 +2513,9 @@ build_dmar_q35(GArray *table_data, GArray
>>>>> *linker)
>>>>>        AcpiDmarHardwareUnit *drhd;
>>>>>        uint8_t dmar_flags = 0;
>>>>>        IntelIOMMUState *intel_iommu = acpi_get_iommu();
>>>>> +    AcpiDmarDeviceScope *scope = NULL;
>>>>> +    /* Root complex IOAPIC use one path[0] only */
>>>>> +    uint16_t scope_size = sizeof(*scope) + sizeof(uint16_t);
>>>>>
>>>>>        assert(intel_iommu);
>>>>>
>>>>> @@ -2526,11 +2531,25 @@ build_dmar_q35(GArray *table_data, GArray
>>>>> *linker)
>>>>>        /* DMAR Remapping Hardware Unit Definition structure */
>>>>>        drhd = acpi_data_push(table_data, sizeof(*drhd));
>>>>>        drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
>>>>> -    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope
>>>>> now */
>>>>> +    drhd->length = cpu_to_le16(sizeof(*drhd) + scope_size);
>>>>>        drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
>>>>>        drhd->pci_segment = cpu_to_le16(0);
>>>>>        drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
>>>>>
>>>>> +    /* Scope definition for the root-complex IOAPIC */
>>>>> +    scope = acpi_data_push(table_data, scope_size);
>>>>> +    scope->entry_type = cpu_to_le16(ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC);
>>>>> +    scope->length = scope_size;
>>>>> +    /*
>>>>> +     * An arbitary but unique bus number, to be used to generate
>>>>> +     * source ID for IOAPIC device in BDF format.
>>>>> +     */
>>>>> +#define ACPI_IOAPIC_BUS_IR         (0xf0)
>>>>> +#define ACPI_IOAPIC_DEVFN_IR       (0x00)
>>>>
>>>> Hi,
>>>>
>>>> How do you know for sure there is no bus (or bus & device) having the
>>>> number 0xf0 in the system?
>>>> Now that we support multiple Root Complexes, using the  pxb-pcie
>>>> device we can simply add a PCI root bus like this:
>>>>       -device pxb-pcie,bus_nr=0xf0
>>>> or we can add enough switches to get to this number.
>>>>
>>>> You could dynamically query for an unused PCI bus, but the number
>>>> would change between the runs.
>>>> Or, I suppose you can reserve a slot on bus 0 for that. It is
>>>> interesting how it works on a real machine.
>>>
>>> thinking about it more, maybe we should let the firmware to assign the
>>> bus/dev/fun for the IO APIC?
>>
>> We have the same problem over with VT-d and IR.
>>
>> I don't think the firmware is not the right place, otherwise there would
>> be an interface in hw to adjust that parameters. I think we should
>> simply make sure that the qemu user cannot assign devices to those
>> addresses as they are reserved for the platform devices (the HPET
>> requires another ID), or even reserve the hole bus for the platform.
> 
> 
> I understand, but it is the firmware that assign addresses, not QEMU/user.

Right, buses are chosen by the firmware, but device address are
(optionally) under user control.

> We need a way to tell the firmware not to use a certain address range,
> we can do that
> with fw_config I suppose.

The pseudo addresses of IOAPIC and HPET are part of ACPI tables, both on
AMD and Intel. Firmware can evaluate them easily.

> 
> Reserving a bus might be easier, we take the bus number out from host
> bridges CRS
> ranges and each platform device can be assigned a slot. We would need to
> select the bus carefully
> to not collide with other PCI hierarchies. Maybe bus 0xFF.

Yes, that's the bus number I once chose for Intel / Q35.

Jan
Peter Xu April 8, 2016, 9:53 a.m. UTC | #6
On Sun, Feb 21, 2016 at 05:01:02PM +0100, Jan Kiszka wrote:
> >> We have the same problem over with VT-d and IR.
> >>
> >> I don't think the firmware is not the right place, otherwise there would
> >> be an interface in hw to adjust that parameters. I think we should
> >> simply make sure that the qemu user cannot assign devices to those
> >> addresses as they are reserved for the platform devices (the HPET
> >> requires another ID), or even reserve the hole bus for the platform.
> > 
> > 
> > I understand, but it is the firmware that assign addresses, not QEMU/user.
> 
> Right, buses are chosen by the firmware, but device address are
> (optionally) under user control.
> 
> > We need a way to tell the firmware not to use a certain address range,
> > we can do that
> > with fw_config I suppose.
> 
> The pseudo addresses of IOAPIC and HPET are part of ACPI tables, both on
> AMD and Intel. Firmware can evaluate them easily.

Hi, Jan, Marcel,

Could you give me some hint on how to achieve this using fw_config
(restriction on PCI bus numbers)?  I failed to find it out myself.

Thanks in advance.

-- peterx
diff mbox

Patch

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index d9e4f91..1cefe43 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -76,6 +76,9 @@ 
 #define ACPI_BUILD_DPRINTF(fmt, ...)
 #endif
 
+/* Default IOAPIC ID */
+#define ACPI_BUILD_IOAPIC_ID 0x0
+
 typedef struct AcpiCpuInfo {
     DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
 } AcpiCpuInfo;
@@ -392,7 +395,6 @@  build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu)
     io_apic = acpi_data_push(table_data, sizeof *io_apic);
     io_apic->type = ACPI_APIC_IO;
     io_apic->length = sizeof(*io_apic);
-#define ACPI_BUILD_IOAPIC_ID 0x0
     io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
     io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
     io_apic->interrupt = cpu_to_le32(0);
@@ -2511,6 +2513,9 @@  build_dmar_q35(GArray *table_data, GArray *linker)
     AcpiDmarHardwareUnit *drhd;
     uint8_t dmar_flags = 0;
     IntelIOMMUState *intel_iommu = acpi_get_iommu();
+    AcpiDmarDeviceScope *scope = NULL;
+    /* Root complex IOAPIC use one path[0] only */
+    uint16_t scope_size = sizeof(*scope) + sizeof(uint16_t);
 
     assert(intel_iommu);
 
@@ -2526,11 +2531,25 @@  build_dmar_q35(GArray *table_data, GArray *linker)
     /* DMAR Remapping Hardware Unit Definition structure */
     drhd = acpi_data_push(table_data, sizeof(*drhd));
     drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
-    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
+    drhd->length = cpu_to_le16(sizeof(*drhd) + scope_size);
     drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
     drhd->pci_segment = cpu_to_le16(0);
     drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
 
+    /* Scope definition for the root-complex IOAPIC */
+    scope = acpi_data_push(table_data, scope_size);
+    scope->entry_type = cpu_to_le16(ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC);
+    scope->length = scope_size;
+    /*
+     * An arbitary but unique bus number, to be used to generate
+     * source ID for IOAPIC device in BDF format.
+     */
+#define ACPI_IOAPIC_BUS_IR         (0xf0)
+#define ACPI_IOAPIC_DEVFN_IR       (0x00)
+    scope->enumeration_id = cpu_to_le16(ACPI_BUILD_IOAPIC_ID);
+    scope->bus = cpu_to_le16(ACPI_IOAPIC_BUS_IR);
+    scope->path[0] = cpu_to_le16(ACPI_IOAPIC_DEVFN_IR);
+
     build_header(linker, table_data, (void *)(table_data->data + dmar_start),
                  "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
 }
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index c7a03d4..2430af6 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -556,6 +556,20 @@  enum {
 /*
  * Sub-structures for DMAR
  */
+
+#define ACPI_DMAR_DEV_SCOPE_TYPE_IOAPIC     (0x03)
+
+/* Device scope structure for DRHD. */
+struct AcpiDmarDeviceScope {
+    uint8_t entry_type;
+    uint8_t length;
+    uint16_t reserved;
+    uint8_t enumeration_id;
+    uint8_t bus;
+    uint16_t path[0];           /* list of dev:func pairs */
+} QEMU_PACKED;
+typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
+
 /* Type 0: Hardware Unit Definition */
 struct AcpiDmarHardwareUnit {
     uint16_t type;
@@ -564,6 +578,7 @@  struct AcpiDmarHardwareUnit {
     uint8_t reserved;
     uint16_t pci_segment;   /* The PCI Segment associated with this unit */
     uint64_t address;   /* Base address of remapping hardware register-set */
+    AcpiDmarDeviceScope scope[0];
 } QEMU_PACKED;
 typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;