diff mbox series

[2/6] x86_iommu/amd: Prepare for interrupt remap support

Message ID 1536684589-11718-3-git-send-email-brijesh.singh@amd.com
State New
Headers show
Series x86_iommu/amd: add interrupt remap support | expand

Commit Message

Brijesh Singh Sept. 11, 2018, 4:49 p.m. UTC
Register the interrupt remapping callback and read/write ops for the
amd-iommu-ir memory region.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 hw/i386/amd_iommu.c  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/i386/amd_iommu.h  |  17 +++++++-
 hw/i386/trace-events |   5 +++
 3 files changed, 127 insertions(+), 2 deletions(-)

Comments

Peter Xu Sept. 12, 2018, 3:52 a.m. UTC | #1
On Tue, Sep 11, 2018 at 11:49:45AM -0500, Brijesh Singh wrote:
>  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>  {
>      AMDVIState *s = opaque;
> @@ -1055,6 +1151,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>          address_space_init(&iommu_as[devfn]->as,
>                             MEMORY_REGION(&iommu_as[devfn]->iommu),
>                             "amd-iommu");
> +        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
> +                              &amdvi_ir_ops, s, "amd-iommu-ir",
> +                              AMDVI_INT_ADDR_SIZE);
> +        memory_region_add_subregion(MEMORY_REGION(&iommu_as[devfn]->iommu),
> +                              AMDVI_INT_ADDR_FIRST,
> +                              &iommu_as[devfn]->iommu_ir);

A pure question: just to make sure this IR region won't be masked out
by other memory regions.  Asked since VT-d is explicitly setting a
higher priority of the memory region for interrupts with
memory_region_add_subregion_overlap().

>      }
>      return &iommu_as[devfn]->as;
>  }
> @@ -1172,6 +1274,10 @@ static void amdvi_realize(DeviceState *dev, Error **err)
>          return;
>      }
>  
> +    /* Pseudo address space under root PCI bus. */
> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_SB_IOAPIC_ID);
> +    s->intr_enabled = x86_iommu->intr_supported;

So does this mean that AMD IR cannot be disabled if declared support?
For VT-d, IR needs to be explicitly enabled otherwise disabled (even
supported).

Otherwise the patch looks good to me.  Thanks,

> +
>      /* set up MMIO */
>      memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
>                            AMDVI_MMIO_SIZE);
> @@ -1205,6 +1311,7 @@ static void amdvi_class_init(ObjectClass *klass, void* data)
>      dc->vmsd = &vmstate_amdvi;
>      dc->hotpluggable = false;
>      dc_class->realize = amdvi_realize;
> +    dc_class->int_remap = amdvi_int_remap;
>      /* Supported by the pc-q35-* machine types */
>      dc->user_creatable = true;
>  }

Regards,
Brijesh Singh Sept. 12, 2018, 6:59 p.m. UTC | #2
On 09/11/2018 10:52 PM, Peter Xu wrote:
> On Tue, Sep 11, 2018 at 11:49:45AM -0500, Brijesh Singh wrote:
>>   static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>   {
>>       AMDVIState *s = opaque;
>> @@ -1055,6 +1151,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>           address_space_init(&iommu_as[devfn]->as,
>>                              MEMORY_REGION(&iommu_as[devfn]->iommu),
>>                              "amd-iommu");
>> +        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
>> +                              &amdvi_ir_ops, s, "amd-iommu-ir",
>> +                              AMDVI_INT_ADDR_SIZE);
>> +        memory_region_add_subregion(MEMORY_REGION(&iommu_as[devfn]->iommu),
>> +                              AMDVI_INT_ADDR_FIRST,
>> +                              &iommu_as[devfn]->iommu_ir);
> 
> A pure question: just to make sure this IR region won't be masked out
> by other memory regions.  Asked since VT-d is explicitly setting a
> higher priority of the memory region for interrupts with
> memory_region_add_subregion_overlap().
> 

Hmm, I was hoping that this IR region will not be masked out by other
regions but if it does then we will have trouble. thanks for pointing
this out, I think we can do similar thing as VT-d and make the region
as high priority so that we get memops invoked.


>>       }
>>       return &iommu_as[devfn]->as;
>>   }
>> @@ -1172,6 +1274,10 @@ static void amdvi_realize(DeviceState *dev, Error **err)
>>           return;
>>       }
>>   
>> +    /* Pseudo address space under root PCI bus. */
>> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_SB_IOAPIC_ID);
>> +    s->intr_enabled = x86_iommu->intr_supported;
> 
> So does this mean that AMD IR cannot be disabled if declared support?
> For VT-d, IR needs to be explicitly enabled otherwise disabled (even
> supported).
> 


Yes, once its declared as supported then it can not disabled. Its
upto the guest OS to decide whether it want to use the intr remapping
feature by parsing the IVRS. This also brings question, should we
just enable it by default because its guest OS decision whether it
wants to use it or not.
Peter Xu Sept. 13, 2018, 3:15 a.m. UTC | #3
On Wed, Sep 12, 2018 at 01:59:06PM -0500, Brijesh Singh wrote:

[...]

> > >       }
> > >       return &iommu_as[devfn]->as;
> > >   }
> > > @@ -1172,6 +1274,10 @@ static void amdvi_realize(DeviceState *dev, Error **err)
> > >           return;
> > >       }
> > > +    /* Pseudo address space under root PCI bus. */
> > > +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_SB_IOAPIC_ID);
> > > +    s->intr_enabled = x86_iommu->intr_supported;
> > 
> > So does this mean that AMD IR cannot be disabled if declared support?
> > For VT-d, IR needs to be explicitly enabled otherwise disabled (even
> > supported).
> > 
> 
> 
> Yes, once its declared as supported then it can not disabled. Its
> upto the guest OS to decide whether it want to use the intr remapping
> feature by parsing the IVRS. This also brings question, should we
> just enable it by default because its guest OS decision whether it
> wants to use it or not.

It's by default off?  I mean:

    DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),

Then it's good to me, thanks.  You could add a comment there
explicitly mentioning that "IR will be enabled as long as declared
support for AMD" since it might confuse some of the people like me...
but it's optional.

Regards,
Suravee Suthikulpanit Sept. 13, 2018, 8:15 a.m. UTC | #4
Brijesh / Peter,

On 9/13/18 10:15 AM, Peter Xu wrote:
> On Wed, Sep 12, 2018 at 01:59:06PM -0500, Brijesh Singh wrote:
> 
> [...]
> 
>>>>        }
>>>>        return &iommu_as[devfn]->as;
>>>>    }
>>>> @@ -1172,6 +1274,10 @@ static void amdvi_realize(DeviceState *dev, Error **err)
>>>>            return;
>>>>        }
>>>> +    /* Pseudo address space under root PCI bus. */
>>>> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_SB_IOAPIC_ID);
>>>> +    s->intr_enabled = x86_iommu->intr_supported;
>>>
>>> So does this mean that AMD IR cannot be disabled if declared support?
>>> For VT-d, IR needs to be explicitly enabled otherwise disabled (even
>>> supported).
>>>
>>
>>
>> Yes, once its declared as supported then it can not disabled. Its
>> upto the guest OS to decide whether it want to use the intr remapping
>> feature by parsing the IVRS. This also brings question, should we
>> just enable it by default because its guest OS decision whether it
>> wants to use it or not.
> 
> It's by default off?  I mean:
> 
>      DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
> 
> Then it's good to me, thanks.  You could add a comment there
> explicitly mentioning that "IR will be enabled as long as declared
> support for AMD" since it might confuse some of the people like me...
> but it's optional.
> 
> Regards,
> 

Actually, there are two separate knobs here:

* This option basically means the interrupt remapping support is available/unavailable,
   which should default to available unless specified to unavailable at the QEMU command line.
   For example, the AMD IOMMU v1 does not have interrupt remapping support, which means
   the interrupt remapping bit fields in DTE (e.g. IV, IntCtl, EIntPass, INITPass, NMIPass,
   Lint0Pass, Lint1Pass) are ignored.

* If interrupt remapping support is available, the guest OS can decide to enable or disable the feature
   using DTE bit fileds (e.g. Linux option intremap=off would disable interrupt remapping
   by not setting DTE[IV] bit). For Linux AMD IOMMU driver, the default is to enable the interrupt remapping.

In facts, we should not need this option. However, if you prefer to keep this option,
we probably should rename this to "intremap_sup", in which if the default value should be 1.

Regards,
Suravee
Peter Xu Sept. 13, 2018, 8:48 a.m. UTC | #5
On Thu, Sep 13, 2018 at 03:15:27PM +0700, Suravee Suthikulpanit wrote:
> Brijesh / Peter,
> 
> On 9/13/18 10:15 AM, Peter Xu wrote:
> > On Wed, Sep 12, 2018 at 01:59:06PM -0500, Brijesh Singh wrote:
> > 
> > [...]
> > 
> > > > >        }
> > > > >        return &iommu_as[devfn]->as;
> > > > >    }
> > > > > @@ -1172,6 +1274,10 @@ static void amdvi_realize(DeviceState *dev, Error **err)
> > > > >            return;
> > > > >        }
> > > > > +    /* Pseudo address space under root PCI bus. */
> > > > > +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_SB_IOAPIC_ID);
> > > > > +    s->intr_enabled = x86_iommu->intr_supported;
> > > > 
> > > > So does this mean that AMD IR cannot be disabled if declared support?
> > > > For VT-d, IR needs to be explicitly enabled otherwise disabled (even
> > > > supported).
> > > > 
> > > 
> > > 
> > > Yes, once its declared as supported then it can not disabled. Its
> > > upto the guest OS to decide whether it want to use the intr remapping
> > > feature by parsing the IVRS. This also brings question, should we
> > > just enable it by default because its guest OS decision whether it
> > > wants to use it or not.
> > 
> > It's by default off?  I mean:
> > 
> >      DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
> > 
> > Then it's good to me, thanks.  You could add a comment there
> > explicitly mentioning that "IR will be enabled as long as declared
> > support for AMD" since it might confuse some of the people like me...
> > but it's optional.
> > 
> > Regards,
> > 
> 
> Actually, there are two separate knobs here:
> 
> * This option basically means the interrupt remapping support is available/unavailable,
>   which should default to available unless specified to unavailable at the QEMU command line.
>   For example, the AMD IOMMU v1 does not have interrupt remapping support, which means
>   the interrupt remapping bit fields in DTE (e.g. IV, IntCtl, EIntPass, INITPass, NMIPass,
>   Lint0Pass, Lint1Pass) are ignored.
> 
> * If interrupt remapping support is available, the guest OS can decide to enable or disable the feature
>   using DTE bit fileds (e.g. Linux option intremap=off would disable interrupt remapping
>   by not setting DTE[IV] bit). For Linux AMD IOMMU driver, the default is to enable the interrupt remapping.
> 
> In facts, we should not need this option. However, if you prefer to keep this option,
> we probably should rename this to "intremap_sup", in which if the default value should be 1.

I see, thanks.  For me I don't yet see a reason for that intremap_sup
since AFAIU that's exactly what current QEMU's intremap parameter do.

I think Intel is having the similar knobs:

* The "intremap" in QEMU decides whether the hardware we emulate
  supports interrupt remapping, and

* The "intremap" in the guest decides whether the guest kernel will
  use the interrupt remapping feature

Though IMHO the only difference is that Intel has another global knob
to turn that on/off in the GCMD register (VT-d spec 10.4.4, GCMD bit
25) depending on whether the 2nd knob is on, while for AMD it's just
per-device rather than per-iommu.

Regards,
Paolo Bonzini Sept. 13, 2018, 2:47 p.m. UTC | #6
On 13/09/2018 10:15, Suravee Suthikulpanit wrote:
> However, if you prefer to keep this option,
> we probably should rename this to "intremap_sup", in which if the
> default value should be 1.

The main reason to have the property and to leave it off by default is
that it is incompatible with kernel_irqchip=split.  A second reason is
migration support, where you would be able to migrate from QEMU 3.1 to
3.0 and interrupt remapping would disappear.

It's okay to call the property just "intremap" even if it's ultimately
the guest's call to use it.  It's the same for e.g. a PCI device's MSI
support, properties for that are called just "msi" or "msix".

Thanks,

Paolo
diff mbox series

Patch

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 1fd669f..572ba0a 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -26,6 +26,7 @@ 
 #include "amd_iommu.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "hw/i386/apic_internal.h"
 #include "trace.h"
 
 /* used AMD-Vi MMIO registers */
@@ -1026,6 +1027,101 @@  static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
     return ret;
 }
 
+/* Interrupt remapping for MSI/MSI-X entry */
+static int amdvi_int_remap_msi(AMDVIState *iommu,
+                               MSIMessage *origin,
+                               MSIMessage *translated,
+                               uint16_t sid)
+{
+    int ret;
+
+    assert(origin && translated);
+
+    trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
+
+    if (!iommu || !iommu->intr_enabled) {
+        memcpy(translated, origin, sizeof(*origin));
+        goto out;
+    }
+
+    if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
+        trace_amdvi_err("MSI address high 32 bits non-zero when "
+                        "Interrupt Remapping enabled.");
+        return -AMDVI_IR_ERR;
+    }
+
+    if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
+        trace_amdvi_err("MSI is not from IOAPIC.");
+        return -AMDVI_IR_ERR;
+    }
+
+out:
+    trace_amdvi_ir_remap_msi(origin->address, origin->data,
+                             translated->address, translated->data);
+    return 0;
+}
+
+static int amdvi_int_remap(X86IOMMUState *iommu,
+                           MSIMessage *origin,
+                           MSIMessage *translated,
+                           uint16_t sid)
+{
+    return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
+                               translated, sid);
+}
+
+static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
+                                      uint64_t value, unsigned size,
+                                      MemTxAttrs attrs)
+{
+    int ret;
+    MSIMessage from = { 0, 0 }, to = { 0, 0 };
+    uint16_t sid = AMDVI_SB_IOAPIC_ID;
+
+    from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
+    from.data = (uint32_t) value;
+
+    trace_amdvi_mem_ir_write_req(addr, value, size);
+
+    if (!attrs.unspecified) {
+        /* We have explicit Source ID */
+        sid = attrs.requester_id;
+    }
+
+    ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
+    if (ret < 0) {
+        /* TODO: report error */
+        /* Drop the interrupt */
+        return MEMTX_ERROR;
+    }
+
+    apic_get_class()->send_msi(&to);
+
+    trace_amdvi_mem_ir_write(to.address, to.data);
+    return MEMTX_OK;
+}
+
+static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
+                                     uint64_t *data, unsigned size,
+                                     MemTxAttrs attrs)
+{
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps amdvi_ir_ops = {
+    .read_with_attrs = amdvi_mem_ir_read,
+    .write_with_attrs = amdvi_mem_ir_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     AMDVIState *s = opaque;
@@ -1055,6 +1151,12 @@  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
         address_space_init(&iommu_as[devfn]->as,
                            MEMORY_REGION(&iommu_as[devfn]->iommu),
                            "amd-iommu");
+        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
+                              &amdvi_ir_ops, s, "amd-iommu-ir",
+                              AMDVI_INT_ADDR_SIZE);
+        memory_region_add_subregion(MEMORY_REGION(&iommu_as[devfn]->iommu),
+                              AMDVI_INT_ADDR_FIRST,
+                              &iommu_as[devfn]->iommu_ir);
     }
     return &iommu_as[devfn]->as;
 }
@@ -1172,6 +1274,10 @@  static void amdvi_realize(DeviceState *dev, Error **err)
         return;
     }
 
+    /* Pseudo address space under root PCI bus. */
+    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_SB_IOAPIC_ID);
+    s->intr_enabled = x86_iommu->intr_supported;
+
     /* set up MMIO */
     memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
                           AMDVI_MMIO_SIZE);
@@ -1205,6 +1311,7 @@  static void amdvi_class_init(ObjectClass *klass, void* data)
     dc->vmsd = &vmstate_amdvi;
     dc->hotpluggable = false;
     dc_class->realize = amdvi_realize;
+    dc_class->int_remap = amdvi_int_remap;
     /* Supported by the pc-q35-* machine types */
     dc->user_creatable = true;
 }
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 8740305..74e568b 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -206,8 +206,18 @@ 
 
 #define AMDVI_COMMAND_SIZE   16
 
-#define AMDVI_INT_ADDR_FIRST 0xfee00000
-#define AMDVI_INT_ADDR_LAST  0xfeefffff
+#define AMDVI_INT_ADDR_FIRST    0xfee00000
+#define AMDVI_INT_ADDR_LAST     0xfeefffff
+#define AMDVI_INT_ADDR_SIZE     (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 1)
+#define AMDVI_MSI_ADDR_HI_MASK  (0xffffffff00000000ULL)
+#define AMDVI_MSI_ADDR_LO_MASK  (0x00000000ffffffffULL)
+
+/* Southbridge IOAPIC ID */
+#define AMDVI_SB_IOAPIC_ID      0xa0
+
+/* Interrupt remapping errors */
+#define AMDVI_IR_ERR            0x1
+
 
 #define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
 #define AMD_IOMMU_DEVICE(obj)\
@@ -278,6 +288,9 @@  typedef struct AMDVIState {
 
     /* IOTLB */
     GHashTable *iotlb;
+
+    /* Interrupt remapping */
+    bool intr_enabled;
 } AMDVIState;
 
 #endif
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 9e6fc4d..41d533c 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -101,6 +101,11 @@  amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: translation level 0x%"PR
 amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical address 0x%"PRIx64
 amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
 amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_mem_ir_write_req(uint64_t addr, uint64_t val, uint32_t size) "addr 0x%"PRIx64" data 0x%"PRIx64" size 0x%"PRIx32
+amdvi_mem_ir_write(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" data 0x%"PRIx64
+amdvi_ir_remap_msi_req(uint64_t addr, uint64_t data, uint8_t devid) "addr 0x%"PRIx64" data 0x%"PRIx64" devid 0x%"PRIx8
+amdvi_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr 0x%"PRIx64", data 0x%"PRIx64")"
+amdvi_err(const char *str) "%s"
 
 # hw/i386/vmport.c
 vmport_register(unsigned char command, void *func, void *opaque) "command: 0x%02x func: %p opaque: %p"