diff mbox

[V1,2/4] hw/iommu: AMD IOMMU interrupt remapping

Message ID 1470858179-27754-3-git-send-email-davidkiarie4@gmail.com
State New
Headers show

Commit Message

David Kiarie Aug. 10, 2016, 7:42 p.m. UTC
Introduce AMD IOMMU interrupt remapping and hook it onto
the existing interrupt remapping infrastructure

Signed-off-by: David Kiarie <davidkiarie4@gmail.com>
---
 hw/i386/amd_iommu.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/i386/amd_iommu.h |   2 +
 2 files changed, 227 insertions(+), 1 deletion(-)

Comments

Valentine Sinitsyn Aug. 12, 2016, 8:08 p.m. UTC | #1
On 11.08.2016 00:42, David Kiarie wrote:
> Introduce AMD IOMMU interrupt remapping and hook it onto
> the existing interrupt remapping infrastructure
>
> Signed-off-by: David Kiarie <davidkiarie4@gmail.com>
> ---
>  hw/i386/amd_iommu.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/i386/amd_iommu.h |   2 +
>  2 files changed, 227 insertions(+), 1 deletion(-)
>
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index 5fab9aa..825159b 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -18,12 +18,14 @@
>   * with this program; if not, see <http://www.gnu.org/licenses/>.
>   *
>   * Cache implementation inspired by hw/i386/intel_iommu.c
> + *
>   */
>  #include "qemu/osdep.h"
>  #include <math.h>
>  #include "hw/pci/msi.h"
>  #include "hw/i386/pc.h"
>  #include "hw/i386/amd_iommu.h"
> +#include "hw/i386/ioapic_internal.h"
>  #include "hw/pci/pci_bus.h"
>  #include "trace.h"
>
> @@ -660,6 +662,11 @@ static void amdvi_inval_inttable(AMDVIState *s, CMDInvalIntrTable *inval)
>          amdvi_log_illegalcom_error(s, inval->type, s->cmdbuf + s->cmdbuf_head);
>          return;
>      }
> +
> +    if (s->ir_cache) {
> +        x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), true, 0, 0);
> +    }
> +
>      trace_amdvi_intr_inval();
>  }
>
> @@ -1221,6 +1228,207 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
>      return ret;
>  }
>
> +static inline int amdvi_ir_pass(MSIMessage *src, MSIMessage *dst, uint8_t bit,
> +                                uint64_t dte)
The name is misleading. Actually, this function handles non-vectored 
interrupts (either passes or target aborts them). Maybe call it 
amdvi_ir_handle_non_vectored() ?

> +{
> +    if ((dte & (1UL << bit))) {
> +        /* passing interrupt enabled */
> +        dst->address = src->address;
> +        dst->data = src->data;
> +    } else {
> +        /* should be target aborted */
> +        return -AMDVI_TARGET_ABORT;
> +    }
> +    return 0;
> +}
> +
> +static int amdvi_remap_ir_intctl(uint64_t dte, uint32_t irte,
> +                                 MSIMessage *src, MSIMessage *dst)
> +{
> +    int ret = 0;
> +
> +    switch ((dte >> AMDVI_DTE_INTCTL ) & 3UL) {
AMDVI_DTE_INTCTL_SHIFT? Yes, I should have mentioned it in a previous 
patch, sorry. Maybe also introduce macros for 3UL and 1, 2, 3 in switch 
branches below.

> +    case 1:
> +        /* pass */
> +        memcpy(dst, src, sizeof(*dst));
> +        break;
> +    case 2:
> +        /* remap */
> +        if (irte & AMDVI_IRTE_REMAP_MASK) {
> +            /* LOCAL APIC address */
> +            dst->address = AMDVI_LOCAL_APIC_ADDR;
> +            /* destination mode */
> +            dst->address |= (((irte & AMDVI_IRTE_DM_MASK) >> 6) <<
> +                    AMDVI_MSI_ADDR_DM_RSHIFT);
> +            /* RH */
> +            dst->address |= ((irte & AMDVI_IRTE_RQEOI_MASK) >> 5) <<
> +                AMDVI_MSI_ADDR_RH_RSHIFT;
> +            /* Destination ID */
> +            dst->address |= ((irte & AMDVI_IRTE_DEST_MASK) >> 8) <<
> +                AMDVI_MSI_ADDR_DEST_RSHIFT;
> +            /* construct data - vector */
> +            dst->data |= (irte & AMDVI_IRTE_VECTOR_MASK) >> 16;
> +            /* Interrupt type */
> +            dst->data |= ((irte & AMDVI_IRTE_INTTYPE_MASK) >> 2) <<
> +                AMDVI_MSI_DATA_DM_RSHIFT;
These bit operations look scary. Did you considered using bitfields or 
wrapping them in macros?
> +        } else  {
> +            ret = -AMDVI_TARGET_ABORT;
> +        }
> +        break;
> +    case 0:
> +    case 3:
In fact, you should report this as event when IR == 1.

> +    default:
> +        ret = -AMDVI_TARGET_ABORT;
> +    }
> +    return ret;
> +}
> +/*
> + * We don't support guest virtual APIC so IRTE size will most likely always be 4
> + */
> +static int amdvi_irte_get(AMDVIState *s, MSIMessage *src, uint32_t *irte,
> +                          uint64_t *dte, uint16_t devid)
> +{
> +    uint64_t irte_root, offset = devid * AMDVI_DEVTAB_ENTRY_SIZE,
> +             irte_size = AMDVI_DEFAULT_IRTE_SIZE,
> +             ir_table_size;
> +
> +    /* check for GASup and if it's enabled */
> +    if ((amdvi_readq(s, AMDVI_EXT_FEATURES) & AMDVI_GASUP)
> +        && (amdvi_readq(s, AMDVI_MMIO_CONTROL) & AMDVI_GAEN)) {
> +        /* set a different IRTE size */
> +        irte_size = AMDVI_IRTE_SIZE_GASUP;
> +    }
As I said, this is likely the only place where we account for Virtual 
APIC. You don't seem to handle Virtual APIC Root in DTE, for instance. 
Maybe drop this incomplete support altogether, and print some warning 
here instead?

> +    if (dma_memory_read(&address_space_memory, s->devtab + offset, dte,
> +                        AMDVI_DEVTAB_ENTRY_SIZE)) {
> +        trace_amdvi_dte_get_fail(s->devtab, offset);
> +        return -AMDVI_DEV_TAB_HW;
> +    }
> +
> +    irte_root = dte[2] & AMDVI_IRTEROOT_MASK;
> +    offset = (src->data & AMDVI_IRTE_INDEX_MASK) << 2;
> +    ir_table_size = pow(2, dte[2] & AMDVI_IR_TABLE_SIZE_MASK);
1 << dte[2] & AMDVI_IR_TABLE_SIZE_MASK ?

> +    /* enforce IR table size */
> +    if (offset > (ir_table_size * irte_size)) {
> +        trace_amdvi_invalid_irte_entry(offset, ir_table_size);
> +        return -AMDVI_TARGET_ABORT;
> +    }
> +    /* read IRTE */
> +    if (dma_memory_read(&address_space_memory, irte_root + offset,
> +        irte, sizeof(*irte))) {
> +        trace_amdvi_irte_get_fail(irte_root, offset);
> +        return -AMDVI_DEV_TAB_HW;
> +    }
> +    return 0;
> +}
> +
> +static int amdvi_int_remap(X86IOMMUState *iommu, MSIMessage *src,
> +                           MSIMessage *dst, uint16_t sid)
> +{
> +    trace_amdvi_ir_request(src->data, src->address, sid);
> +
> +    AMDVIState *s = AMD_IOMMU_DEVICE(iommu);
> +    int ret = 0;
> +    uint64_t dte[4];
> +    uint32_t ir_enabled, irte;
> +
> +    ret = amdvi_irte_get(s, src, &irte, dte, sid);
> +    if (ret < 0) {
> +        goto no_remap;
> +    }
> +    /* interrupt remapping disabled */
> +    if (!(dte[2] & AMDVI_IR_VALID)) {
> +        memcpy(dst, src, sizeof(*src));
> +        return ret;
> +    }
> +    switch (src->data & AMDVI_IR_TYPE_MASK) {
> +    case AMDVI_MT_FIXED:
> +    case AMDVI_MT_ARBIT:
> +        ret = amdvi_remap_ir_intctl(dte[2], irte, src, dst);
> +        if (ret < 0) {
> +            goto no_remap;
> +        } else {
> +            s->ir_cache = true;
> +            trace_amdvi_ir_remap(dst->data, dst->address, sid);
> +            return ret;
> +        }
> +    /* not handling SMI currently */
> +    case AMDVI_MT_SMI:
Maybe also add some warning here so we'd know when to implement SMI Filter.

> +        goto no_remap;
> +    case AMDVI_MT_NMI:
> +        ir_enabled = AMDVI_DTE_NMIPASS;
> +        break;
> +    case AMDVI_MT_INIT:
> +        ir_enabled = AMDVI_DTE_INTPASS;
> +        break;
> +    case AMDVI_MT_EXTINT:
> +        ir_enabled = AMDVI_DTE_EINTPASS;
> +        break;
> +    case AMDVI_MT_LINT1:
> +        ir_enabled = AMDVI_DTE_LINT1PASS;
> +        break;
> +    case AMDVI_MT_LINT0:
> +        ir_enabled = AMDVI_DTE_LINT0PASS;
> +    }
> +
> +    ret = amdvi_ir_pass(src, dst, ir_enabled, dte[2]);
> +    if (ret < 0){
> +        goto no_remap;
> +    }
> +    s->ir_cache = true;
> +    trace_amdvi_ir_remap(dst->data, dst->address, sid);
> +    return ret;
> +no_remap:
> +    memcpy(dst, src, sizeof(*src));
Do you need this memcpy()?  The original request is target aborted as 
soon as this returns.

> +    trace_amdvi_ir_target_abort(dst->data, dst->address, sid);
> +    return ret;
> +}
> +
> +static MemTxResult amdvi_ir_read(void *opaque, hwaddr addr,
> +                                 uint64_t *data, unsigned size,
> +                                 MemTxAttrs attrs)
> +{
> +    return MEMTX_OK;
> +}
> +
> +static MemTxResult amdvi_ir_write(void *opaque, hwaddr addr, uint64_t val,
> +                                  unsigned size, MemTxAttrs attrs)
> +{
> +    AMDVIAddressSpace *as = opaque;
> +    MSIMessage from = { addr + AMDVI_INT_ADDR_FIRST, val }, to = { 0, 0};
> +    uint16_t sid = PCI_BUILD_BDFi(as->bus_num, as->devfn);
'BDFi' reads like a typo

> +    int ret = 0;
> +
> +    ret  = amdvi_int_remap(X86_IOMMU_DEVICE(as->iommu_state), &from, &to,
> +                           attrs.requester_id);
> +
> +    if (ret < 0) {
> +        trace_amdvi_ir_target_abort(from.data, from.address,
> +                                    attrs.requester_id);
> +        return MEMTX_ERROR;
> +    }
> +
> +    if(dma_memory_write(&address_space_memory, to.address, &to.data, size)) {
> +        trace_amdvi_ir_write_fail(to.address, to.data);
> +        return MEMTX_ERROR;
> +    }
> +
> +    return MEMTX_OK;
> +}
> +
> +static const MemoryRegionOps amdvi_ir_ops = {
> +    .read_with_attrs = amdvi_ir_read,
> +    .write_with_attrs = amdvi_ir_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .impl = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    }
> +};
> +
>  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>  {
>      AMDVIState *s = opaque;
> @@ -1244,6 +1452,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>
>          memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
>                                   &s->iommu_ops, "amd-iommu", UINT64_MAX);
> +        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
> +                              &amdvi_ir_ops, iommu_as[devfn], "amd-iommu-ir",
> +                              AMDVI_INT_ADDR_SIZE);
> +        memory_region_add_subregion(&iommu_as[devfn]->iommu,
> +                                    AMDVI_INT_ADDR_FIRST,
> +                                    &iommu_as[devfn]->iommu_ir);
>          address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
>                             "amd-iommu");
>      }
> @@ -1292,6 +1506,7 @@ static void amdvi_init(AMDVIState *s)
>      s->enabled = false;
>      s->ats_enabled = false;
>      s->cmdbuf_enabled = false;
> +    s->ir_cache = false;
>
>      /* reset MMIO */
>      memset(s->mmior, 0, AMDVI_MMIO_SIZE);
> @@ -1331,11 +1546,15 @@ static void amdvi_realize(DeviceState *dev, Error **err)
>      AMDVIState *s = AMD_IOMMU_DEVICE(dev);
>      X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
>      PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
> +    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
>      s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
>                                       amdvi_uint64_equal, g_free, g_free);
>
> -    /* This device should take care of IOMMU PCI properties */
> +    /* AMD IOMMU has Interrupt Remapping on by default */
> +    x86_iommu->intr_supported = true;
>      x86_iommu->type = TYPE_AMD;
> +
> +    /* This device should take care of IOMMU PCI properties */
>      qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
>      object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
>      s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
> @@ -1347,9 +1566,13 @@ static void amdvi_realize(DeviceState *dev, Error **err)
>      memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
>                            AMDVI_MMIO_SIZE);
>
> +    x86_iommu->ioapic_bdf = PCI_BUILD_BDF(AMDVI_BUS_NUM,
> +             AMDVI_DEVFN_IOAPIC);
> +
>      sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
>      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
>      pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_DEVFN_IOAPIC);
>      s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
>      msi_init(&s->pci.dev, 0, 1, true, false, err);
>      amdvi_init(s);
> @@ -1376,6 +1599,7 @@ static void amdvi_class_init(ObjectClass *klass, void* data)
>      dc->vmsd = &vmstate_amdvi;
>      dc->hotpluggable = false;
>      dc_class->realize = amdvi_realize;
> +    dc_class->int_remap = amdvi_int_remap;
>  }
>
>  static const TypeInfo amdvi = {
> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
> index 2a7f19e..f0e23a8 100644
> --- a/hw/i386/amd_iommu.h
> +++ b/hw/i386/amd_iommu.h
> @@ -356,6 +356,8 @@ typedef struct AMDVIState {
>      uint32_t evtlog_len;         /* event log length             */
>      uint32_t evtlog_head;        /* current IOMMU write position */
>      uint32_t evtlog_tail;        /* current Software read position */
> +    /* whether we have remapped any interrupts and hence IR cache */
> +    bool ir_cache;
>
>      /* unused for now */
>      hwaddr excl_base;            /* base DVA - IOMMU exclusion range */
>

Valentine
David Kiarie Aug. 12, 2016, 8:30 p.m. UTC | #2
On Fri, Aug 12, 2016 at 11:08 PM, Valentine Sinitsyn <
valentine.sinitsyn@gmail.com> wrote:

> On 11.08.2016 00:42, David Kiarie wrote:
>
>> Introduce AMD IOMMU interrupt remapping and hook it onto
>
>
>> +static inline int amdvi_ir_pass(MSIMessage *src, MSIMessage *dst,
>> uint8_t bit,
>> +                                uint64_t dte)
>>
> The name is misleading. Actually, this function handles non-vectored
> interrupts (either passes or target aborts them). Maybe call it
> amdvi_ir_handle_non_vectored() ?
>
> +{
>> +    if ((dte & (1UL << bit))) {
>> +        /* passing interrupt enabled */
>> +        dst->address = src->address;
>> +        dst->data = src->data;
>> +    } else {
>> +        /* should be target aborted */
>> +        return -AMDVI_TARGET_ABORT;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int amdvi_remap_ir_intctl(uint64_t dte, uint32_t irte,
>> +                                 MSIMessage *src, MSIMessage *dst)
>> +{
>> +    int ret = 0;
>> +
>> +    switch ((dte >> AMDVI_DTE_INTCTL ) & 3UL) {
>>
> AMDVI_DTE_INTCTL_SHIFT? Yes, I should have mentioned it in a previous
> patch, sorry. Maybe also introduce macros for 3UL and 1, 2, 3 in switch
> branches below.
>
> +    case 1:
>> +        /* pass */
>> +        memcpy(dst, src, sizeof(*dst));
>> +        break;
>> +    case 2:
>> +        /* remap */
>> +        if (irte & AMDVI_IRTE_REMAP_MASK) {
>> +            /* LOCAL APIC address */
>> +            dst->address = AMDVI_LOCAL_APIC_ADDR;
>> +            /* destination mode */
>> +            dst->address |= (((irte & AMDVI_IRTE_DM_MASK) >> 6) <<
>> +                    AMDVI_MSI_ADDR_DM_RSHIFT);
>> +            /* RH */
>> +            dst->address |= ((irte & AMDVI_IRTE_RQEOI_MASK) >> 5) <<
>> +                AMDVI_MSI_ADDR_RH_RSHIFT;
>> +            /* Destination ID */
>> +            dst->address |= ((irte & AMDVI_IRTE_DEST_MASK) >> 8) <<
>> +                AMDVI_MSI_ADDR_DEST_RSHIFT;
>> +            /* construct data - vector */
>> +            dst->data |= (irte & AMDVI_IRTE_VECTOR_MASK) >> 16;
>> +            /* Interrupt type */
>> +            dst->data |= ((irte & AMDVI_IRTE_INTTYPE_MASK) >> 2) <<
>> +                AMDVI_MSI_DATA_DM_RSHIFT;
>>
> These bit operations look scary. Did you considered using bitfields or
> wrapping them in macros?


Will look at introducing macros to cover this comment and the previous one.


>
> +        } else  {
>> +            ret = -AMDVI_TARGET_ABORT;
>> +        }
>> +        break;
>> +    case 0:
>> +    case 3:
>>
> In fact, you should report this as event when IR == 1.
>
> +    default:
>> +        ret = -AMDVI_TARGET_ABORT;
>> +    }
>> +    return ret;
>> +}
>> +/*
>> + * We don't support guest virtual APIC so IRTE size will most likely
>> always be 4
>> + */
>> +static int amdvi_irte_get(AMDVIState *s, MSIMessage *src, uint32_t *irte,
>> +                          uint64_t *dte, uint16_t devid)
>> +{
>> +    uint64_t irte_root, offset = devid * AMDVI_DEVTAB_ENTRY_SIZE,
>> +             irte_size = AMDVI_DEFAULT_IRTE_SIZE,
>> +             ir_table_size;
>> +
>> +    /* check for GASup and if it's enabled */
>> +    if ((amdvi_readq(s, AMDVI_EXT_FEATURES) & AMDVI_GASUP)
>> +        && (amdvi_readq(s, AMDVI_MMIO_CONTROL) & AMDVI_GAEN)) {
>> +        /* set a different IRTE size */
>> +        irte_size = AMDVI_IRTE_SIZE_GASUP;
>> +    }
>>
> As I said, this is likely the only place where we account for Virtual
> APIC. You don't seem to handle Virtual APIC Root in DTE, for instance.
> Maybe drop this incomplete support altogether, and print some warning here
> instead?


I'll drop everything and print a warning instead.


>
>
> +    if (dma_memory_read(&address_space_memory, s->devtab + offset, dte,
>> +                        AMDVI_DEVTAB_ENTRY_SIZE)) {
>> +        trace_amdvi_dte_get_fail(s->devtab, offset);
>> +        return -AMDVI_DEV_TAB_HW;
>> +    }
>> +
>> +    irte_root = dte[2] & AMDVI_IRTEROOT_MASK;
>> +    offset = (src->data & AMDVI_IRTE_INDEX_MASK) << 2;
>> +    ir_table_size = pow(2, dte[2] & AMDVI_IR_TABLE_SIZE_MASK);
>>
> 1 << dte[2] & AMDVI_IR_TABLE_SIZE_MASK ?
>
>
> +    /* enforce IR table size */
>> +    if (offset > (ir_table_size * irte_size)) {
>> +        trace_amdvi_invalid_irte_entry(offset, ir_table_size);
>> +        return -AMDVI_TARGET_ABORT;
>> +    }
>> +    /* read IRTE */
>> +    if (dma_memory_read(&address_space_memory, irte_root + offset,
>> +        irte, sizeof(*irte))) {
>> +        trace_amdvi_irte_get_fail(irte_root, offset);
>> +        return -AMDVI_DEV_TAB_HW;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int amdvi_int_remap(X86IOMMUState *iommu, MSIMessage *src,
>> +                           MSIMessage *dst, uint16_t sid)
>> +{
>> +    trace_amdvi_ir_request(src->data, src->address, sid);
>> +
>> +    AMDVIState *s = AMD_IOMMU_DEVICE(iommu);
>> +    int ret = 0;
>> +    uint64_t dte[4];
>> +    uint32_t ir_enabled, irte;
>> +
>> +    ret = amdvi_irte_get(s, src, &irte, dte, sid);
>> +    if (ret < 0) {
>> +        goto no_remap;
>> +    }
>> +    /* interrupt remapping disabled */
>> +    if (!(dte[2] & AMDVI_IR_VALID)) {
>> +        memcpy(dst, src, sizeof(*src));
>> +        return ret;
>> +    }
>> +    switch (src->data & AMDVI_IR_TYPE_MASK) {
>> +    case AMDVI_MT_FIXED:
>> +    case AMDVI_MT_ARBIT:
>> +        ret = amdvi_remap_ir_intctl(dte[2], irte, src, dst);
>> +        if (ret < 0) {
>> +            goto no_remap;
>> +        } else {
>> +            s->ir_cache = true;
>> +            trace_amdvi_ir_remap(dst->data, dst->address, sid);
>> +            return ret;
>> +        }
>> +    /* not handling SMI currently */
>> +    case AMDVI_MT_SMI:
>>
> Maybe also add some warning here so we'd know when to implement SMI Filter.
>
>
> +        goto no_remap;
>> +    case AMDVI_MT_NMI:
>> +        ir_enabled = AMDVI_DTE_NMIPASS;
>> +        break;
>> +    case AMDVI_MT_INIT:
>> +        ir_enabled = AMDVI_DTE_INTPASS;
>> +        break;
>> +    case AMDVI_MT_EXTINT:
>> +        ir_enabled = AMDVI_DTE_EINTPASS;
>> +        break;
>> +    case AMDVI_MT_LINT1:
>> +        ir_enabled = AMDVI_DTE_LINT1PASS;
>> +        break;
>> +    case AMDVI_MT_LINT0:
>> +        ir_enabled = AMDVI_DTE_LINT0PASS;
>> +    }
>> +
>> +    ret = amdvi_ir_pass(src, dst, ir_enabled, dte[2]);
>> +    if (ret < 0){
>> +        goto no_remap;
>> +    }
>> +    s->ir_cache = true;
>> +    trace_amdvi_ir_remap(dst->data, dst->address, sid);
>> +    return ret;
>> +no_remap:
>> +    memcpy(dst, src, sizeof(*src));
>>
> Do you need this memcpy()?  The original request is target aborted as soon
> as this returns.
>
> +    trace_amdvi_ir_target_abort(dst->data, dst->address, sid);
>> +    return ret;
>> +}
>> +
>> +static MemTxResult amdvi_ir_read(void *opaque, hwaddr addr,
>> +                                 uint64_t *data, unsigned size,
>> +                                 MemTxAttrs attrs)
>> +{
>> +    return MEMTX_OK;
>> +}
>> +
>> +static MemTxResult amdvi_ir_write(void *opaque, hwaddr addr, uint64_t
>> val,
>> +                                  unsigned size, MemTxAttrs attrs)
>> +{
>> +    AMDVIAddressSpace *as = opaque;
>> +    MSIMessage from = { addr + AMDVI_INT_ADDR_FIRST, val }, to = { 0, 0};
>> +    uint16_t sid = PCI_BUILD_BDFi(as->bus_num, as->devfn);
>>
> 'BDFi' reads like a typo


Right, typo even I am not sure how it got this far.

Overall, thanks for review all comments will be covered in next version.


>
>
> +    int ret = 0;
>> +
>> +    ret  = amdvi_int_remap(X86_IOMMU_DEVICE(as->iommu_state), &from,
>> &to,
>> +                           attrs.requester_id);
>> +
>> +    if (ret < 0) {
>> +        trace_amdvi_ir_target_abort(from.data, from.address,
>> +                                    attrs.requester_id);
>> +        return MEMTX_ERROR;
>> +    }
>> +
>> +    if(dma_memory_write(&address_space_memory, to.address, &to.data,
>> size)) {
>> +        trace_amdvi_ir_write_fail(to.address, to.data);
>> +        return MEMTX_ERROR;
>> +    }
>> +
>> +    return MEMTX_OK;
>> +}
>> +
>> +static const MemoryRegionOps amdvi_ir_ops = {
>> +    .read_with_attrs = amdvi_ir_read,
>> +    .write_with_attrs = amdvi_ir_write,
>> +    .endianness = DEVICE_LITTLE_ENDIAN,
>> +    .impl = {
>> +        .min_access_size = 4,
>> +        .max_access_size = 4,
>> +    },
>> +    .valid = {
>> +        .min_access_size = 4,
>> +        .max_access_size = 4,
>> +    }
>> +};
>> +
>>  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int
>> devfn)
>>  {
>>      AMDVIState *s = opaque;
>> @@ -1244,6 +1452,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus
>> *bus, void *opaque, int devfn)
>>
>>          memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
>>                                   &s->iommu_ops, "amd-iommu", UINT64_MAX);
>> +        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
>> +                              &amdvi_ir_ops, iommu_as[devfn],
>> "amd-iommu-ir",
>> +                              AMDVI_INT_ADDR_SIZE);
>> +        memory_region_add_subregion(&iommu_as[devfn]->iommu,
>> +                                    AMDVI_INT_ADDR_FIRST,
>> +                                    &iommu_as[devfn]->iommu_ir);
>>          address_space_init(&iommu_as[devfn]->as,
>> &iommu_as[devfn]->iommu,
>>                             "amd-iommu");
>>      }
>> @@ -1292,6 +1506,7 @@ static void amdvi_init(AMDVIState *s)
>>      s->enabled = false;
>>      s->ats_enabled = false;
>>      s->cmdbuf_enabled = false;
>> +    s->ir_cache = false;
>>
>>      /* reset MMIO */
>>      memset(s->mmior, 0, AMDVI_MMIO_SIZE);
>> @@ -1331,11 +1546,15 @@ static void amdvi_realize(DeviceState *dev, Error
>> **err)
>>      AMDVIState *s = AMD_IOMMU_DEVICE(dev);
>>      X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
>>      PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
>> +    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
>>      s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
>>                                       amdvi_uint64_equal, g_free, g_free);
>>
>> -    /* This device should take care of IOMMU PCI properties */
>> +    /* AMD IOMMU has Interrupt Remapping on by default */
>> +    x86_iommu->intr_supported = true;
>>      x86_iommu->type = TYPE_AMD;
>> +
>> +    /* This device should take care of IOMMU PCI properties */
>>      qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
>>      object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
>>      s->capab_offset = pci_add_capability(&s->pci.dev,
>> AMDVI_CAPAB_ID_SEC, 0,
>> @@ -1347,9 +1566,13 @@ static void amdvi_realize(DeviceState *dev, Error
>> **err)
>>      memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s,
>> "amdvi-mmio",
>>                            AMDVI_MMIO_SIZE);
>>
>> +    x86_iommu->ioapic_bdf = PCI_BUILD_BDF(AMDVI_BUS_NUM,
>> +             AMDVI_DEVFN_IOAPIC);
>> +
>>      sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
>>      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
>>      pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
>> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_DEVFN_IOAPIC);
>>      s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
>>      msi_init(&s->pci.dev, 0, 1, true, false, err);
>>      amdvi_init(s);
>> @@ -1376,6 +1599,7 @@ static void amdvi_class_init(ObjectClass *klass,
>> void* data)
>>      dc->vmsd = &vmstate_amdvi;
>>      dc->hotpluggable = false;
>>      dc_class->realize = amdvi_realize;
>> +    dc_class->int_remap = amdvi_int_remap;
>>  }
>>
>>  static const TypeInfo amdvi = {
>> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
>> index 2a7f19e..f0e23a8 100644
>> --- a/hw/i386/amd_iommu.h
>> +++ b/hw/i386/amd_iommu.h
>> @@ -356,6 +356,8 @@ typedef struct AMDVIState {
>>      uint32_t evtlog_len;         /* event log length             */
>>      uint32_t evtlog_head;        /* current IOMMU write position */
>>      uint32_t evtlog_tail;        /* current Software read position */
>> +    /* whether we have remapped any interrupts and hence IR cache */
>> +    bool ir_cache;
>>
>>      /* unused for now */
>>      hwaddr excl_base;            /* base DVA - IOMMU exclusion range */
>>
>>
> Valentine
>
diff mbox

Patch

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 5fab9aa..825159b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -18,12 +18,14 @@ 
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  *
  * Cache implementation inspired by hw/i386/intel_iommu.c
+ *
  */
 #include "qemu/osdep.h"
 #include <math.h>
 #include "hw/pci/msi.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/amd_iommu.h"
+#include "hw/i386/ioapic_internal.h"
 #include "hw/pci/pci_bus.h"
 #include "trace.h"
 
@@ -660,6 +662,11 @@  static void amdvi_inval_inttable(AMDVIState *s, CMDInvalIntrTable *inval)
         amdvi_log_illegalcom_error(s, inval->type, s->cmdbuf + s->cmdbuf_head);
         return;
     }
+
+    if (s->ir_cache) {
+        x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), true, 0, 0);
+    }
+
     trace_amdvi_intr_inval();
 }
 
@@ -1221,6 +1228,207 @@  static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
     return ret;
 }
 
+static inline int amdvi_ir_pass(MSIMessage *src, MSIMessage *dst, uint8_t bit,
+                                uint64_t dte)
+{
+    if ((dte & (1UL << bit))) {
+        /* passing interrupt enabled */
+        dst->address = src->address;
+        dst->data = src->data;
+    } else {
+        /* should be target aborted */
+        return -AMDVI_TARGET_ABORT;
+    }
+    return 0;
+}
+
+static int amdvi_remap_ir_intctl(uint64_t dte, uint32_t irte,
+                                 MSIMessage *src, MSIMessage *dst)
+{
+    int ret = 0;
+
+    switch ((dte >> AMDVI_DTE_INTCTL ) & 3UL) {
+    case 1:
+        /* pass */
+        memcpy(dst, src, sizeof(*dst));
+        break;
+    case 2:
+        /* remap */
+        if (irte & AMDVI_IRTE_REMAP_MASK) {
+            /* LOCAL APIC address */
+            dst->address = AMDVI_LOCAL_APIC_ADDR;
+            /* destination mode */
+            dst->address |= (((irte & AMDVI_IRTE_DM_MASK) >> 6) <<
+                    AMDVI_MSI_ADDR_DM_RSHIFT);
+            /* RH */
+            dst->address |= ((irte & AMDVI_IRTE_RQEOI_MASK) >> 5) <<
+                AMDVI_MSI_ADDR_RH_RSHIFT;
+            /* Destination ID */
+            dst->address |= ((irte & AMDVI_IRTE_DEST_MASK) >> 8) <<
+                AMDVI_MSI_ADDR_DEST_RSHIFT;
+            /* construct data - vector */
+            dst->data |= (irte & AMDVI_IRTE_VECTOR_MASK) >> 16;
+            /* Interrupt type */
+            dst->data |= ((irte & AMDVI_IRTE_INTTYPE_MASK) >> 2) <<
+                AMDVI_MSI_DATA_DM_RSHIFT;
+        } else  {
+            ret = -AMDVI_TARGET_ABORT;
+        }
+        break;
+    case 0:
+    case 3:
+    default:
+        ret = -AMDVI_TARGET_ABORT;
+    }
+    return ret;
+}
+/*
+ * We don't support guest virtual APIC so IRTE size will most likely always be 4
+ */
+static int amdvi_irte_get(AMDVIState *s, MSIMessage *src, uint32_t *irte,
+                          uint64_t *dte, uint16_t devid)
+{
+    uint64_t irte_root, offset = devid * AMDVI_DEVTAB_ENTRY_SIZE,
+             irte_size = AMDVI_DEFAULT_IRTE_SIZE,
+             ir_table_size;
+
+    /* check for GASup and if it's enabled */
+    if ((amdvi_readq(s, AMDVI_EXT_FEATURES) & AMDVI_GASUP)
+        && (amdvi_readq(s, AMDVI_MMIO_CONTROL) & AMDVI_GAEN)) {
+        /* set a different IRTE size */
+        irte_size = AMDVI_IRTE_SIZE_GASUP;
+    }
+    if (dma_memory_read(&address_space_memory, s->devtab + offset, dte,
+                        AMDVI_DEVTAB_ENTRY_SIZE)) {
+        trace_amdvi_dte_get_fail(s->devtab, offset);
+        return -AMDVI_DEV_TAB_HW;
+    }
+
+    irte_root = dte[2] & AMDVI_IRTEROOT_MASK;
+    offset = (src->data & AMDVI_IRTE_INDEX_MASK) << 2;
+    ir_table_size = pow(2, dte[2] & AMDVI_IR_TABLE_SIZE_MASK);
+    /* enforce IR table size */
+    if (offset > (ir_table_size * irte_size)) {
+        trace_amdvi_invalid_irte_entry(offset, ir_table_size);
+        return -AMDVI_TARGET_ABORT;
+    }
+    /* read IRTE */
+    if (dma_memory_read(&address_space_memory, irte_root + offset,
+        irte, sizeof(*irte))) {
+        trace_amdvi_irte_get_fail(irte_root, offset);
+        return -AMDVI_DEV_TAB_HW;
+    }
+    return 0;
+}
+
+static int amdvi_int_remap(X86IOMMUState *iommu, MSIMessage *src,
+                           MSIMessage *dst, uint16_t sid)
+{
+    trace_amdvi_ir_request(src->data, src->address, sid);
+
+    AMDVIState *s = AMD_IOMMU_DEVICE(iommu);
+    int ret = 0;
+    uint64_t dte[4];
+    uint32_t ir_enabled, irte;
+
+    ret = amdvi_irte_get(s, src, &irte, dte, sid);
+    if (ret < 0) {
+        goto no_remap;
+    }
+    /* interrupt remapping disabled */
+    if (!(dte[2] & AMDVI_IR_VALID)) {
+        memcpy(dst, src, sizeof(*src));
+        return ret;
+    }
+    switch (src->data & AMDVI_IR_TYPE_MASK) {
+    case AMDVI_MT_FIXED:
+    case AMDVI_MT_ARBIT:
+        ret = amdvi_remap_ir_intctl(dte[2], irte, src, dst);
+        if (ret < 0) {
+            goto no_remap;
+        } else {
+            s->ir_cache = true;
+            trace_amdvi_ir_remap(dst->data, dst->address, sid);
+            return ret;
+        }
+    /* not handling SMI currently */
+    case AMDVI_MT_SMI:
+        goto no_remap;
+    case AMDVI_MT_NMI:
+        ir_enabled = AMDVI_DTE_NMIPASS;
+        break;
+    case AMDVI_MT_INIT:
+        ir_enabled = AMDVI_DTE_INTPASS;
+        break;
+    case AMDVI_MT_EXTINT:
+        ir_enabled = AMDVI_DTE_EINTPASS;
+        break;
+    case AMDVI_MT_LINT1:
+        ir_enabled = AMDVI_DTE_LINT1PASS;
+        break;
+    case AMDVI_MT_LINT0:
+        ir_enabled = AMDVI_DTE_LINT0PASS;
+    }
+
+    ret = amdvi_ir_pass(src, dst, ir_enabled, dte[2]);
+    if (ret < 0){
+        goto no_remap;
+    }
+    s->ir_cache = true;
+    trace_amdvi_ir_remap(dst->data, dst->address, sid);
+    return ret;
+no_remap:
+    memcpy(dst, src, sizeof(*src));
+    trace_amdvi_ir_target_abort(dst->data, dst->address, sid);
+    return ret;
+}
+
+static MemTxResult amdvi_ir_read(void *opaque, hwaddr addr,
+                                 uint64_t *data, unsigned size,
+                                 MemTxAttrs attrs)
+{
+    return MEMTX_OK;
+}
+
+static MemTxResult amdvi_ir_write(void *opaque, hwaddr addr, uint64_t val,
+                                  unsigned size, MemTxAttrs attrs)
+{
+    AMDVIAddressSpace *as = opaque;
+    MSIMessage from = { addr + AMDVI_INT_ADDR_FIRST, val }, to = { 0, 0};
+    uint16_t sid = PCI_BUILD_BDFi(as->bus_num, as->devfn);
+    int ret = 0;
+
+    ret  = amdvi_int_remap(X86_IOMMU_DEVICE(as->iommu_state), &from, &to,
+                           attrs.requester_id);
+
+    if (ret < 0) {
+        trace_amdvi_ir_target_abort(from.data, from.address,
+                                    attrs.requester_id);
+        return MEMTX_ERROR;
+    }
+
+    if(dma_memory_write(&address_space_memory, to.address, &to.data, size)) {
+        trace_amdvi_ir_write_fail(to.address, to.data);
+        return MEMTX_ERROR;
+    }
+
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps amdvi_ir_ops = {
+    .read_with_attrs = amdvi_ir_read,
+    .write_with_attrs = amdvi_ir_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     AMDVIState *s = opaque;
@@ -1244,6 +1452,12 @@  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 
         memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
                                  &s->iommu_ops, "amd-iommu", UINT64_MAX);
+        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
+                              &amdvi_ir_ops, iommu_as[devfn], "amd-iommu-ir",
+                              AMDVI_INT_ADDR_SIZE);
+        memory_region_add_subregion(&iommu_as[devfn]->iommu,
+                                    AMDVI_INT_ADDR_FIRST,
+                                    &iommu_as[devfn]->iommu_ir);
         address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
                            "amd-iommu");
     }
@@ -1292,6 +1506,7 @@  static void amdvi_init(AMDVIState *s)
     s->enabled = false;
     s->ats_enabled = false;
     s->cmdbuf_enabled = false;
+    s->ir_cache = false;
 
     /* reset MMIO */
     memset(s->mmior, 0, AMDVI_MMIO_SIZE);
@@ -1331,11 +1546,15 @@  static void amdvi_realize(DeviceState *dev, Error **err)
     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
     PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
     s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
                                      amdvi_uint64_equal, g_free, g_free);
 
-    /* This device should take care of IOMMU PCI properties */
+    /* AMD IOMMU has Interrupt Remapping on by default */
+    x86_iommu->intr_supported = true;
     x86_iommu->type = TYPE_AMD;
+
+    /* This device should take care of IOMMU PCI properties */
     qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
     object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
     s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
@@ -1347,9 +1566,13 @@  static void amdvi_realize(DeviceState *dev, Error **err)
     memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
                           AMDVI_MMIO_SIZE);
 
+    x86_iommu->ioapic_bdf = PCI_BUILD_BDF(AMDVI_BUS_NUM,
+             AMDVI_DEVFN_IOAPIC);
+
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
     pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_DEVFN_IOAPIC);
     s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
     msi_init(&s->pci.dev, 0, 1, true, false, err);
     amdvi_init(s);
@@ -1376,6 +1599,7 @@  static void amdvi_class_init(ObjectClass *klass, void* data)
     dc->vmsd = &vmstate_amdvi;
     dc->hotpluggable = false;
     dc_class->realize = amdvi_realize;
+    dc_class->int_remap = amdvi_int_remap;
 }
 
 static const TypeInfo amdvi = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 2a7f19e..f0e23a8 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -356,6 +356,8 @@  typedef struct AMDVIState {
     uint32_t evtlog_len;         /* event log length             */
     uint32_t evtlog_head;        /* current IOMMU write position */
     uint32_t evtlog_tail;        /* current Software read position */
+    /* whether we have remapped any interrupts and hence IR cache */
+    bool ir_cache;
 
     /* unused for now */
     hwaddr excl_base;            /* base DVA - IOMMU exclusion range */