diff mbox

[RFC,6/8] intel_iommu: support device iotlb descriptor

Message ID 1458872009-13342-7-git-send-email-jasowang@redhat.com
State New
Headers show

Commit Message

Jason Wang March 25, 2016, 2:13 a.m. UTC
This patch enables device IOTLB support for intel iommu. The major
work is to implement QI device IOTLB descriptor processing and notify
the device through iommu notifier.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu.c          | 81 ++++++++++++++++++++++++++++++++++++++----
 hw/i386/intel_iommu_internal.h | 13 +++++--
 2 files changed, 86 insertions(+), 8 deletions(-)

Comments

Peter Xu March 28, 2016, 3:37 a.m. UTC | #1
On Fri, Mar 25, 2016 at 10:13:27AM +0800, Jason Wang wrote:
> This patch enables device IOTLB support for intel iommu. The major
> work is to implement QI device IOTLB descriptor processing and notify
> the device through iommu notifier.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  hw/i386/intel_iommu.c          | 81 ++++++++++++++++++++++++++++++++++++++----
>  hw/i386/intel_iommu_internal.h | 13 +++++--
>  2 files changed, 86 insertions(+), 8 deletions(-)
> 

[...]

> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> +                                          VTDInvDesc *inv_desc)
> +{
> +    VTDAddressSpace *vtd_dev_as;
> +    IOMMUTLBEntry entry;
> +    struct VTDBus *vtd_bus;
> +    hwaddr addr;
> +    uint64_t sz;
> +    uint16_t sid;
> +    uint8_t devfn;
> +    bool size;
> +    uint8_t bus_num;
> +
> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> +    devfn = sid & 0xff;
> +    bus_num = sid >> 8;
> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
> +
> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
> +                    inv_desc->hi, inv_desc->lo);
> +        return false;
> +    }
> +
> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> +    if (!vtd_bus) {
> +        goto done;
> +    }
> +
> +    vtd_dev_as = vtd_bus->dev_as[devfn];
> +    if (!vtd_dev_as) {
> +        goto done;
> +    }
> +
> +    if (size) {
> +        sz = ffsll(~(addr >> VTD_PAGE_SHIFT));
> +        addr = addr & ~((1 << (sz + VTD_PAGE_SHIFT)) - 1);
> +        sz = VTD_PAGE_SIZE << sz;

For these three lines, could it be shorter like:

    sz = 1 << ffsll(~addr);
    addr &= ~(sz - 1);

It seems that we can avoid using VTD_PAGE_*.

> +    } else {
> +        sz = VTD_PAGE_SIZE;
> +    }
> +
> +    entry.target_as = &vtd_dev_as->as;
> +    entry.addr_mask = sz - 1;
> +    entry.iova = addr;
> +    memory_region_notify_iommu(entry.target_as->root, entry);

Here, we seems to be posting this invalidation to all registered
notifiers. Since this is a device-tlb invalidation, and we should
know which device (BDF) that we should invalidate, is there any way
that we can directly route this info to that specific device?

E.g., if we enable VFIO with current patch, this notify will
possibly be passed to VFIO devices as well, even it's actually for
vhost devices.  Not sure whether there would be problem.

Another thing totally not related to this patch: I see that the
second parameter for memory_region_notify_iommu() is IOMMUTLBEntry,
rather than its pointer.  While inside of the funccall, it only
passes in the pointer directly:

void memory_region_notify_iommu(MemoryRegion *mr,
                                IOMMUTLBEntry entry)
{
    assert(memory_region_is_iommu(mr));
    notifier_list_notify(&mr->iommu_notify, &entry);
}

Shall we change "entry" into a pointer as well? I found no reason
why we need to keep this IOMMUTLBEntry in stack twice...

Thanks.

-- peterx
Jason Wang March 30, 2016, 5:08 a.m. UTC | #2
On 03/28/2016 11:37 AM, Peter Xu wrote:
> On Fri, Mar 25, 2016 at 10:13:27AM +0800, Jason Wang wrote:
>> This patch enables device IOTLB support for intel iommu. The major
>> work is to implement QI device IOTLB descriptor processing and notify
>> the device through iommu notifier.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Richard Henderson <rth@twiddle.net>
>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>  hw/i386/intel_iommu.c          | 81 ++++++++++++++++++++++++++++++++++++++----
>>  hw/i386/intel_iommu_internal.h | 13 +++++--
>>  2 files changed, 86 insertions(+), 8 deletions(-)
>>
> [...]
>
>> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>> +                                          VTDInvDesc *inv_desc)
>> +{
>> +    VTDAddressSpace *vtd_dev_as;
>> +    IOMMUTLBEntry entry;
>> +    struct VTDBus *vtd_bus;
>> +    hwaddr addr;
>> +    uint64_t sz;
>> +    uint16_t sid;
>> +    uint8_t devfn;
>> +    bool size;
>> +    uint8_t bus_num;
>> +
>> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
>> +    devfn = sid & 0xff;
>> +    bus_num = sid >> 8;
>> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>> +
>> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
>> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
>> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
>> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> +                    inv_desc->hi, inv_desc->lo);
>> +        return false;
>> +    }
>> +
>> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
>> +    if (!vtd_bus) {
>> +        goto done;
>> +    }
>> +
>> +    vtd_dev_as = vtd_bus->dev_as[devfn];
>> +    if (!vtd_dev_as) {
>> +        goto done;
>> +    }
>> +
>> +    if (size) {
>> +        sz = ffsll(~(addr >> VTD_PAGE_SHIFT));
>> +        addr = addr & ~((1 << (sz + VTD_PAGE_SHIFT)) - 1);
>> +        sz = VTD_PAGE_SIZE << sz;
> For these three lines, could it be shorter like:
>
>     sz = 1 << ffsll(~addr);
>     addr &= ~(sz - 1);
>
> It seems that we can avoid using VTD_PAGE_*.

Some lower bits of addr is zero (since it was reserved), so this may not
work. Looks like it could be optimized to

sz = 1 << ffsll(~(addr | (VTD_PAGE_MASK - 1)));
addr &= ~(sz - 1);

>
>> +    } else {
>> +        sz = VTD_PAGE_SIZE;
>> +    }
>> +
>> +    entry.target_as = &vtd_dev_as->as;
>> +    entry.addr_mask = sz - 1;
>> +    entry.iova = addr;
>> +    memory_region_notify_iommu(entry.target_as->root, entry);
> Here, we seems to be posting this invalidation to all registered
> notifiers.

Yes, but only for a device specified address space.

>  Since this is a device-tlb invalidation, and we should
> know which device (BDF) that we should invalidate, is there any way
> that we can directly route this info to that specific device?

Looks like the codes has already done this, the target_as was found by
bus num and devfn.

>
> E.g., if we enable VFIO with current patch, this notify will
> possibly be passed to VFIO devices as well, even it's actually for
> vhost devices.  Not sure whether there would be problem.

Not sure, but if the underlaying device has ATS capability, we probably
need to propagate the invalidation to the device itself too.

>
> Another thing totally not related to this patch: I see that the
> second parameter for memory_region_notify_iommu() is IOMMUTLBEntry,
> rather than its pointer.  While inside of the funccall, it only
> passes in the pointer directly:
>
> void memory_region_notify_iommu(MemoryRegion *mr,
>                                 IOMMUTLBEntry entry)
> {
>     assert(memory_region_is_iommu(mr));
>     notifier_list_notify(&mr->iommu_notify, &entry);
> }
>
> Shall we change "entry" into a pointer as well? I found no reason
> why we need to keep this IOMMUTLBEntry in stack twice...
>
> Thanks.
>
> -- peterx
>

Right, it looks ok to change to use a pointer.
Peter Xu March 30, 2016, 5:21 a.m. UTC | #3
On Wed, Mar 30, 2016 at 01:08:45PM +0800, Jason Wang wrote:

[...]

> >> +    } else {
> >> +        sz = VTD_PAGE_SIZE;
> >> +    }
> >> +
> >> +    entry.target_as = &vtd_dev_as->as;
> >> +    entry.addr_mask = sz - 1;
> >> +    entry.iova = addr;
> >> +    memory_region_notify_iommu(entry.target_as->root, entry);
> > Here, we seems to be posting this invalidation to all registered
> > notifiers.
> 
> Yes, but only for a device specified address space.
> 
> >  Since this is a device-tlb invalidation, and we should
> > know which device (BDF) that we should invalidate, is there any way
> > that we can directly route this info to that specific device?
> 
> Looks like the codes has already done this, the target_as was found by
> bus num and devfn.

Yes, seems you are right. :)

-- peterx
diff mbox

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 36b2072..e23bf2c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -728,11 +728,18 @@  static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
                     "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
                     ce->hi, ce->lo);
         return -VTD_FR_CONTEXT_ENTRY_INV;
-    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
-        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
-                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    ce->hi, ce->lo);
-        return -VTD_FR_CONTEXT_ENTRY_INV;
+    } else {
+        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
+        case VTD_CONTEXT_TT_MULTI_LEVEL:
+            /* fall through */
+        case VTD_CONTEXT_TT_DEV_IOTLB:
+            break;
+        default:
+            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
+                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                        ce->hi, ce->lo);
+            return -VTD_FR_CONTEXT_ENTRY_INV;
+        }
     }
     return 0;
 }
@@ -1361,6 +1368,60 @@  static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
     return true;
 }
 
+static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
+                                          VTDInvDesc *inv_desc)
+{
+    VTDAddressSpace *vtd_dev_as;
+    IOMMUTLBEntry entry;
+    struct VTDBus *vtd_bus;
+    hwaddr addr;
+    uint64_t sz;
+    uint16_t sid;
+    uint8_t devfn;
+    bool size;
+    uint8_t bus_num;
+
+    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
+    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
+    devfn = sid & 0xff;
+    bus_num = sid >> 8;
+    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
+
+    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
+        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
+        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
+                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                    inv_desc->hi, inv_desc->lo);
+        return false;
+    }
+
+    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
+    if (!vtd_bus) {
+        goto done;
+    }
+
+    vtd_dev_as = vtd_bus->dev_as[devfn];
+    if (!vtd_dev_as) {
+        goto done;
+    }
+
+    if (size) {
+        sz = ffsll(~(addr >> VTD_PAGE_SHIFT));
+        addr = addr & ~((1 << (sz + VTD_PAGE_SHIFT)) - 1);
+        sz = VTD_PAGE_SIZE << sz;
+    } else {
+        sz = VTD_PAGE_SIZE;
+    }
+
+    entry.target_as = &vtd_dev_as->as;
+    entry.addr_mask = sz - 1;
+    entry.iova = addr;
+    memory_region_notify_iommu(entry.target_as->root, entry);
+
+done:
+    return true;
+}
+
 static bool vtd_process_inv_desc(IntelIOMMUState *s)
 {
     VTDInvDesc inv_desc;
@@ -1400,6 +1461,14 @@  static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_DEVICE:
+        VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
+                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     default:
         VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
                     "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
@@ -1953,7 +2022,7 @@  static void vtd_init(IntelIOMMUState *s)
     s->next_frcd_reg = 0;
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
              VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
-    s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
+    s->ecap = VTD_ECAP_QI | VTD_ECAP_DT | VTD_ECAP_IRO;
 
     vtd_reset_context_cache(s);
     vtd_reset_iotlb(s);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e5f514c..5b803d5 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -176,6 +176,7 @@ 
 /* (offset >> 4) << 8 */
 #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
 #define VTD_ECAP_QI                 (1ULL << 1)
+#define VTD_ECAP_DT                 (1ULL << 2)
 
 /* CAP_REG */
 /* (offset >> 4) << 24 */
@@ -286,6 +287,7 @@  typedef struct VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_TYPE               0xf
 #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
 #define VTD_INV_DESC_IOTLB              0x2
+#define VTD_INV_DESC_DEVICE             0x3
 #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
 #define VTD_INV_DESC_NONE               0   /* Not an Invalidate Descriptor */
 
@@ -319,6 +321,13 @@  typedef struct VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
 #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
 
+/* Mask for Device IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
+#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
+#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
     uint16_t domain_id;
@@ -357,8 +366,8 @@  typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault Processing Disable */
 #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation Type */
 #define VTD_CONTEXT_TT_MULTI_LEVEL  0
-#define VTD_CONTEXT_TT_DEV_IOTLB    1
-#define VTD_CONTEXT_TT_PASS_THROUGH 2
+#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
+#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
 /* Second Level Page Translation Pointer*/
 #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
 #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)