diff mbox

[PULL,08/41] intel_iommu: support device iotlb descriptor

Message ID 1484026704-28027-9-git-send-email-mst@redhat.com
State New
Headers show

Commit Message

Michael S. Tsirkin Jan. 10, 2017, 5:39 a.m. UTC
From: Jason Wang <jasowang@redhat.com>

This patch enables device IOTLB support for intel iommu. The major
work is to implement QI device IOTLB descriptor processing and notify
the device through iommu notifier.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu_internal.h | 13 ++++++-
 include/hw/i386/x86-iommu.h    |  1 +
 hw/i386/intel_iommu.c          | 83 +++++++++++++++++++++++++++++++++++++++---
 hw/i386/x86-iommu.c            | 17 +++++++++
 4 files changed, 107 insertions(+), 7 deletions(-)

Comments

Paolo Bonzini Jan. 18, 2017, 12:19 p.m. UTC | #1
On 10/01/2017 06:39, Michael S. Tsirkin wrote:
> From: Jason Wang <jasowang@redhat.com>
> 
> This patch enables device IOTLB support for intel iommu. The major
> work is to implement QI device IOTLB descriptor processing and notify
> the device through iommu notifier.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Reviewed-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu_internal.h | 13 ++++++-
>  include/hw/i386/x86-iommu.h    |  1 +
>  hw/i386/intel_iommu.c          | 83 +++++++++++++++++++++++++++++++++++++++---
>  hw/i386/x86-iommu.c            | 17 +++++++++
>  4 files changed, 107 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 11abfa2..356f188 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -183,6 +183,7 @@
>  /* (offset >> 4) << 8 */
>  #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
>  #define VTD_ECAP_QI                 (1ULL << 1)
> +#define VTD_ECAP_DT                 (1ULL << 2)
>  /* Interrupt Remapping support */
>  #define VTD_ECAP_IR                 (1ULL << 3)
>  #define VTD_ECAP_EIM                (1ULL << 4)
> @@ -326,6 +327,7 @@ typedef union VTDInvDesc VTDInvDesc;
>  #define VTD_INV_DESC_TYPE               0xf
>  #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
>  #define VTD_INV_DESC_IOTLB              0x2
> +#define VTD_INV_DESC_DEVICE             0x3
>  #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
>                                                 Invalidate Descriptor */
>  #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
> @@ -361,6 +363,13 @@ typedef union VTDInvDesc VTDInvDesc;
>  #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
>  #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>  
> +/* Mask for Device IOTLB Invalidate Descriptor */
> +#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
> +#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
> +#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
> +
>  /* Information about page-selective IOTLB invalidate */
>  struct VTDIOTLBPageInvInfo {
>      uint16_t domain_id;
> @@ -399,8 +408,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault Processing Disable */
>  #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation Type */
>  #define VTD_CONTEXT_TT_MULTI_LEVEL  0
> -#define VTD_CONTEXT_TT_DEV_IOTLB    1
> -#define VTD_CONTEXT_TT_PASS_THROUGH 2
> +#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
> +#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
>  /* Second Level Page Translation Pointer*/
>  #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
>  #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
> diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
> index 0c89d98..361c07c 100644
> --- a/include/hw/i386/x86-iommu.h
> +++ b/include/hw/i386/x86-iommu.h
> @@ -73,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
>  struct X86IOMMUState {
>      SysBusDevice busdev;
>      bool intr_supported;        /* Whether vIOMMU supports IR */
> +    bool dt_supported;          /* Whether vIOMMU supports DT */
>      IommuType type;             /* IOMMU type - AMD/Intel     */
>      QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
>  };
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index e39b764..ec62239 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -738,11 +738,18 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>                      "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>                      ce->hi, ce->lo);
>          return -VTD_FR_CONTEXT_ENTRY_INV;
> -    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
> -        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
> -                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
> -                    ce->hi, ce->lo);
> -        return -VTD_FR_CONTEXT_ENTRY_INV;
> +    } else {
> +        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
> +        case VTD_CONTEXT_TT_MULTI_LEVEL:
> +            /* fall through */
> +        case VTD_CONTEXT_TT_DEV_IOTLB:
> +            break;
> +        default:
> +            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
> +                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
> +                        ce->hi, ce->lo);
> +            return -VTD_FR_CONTEXT_ENTRY_INV;
> +        }
>      }
>      return 0;
>  }
> @@ -1438,7 +1445,61 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
>      vtd_iec_notify_all(s, !inv_desc->iec.granularity,
>                         inv_desc->iec.index,
>                         inv_desc->iec.index_mask);
> +    return true;
> +}
> +
> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> +                                          VTDInvDesc *inv_desc)
> +{
> +    VTDAddressSpace *vtd_dev_as;
> +    IOMMUTLBEntry entry;
> +    struct VTDBus *vtd_bus;
> +    hwaddr addr;
> +    uint64_t sz;
> +    uint16_t sid;
> +    uint8_t devfn;
> +    bool size;
> +    uint8_t bus_num;
> +
> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> +    devfn = sid & 0xff;
> +    bus_num = sid >> 8;
> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
> +
> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
> +                    inv_desc->hi, inv_desc->lo);
> +        return false;
> +    }
> +
> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> +    if (!vtd_bus) {
> +        goto done;
> +    }
> +
> +    vtd_dev_as = vtd_bus->dev_as[devfn];
> +    if (!vtd_dev_as) {
> +        goto done;
> +    }
> +
> +    if (size) {
> +        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);

This should be 1ULL.  It could also be converted to cto64:

(VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT)

Here, I'm shifting addr right to avoid the case of an addr that is all ones.

It probably could use a comment too. :)  The examples in table 2-4 of
the PCIe ATS specification are useful:

	S = 0, bits 15:12 = xxxx     range size: 4K
	S = 1, bits 15:12 = xxx0     range size: 8K
	S = 1, bits 15:12 = xx01     range size: 16K
	S = 1, bits 15:12 = x011     range size: 32K
	S = 1, bits 15:12 = 0111     range size: 64K

        and so on

> +        addr &= ~(sz - 1);
> +    } else {
> +        sz = VTD_PAGE_SIZE;
> +    }
>  
> +    entry.target_as = &vtd_dev_as->as;
> +    entry.addr_mask = sz - 1;
> +    entry.iova = addr;

If S=1, entry.iova must mask away the 1 bits that specified the size.
For example,

     addr = 0xabcd1000

has cto64(0xabcd1) == 1, so it indicates a 16K invalidation from
0xabcd0000 to 0xabcd3fff.  The "1" must be masked away with "addr & -sz"
or "addr & ~entry.addr_mask".

Thanks,

Paolo

> +    entry.perm = IOMMU_NONE;
> +    entry.translated_addr = 0;
> +    memory_region_notify_iommu(entry.target_as->root, entry);
> +
> +done:
>      return true;
>  }
>  
> @@ -1490,6 +1551,14 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>          }
>          break;
>  
> +    case VTD_INV_DESC_DEVICE:
> +        VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
> +                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
> +        if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
> +            return false;
> +        }
> +        break;
> +
>      default:
>          VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
>                      "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
> @@ -2415,6 +2484,10 @@ static void vtd_init(IntelIOMMUState *s)
>          assert(s->intr_eim != ON_OFF_AUTO_AUTO);
>      }
>  
> +    if (x86_iommu->dt_supported) {
> +        s->ecap |= VTD_ECAP_DT;
> +    }
> +
>      vtd_reset_context_cache(s);
>      vtd_reset_iotlb(s);
>  
> diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
> index 2278af7..23dcd3f 100644
> --- a/hw/i386/x86-iommu.c
> +++ b/hw/i386/x86-iommu.c
> @@ -106,6 +106,18 @@ static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp)
>      s->intr_supported = value;
>  }
>  
> +static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp)
> +{
> +    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
> +    return s->dt_supported;
> +}
> +
> +static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp)
> +{
> +    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
> +    s->dt_supported = value;
> +}
> +
>  static void x86_iommu_instance_init(Object *o)
>  {
>      X86IOMMUState *s = X86_IOMMU_DEVICE(o);
> @@ -114,6 +126,11 @@ static void x86_iommu_instance_init(Object *o)
>      s->intr_supported = false;
>      object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
>                               x86_iommu_intremap_prop_set, NULL);
> +    s->dt_supported = false;
> +    object_property_add_bool(o, "device-iotlb",
> +                             x86_iommu_device_iotlb_prop_get,
> +                             x86_iommu_device_iotlb_prop_set,
> +                             NULL);
>  }
>  
>  static const TypeInfo x86_iommu_info = {
>
Jason Wang Jan. 19, 2017, 2:50 a.m. UTC | #2
On 2017年01月18日 20:19, Paolo Bonzini wrote:
>
> On 10/01/2017 06:39, Michael S. Tsirkin wrote:
>> From: Jason Wang <jasowang@redhat.com>
>>
>> This patch enables device IOTLB support for intel iommu. The major
>> work is to implement QI device IOTLB descriptor processing and notify
>> the device through iommu notifier.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Richard Henderson <rth@twiddle.net>
>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> Reviewed-by: Peter Xu <peterx@redhat.com>
>> ---
>>   hw/i386/intel_iommu_internal.h | 13 ++++++-
>>   include/hw/i386/x86-iommu.h    |  1 +
>>   hw/i386/intel_iommu.c          | 83 +++++++++++++++++++++++++++++++++++++++---
>>   hw/i386/x86-iommu.c            | 17 +++++++++
>>   4 files changed, 107 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 11abfa2..356f188 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -183,6 +183,7 @@
>>   /* (offset >> 4) << 8 */
>>   #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
>>   #define VTD_ECAP_QI                 (1ULL << 1)
>> +#define VTD_ECAP_DT                 (1ULL << 2)
>>   /* Interrupt Remapping support */
>>   #define VTD_ECAP_IR                 (1ULL << 3)
>>   #define VTD_ECAP_EIM                (1ULL << 4)
>> @@ -326,6 +327,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_TYPE               0xf
>>   #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
>>   #define VTD_INV_DESC_IOTLB              0x2
>> +#define VTD_INV_DESC_DEVICE             0x3
>>   #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
>>                                                  Invalidate Descriptor */
>>   #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
>> @@ -361,6 +363,13 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
>>   #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>>   
>> +/* Mask for Device IOTLB Invalidate Descriptor */
>> +#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
>> +#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
>> +#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
>> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
>> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
>> +
>>   /* Information about page-selective IOTLB invalidate */
>>   struct VTDIOTLBPageInvInfo {
>>       uint16_t domain_id;
>> @@ -399,8 +408,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault Processing Disable */
>>   #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation Type */
>>   #define VTD_CONTEXT_TT_MULTI_LEVEL  0
>> -#define VTD_CONTEXT_TT_DEV_IOTLB    1
>> -#define VTD_CONTEXT_TT_PASS_THROUGH 2
>> +#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
>> +#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
>>   /* Second Level Page Translation Pointer*/
>>   #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
>>   #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
>> diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
>> index 0c89d98..361c07c 100644
>> --- a/include/hw/i386/x86-iommu.h
>> +++ b/include/hw/i386/x86-iommu.h
>> @@ -73,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
>>   struct X86IOMMUState {
>>       SysBusDevice busdev;
>>       bool intr_supported;        /* Whether vIOMMU supports IR */
>> +    bool dt_supported;          /* Whether vIOMMU supports DT */
>>       IommuType type;             /* IOMMU type - AMD/Intel     */
>>       QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
>>   };
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index e39b764..ec62239 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -738,11 +738,18 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>>                       "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>>                       ce->hi, ce->lo);
>>           return -VTD_FR_CONTEXT_ENTRY_INV;
>> -    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
>> -        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
>> -                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> -                    ce->hi, ce->lo);
>> -        return -VTD_FR_CONTEXT_ENTRY_INV;
>> +    } else {
>> +        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
>> +        case VTD_CONTEXT_TT_MULTI_LEVEL:
>> +            /* fall through */
>> +        case VTD_CONTEXT_TT_DEV_IOTLB:
>> +            break;
>> +        default:
>> +            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
>> +                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> +                        ce->hi, ce->lo);
>> +            return -VTD_FR_CONTEXT_ENTRY_INV;
>> +        }
>>       }
>>       return 0;
>>   }
>> @@ -1438,7 +1445,61 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
>>       vtd_iec_notify_all(s, !inv_desc->iec.granularity,
>>                          inv_desc->iec.index,
>>                          inv_desc->iec.index_mask);
>> +    return true;
>> +}
>> +
>> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>> +                                          VTDInvDesc *inv_desc)
>> +{
>> +    VTDAddressSpace *vtd_dev_as;
>> +    IOMMUTLBEntry entry;
>> +    struct VTDBus *vtd_bus;
>> +    hwaddr addr;
>> +    uint64_t sz;
>> +    uint16_t sid;
>> +    uint8_t devfn;
>> +    bool size;
>> +    uint8_t bus_num;
>> +
>> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
>> +    devfn = sid & 0xff;
>> +    bus_num = sid >> 8;
>> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>> +
>> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
>> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
>> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
>> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> +                    inv_desc->hi, inv_desc->lo);
>> +        return false;
>> +    }
>> +
>> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
>> +    if (!vtd_bus) {
>> +        goto done;
>> +    }
>> +
>> +    vtd_dev_as = vtd_bus->dev_as[devfn];
>> +    if (!vtd_dev_as) {
>> +        goto done;
>> +    }
>> +
>> +    if (size) {
>> +        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
> This should be 1ULL.

Yes.

>    It could also be converted to cto64:
>
> (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT)
>
> Here, I'm shifting addr right to avoid the case of an addr that is all ones.
>
> It probably could use a comment too. :)  The examples in table 2-4 of
> the PCIe ATS specification are useful:
>
> 	S = 0, bits 15:12 = xxxx     range size: 4K
> 	S = 1, bits 15:12 = xxx0     range size: 8K
> 	S = 1, bits 15:12 = xx01     range size: 16K
> 	S = 1, bits 15:12 = x011     range size: 32K
> 	S = 1, bits 15:12 = 0111     range size: 64K
>
>          and so on

This seems simpler let me add them comment and convert it to cto64.

>
>> +        addr &= ~(sz - 1);
>> +    } else {
>> +        sz = VTD_PAGE_SIZE;
>> +    }
>>   
>> +    entry.target_as = &vtd_dev_as->as;
>> +    entry.addr_mask = sz - 1;
>> +    entry.iova = addr;
> If S=1, entry.iova must mask away the 1 bits that specified the size.
> For example,
>
>       addr = 0xabcd1000
>
> has cto64(0xabcd1) == 1, so it indicates a 16K invalidation from
> 0xabcd0000 to 0xabcd3fff.  The "1" must be masked away with "addr & -sz"
> or "addr & ~entry.addr_mask".
>
> Thanks,
>
> Paolo

Good catch.

Let me fix it.

Thanks
Peter Xu Jan. 19, 2017, 3:28 a.m. UTC | #3
On Thu, Jan 19, 2017 at 10:50:59AM +0800, Jason Wang wrote:

[...]

> >>+static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> >>+                                          VTDInvDesc *inv_desc)
> >>+{
> >>+    VTDAddressSpace *vtd_dev_as;
> >>+    IOMMUTLBEntry entry;
> >>+    struct VTDBus *vtd_bus;
> >>+    hwaddr addr;
> >>+    uint64_t sz;
> >>+    uint16_t sid;
> >>+    uint8_t devfn;
> >>+    bool size;
> >>+    uint8_t bus_num;
> >>+
> >>+    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
> >>+    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> >>+    devfn = sid & 0xff;
> >>+    bus_num = sid >> 8;
> >>+    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
> >>+
> >>+    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> >>+        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
> >>+        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
> >>+                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
> >>+                    inv_desc->hi, inv_desc->lo);
> >>+        return false;
> >>+    }
> >>+
> >>+    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> >>+    if (!vtd_bus) {
> >>+        goto done;
> >>+    }
> >>+
> >>+    vtd_dev_as = vtd_bus->dev_as[devfn];
> >>+    if (!vtd_dev_as) {
> >>+        goto done;
> >>+    }
> >>+
> >>+    if (size) {
> >>+        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
> >This should be 1ULL.
> 
> Yes.
> 
> >   It could also be converted to cto64:
> >
> >(VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT)
> >
> >Here, I'm shifting addr right to avoid the case of an addr that is all ones.
> >
> >It probably could use a comment too. :)  The examples in table 2-4 of
> >the PCIe ATS specification are useful:
> >
> >	S = 0, bits 15:12 = xxxx     range size: 4K
> >	S = 1, bits 15:12 = xxx0     range size: 8K
> >	S = 1, bits 15:12 = xx01     range size: 16K
> >	S = 1, bits 15:12 = x011     range size: 32K
> >	S = 1, bits 15:12 = 0111     range size: 64K
> >
> >         and so on
> 
> This seems simpler let me add them comment and convert it to cto64.
> 
> >
> >>+        addr &= ~(sz - 1);

[1]

> >>+    } else {
> >>+        sz = VTD_PAGE_SIZE;
> >>+    }
> >>+    entry.target_as = &vtd_dev_as->as;
> >>+    entry.addr_mask = sz - 1;
> >>+    entry.iova = addr;
> >If S=1, entry.iova must mask away the 1 bits that specified the size.
> >For example,
> >
> >      addr = 0xabcd1000
> >
> >has cto64(0xabcd1) == 1, so it indicates a 16K invalidation from
> >0xabcd0000 to 0xabcd3fff.  The "1" must be masked away with "addr & -sz"
> >or "addr & ~entry.addr_mask".
> >
> >Thanks,
> >
> >Paolo
> 
> Good catch.
> 
> Let me fix it.

Is above [1] doing that?

Thanks,

-- peterx
Jason Wang Jan. 19, 2017, 3:32 a.m. UTC | #4
On 2017年01月19日 10:50, Jason Wang wrote:
>
>
> On 2017年01月18日 20:19, Paolo Bonzini wrote:
>>
>> On 10/01/2017 06:39, Michael S. Tsirkin wrote:
>>> From: Jason Wang <jasowang@redhat.com>
>>>
>>> This patch enables device IOTLB support for intel iommu. The major
>>> work is to implement QI device IOTLB descriptor processing and notify
>>> the device through iommu notifier.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Richard Henderson <rth@twiddle.net>
>>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>>> Cc: Michael S. Tsirkin <mst@redhat.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> Reviewed-by: Peter Xu <peterx@redhat.com>
>>> ---
>>>   hw/i386/intel_iommu_internal.h | 13 ++++++-
>>>   include/hw/i386/x86-iommu.h    |  1 +
>>>   hw/i386/intel_iommu.c          | 83 
>>> +++++++++++++++++++++++++++++++++++++++---
>>>   hw/i386/x86-iommu.c            | 17 +++++++++
>>>   4 files changed, 107 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h 
>>> b/hw/i386/intel_iommu_internal.h
>>> index 11abfa2..356f188 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -183,6 +183,7 @@
>>>   /* (offset >> 4) << 8 */
>>>   #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
>>>   #define VTD_ECAP_QI                 (1ULL << 1)
>>> +#define VTD_ECAP_DT                 (1ULL << 2)
>>>   /* Interrupt Remapping support */
>>>   #define VTD_ECAP_IR                 (1ULL << 3)
>>>   #define VTD_ECAP_EIM                (1ULL << 4)
>>> @@ -326,6 +327,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>>   #define VTD_INV_DESC_TYPE               0xf
>>>   #define VTD_INV_DESC_CC                 0x1 /* Context-cache 
>>> Invalidate Desc */
>>>   #define VTD_INV_DESC_IOTLB              0x2
>>> +#define VTD_INV_DESC_DEVICE             0x3
>>>   #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
>>>                                                  Invalidate 
>>> Descriptor */
>>>   #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait 
>>> Descriptor */
>>> @@ -361,6 +363,13 @@ typedef union VTDInvDesc VTDInvDesc;
>>>   #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
>>>   #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>>>   +/* Mask for Device IOTLB Invalidate Descriptor */
>>> +#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 
>>> 0xfffffffffffff000ULL)
>>> +#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
>>> +#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
>>> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
>>> +#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
>>> +
>>>   /* Information about page-selective IOTLB invalidate */
>>>   struct VTDIOTLBPageInvInfo {
>>>       uint16_t domain_id;
>>> @@ -399,8 +408,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>   #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault 
>>> Processing Disable */
>>>   #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation 
>>> Type */
>>>   #define VTD_CONTEXT_TT_MULTI_LEVEL  0
>>> -#define VTD_CONTEXT_TT_DEV_IOTLB    1
>>> -#define VTD_CONTEXT_TT_PASS_THROUGH 2
>>> +#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
>>> +#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
>>>   /* Second Level Page Translation Pointer*/
>>>   #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
>>>   #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
>>> diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
>>> index 0c89d98..361c07c 100644
>>> --- a/include/hw/i386/x86-iommu.h
>>> +++ b/include/hw/i386/x86-iommu.h
>>> @@ -73,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
>>>   struct X86IOMMUState {
>>>       SysBusDevice busdev;
>>>       bool intr_supported;        /* Whether vIOMMU supports IR */
>>> +    bool dt_supported;          /* Whether vIOMMU supports DT */
>>>       IommuType type;             /* IOMMU type - AMD/Intel     */
>>>       QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
>>>   };
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index e39b764..ec62239 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -738,11 +738,18 @@ static int 
>>> vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>>>                       "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>>>                       ce->hi, ce->lo);
>>>           return -VTD_FR_CONTEXT_ENTRY_INV;
>>> -    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
>>> -        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
>>> -                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>>> -                    ce->hi, ce->lo);
>>> -        return -VTD_FR_CONTEXT_ENTRY_INV;
>>> +    } else {
>>> +        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
>>> +        case VTD_CONTEXT_TT_MULTI_LEVEL:
>>> +            /* fall through */
>>> +        case VTD_CONTEXT_TT_DEV_IOTLB:
>>> +            break;
>>> +        default:
>>> +            VTD_DPRINTF(GENERAL, "error: unsupported Translation 
>>> Type in "
>>> +                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>>> +                        ce->hi, ce->lo);
>>> +            return -VTD_FR_CONTEXT_ENTRY_INV;
>>> +        }
>>>       }
>>>       return 0;
>>>   }
>>> @@ -1438,7 +1445,61 @@ static bool 
>>> vtd_process_inv_iec_desc(IntelIOMMUState *s,
>>>       vtd_iec_notify_all(s, !inv_desc->iec.granularity,
>>>                          inv_desc->iec.index,
>>>                          inv_desc->iec.index_mask);
>>> +    return true;
>>> +}
>>> +
>>> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>>> +                                          VTDInvDesc *inv_desc)
>>> +{
>>> +    VTDAddressSpace *vtd_dev_as;
>>> +    IOMMUTLBEntry entry;
>>> +    struct VTDBus *vtd_bus;
>>> +    hwaddr addr;
>>> +    uint64_t sz;
>>> +    uint16_t sid;
>>> +    uint8_t devfn;
>>> +    bool size;
>>> +    uint8_t bus_num;
>>> +
>>> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>>> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
>>> +    devfn = sid & 0xff;
>>> +    bus_num = sid >> 8;
>>> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>>> +
>>> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
>>> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
>>> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in 
>>> Device "
>>> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 
>>> 0x%"PRIx64,
>>> +                    inv_desc->hi, inv_desc->lo);
>>> +        return false;
>>> +    }
>>> +
>>> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
>>> +    if (!vtd_bus) {
>>> +        goto done;
>>> +    }
>>> +
>>> +    vtd_dev_as = vtd_bus->dev_as[devfn];
>>> +    if (!vtd_dev_as) {
>>> +        goto done;
>>> +    }
>>> +
>>> +    if (size) {
>>> +        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
>> This should be 1ULL.
>
> Yes.
>
>>    It could also be converted to cto64:
>>
>> (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT)
>>
>> Here, I'm shifting addr right to avoid the case of an addr that is 
>> all ones.
>>
>> It probably could use a comment too. :)  The examples in table 2-4 of
>> the PCIe ATS specification are useful:
>>
>>     S = 0, bits 15:12 = xxxx     range size: 4K
>>     S = 1, bits 15:12 = xxx0     range size: 8K
>>     S = 1, bits 15:12 = xx01     range size: 16K
>>     S = 1, bits 15:12 = x011     range size: 32K
>>     S = 1, bits 15:12 = 0111     range size: 64K
>>
>>          and so on
>
> This seems simpler let me add them comment and convert it to cto64.
>
>>
>>> +        addr &= ~(sz - 1);
>>> +    } else {
>>> +        sz = VTD_PAGE_SIZE;
>>> +    }
>>>   +    entry.target_as = &vtd_dev_as->as;
>>> +    entry.addr_mask = sz - 1;
>>> +    entry.iova = addr;
>> If S=1, entry.iova must mask away the 1 bits that specified the size.
>> For example,
>>
>>       addr = 0xabcd1000
>>
>> has cto64(0xabcd1) == 1, so it indicates a 16K invalidation from
>> 0xabcd0000 to 0xabcd3fff.  The "1" must be masked away with "addr & -sz"
>> or "addr & ~entry.addr_mask".
>>
>> Thanks,
>>
>> Paolo
>
> Good catch.

It should be addr & ~(sz - 1) I think? And it has been done above :)

Thanks

>
> Let me fix it.
>
> Thanks
>
Jason Wang Jan. 19, 2017, 3:35 a.m. UTC | #5
On 2017年01月19日 11:28, Peter Xu wrote:
> On Thu, Jan 19, 2017 at 10:50:59AM +0800, Jason Wang wrote:
>
> [...]
>
>>>> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>>>> +                                          VTDInvDesc *inv_desc)
>>>> +{
>>>> +    VTDAddressSpace *vtd_dev_as;
>>>> +    IOMMUTLBEntry entry;
>>>> +    struct VTDBus *vtd_bus;
>>>> +    hwaddr addr;
>>>> +    uint64_t sz;
>>>> +    uint16_t sid;
>>>> +    uint8_t devfn;
>>>> +    bool size;
>>>> +    uint8_t bus_num;
>>>> +
>>>> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>>>> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
>>>> +    devfn = sid & 0xff;
>>>> +    bus_num = sid >> 8;
>>>> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>>>> +
>>>> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
>>>> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
>>>> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
>>>> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
>>>> +                    inv_desc->hi, inv_desc->lo);
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
>>>> +    if (!vtd_bus) {
>>>> +        goto done;
>>>> +    }
>>>> +
>>>> +    vtd_dev_as = vtd_bus->dev_as[devfn];
>>>> +    if (!vtd_dev_as) {
>>>> +        goto done;
>>>> +    }
>>>> +
>>>> +    if (size) {
>>>> +        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
>>> This should be 1ULL.
>> Yes.
>>
>>>    It could also be converted to cto64:
>>>
>>> (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT)
>>>
>>> Here, I'm shifting addr right to avoid the case of an addr that is all ones.
>>>
>>> It probably could use a comment too. :)  The examples in table 2-4 of
>>> the PCIe ATS specification are useful:
>>>
>>> 	S = 0, bits 15:12 = xxxx     range size: 4K
>>> 	S = 1, bits 15:12 = xxx0     range size: 8K
>>> 	S = 1, bits 15:12 = xx01     range size: 16K
>>> 	S = 1, bits 15:12 = x011     range size: 32K
>>> 	S = 1, bits 15:12 = 0111     range size: 64K
>>>
>>>          and so on
>> This seems simpler let me add them comment and convert it to cto64.
>>
>>>> +        addr &= ~(sz - 1);
> [1]
>
>>>> +    } else {
>>>> +        sz = VTD_PAGE_SIZE;
>>>> +    }
>>>> +    entry.target_as = &vtd_dev_as->as;
>>>> +    entry.addr_mask = sz - 1;
>>>> +    entry.iova = addr;
>>> If S=1, entry.iova must mask away the 1 bits that specified the size.
>>> For example,
>>>
>>>       addr = 0xabcd1000
>>>
>>> has cto64(0xabcd1) == 1, so it indicates a 16K invalidation from
>>> 0xabcd0000 to 0xabcd3fff.  The "1" must be masked away with "addr & -sz"
>>> or "addr & ~entry.addr_mask".
>>>
>>> Thanks,
>>>
>>> Paolo
>> Good catch.
>>
>> Let me fix it.
> Is above [1] doing that?
>
> Thanks,
>
> -- peterx

Yes, speak too fast :(

Thanks
Paolo Bonzini Jan. 19, 2017, 9:07 a.m. UTC | #6
On 19/01/2017 04:32, Jason Wang wrote:
>>
>>>
>>>> +        addr &= ~(sz - 1);
>>>> +    } else {
>>>> +        sz = VTD_PAGE_SIZE;
>>>> +    }
>>>>   +    entry.target_as = &vtd_dev_as->as;
>>>> +    entry.addr_mask = sz - 1;
>>>> +    entry.iova = addr;
>>> If S=1, entry.iova must mask away the 1 bits that specified the size.
>>> For example,
>>>
>>>       addr = 0xabcd1000
>>>
>>> has cto64(0xabcd1) == 1, so it indicates a 16K invalidation from
>>> 0xabcd0000 to 0xabcd3fff.  The "1" must be masked away with "addr & -sz"
>>> or "addr & ~entry.addr_mask".
>>>
>>> Thanks,
>>>
>>> Paolo
>>
>> Good catch.
> 
> It should be addr & ~(sz - 1) I think? And it has been done above :)

Oh, of course!

Paolo
Yi Liu Feb. 16, 2017, 5:36 a.m. UTC | #7
> -----Original Message-----
> From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
> On Behalf Of Michael S. Tsirkin
> Sent: Tuesday, January 10, 2017 1:40 PM
> To: qemu-devel@nongnu.org
> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu
> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
> Henderson <rth@twiddle.net>
> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
> descriptor
> 
> From: Jason Wang <jasowang@redhat.com>
> 
> This patch enables device IOTLB support for intel iommu. The major work is to
> implement QI device IOTLB descriptor processing and notify the device through
> iommu notifier.
>
Hi Jason/Michael,
	
Recently Peter Xu's patch also touched intel-iommu emulation. His patch shadows
second-level page table by capturing iotlb flush from guest. It would result in page
table updating in host. Does this patch also use the same map/umap API provided
by VFIO? If it is, then I think it would also update page table in host. It looks to be
a duplicate update. Pls refer to the following snapshot captured from section 6.5.2.5
of vtd spec. 

"Since translation requests from a device may be serviced by hardware from the IOTLB, software must
always request IOTLB invalidation (iotlb_inv_dsc) before requesting corresponding Device-TLB
(dev_tlb_inv_dsc) invalidation."

Maybe for device-iotlb, we need a separate API which just pass down the invalidate
info without updating page table. Any thoughts?

Thanks,
Yi L
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Reviewed-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu_internal.h | 13 ++++++-
>  include/hw/i386/x86-iommu.h    |  1 +
>  hw/i386/intel_iommu.c          | 83
> +++++++++++++++++++++++++++++++++++++++---
>  hw/i386/x86-iommu.c            | 17 +++++++++
>  4 files changed, 107 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h
> b/hw/i386/intel_iommu_internal.h index 11abfa2..356f188 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -183,6 +183,7 @@
>  /* (offset >> 4) << 8 */
>  #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
>  #define VTD_ECAP_QI                 (1ULL << 1)
> +#define VTD_ECAP_DT                 (1ULL << 2)
>  /* Interrupt Remapping support */
>  #define VTD_ECAP_IR                 (1ULL << 3)
>  #define VTD_ECAP_EIM                (1ULL << 4)
> @@ -326,6 +327,7 @@ typedef union VTDInvDesc VTDInvDesc;
>  #define VTD_INV_DESC_TYPE               0xf
>  #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
>  #define VTD_INV_DESC_IOTLB              0x2
> +#define VTD_INV_DESC_DEVICE             0x3
>  #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
>                                                 Invalidate Descriptor */
>  #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
> @@ -361,6 +363,13 @@ typedef union VTDInvDesc VTDInvDesc;
>  #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
>  #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
> 
> +/* Mask for Device IOTLB Invalidate Descriptor */ #define
> +VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
> +#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1) #define
> +VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL) #define
> +VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL #define
> +VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
> +
>  /* Information about page-selective IOTLB invalidate */  struct
> VTDIOTLBPageInvInfo {
>      uint16_t domain_id;
> @@ -399,8 +408,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault Processing Disable
> */
>  #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation Type */
>  #define VTD_CONTEXT_TT_MULTI_LEVEL  0
> -#define VTD_CONTEXT_TT_DEV_IOTLB    1
> -#define VTD_CONTEXT_TT_PASS_THROUGH 2
> +#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
> +#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
>  /* Second Level Page Translation Pointer*/
>  #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
>  #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
> diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
> index 0c89d98..361c07c 100644
> --- a/include/hw/i386/x86-iommu.h
> +++ b/include/hw/i386/x86-iommu.h
> @@ -73,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;  struct
> X86IOMMUState {
>      SysBusDevice busdev;
>      bool intr_supported;        /* Whether vIOMMU supports IR */
> +    bool dt_supported;          /* Whether vIOMMU supports DT */
>      IommuType type;             /* IOMMU type - AMD/Intel     */
>      QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */  }; diff --git
> a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index e39b764..ec62239
> 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -738,11 +738,18 @@ static int
> vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>                      "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>                      ce->hi, ce->lo);
>          return -VTD_FR_CONTEXT_ENTRY_INV;
> -    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
> -        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
> -                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
> -                    ce->hi, ce->lo);
> -        return -VTD_FR_CONTEXT_ENTRY_INV;
> +    } else {
> +        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
> +        case VTD_CONTEXT_TT_MULTI_LEVEL:
> +            /* fall through */
> +        case VTD_CONTEXT_TT_DEV_IOTLB:
> +            break;
> +        default:
> +            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
> +                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
> +                        ce->hi, ce->lo);
> +            return -VTD_FR_CONTEXT_ENTRY_INV;
> +        }
>      }
>      return 0;
>  }
> @@ -1438,7 +1445,61 @@ static bool
> vtd_process_inv_iec_desc(IntelIOMMUState *s,
>      vtd_iec_notify_all(s, !inv_desc->iec.granularity,
>                         inv_desc->iec.index,
>                         inv_desc->iec.index_mask);
> +    return true;
> +}
> +
> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> +                                          VTDInvDesc *inv_desc) {
> +    VTDAddressSpace *vtd_dev_as;
> +    IOMMUTLBEntry entry;
> +    struct VTDBus *vtd_bus;
> +    hwaddr addr;
> +    uint64_t sz;
> +    uint16_t sid;
> +    uint8_t devfn;
> +    bool size;
> +    uint8_t bus_num;
> +
> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> +    devfn = sid & 0xff;
> +    bus_num = sid >> 8;
> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
> +
> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
> +                    inv_desc->hi, inv_desc->lo);
> +        return false;
> +    }
> +
> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> +    if (!vtd_bus) {
> +        goto done;
> +    }
> +
> +    vtd_dev_as = vtd_bus->dev_as[devfn];
> +    if (!vtd_dev_as) {
> +        goto done;
> +    }
> +
> +    if (size) {
> +        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
> +        addr &= ~(sz - 1);
> +    } else {
> +        sz = VTD_PAGE_SIZE;
> +    }
> 
> +    entry.target_as = &vtd_dev_as->as;
> +    entry.addr_mask = sz - 1;
> +    entry.iova = addr;
> +    entry.perm = IOMMU_NONE;
> +    entry.translated_addr = 0;
> +    memory_region_notify_iommu(entry.target_as->root, entry);
> +
> +done:
>      return true;
>  }
> 
> @@ -1490,6 +1551,14 @@ static bool vtd_process_inv_desc(IntelIOMMUState
> *s)
>          }
>          break;
> 
> +    case VTD_INV_DESC_DEVICE:
> +        VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
> +                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
> +        if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
> +            return false;
> +        }
> +        break;
> +
>      default:
>          VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
>                      "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, @@ -2415,6
> +2484,10 @@ static void vtd_init(IntelIOMMUState *s)
>          assert(s->intr_eim != ON_OFF_AUTO_AUTO);
>      }
> 
> +    if (x86_iommu->dt_supported) {
> +        s->ecap |= VTD_ECAP_DT;
> +    }
> +
>      vtd_reset_context_cache(s);
>      vtd_reset_iotlb(s);
> 
> diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index
> 2278af7..23dcd3f 100644
> --- a/hw/i386/x86-iommu.c
> +++ b/hw/i386/x86-iommu.c
> @@ -106,6 +106,18 @@ static void x86_iommu_intremap_prop_set(Object *o,
> bool value, Error **errp)
>      s->intr_supported = value;
>  }
> 
> +static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp) {
> +    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
> +    return s->dt_supported;
> +}
> +
> +static void x86_iommu_device_iotlb_prop_set(Object *o, bool value,
> +Error **errp) {
> +    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
> +    s->dt_supported = value;
> +}
> +
>  static void x86_iommu_instance_init(Object *o)  {
>      X86IOMMUState *s = X86_IOMMU_DEVICE(o); @@ -114,6 +126,11 @@
> static void x86_iommu_instance_init(Object *o)
>      s->intr_supported = false;
>      object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
>                               x86_iommu_intremap_prop_set, NULL);
> +    s->dt_supported = false;
> +    object_property_add_bool(o, "device-iotlb",
> +                             x86_iommu_device_iotlb_prop_get,
> +                             x86_iommu_device_iotlb_prop_set,
> +                             NULL);
>  }
> 
>  static const TypeInfo x86_iommu_info = {
> --
> MST
>
Jason Wang Feb. 16, 2017, 5:43 a.m. UTC | #8
On 2017年02月16日 13:36, Liu, Yi L wrote:
>> -----Original Message-----
>> From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
>> On Behalf Of Michael S. Tsirkin
>> Sent: Tuesday, January 10, 2017 1:40 PM
>> To: qemu-devel@nongnu.org
>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu
>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
>> Henderson <rth@twiddle.net>
>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
>> descriptor
>>
>> From: Jason Wang <jasowang@redhat.com>
>>
>> This patch enables device IOTLB support for intel iommu. The major work is to
>> implement QI device IOTLB descriptor processing and notify the device through
>> iommu notifier.
>>
> Hi Jason/Michael,
> 	
> Recently Peter Xu's patch also touched intel-iommu emulation. His patch shadows
> second-level page table by capturing iotlb flush from guest. It would result in page
> table updating in host. Does this patch also use the same map/umap API provided
> by VFIO?

Yes, it depends on the iommu notifier too.

> If it is, then I think it would also update page table in host. It looks to be
> a duplicate update. Pls refer to the following snapshot captured from section 6.5.2.5
> of vtd spec.
>
> "Since translation requests from a device may be serviced by hardware from the IOTLB, software must
> always request IOTLB invalidation (iotlb_inv_dsc) before requesting corresponding Device-TLB
> (dev_tlb_inv_dsc) invalidation."
>
> Maybe for device-iotlb, we need a separate API which just pass down the invalidate
> info without updating page table. Any thoughts?

cc Alex.

If we want ATS to be visible for guest (but I'm not sure if VFIO support 
this), we probably need another notifier or a new flag.

Thanks

>
> Thanks,
> Yi L
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Richard Henderson <rth@twiddle.net>
>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> Reviewed-by: Peter Xu <peterx@redhat.com>
>> ---
>>   hw/i386/intel_iommu_internal.h | 13 ++++++-
>>   include/hw/i386/x86-iommu.h    |  1 +
>>   hw/i386/intel_iommu.c          | 83
>> +++++++++++++++++++++++++++++++++++++++---
>>   hw/i386/x86-iommu.c            | 17 +++++++++
>>   4 files changed, 107 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>> b/hw/i386/intel_iommu_internal.h index 11abfa2..356f188 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -183,6 +183,7 @@
>>   /* (offset >> 4) << 8 */
>>   #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
>>   #define VTD_ECAP_QI                 (1ULL << 1)
>> +#define VTD_ECAP_DT                 (1ULL << 2)
>>   /* Interrupt Remapping support */
>>   #define VTD_ECAP_IR                 (1ULL << 3)
>>   #define VTD_ECAP_EIM                (1ULL << 4)
>> @@ -326,6 +327,7 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_TYPE               0xf
>>   #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
>>   #define VTD_INV_DESC_IOTLB              0x2
>> +#define VTD_INV_DESC_DEVICE             0x3
>>   #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
>>                                                  Invalidate Descriptor */
>>   #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
>> @@ -361,6 +363,13 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
>>   #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>>
>> +/* Mask for Device IOTLB Invalidate Descriptor */ #define
>> +VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
>> +#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1) #define
>> +VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL) #define
>> +VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL #define
>> +VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
>> +
>>   /* Information about page-selective IOTLB invalidate */  struct
>> VTDIOTLBPageInvInfo {
>>       uint16_t domain_id;
>> @@ -399,8 +408,8 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault Processing Disable
>> */
>>   #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation Type */
>>   #define VTD_CONTEXT_TT_MULTI_LEVEL  0
>> -#define VTD_CONTEXT_TT_DEV_IOTLB    1
>> -#define VTD_CONTEXT_TT_PASS_THROUGH 2
>> +#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
>> +#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
>>   /* Second Level Page Translation Pointer*/
>>   #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
>>   #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
>> diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
>> index 0c89d98..361c07c 100644
>> --- a/include/hw/i386/x86-iommu.h
>> +++ b/include/hw/i386/x86-iommu.h
>> @@ -73,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;  struct
>> X86IOMMUState {
>>       SysBusDevice busdev;
>>       bool intr_supported;        /* Whether vIOMMU supports IR */
>> +    bool dt_supported;          /* Whether vIOMMU supports DT */
>>       IommuType type;             /* IOMMU type - AMD/Intel     */
>>       QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */  }; diff --git
>> a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index e39b764..ec62239
>> 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -738,11 +738,18 @@ static int
>> vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>>                       "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>>                       ce->hi, ce->lo);
>>           return -VTD_FR_CONTEXT_ENTRY_INV;
>> -    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
>> -        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
>> -                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> -                    ce->hi, ce->lo);
>> -        return -VTD_FR_CONTEXT_ENTRY_INV;
>> +    } else {
>> +        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
>> +        case VTD_CONTEXT_TT_MULTI_LEVEL:
>> +            /* fall through */
>> +        case VTD_CONTEXT_TT_DEV_IOTLB:
>> +            break;
>> +        default:
>> +            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
>> +                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> +                        ce->hi, ce->lo);
>> +            return -VTD_FR_CONTEXT_ENTRY_INV;
>> +        }
>>       }
>>       return 0;
>>   }
>> @@ -1438,7 +1445,61 @@ static bool
>> vtd_process_inv_iec_desc(IntelIOMMUState *s,
>>       vtd_iec_notify_all(s, !inv_desc->iec.granularity,
>>                          inv_desc->iec.index,
>>                          inv_desc->iec.index_mask);
>> +    return true;
>> +}
>> +
>> +static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>> +                                          VTDInvDesc *inv_desc) {
>> +    VTDAddressSpace *vtd_dev_as;
>> +    IOMMUTLBEntry entry;
>> +    struct VTDBus *vtd_bus;
>> +    hwaddr addr;
>> +    uint64_t sz;
>> +    uint16_t sid;
>> +    uint8_t devfn;
>> +    bool size;
>> +    uint8_t bus_num;
>> +
>> +    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>> +    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
>> +    devfn = sid & 0xff;
>> +    bus_num = sid >> 8;
>> +    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>> +
>> +    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
>> +        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
>> +        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
>> +                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
>> +                    inv_desc->hi, inv_desc->lo);
>> +        return false;
>> +    }
>> +
>> +    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
>> +    if (!vtd_bus) {
>> +        goto done;
>> +    }
>> +
>> +    vtd_dev_as = vtd_bus->dev_as[devfn];
>> +    if (!vtd_dev_as) {
>> +        goto done;
>> +    }
>> +
>> +    if (size) {
>> +        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
>> +        addr &= ~(sz - 1);
>> +    } else {
>> +        sz = VTD_PAGE_SIZE;
>> +    }
>>
>> +    entry.target_as = &vtd_dev_as->as;
>> +    entry.addr_mask = sz - 1;
>> +    entry.iova = addr;
>> +    entry.perm = IOMMU_NONE;
>> +    entry.translated_addr = 0;
>> +    memory_region_notify_iommu(entry.target_as->root, entry);
>> +
>> +done:
>>       return true;
>>   }
>>
>> @@ -1490,6 +1551,14 @@ static bool vtd_process_inv_desc(IntelIOMMUState
>> *s)
>>           }
>>           break;
>>
>> +    case VTD_INV_DESC_DEVICE:
>> +        VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
>> +                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
>> +        if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
>> +            return false;
>> +        }
>> +        break;
>> +
>>       default:
>>           VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
>>                       "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, @@ -2415,6
>> +2484,10 @@ static void vtd_init(IntelIOMMUState *s)
>>           assert(s->intr_eim != ON_OFF_AUTO_AUTO);
>>       }
>>
>> +    if (x86_iommu->dt_supported) {
>> +        s->ecap |= VTD_ECAP_DT;
>> +    }
>> +
>>       vtd_reset_context_cache(s);
>>       vtd_reset_iotlb(s);
>>
>> diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index
>> 2278af7..23dcd3f 100644
>> --- a/hw/i386/x86-iommu.c
>> +++ b/hw/i386/x86-iommu.c
>> @@ -106,6 +106,18 @@ static void x86_iommu_intremap_prop_set(Object *o,
>> bool value, Error **errp)
>>       s->intr_supported = value;
>>   }
>>
>> +static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp) {
>> +    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
>> +    return s->dt_supported;
>> +}
>> +
>> +static void x86_iommu_device_iotlb_prop_set(Object *o, bool value,
>> +Error **errp) {
>> +    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
>> +    s->dt_supported = value;
>> +}
>> +
>>   static void x86_iommu_instance_init(Object *o)  {
>>       X86IOMMUState *s = X86_IOMMU_DEVICE(o); @@ -114,6 +126,11 @@
>> static void x86_iommu_instance_init(Object *o)
>>       s->intr_supported = false;
>>       object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
>>                                x86_iommu_intremap_prop_set, NULL);
>> +    s->dt_supported = false;
>> +    object_property_add_bool(o, "device-iotlb",
>> +                             x86_iommu_device_iotlb_prop_get,
>> +                             x86_iommu_device_iotlb_prop_set,
>> +                             NULL);
>>   }
>>
>>   static const TypeInfo x86_iommu_info = {
>> --
>> MST
>>
Jason Wang Feb. 16, 2017, 5:59 a.m. UTC | #9
On 2017年02月16日 13:43, Jason Wang wrote:
>
>
> On 2017年02月16日 13:36, Liu, Yi L wrote:
>>> -----Original Message-----
>>> From: Qemu-devel 
>>> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
>>> On Behalf Of Michael S. Tsirkin
>>> Sent: Tuesday, January 10, 2017 1:40 PM
>>> To: qemu-devel@nongnu.org
>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu
>>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
>>> Henderson <rth@twiddle.net>
>>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
>>> descriptor
>>>
>>> From: Jason Wang <jasowang@redhat.com>
>>>
>>> This patch enables device IOTLB support for intel iommu. The major 
>>> work is to
>>> implement QI device IOTLB descriptor processing and notify the 
>>> device through
>>> iommu notifier.
>>>
>> Hi Jason/Michael,
>>
>> Recently Peter Xu's patch also touched intel-iommu emulation. His 
>> patch shadows
>> second-level page table by capturing iotlb flush from guest. It would 
>> result in page
>> table updating in host. Does this patch also use the same map/umap 
>> API provided
>> by VFIO?
>
> Yes, it depends on the iommu notifier too.
>
>> If it is, then I think it would also update page table in host. It 
>> looks to be
>> a duplicate update. Pls refer to the following snapshot captured from 
>> section 6.5.2.5
>> of vtd spec.
>>
>> "Since translation requests from a device may be serviced by hardware 
>> from the IOTLB, software must
>> always request IOTLB invalidation (iotlb_inv_dsc) before requesting 
>> corresponding Device-TLB
>> (dev_tlb_inv_dsc) invalidation."
>>
>> Maybe for device-iotlb, we need a separate API which just pass down 
>> the invalidate
>> info without updating page table. Any thoughts?
>
> cc Alex.
>
> If we want ATS to be visible for guest (but I'm not sure if VFIO 
> support this), we probably need another notifier or a new flag.
>
> Thanks 

Or need a dedicated address_space if ATS were enabled for the device.
Peter Xu Feb. 17, 2017, 3:26 a.m. UTC | #10
On Thu, Feb 16, 2017 at 05:36:06AM +0000, Liu, Yi L wrote:
> > -----Original Message-----
> > From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
> > On Behalf Of Michael S. Tsirkin
> > Sent: Tuesday, January 10, 2017 1:40 PM
> > To: qemu-devel@nongnu.org
> > Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
> > <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu
> > <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
> > Henderson <rth@twiddle.net>
> > Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
> > descriptor
> > 
> > From: Jason Wang <jasowang@redhat.com>
> > 
> > This patch enables device IOTLB support for intel iommu. The major work is to
> > implement QI device IOTLB descriptor processing and notify the device through
> > iommu notifier.
> >
> Hi Jason/Michael,
> 	
> Recently Peter Xu's patch also touched intel-iommu emulation. His patch shadows
> second-level page table by capturing iotlb flush from guest. It would result in page
> table updating in host. Does this patch also use the same map/umap API provided
> by VFIO? If it is, then I think it would also update page table in host.

I haven't considered complex scenarios, but if simply we have a VM
with one vhost device and one vfio-pci device, imho that should not be
a problem - device iotlb is targeting SID rather than DOMAIN. So
device iotlb invalidations for vhost will be sent to vhost device
only.

However, vhost may receive two invalidations here, but it won't matter
much since vhost is just flushing caches twice.

> It looks to be
> a duplicate update. Pls refer to the following snapshot captured from section 6.5.2.5
> of vtd spec. 
> 
> "Since translation requests from a device may be serviced by hardware from the IOTLB, software must
> always request IOTLB invalidation (iotlb_inv_dsc) before requesting corresponding Device-TLB
> (dev_tlb_inv_dsc) invalidation."
> 
> Maybe for device-iotlb, we need a separate API which just pass down the invalidate
> info without updating page table. Any thoughts?

Now imho I slightly prefer just use the current UNMAP notifier type
even for device iotlb device. But, we may need to do one more check
that we skip sending general iotlb invalidations to ATS enabled
devices like vhost, to avoid duplicated cache flushing. From virt-svm
side, do we have specific requirement to introduce a new flag for it?

Thanks,

-- peterx
Yi Liu Feb. 17, 2017, 6:18 a.m. UTC | #11
> -----Original Message-----

> From: Jason Wang [mailto:jasowang@redhat.com]

> Sent: Thursday, February 16, 2017 1:44 PM

> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin <mst@redhat.com>; qemu-

> devel@nongnu.org

> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini

> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin

> <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>; Alex Williamson

> <alex.williamson@redhat.com>

> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> descriptor

> 

> 

> 

> On 2017年02月16日 13:36, Liu, Yi L wrote:

> >> -----Original Message-----

> >> From: Qemu-devel

> >> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]

> >> On Behalf Of Michael S. Tsirkin

> >> Sent: Tuesday, January 10, 2017 1:40 PM

> >> To: qemu-devel@nongnu.org

> >> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> >> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu

> >> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard

> >> Henderson <rth@twiddle.net>

> >> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> >> descriptor

> >>

> >> From: Jason Wang <jasowang@redhat.com>

> >>

> >> This patch enables device IOTLB support for intel iommu. The major

> >> work is to implement QI device IOTLB descriptor processing and notify

> >> the device through iommu notifier.

> >>

> > Hi Jason/Michael,

> >

> > Recently Peter Xu's patch also touched intel-iommu emulation. His

> > patch shadows second-level page table by capturing iotlb flush from

> > guest. It would result in page table updating in host. Does this patch

> > also use the same map/umap API provided by VFIO?

> 

> Yes, it depends on the iommu notifier too.

> 

> > If it is, then I think it would also update page table in host. It

> > looks to be a duplicate update. Pls refer to the following snapshot

> > captured from section 6.5.2.5 of vtd spec.

> >

> > "Since translation requests from a device may be serviced by hardware

> > from the IOTLB, software must always request IOTLB invalidation

> > (iotlb_inv_dsc) before requesting corresponding Device-TLB

> > (dev_tlb_inv_dsc) invalidation."

> >

> > Maybe for device-iotlb, we need a separate API which just pass down

> > the invalidate info without updating page table. Any thoughts?

> 

> cc Alex.

> 

> If we want ATS to be visible for guest (but I'm not sure if VFIO support this), we

> probably need another notifier or a new flag.


Jason, for assigned device, I think guest could see ATS if the assigned device
supports ATS. I can see it when passthru iGPU.

Regards,
Yi L
Yi Liu Feb. 17, 2017, 6:36 a.m. UTC | #12
> -----Original Message-----

> From: Peter Xu [mailto:peterx@redhat.com]

> Sent: Friday, February 17, 2017 11:26 AM

> To: Liu, Yi L <yi.l.liu@intel.com>

> Cc: Michael S. Tsirkin <mst@redhat.com>; qemu-devel@nongnu.org; Peter

> Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Paolo Bonzini

> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin

> <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>

> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> descriptor

> 

> On Thu, Feb 16, 2017 at 05:36:06AM +0000, Liu, Yi L wrote:

> > > -----Original Message-----

> > > From: Qemu-devel

> > > [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]

> > > On Behalf Of Michael S. Tsirkin

> > > Sent: Tuesday, January 10, 2017 1:40 PM

> > > To: qemu-devel@nongnu.org

> > > Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> > > <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu

> > > <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard

> > > Henderson <rth@twiddle.net>

> > > Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> > > descriptor

> > >

> > > From: Jason Wang <jasowang@redhat.com>

> > >

> > > This patch enables device IOTLB support for intel iommu. The major

> > > work is to implement QI device IOTLB descriptor processing and

> > > notify the device through iommu notifier.

> > >

> > Hi Jason/Michael,

> >

> > Recently Peter Xu's patch also touched intel-iommu emulation. His

> > patch shadows second-level page table by capturing iotlb flush from

> > guest. It would result in page table updating in host. Does this patch

> > also use the same map/umap API provided by VFIO? If it is, then I think it

> would also update page table in host.

> 

> I haven't considered complex scenarios, but if simply we have a VM with one

> vhost device and one vfio-pci device, imho that should not be a problem -

> device iotlb is targeting SID rather than DOMAIN. So device iotlb invalidations

> for vhost will be sent to vhost device only.


Peter, I think for assigned device, all guest iotlb flush should be translated to
be targeting device in the scope of host. Besides the scenario which has vhost
and vfio-pci device at the same time, how about only having vfio-pci device
and this device has ATS support. Then there should be device-iotlb flushing.
With this patch and your patch, it would also introduce two flushing.

> However, vhost may receive two invalidations here, but it won't matter much

> since vhost is just flushing caches twice.


yeah, so far I didn’t see functional issue, may just reduce performance^_^

> > It looks to be

> > a duplicate update. Pls refer to the following snapshot captured from

> > section 6.5.2.5 of vtd spec.

> >

> > "Since translation requests from a device may be serviced by hardware

> > from the IOTLB, software must always request IOTLB invalidation

> > (iotlb_inv_dsc) before requesting corresponding Device-TLB

> > (dev_tlb_inv_dsc) invalidation."

> >

> > Maybe for device-iotlb, we need a separate API which just pass down

> > the invalidate info without updating page table. Any thoughts?

> 

> Now imho I slightly prefer just use the current UNMAP notifier type even for

> device iotlb device. But, we may need to do one more check that we skip

> sending general iotlb invalidations to ATS enabled devices like vhost, to avoid

> duplicated cache flushing. From virt-svm side, do we have specific requirement

> to introduce a new flag for it?

I think it is a practical solution to do such a check to avoid duplicate flushing.

For virt-svm, requirement is a little bit different since it's not shadowing any guest
page table. It needs to shadow invalidate operations. So virt-svm will not use
MAP/UNMAP notifier. Instead, it may require notifier which passdown invalidate
info and then submit invalidation directly.(no page table updating in host). 

Regards,
Yi L
Jason Wang Feb. 17, 2017, 6:43 a.m. UTC | #13
On 2017年02月17日 14:18, Liu, Yi L wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Thursday, February 16, 2017 1:44 PM
>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin <mst@redhat.com>; qemu-
>> devel@nongnu.org
>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini
>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin
>> <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>; Alex Williamson
>> <alex.williamson@redhat.com>
>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
>> descriptor
>>
>>
>>
>> On 2017年02月16日 13:36, Liu, Yi L wrote:
>>>> -----Original Message-----
>>>> From: Qemu-devel
>>>> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
>>>> On Behalf Of Michael S. Tsirkin
>>>> Sent: Tuesday, January 10, 2017 1:40 PM
>>>> To: qemu-devel@nongnu.org
>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu
>>>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
>>>> Henderson <rth@twiddle.net>
>>>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
>>>> descriptor
>>>>
>>>> From: Jason Wang <jasowang@redhat.com>
>>>>
>>>> This patch enables device IOTLB support for intel iommu. The major
>>>> work is to implement QI device IOTLB descriptor processing and notify
>>>> the device through iommu notifier.
>>>>
>>> Hi Jason/Michael,
>>>
>>> Recently Peter Xu's patch also touched intel-iommu emulation. His
>>> patch shadows second-level page table by capturing iotlb flush from
>>> guest. It would result in page table updating in host. Does this patch
>>> also use the same map/umap API provided by VFIO?
>> Yes, it depends on the iommu notifier too.
>>
>>> If it is, then I think it would also update page table in host. It
>>> looks to be a duplicate update. Pls refer to the following snapshot
>>> captured from section 6.5.2.5 of vtd spec.
>>>
>>> "Since translation requests from a device may be serviced by hardware
>>> from the IOTLB, software must always request IOTLB invalidation
>>> (iotlb_inv_dsc) before requesting corresponding Device-TLB
>>> (dev_tlb_inv_dsc) invalidation."
>>>
>>> Maybe for device-iotlb, we need a separate API which just pass down
>>> the invalidate info without updating page table. Any thoughts?
>> cc Alex.
>>
>> If we want ATS to be visible for guest (but I'm not sure if VFIO support this), we
>> probably need another notifier or a new flag.
> Jason, for assigned device, I think guest could see ATS if the assigned device
> supports ATS. I can see it when passthru iGPU.
>
> Regards,
> Yi L

Good to know this.

If I understand your suggestion correctly, you want a dedicated API to 
flush a hardware device IOTLB. I'm not sure this is really needed.

There's some discussion of similar issue in the past (when ATS is used 
for virtio-net/vhost), looks like we could solve this by not trigger the 
UNMAP notifier unless it was device IOTLB inv desc if ATS is enabled for 
the device? With this remote IOMMU/IOTLB can only get invalidation 
request once. For VFIO, the under layer IOMMU can deal with hardware 
device IOTLB without any extra efforts.

Does this make sense?

Thanks
Peter Xu Feb. 17, 2017, 7 a.m. UTC | #14
On Fri, Feb 17, 2017 at 06:36:41AM +0000, Liu, Yi L wrote:
> > -----Original Message-----
> > From: Peter Xu [mailto:peterx@redhat.com]
> > Sent: Friday, February 17, 2017 11:26 AM
> > To: Liu, Yi L <yi.l.liu@intel.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>; qemu-devel@nongnu.org; Peter
> > Maydell <peter.maydell@linaro.org>; Eduardo Habkost
> > <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Paolo Bonzini
> > <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin
> > <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>
> > Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
> > descriptor
> > 
> > On Thu, Feb 16, 2017 at 05:36:06AM +0000, Liu, Yi L wrote:
> > > > -----Original Message-----
> > > > From: Qemu-devel
> > > > [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
> > > > On Behalf Of Michael S. Tsirkin
> > > > Sent: Tuesday, January 10, 2017 1:40 PM
> > > > To: qemu-devel@nongnu.org
> > > > Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
> > > > <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter Xu
> > > > <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
> > > > Henderson <rth@twiddle.net>
> > > > Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
> > > > descriptor
> > > >
> > > > From: Jason Wang <jasowang@redhat.com>
> > > >
> > > > This patch enables device IOTLB support for intel iommu. The major
> > > > work is to implement QI device IOTLB descriptor processing and
> > > > notify the device through iommu notifier.
> > > >
> > > Hi Jason/Michael,
> > >
> > > Recently Peter Xu's patch also touched intel-iommu emulation. His
> > > patch shadows second-level page table by capturing iotlb flush from
> > > guest. It would result in page table updating in host. Does this patch
> > > also use the same map/umap API provided by VFIO? If it is, then I think it
> > would also update page table in host.
> > 
> > I haven't considered complex scenarios, but if simply we have a VM with one
> > vhost device and one vfio-pci device, imho that should not be a problem -
> > device iotlb is targeting SID rather than DOMAIN. So device iotlb invalidations
> > for vhost will be sent to vhost device only.
> 
> Peter, I think for assigned device, all guest iotlb flush should be translated to
> be targeting device in the scope of host. Besides the scenario which has vhost
> and vfio-pci device at the same time, how about only having vfio-pci device
> and this device has ATS support. Then there should be device-iotlb flushing.
> With this patch and your patch, it would also introduce two flushing.

Hmm possibly. I'm still not quite familiar with ATS, but here what we
need to do may be that we forward these device-iotlb invalidations to
the hardware below, instead of sending UNMAP notifies, right?

> 
> > However, vhost may receive two invalidations here, but it won't matter much
> > since vhost is just flushing caches twice.
> 
> yeah, so far I didn’t see functional issue, may just reduce performance^_^
> 
> > > It looks to be
> > > a duplicate update. Pls refer to the following snapshot captured from
> > > section 6.5.2.5 of vtd spec.
> > >
> > > "Since translation requests from a device may be serviced by hardware
> > > from the IOTLB, software must always request IOTLB invalidation
> > > (iotlb_inv_dsc) before requesting corresponding Device-TLB
> > > (dev_tlb_inv_dsc) invalidation."
> > >
> > > Maybe for device-iotlb, we need a separate API which just pass down
> > > the invalidate info without updating page table. Any thoughts?
> > 
> > Now imho I slightly prefer just use the current UNMAP notifier type even for
> > device iotlb device. But, we may need to do one more check that we skip
> > sending general iotlb invalidations to ATS enabled devices like vhost, to avoid
> > duplicated cache flushing. From virt-svm side, do we have specific requirement
> > to introduce a new flag for it?
> I think it is a practical solution to do such a check to avoid duplicate flushing.
> 
> For virt-svm, requirement is a little bit different since it's not shadowing any guest
> page table. It needs to shadow invalidate operations. So virt-svm will not use
> MAP/UNMAP notifier. Instead, it may require notifier which passdown invalidate
> info and then submit invalidation directly.(no page table updating in host). 

Again, just want to know whether my above understanding is correct. If
so, instead of introducing a new flag, maybe we just need to enhance
current vtd_process_device_iotlb_desc() to behave differently
depending on which device the SID belongs to. E.g.:

(1) for emulated devices (virtio-pci/vhost is one of them), we pass
    down using a UNMAP, or say, do a cache flush

(2) for assigned devices, we pass it down to hardware by some way

I don't know whether there'll be a (3) though. Also, I don't know the
best way to achieve (2) (new vfio driver interface?).

Thanks,

-- peterx
Yi Liu Feb. 20, 2017, 8:27 a.m. UTC | #15
> -----Original Message-----

> From: Jason Wang [mailto:jasowang@redhat.com]

> Sent: Friday, February 17, 2017 2:43 PM

> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin <mst@redhat.com>; qemu-

> devel@nongnu.org

> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini

> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin

> <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>; Alex Williamson

> <alex.williamson@redhat.com>

> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> descriptor

> 

> 

> 

> On 2017年02月17日 14:18, Liu, Yi L wrote:

> >> -----Original Message-----

> >> From: Jason Wang [mailto:jasowang@redhat.com]

> >> Sent: Thursday, February 16, 2017 1:44 PM

> >> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin

> >> <mst@redhat.com>; qemu- devel@nongnu.org

> >> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> >> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini

> >> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian,

> >> Kevin <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>;

> >> Alex Williamson <alex.williamson@redhat.com>

> >> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> >> iotlb descriptor

> >>

> >>

> >>

> >> On 2017年02月16日 13:36, Liu, Yi L wrote:

> >>>> -----Original Message-----

> >>>> From: Qemu-devel

> >>>> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]

> >>>> On Behalf Of Michael S. Tsirkin

> >>>> Sent: Tuesday, January 10, 2017 1:40 PM

> >>>> To: qemu-devel@nongnu.org

> >>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> >>>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter

> Xu

> >>>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard

> >>>> Henderson <rth@twiddle.net>

> >>>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> >>>> iotlb descriptor

> >>>>

> >>>> From: Jason Wang <jasowang@redhat.com>

> >>>>

> >>>> This patch enables device IOTLB support for intel iommu. The major

> >>>> work is to implement QI device IOTLB descriptor processing and

> >>>> notify the device through iommu notifier.

> >>>>

> >>> Hi Jason/Michael,

> >>>

> >>> Recently Peter Xu's patch also touched intel-iommu emulation. His

> >>> patch shadows second-level page table by capturing iotlb flush from

> >>> guest. It would result in page table updating in host. Does this

> >>> patch also use the same map/umap API provided by VFIO?

> >> Yes, it depends on the iommu notifier too.

> >>

> >>> If it is, then I think it would also update page table in host. It

> >>> looks to be a duplicate update. Pls refer to the following snapshot

> >>> captured from section 6.5.2.5 of vtd spec.

> >>>

> >>> "Since translation requests from a device may be serviced by

> >>> hardware from the IOTLB, software must always request IOTLB

> >>> invalidation

> >>> (iotlb_inv_dsc) before requesting corresponding Device-TLB

> >>> (dev_tlb_inv_dsc) invalidation."

> >>>

> >>> Maybe for device-iotlb, we need a separate API which just pass down

> >>> the invalidate info without updating page table. Any thoughts?

> >> cc Alex.

> >>

> >> If we want ATS to be visible for guest (but I'm not sure if VFIO

> >> support this), we probably need another notifier or a new flag.

> > Jason, for assigned device, I think guest could see ATS if the

> > assigned device supports ATS. I can see it when passthru iGPU.

> >

> > Regards,

> > Yi L

> 

> Good to know this.

> 

> If I understand your suggestion correctly, you want a dedicated API to flush a

> hardware device IOTLB. I'm not sure this is really needed.

yes, I'd like to have an extra API besides the current MAP/UNMAP.

> There's some discussion of similar issue in the past (when ATS is used for virtio-

> net/vhost), looks like we could solve this by not trigger the UNMAP notifier

> unless it was device IOTLB inv desc if ATS is enabled for the device? With this

> remote IOMMU/IOTLB can only get invalidation request once. For VFIO, the

> under layer IOMMU can deal with hardware device IOTLB without any extra

> efforts.

If I catch the background, I think it should be "not trigger the UNMAP notifier
when unless it was device IOTLB inv desc if ATS is enabled for the device"

Feel free to correct me.

Thanks,
Yi L
> Does this make sense?

> 

> Thanks
Yi Liu Feb. 20, 2017, 8:47 a.m. UTC | #16
> -----Original Message-----

> From: Peter Xu [mailto:peterx@redhat.com]

> Sent: Friday, February 17, 2017 3:00 PM

> To: Liu, Yi L <yi.l.liu@intel.com>

> Cc: Michael S. Tsirkin <mst@redhat.com>; qemu-devel@nongnu.org; Peter

> Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Paolo Bonzini

> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin

> <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>

> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> descriptor

> 

> On Fri, Feb 17, 2017 at 06:36:41AM +0000, Liu, Yi L wrote:

> > > -----Original Message-----

> > > From: Peter Xu [mailto:peterx@redhat.com]

> > > Sent: Friday, February 17, 2017 11:26 AM

> > > To: Liu, Yi L <yi.l.liu@intel.com>

> > > Cc: Michael S. Tsirkin <mst@redhat.com>; qemu-devel@nongnu.org;

> > > Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> > > <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Paolo

> > > Bonzini <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>;

> > > Tian, Kevin <kevin.tian@intel.com>; Lan, Tianyu

> > > <tianyu.lan@intel.com>

> > > Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> > > iotlb descriptor

> > >

> > > On Thu, Feb 16, 2017 at 05:36:06AM +0000, Liu, Yi L wrote:

> > > > > -----Original Message-----

> > > > > From: Qemu-devel

> > > > > [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]

> > > > > On Behalf Of Michael S. Tsirkin

> > > > > Sent: Tuesday, January 10, 2017 1:40 PM

> > > > > To: qemu-devel@nongnu.org

> > > > > Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> > > > > <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter

> > > > > Xu <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>;

> > > > > Richard Henderson <rth@twiddle.net>

> > > > > Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> > > > > iotlb descriptor

> > > > >

> > > > > From: Jason Wang <jasowang@redhat.com>

> > > > >

> > > > > This patch enables device IOTLB support for intel iommu. The

> > > > > major work is to implement QI device IOTLB descriptor processing

> > > > > and notify the device through iommu notifier.

> > > > >

> > > > Hi Jason/Michael,

> > > >

> > > > Recently Peter Xu's patch also touched intel-iommu emulation. His

> > > > patch shadows second-level page table by capturing iotlb flush

> > > > from guest. It would result in page table updating in host. Does

> > > > this patch also use the same map/umap API provided by VFIO? If it

> > > > is, then I think it

> > > would also update page table in host.

> > >

> > > I haven't considered complex scenarios, but if simply we have a VM

> > > with one vhost device and one vfio-pci device, imho that should not

> > > be a problem - device iotlb is targeting SID rather than DOMAIN. So

> > > device iotlb invalidations for vhost will be sent to vhost device only.

> >

> > Peter, I think for assigned device, all guest iotlb flush should be

> > translated to be targeting device in the scope of host. Besides the

> > scenario which has vhost and vfio-pci device at the same time, how

> > about only having vfio-pci device and this device has ATS support. Then there

> should be device-iotlb flushing.

> > With this patch and your patch, it would also introduce two flushing.

> 

> Hmm possibly. I'm still not quite familiar with ATS, but here what we need to

> do may be that we forward these device-iotlb invalidations to the hardware

> below, instead of sending UNMAP notifies, right?

yes, that wouldn’t result in duplicate page table updating in host.

> 

> >

> > > However, vhost may receive two invalidations here, but it won't

> > > matter much since vhost is just flushing caches twice.

> >

> > yeah, so far I didn’t see functional issue, may just reduce

> > performance^_^

> >

> > > > It looks to be

> > > > a duplicate update. Pls refer to the following snapshot captured

> > > > from section 6.5.2.5 of vtd spec.

> > > >

> > > > "Since translation requests from a device may be serviced by

> > > > hardware from the IOTLB, software must always request IOTLB

> > > > invalidation

> > > > (iotlb_inv_dsc) before requesting corresponding Device-TLB

> > > > (dev_tlb_inv_dsc) invalidation."

> > > >

> > > > Maybe for device-iotlb, we need a separate API which just pass

> > > > down the invalidate info without updating page table. Any thoughts?

> > >

> > > Now imho I slightly prefer just use the current UNMAP notifier type

> > > even for device iotlb device. But, we may need to do one more check

> > > that we skip sending general iotlb invalidations to ATS enabled

> > > devices like vhost, to avoid duplicated cache flushing. From

> > > virt-svm side, do we have specific requirement to introduce a new flag for it?

> > I think it is a practical solution to do such a check to avoid duplicate flushing.

> >

> > For virt-svm, requirement is a little bit different since it's not

> > shadowing any guest page table. It needs to shadow invalidate

> > operations. So virt-svm will not use MAP/UNMAP notifier. Instead, it

> > may require notifier which passdown invalidate info and then submit

> invalidation directly.(no page table updating in host).

> 

> Again, just want to know whether my above understanding is correct. If so,

> instead of introducing a new flag, maybe we just need to enhance current

> vtd_process_device_iotlb_desc() to behave differently depending on which

> device the SID belongs to. E.g.:

> 

> (1) for emulated devices (virtio-pci/vhost is one of them), we pass

>     down using a UNMAP, or say, do a cache flush

> 

> (2) for assigned devices, we pass it down to hardware by some way

> 

> I don't know whether there'll be a (3) though. Also, I don't know the best way to

> achieve (2) (new vfio driver interface?).

yes. we can think about it. Tianyu and me are preparing RFC patch for virt-svm,
we would have more discussion on this part.

Regards,
Yi L
Jason Wang Feb. 20, 2017, 9:03 a.m. UTC | #17
On 2017年02月20日 16:27, Liu, Yi L wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Friday, February 17, 2017 2:43 PM
>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin <mst@redhat.com>; qemu-
>> devel@nongnu.org
>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini
>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian, Kevin
>> <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>; Alex Williamson
>> <alex.williamson@redhat.com>
>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
>> descriptor
>>
>>
>>
>> On 2017年02月17日 14:18, Liu, Yi L wrote:
>>>> -----Original Message-----
>>>> From: Jason Wang [mailto:jasowang@redhat.com]
>>>> Sent: Thursday, February 16, 2017 1:44 PM
>>>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin
>>>> <mst@redhat.com>; qemu- devel@nongnu.org
>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>>> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini
>>>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian,
>>>> Kevin <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>;
>>>> Alex Williamson <alex.williamson@redhat.com>
>>>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device
>>>> iotlb descriptor
>>>>
>>>>
>>>>
>>>> On 2017年02月16日 13:36, Liu, Yi L wrote:
>>>>>> -----Original Message-----
>>>>>> From: Qemu-devel
>>>>>> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
>>>>>> On Behalf Of Michael S. Tsirkin
>>>>>> Sent: Tuesday, January 10, 2017 1:40 PM
>>>>>> To: qemu-devel@nongnu.org
>>>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>>>>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter
>> Xu
>>>>>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
>>>>>> Henderson <rth@twiddle.net>
>>>>>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device
>>>>>> iotlb descriptor
>>>>>>
>>>>>> From: Jason Wang <jasowang@redhat.com>
>>>>>>
>>>>>> This patch enables device IOTLB support for intel iommu. The major
>>>>>> work is to implement QI device IOTLB descriptor processing and
>>>>>> notify the device through iommu notifier.
>>>>>>
>>>>> Hi Jason/Michael,
>>>>>
>>>>> Recently Peter Xu's patch also touched intel-iommu emulation. His
>>>>> patch shadows second-level page table by capturing iotlb flush from
>>>>> guest. It would result in page table updating in host. Does this
>>>>> patch also use the same map/umap API provided by VFIO?
>>>> Yes, it depends on the iommu notifier too.
>>>>
>>>>> If it is, then I think it would also update page table in host. It
>>>>> looks to be a duplicate update. Pls refer to the following snapshot
>>>>> captured from section 6.5.2.5 of vtd spec.
>>>>>
>>>>> "Since translation requests from a device may be serviced by
>>>>> hardware from the IOTLB, software must always request IOTLB
>>>>> invalidation
>>>>> (iotlb_inv_dsc) before requesting corresponding Device-TLB
>>>>> (dev_tlb_inv_dsc) invalidation."
>>>>>
>>>>> Maybe for device-iotlb, we need a separate API which just pass down
>>>>> the invalidate info without updating page table. Any thoughts?
>>>> cc Alex.
>>>>
>>>> If we want ATS to be visible for guest (but I'm not sure if VFIO
>>>> support this), we probably need another notifier or a new flag.
>>> Jason, for assigned device, I think guest could see ATS if the
>>> assigned device supports ATS. I can see it when passthru iGPU.
>>>
>>> Regards,
>>> Yi L
>> Good to know this.
>>
>> If I understand your suggestion correctly, you want a dedicated API to flush a
>> hardware device IOTLB. I'm not sure this is really needed.
> yes, I'd like to have an extra API besides the current MAP/UNMAP.

I'm think whether or not we can do this without extra API or even don't 
need to care about this.

>
>> There's some discussion of similar issue in the past (when ATS is used for virtio-
>> net/vhost), looks like we could solve this by not trigger the UNMAP notifier
>> unless it was device IOTLB inv desc if ATS is enabled for the device? With this
>> remote IOMMU/IOTLB can only get invalidation request once. For VFIO, the
>> under layer IOMMU can deal with hardware device IOTLB without any extra
>> efforts.
> If I catch the background, I think it should be "not trigger the UNMAP notifier
> when unless it was device IOTLB inv desc if ATS is enabled for the device"

Seems not :)

I mean, if ATS is enabled for the device, only trigger UNMAP notifier 
when processing device IOTLB. Then we can only have flush once. And host 
IOMMU driver will take care of device IOTLB flush too.

Thanks

>
> Feel free to correct me.
>
> Thanks,
> Yi L
>> Does this make sense?
>>
>> Thanks
Yi Liu Feb. 20, 2017, 9:13 a.m. UTC | #18
> -----Original Message-----

> From: Jason Wang [mailto:jasowang@redhat.com]

> Sent: Monday, February 20, 2017 5:04 PM

> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin <mst@redhat.com>; qemu-

> devel@nongnu.org

> Cc: Lan, Tianyu <tianyu.lan@intel.com>; Peter Maydell

> <peter.maydell@linaro.org>; Tian, Kevin <kevin.tian@intel.com>; Eduardo

> Habkost <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Alex

> Williamson <alex.williamson@redhat.com>; Paolo Bonzini

> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>

> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb

> descriptor

> 

> 

> 

> On 2017年02月20日 16:27, Liu, Yi L wrote:

> >> -----Original Message-----

> >> From: Jason Wang [mailto:jasowang@redhat.com]

> >> Sent: Friday, February 17, 2017 2:43 PM

> >> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin

> >> <mst@redhat.com>; qemu- devel@nongnu.org

> >> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> >> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini

> >> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian,

> >> Kevin <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>;

> >> Alex Williamson <alex.williamson@redhat.com>

> >> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> >> iotlb descriptor

> >>

> >>

> >>

> >> On 2017年02月17日 14:18, Liu, Yi L wrote:

> >>>> -----Original Message-----

> >>>> From: Jason Wang [mailto:jasowang@redhat.com]

> >>>> Sent: Thursday, February 16, 2017 1:44 PM

> >>>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin

> >>>> <mst@redhat.com>; qemu- devel@nongnu.org

> >>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> >>>> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini

> >>>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian,

> >>>> Kevin <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>;

> >>>> Alex Williamson <alex.williamson@redhat.com>

> >>>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> >>>> iotlb descriptor

> >>>>

> >>>>

> >>>>

> >>>> On 2017年02月16日 13:36, Liu, Yi L wrote:

> >>>>>> -----Original Message-----

> >>>>>> From: Qemu-devel

> >>>>>> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]

> >>>>>> On Behalf Of Michael S. Tsirkin

> >>>>>> Sent: Tuesday, January 10, 2017 1:40 PM

> >>>>>> To: qemu-devel@nongnu.org

> >>>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost

> >>>>>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter

> >> Xu

> >>>>>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard

> >>>>>> Henderson <rth@twiddle.net>

> >>>>>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device

> >>>>>> iotlb descriptor

> >>>>>>

> >>>>>> From: Jason Wang <jasowang@redhat.com>

> >>>>>>

> >>>>>> This patch enables device IOTLB support for intel iommu. The

> >>>>>> major work is to implement QI device IOTLB descriptor processing

> >>>>>> and notify the device through iommu notifier.

> >>>>>>

> >>>>> Hi Jason/Michael,

> >>>>>

> >>>>> Recently Peter Xu's patch also touched intel-iommu emulation. His

> >>>>> patch shadows second-level page table by capturing iotlb flush

> >>>>> from guest. It would result in page table updating in host. Does

> >>>>> this patch also use the same map/umap API provided by VFIO?

> >>>> Yes, it depends on the iommu notifier too.

> >>>>

> >>>>> If it is, then I think it would also update page table in host. It

> >>>>> looks to be a duplicate update. Pls refer to the following

> >>>>> snapshot captured from section 6.5.2.5 of vtd spec.

> >>>>>

> >>>>> "Since translation requests from a device may be serviced by

> >>>>> hardware from the IOTLB, software must always request IOTLB

> >>>>> invalidation

> >>>>> (iotlb_inv_dsc) before requesting corresponding Device-TLB

> >>>>> (dev_tlb_inv_dsc) invalidation."

> >>>>>

> >>>>> Maybe for device-iotlb, we need a separate API which just pass

> >>>>> down the invalidate info without updating page table. Any thoughts?

> >>>> cc Alex.

> >>>>

> >>>> If we want ATS to be visible for guest (but I'm not sure if VFIO

> >>>> support this), we probably need another notifier or a new flag.

> >>> Jason, for assigned device, I think guest could see ATS if the

> >>> assigned device supports ATS. I can see it when passthru iGPU.

> >>>

> >>> Regards,

> >>> Yi L

> >> Good to know this.

> >>

> >> If I understand your suggestion correctly, you want a dedicated API

> >> to flush a hardware device IOTLB. I'm not sure this is really needed.

> > yes, I'd like to have an extra API besides the current MAP/UNMAP.

> 

> I'm think whether or not we can do this without extra API or even don't need to

> care about this.

> 

> >

> >> There's some discussion of similar issue in the past (when ATS is

> >> used for virtio- net/vhost), looks like we could solve this by not

> >> trigger the UNMAP notifier unless it was device IOTLB inv desc if ATS

> >> is enabled for the device? With this remote IOMMU/IOTLB can only get

> >> invalidation request once. For VFIO, the under layer IOMMU can deal

> >> with hardware device IOTLB without any extra efforts.

> > If I catch the background, I think it should be "not trigger the UNMAP

> > notifier when unless it was device IOTLB inv desc if ATS is enabled for the

> device"

> 

> Seems not :)

> 

> I mean, if ATS is enabled for the device, only trigger UNMAP notifier when

> processing device IOTLB. Then we can only have flush once. And host IOMMU

> driver will take care of device IOTLB flush too.

hmmm, how about the iotlb inv desc which is prior to device-iotlb? I'm not sure
if it is practical to ignore the iotlb inv des since there is no SID info in it.

Regards,
Yi L
Jason Wang Feb. 20, 2017, 9:18 a.m. UTC | #19
On 2017年02月20日 17:13, Liu, Yi L wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Monday, February 20, 2017 5:04 PM
>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin <mst@redhat.com>; qemu-
>> devel@nongnu.org
>> Cc: Lan, Tianyu <tianyu.lan@intel.com>; Peter Maydell
>> <peter.maydell@linaro.org>; Tian, Kevin <kevin.tian@intel.com>; Eduardo
>> Habkost <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Alex
>> Williamson <alex.williamson@redhat.com>; Paolo Bonzini
>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>
>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device iotlb
>> descriptor
>>
>>
>>
>> On 2017年02月20日 16:27, Liu, Yi L wrote:
>>>> -----Original Message-----
>>>> From: Jason Wang [mailto:jasowang@redhat.com]
>>>> Sent: Friday, February 17, 2017 2:43 PM
>>>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin
>>>> <mst@redhat.com>; qemu- devel@nongnu.org
>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>>> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini
>>>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian,
>>>> Kevin <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>;
>>>> Alex Williamson <alex.williamson@redhat.com>
>>>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device
>>>> iotlb descriptor
>>>>
>>>>
>>>>
>>>> On 2017年02月17日 14:18, Liu, Yi L wrote:
>>>>>> -----Original Message-----
>>>>>> From: Jason Wang [mailto:jasowang@redhat.com]
>>>>>> Sent: Thursday, February 16, 2017 1:44 PM
>>>>>> To: Liu, Yi L <yi.l.liu@intel.com>; Michael S. Tsirkin
>>>>>> <mst@redhat.com>; qemu- devel@nongnu.org
>>>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>>>>> <ehabkost@redhat.com>; Peter Xu <peterx@redhat.com>; Paolo Bonzini
>>>>>> <pbonzini@redhat.com>; Richard Henderson <rth@twiddle.net>; Tian,
>>>>>> Kevin <kevin.tian@intel.com>; Lan, Tianyu <tianyu.lan@intel.com>;
>>>>>> Alex Williamson <alex.williamson@redhat.com>
>>>>>> Subject: Re: [Qemu-devel] [PULL 08/41] intel_iommu: support device
>>>>>> iotlb descriptor
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 2017年02月16日 13:36, Liu, Yi L wrote:
>>>>>>>> -----Original Message-----
>>>>>>>> From: Qemu-devel
>>>>>>>> [mailto:qemu-devel-bounces+yi.l.liu=intel.com@nongnu.org]
>>>>>>>> On Behalf Of Michael S. Tsirkin
>>>>>>>> Sent: Tuesday, January 10, 2017 1:40 PM
>>>>>>>> To: qemu-devel@nongnu.org
>>>>>>>> Cc: Peter Maydell <peter.maydell@linaro.org>; Eduardo Habkost
>>>>>>>> <ehabkost@redhat.com>; Jason Wang <jasowang@redhat.com>; Peter
>>>> Xu
>>>>>>>> <peterx@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard
>>>>>>>> Henderson <rth@twiddle.net>
>>>>>>>> Subject: [Qemu-devel] [PULL 08/41] intel_iommu: support device
>>>>>>>> iotlb descriptor
>>>>>>>>
>>>>>>>> From: Jason Wang <jasowang@redhat.com>
>>>>>>>>
>>>>>>>> This patch enables device IOTLB support for intel iommu. The
>>>>>>>> major work is to implement QI device IOTLB descriptor processing
>>>>>>>> and notify the device through iommu notifier.
>>>>>>>>
>>>>>>> Hi Jason/Michael,
>>>>>>>
>>>>>>> Recently Peter Xu's patch also touched intel-iommu emulation. His
>>>>>>> patch shadows second-level page table by capturing iotlb flush
>>>>>>> from guest. It would result in page table updating in host. Does
>>>>>>> this patch also use the same map/umap API provided by VFIO?
>>>>>> Yes, it depends on the iommu notifier too.
>>>>>>
>>>>>>> If it is, then I think it would also update page table in host. It
>>>>>>> looks to be a duplicate update. Pls refer to the following
>>>>>>> snapshot captured from section 6.5.2.5 of vtd spec.
>>>>>>>
>>>>>>> "Since translation requests from a device may be serviced by
>>>>>>> hardware from the IOTLB, software must always request IOTLB
>>>>>>> invalidation
>>>>>>> (iotlb_inv_dsc) before requesting corresponding Device-TLB
>>>>>>> (dev_tlb_inv_dsc) invalidation."
>>>>>>>
>>>>>>> Maybe for device-iotlb, we need a separate API which just pass
>>>>>>> down the invalidate info without updating page table. Any thoughts?
>>>>>> cc Alex.
>>>>>>
>>>>>> If we want ATS to be visible for guest (but I'm not sure if VFIO
>>>>>> support this), we probably need another notifier or a new flag.
>>>>> Jason, for assigned device, I think guest could see ATS if the
>>>>> assigned device supports ATS. I can see it when passthru iGPU.
>>>>>
>>>>> Regards,
>>>>> Yi L
>>>> Good to know this.
>>>>
>>>> If I understand your suggestion correctly, you want a dedicated API
>>>> to flush a hardware device IOTLB. I'm not sure this is really needed.
>>> yes, I'd like to have an extra API besides the current MAP/UNMAP.
>> I'm think whether or not we can do this without extra API or even don't need to
>> care about this.
>>
>>>> There's some discussion of similar issue in the past (when ATS is
>>>> used for virtio- net/vhost), looks like we could solve this by not
>>>> trigger the UNMAP notifier unless it was device IOTLB inv desc if ATS
>>>> is enabled for the device? With this remote IOMMU/IOTLB can only get
>>>> invalidation request once. For VFIO, the under layer IOMMU can deal
>>>> with hardware device IOTLB without any extra efforts.
>>> If I catch the background, I think it should be "not trigger the UNMAP
>>> notifier when unless it was device IOTLB inv desc if ATS is enabled for the
>> device"
>>
>> Seems not :)
>>
>> I mean, if ATS is enabled for the device, only trigger UNMAP notifier when
>> processing device IOTLB. Then we can only have flush once. And host IOMMU
>> driver will take care of device IOTLB flush too.
> hmmm, how about the iotlb inv desc which is prior to device-iotlb?

Any issue in this case?

> I'm not sure
> if it is practical to ignore the iotlb inv des since there is no SID info in it.

Yes, this needs some changes maybe.

Thanks

>
> Regards,
> Yi L
diff mbox

Patch

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 11abfa2..356f188 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -183,6 +183,7 @@ 
 /* (offset >> 4) << 8 */
 #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
 #define VTD_ECAP_QI                 (1ULL << 1)
+#define VTD_ECAP_DT                 (1ULL << 2)
 /* Interrupt Remapping support */
 #define VTD_ECAP_IR                 (1ULL << 3)
 #define VTD_ECAP_EIM                (1ULL << 4)
@@ -326,6 +327,7 @@  typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_TYPE               0xf
 #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
 #define VTD_INV_DESC_IOTLB              0x2
+#define VTD_INV_DESC_DEVICE             0x3
 #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
                                                Invalidate Descriptor */
 #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
@@ -361,6 +363,13 @@  typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
 #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
 
+/* Mask for Device IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
+#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
+#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
     uint16_t domain_id;
@@ -399,8 +408,8 @@  typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_CONTEXT_ENTRY_FPD       (1ULL << 1) /* Fault Processing Disable */
 #define VTD_CONTEXT_ENTRY_TT        (3ULL << 2) /* Translation Type */
 #define VTD_CONTEXT_TT_MULTI_LEVEL  0
-#define VTD_CONTEXT_TT_DEV_IOTLB    1
-#define VTD_CONTEXT_TT_PASS_THROUGH 2
+#define VTD_CONTEXT_TT_DEV_IOTLB    (1ULL << 2)
+#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
 /* Second Level Page Translation Pointer*/
 #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
 #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index 0c89d98..361c07c 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -73,6 +73,7 @@  typedef struct IEC_Notifier IEC_Notifier;
 struct X86IOMMUState {
     SysBusDevice busdev;
     bool intr_supported;        /* Whether vIOMMU supports IR */
+    bool dt_supported;          /* Whether vIOMMU supports DT */
     IommuType type;             /* IOMMU type - AMD/Intel     */
     QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
 };
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e39b764..ec62239 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -738,11 +738,18 @@  static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
                     "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
                     ce->hi, ce->lo);
         return -VTD_FR_CONTEXT_ENTRY_INV;
-    } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
-        VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
-                    "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
-                    ce->hi, ce->lo);
-        return -VTD_FR_CONTEXT_ENTRY_INV;
+    } else {
+        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
+        case VTD_CONTEXT_TT_MULTI_LEVEL:
+            /* fall through */
+        case VTD_CONTEXT_TT_DEV_IOTLB:
+            break;
+        default:
+            VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
+                        "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                        ce->hi, ce->lo);
+            return -VTD_FR_CONTEXT_ENTRY_INV;
+        }
     }
     return 0;
 }
@@ -1438,7 +1445,61 @@  static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
     vtd_iec_notify_all(s, !inv_desc->iec.granularity,
                        inv_desc->iec.index,
                        inv_desc->iec.index_mask);
+    return true;
+}
+
+static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
+                                          VTDInvDesc *inv_desc)
+{
+    VTDAddressSpace *vtd_dev_as;
+    IOMMUTLBEntry entry;
+    struct VTDBus *vtd_bus;
+    hwaddr addr;
+    uint64_t sz;
+    uint16_t sid;
+    uint8_t devfn;
+    bool size;
+    uint8_t bus_num;
+
+    addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
+    sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
+    devfn = sid & 0xff;
+    bus_num = sid >> 8;
+    size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
+
+    if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
+        (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
+        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
+                    "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                    inv_desc->hi, inv_desc->lo);
+        return false;
+    }
+
+    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
+    if (!vtd_bus) {
+        goto done;
+    }
+
+    vtd_dev_as = vtd_bus->dev_as[devfn];
+    if (!vtd_dev_as) {
+        goto done;
+    }
+
+    if (size) {
+        sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
+        addr &= ~(sz - 1);
+    } else {
+        sz = VTD_PAGE_SIZE;
+    }
 
+    entry.target_as = &vtd_dev_as->as;
+    entry.addr_mask = sz - 1;
+    entry.iova = addr;
+    entry.perm = IOMMU_NONE;
+    entry.translated_addr = 0;
+    memory_region_notify_iommu(entry.target_as->root, entry);
+
+done:
     return true;
 }
 
@@ -1490,6 +1551,14 @@  static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_DEVICE:
+        VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
+                    " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     default:
         VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
                     "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
@@ -2415,6 +2484,10 @@  static void vtd_init(IntelIOMMUState *s)
         assert(s->intr_eim != ON_OFF_AUTO_AUTO);
     }
 
+    if (x86_iommu->dt_supported) {
+        s->ecap |= VTD_ECAP_DT;
+    }
+
     vtd_reset_context_cache(s);
     vtd_reset_iotlb(s);
 
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 2278af7..23dcd3f 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -106,6 +106,18 @@  static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp)
     s->intr_supported = value;
 }
 
+static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp)
+{
+    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+    return s->dt_supported;
+}
+
+static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp)
+{
+    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+    s->dt_supported = value;
+}
+
 static void x86_iommu_instance_init(Object *o)
 {
     X86IOMMUState *s = X86_IOMMU_DEVICE(o);
@@ -114,6 +126,11 @@  static void x86_iommu_instance_init(Object *o)
     s->intr_supported = false;
     object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
                              x86_iommu_intremap_prop_set, NULL);
+    s->dt_supported = false;
+    object_property_add_bool(o, "device-iotlb",
+                             x86_iommu_device_iotlb_prop_get,
+                             x86_iommu_device_iotlb_prop_set,
+                             NULL);
 }
 
 static const TypeInfo x86_iommu_info = {