diff mbox

[v3,4/4] intel_iommu: implement mru list for iotlb

Message ID 1499847223-8078-5-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu July 12, 2017, 8:13 a.m. UTC
It is not wise to disgard all the IOTLB cache when cache size reaches
max, but that's what we do now. A slightly better (but still simple) way
to do this is, we just throw away the least recent used cache entry.

This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
is to maintain a Most Recently Used list for the IOTLB entries. The hash
table is still used for lookup, though a new list field is added to each
IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
in the hash table in s->iotlb, and also linked into the MRU list of
s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
knowing whether the cache is still hot.

After we have such a MRU list, replacing all the iterators of IOTLB
entries by using list iterations rather than hash table iterations.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c          | 75 +++++++++++++++++++++++++-----------------
 hw/i386/intel_iommu_internal.h |  8 -----
 hw/i386/trace-events           |  1 -
 include/hw/i386/intel_iommu.h  |  6 +++-
 4 files changed, 50 insertions(+), 40 deletions(-)

Comments

Jason Wang July 13, 2017, 8:48 a.m. UTC | #1
On 2017年07月12日 16:13, Peter Xu wrote:
> It is not wise to disgard all the IOTLB cache when cache size reaches
> max, but that's what we do now. A slightly better (but still simple) way
> to do this is, we just throw away the least recent used cache entry.
>
> This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> is to maintain a Most Recently Used list for the IOTLB entries. The hash
> table is still used for lookup, though a new list field is added to each
> IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> in the hash table in s->iotlb, and also linked into the MRU list of
> s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> knowing whether the cache is still hot.
>
> After we have such a MRU list, replacing all the iterators of IOTLB
> entries by using list iterations rather than hash table iterations.

Any reason of doing this, I thought hashtable was even a little bit faster?

Thanks

>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>   hw/i386/intel_iommu.c          | 75 +++++++++++++++++++++++++-----------------
>   hw/i386/intel_iommu_internal.h |  8 -----
>   hw/i386/trace-events           |  1 -
>   include/hw/i386/intel_iommu.h  |  6 +++-
>   4 files changed, 50 insertions(+), 40 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index e17340c..6320dea 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -37,6 +37,9 @@
>   #include "kvm_i386.h"
>   #include "trace.h"
>   
> +#define FOREACH_IOTLB_SAFE(entry, s, entry_n) \
> +    QTAILQ_FOREACH_SAFE(entry, &(s)->iotlb_head, link, entry_n)
> +
>   static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                               uint64_t wmask, uint64_t w1cmask)
>   {
> @@ -139,14 +142,6 @@ static guint vtd_uint64_hash(gconstpointer v)
>       return (guint)*(const uint64_t *)v;
>   }
>   
> -static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
> -                                          gpointer user_data)
> -{
> -    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> -    uint16_t domain_id = *(uint16_t *)user_data;
> -    return entry->domain_id == domain_id;
> -}
> -
>   /* The shift of an addr for a certain level of paging structure */
>   static inline uint32_t vtd_slpt_level_shift(uint32_t level)
>   {
> @@ -159,18 +154,6 @@ static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
>       return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
>   }
>   
> -static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
> -                                        gpointer user_data)
> -{
> -    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> -    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> -    uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
> -    uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
> -    return (entry->domain_id == info->domain_id) &&
> -            (((entry->gfn & info->mask) == gfn) ||
> -             (entry->gfn == gfn_tlb));
> -}
> -
>   /* Reset all the gen of VTDAddressSpace to zero and set the gen of
>    * IntelIOMMUState to 1.
>    */
> @@ -201,6 +184,7 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
>   {
>       assert(s->iotlb);
>       g_hash_table_remove_all(s->iotlb);
> +    QTAILQ_INIT(&s->iotlb_head);
>   }
>   
>   static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
> @@ -231,6 +215,9 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
>                                   source_id, level);
>           entry = g_hash_table_lookup(s->iotlb, &key);
>           if (entry) {
> +            /* Move the entry to the head of MRU list */
> +            QTAILQ_REMOVE(&s->iotlb_head, entry, link);
> +            QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
>               goto out;
>           }
>       }
> @@ -239,11 +226,23 @@ out:
>       return entry;
>   }
>   
> +static void vtd_iotlb_remove_entry(IntelIOMMUState *s, VTDIOTLBEntry *entry)
> +{
> +    uint64_t key = entry->key;
> +
> +    /*
> +     * To remove an entry, we need to both remove it from the MRU
> +     * list, and also from the hash table.
> +     */
> +    QTAILQ_REMOVE(&s->iotlb_head, entry, link);
> +    g_hash_table_remove(s->iotlb, &key);
> +}
> +
>   static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>                                uint16_t domain_id, hwaddr addr, uint64_t slpte,
>                                uint8_t access_flags, uint32_t level)
>   {
> -    VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
> +    VTDIOTLBEntry *entry = g_new0(VTDIOTLBEntry, 1), *last;
>       uint64_t *key = g_malloc(sizeof(*key));
>       uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
>   
> @@ -253,8 +252,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>   
>       trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
>       if (g_hash_table_size(s->iotlb) >= s->iotlb_size) {
> -        trace_vtd_iotlb_reset("iotlb exceeds size limit");
> -        vtd_reset_iotlb(s);
> +        /* Remove the Least Recently Used cache */
> +        last = QTAILQ_LAST(&s->iotlb_head, VTDIOTLBEntryHead);
> +        vtd_iotlb_remove_entry(s, last);
>       }
>   
>       entry->gfn = gfn;
> @@ -263,7 +263,11 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>       entry->access_flags = access_flags;
>       entry->mask = vtd_slpt_level_page_mask(level);
>       *key = vtd_get_iotlb_key(gfn, source_id, level);
> +    entry->key = *key;
>       g_hash_table_replace(s->iotlb, key, entry);
> +
> +    /* Update MRU list */
> +    QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
>   }
>   
>   /* Given the reg addr of both the message data and address, generate an
> @@ -1354,11 +1358,15 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
>       IntelIOMMUNotifierNode *node;
>       VTDContextEntry ce;
>       VTDAddressSpace *vtd_as;
> +    VTDIOTLBEntry *entry, *entry_n;
>   
>       trace_vtd_inv_desc_iotlb_domain(domain_id);
>   
> -    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
> -                                &domain_id);
> +    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
> +        if (entry->domain_id == domain_id) {
> +            vtd_iotlb_remove_entry(s, entry);
> +        }
> +    }
>   
>       QLIST_FOREACH(node, &s->notifiers_list, next) {
>           vtd_as = node->vtd_as;
> @@ -1400,15 +1408,22 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
>   static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
>                                         hwaddr addr, uint8_t am)
>   {
> -    VTDIOTLBPageInvInfo info;
> +    VTDIOTLBEntry *entry, *entry_n;
> +    uint64_t gfn, mask;
>   
>       trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
>   
>       assert(am <= VTD_MAMV);
> -    info.domain_id = domain_id;
> -    info.addr = addr;
> -    info.mask = ~((1 << am) - 1);
> -    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
> +
> +    mask = ~((1 << am) - 1);
> +    gfn = (addr >> VTD_PAGE_SHIFT) & mask;
> +
> +    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
> +        if (entry->domain_id == domain_id && (entry->gfn & mask) == gfn) {
> +            vtd_iotlb_remove_entry(s, entry);
> +        }
> +    }
> +
>       vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
>   }
>   
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 2d77249..76efc66 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -372,14 +372,6 @@ typedef union VTDInvDesc VTDInvDesc;
>   #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
>   #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
>   
> -/* Information about page-selective IOTLB invalidate */
> -struct VTDIOTLBPageInvInfo {
> -    uint16_t domain_id;
> -    uint64_t addr;
> -    uint8_t mask;
> -};
> -typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
> -
>   /* Pagesize of VTD paging structures, including root and context tables */
>   #define VTD_PAGE_SHIFT              12
>   #define VTD_PAGE_SIZE               (1ULL << VTD_PAGE_SHIFT)
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 42d8a7e..b552815 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -34,7 +34,6 @@ vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain)
>   vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
>   vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
>   vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
> -vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
>   vtd_fault_disabled(void) "Fault processing disabled for context entry"
>   vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
>   vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 1b51c9f..d2caab3 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -98,9 +98,11 @@ struct VTDBus {
>   
>   struct VTDIOTLBEntry {
>       uint64_t gfn;
> -    uint16_t domain_id;
>       uint64_t slpte;
>       uint64_t mask;
> +    uint64_t key;
> +    QTAILQ_ENTRY(VTDIOTLBEntry) link;
> +    uint16_t domain_id;
>       uint8_t access_flags;
>   };
>   
> @@ -288,6 +290,8 @@ struct IntelIOMMUState {
>       uint32_t context_cache_gen;     /* Should be in [1,MAX] */
>       GHashTable *iotlb;              /* IOTLB */
>       uint16_t iotlb_size;            /* IOTLB max cache entries */
> +    /* Head of IOTLB MRU list */
> +    QTAILQ_HEAD(VTDIOTLBEntryHead, VTDIOTLBEntry) iotlb_head;
>   
>       MemoryRegionIOMMUOps iommu_ops;
>       GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
Michael S. Tsirkin July 13, 2017, 5:49 p.m. UTC | #2
On Wed, Jul 12, 2017 at 04:13:43PM +0800, Peter Xu wrote:
> It is not wise to disgard all the IOTLB cache when cache size reaches
> max, but that's what we do now. A slightly better (but still simple) way
> to do this is, we just throw away the least recent used cache entry.

Not wise how? Slower? It seems simpler.

> This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> is to maintain a Most Recently Used list for the IOTLB entries. The hash
> table is still used for lookup, though a new list field is added to each
> IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> in the hash table in s->iotlb, and also linked into the MRU list of
> s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> knowing whether the cache is still hot.

Helps how? Faster?  What's missing here is a report of the performance
impact.

> After we have such a MRU list, replacing all the iterators of IOTLB
> entries by using list iterations rather than hash table iterations.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu.c          | 75 +++++++++++++++++++++++++-----------------
>  hw/i386/intel_iommu_internal.h |  8 -----
>  hw/i386/trace-events           |  1 -
>  include/hw/i386/intel_iommu.h  |  6 +++-
>  4 files changed, 50 insertions(+), 40 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index e17340c..6320dea 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -37,6 +37,9 @@
>  #include "kvm_i386.h"
>  #include "trace.h"
>  
> +#define FOREACH_IOTLB_SAFE(entry, s, entry_n) \
> +    QTAILQ_FOREACH_SAFE(entry, &(s)->iotlb_head, link, entry_n)
> +
>  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                              uint64_t wmask, uint64_t w1cmask)
>  {
> @@ -139,14 +142,6 @@ static guint vtd_uint64_hash(gconstpointer v)
>      return (guint)*(const uint64_t *)v;
>  }
>  
> -static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
> -                                          gpointer user_data)
> -{
> -    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> -    uint16_t domain_id = *(uint16_t *)user_data;
> -    return entry->domain_id == domain_id;
> -}
> -
>  /* The shift of an addr for a certain level of paging structure */
>  static inline uint32_t vtd_slpt_level_shift(uint32_t level)
>  {
> @@ -159,18 +154,6 @@ static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
>      return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
>  }
>  
> -static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
> -                                        gpointer user_data)
> -{
> -    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> -    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> -    uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
> -    uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
> -    return (entry->domain_id == info->domain_id) &&
> -            (((entry->gfn & info->mask) == gfn) ||
> -             (entry->gfn == gfn_tlb));
> -}
> -
>  /* Reset all the gen of VTDAddressSpace to zero and set the gen of
>   * IntelIOMMUState to 1.
>   */
> @@ -201,6 +184,7 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
>  {
>      assert(s->iotlb);
>      g_hash_table_remove_all(s->iotlb);
> +    QTAILQ_INIT(&s->iotlb_head);
>  }
>  
>  static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
> @@ -231,6 +215,9 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
>                                  source_id, level);
>          entry = g_hash_table_lookup(s->iotlb, &key);
>          if (entry) {
> +            /* Move the entry to the head of MRU list */
> +            QTAILQ_REMOVE(&s->iotlb_head, entry, link);
> +            QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
>              goto out;
>          }
>      }
> @@ -239,11 +226,23 @@ out:
>      return entry;
>  }
>  
> +static void vtd_iotlb_remove_entry(IntelIOMMUState *s, VTDIOTLBEntry *entry)
> +{
> +    uint64_t key = entry->key;
> +
> +    /*
> +     * To remove an entry, we need to both remove it from the MRU
> +     * list, and also from the hash table.
> +     */
> +    QTAILQ_REMOVE(&s->iotlb_head, entry, link);
> +    g_hash_table_remove(s->iotlb, &key);
> +}
> +
>  static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>                               uint16_t domain_id, hwaddr addr, uint64_t slpte,
>                               uint8_t access_flags, uint32_t level)
>  {
> -    VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
> +    VTDIOTLBEntry *entry = g_new0(VTDIOTLBEntry, 1), *last;
>      uint64_t *key = g_malloc(sizeof(*key));
>      uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
>  
> @@ -253,8 +252,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>  
>      trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
>      if (g_hash_table_size(s->iotlb) >= s->iotlb_size) {
> -        trace_vtd_iotlb_reset("iotlb exceeds size limit");
> -        vtd_reset_iotlb(s);
> +        /* Remove the Least Recently Used cache */
> +        last = QTAILQ_LAST(&s->iotlb_head, VTDIOTLBEntryHead);
> +        vtd_iotlb_remove_entry(s, last);
>      }
>  
>      entry->gfn = gfn;
> @@ -263,7 +263,11 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>      entry->access_flags = access_flags;
>      entry->mask = vtd_slpt_level_page_mask(level);
>      *key = vtd_get_iotlb_key(gfn, source_id, level);
> +    entry->key = *key;
>      g_hash_table_replace(s->iotlb, key, entry);
> +
> +    /* Update MRU list */
> +    QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
>  }
>  
>  /* Given the reg addr of both the message data and address, generate an
> @@ -1354,11 +1358,15 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
>      IntelIOMMUNotifierNode *node;
>      VTDContextEntry ce;
>      VTDAddressSpace *vtd_as;
> +    VTDIOTLBEntry *entry, *entry_n;
>  
>      trace_vtd_inv_desc_iotlb_domain(domain_id);
>  
> -    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
> -                                &domain_id);
> +    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
> +        if (entry->domain_id == domain_id) {
> +            vtd_iotlb_remove_entry(s, entry);
> +        }
> +    }
>  
>      QLIST_FOREACH(node, &s->notifiers_list, next) {
>          vtd_as = node->vtd_as;
> @@ -1400,15 +1408,22 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
>  static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
>                                        hwaddr addr, uint8_t am)
>  {
> -    VTDIOTLBPageInvInfo info;
> +    VTDIOTLBEntry *entry, *entry_n;
> +    uint64_t gfn, mask;
>  
>      trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
>  
>      assert(am <= VTD_MAMV);
> -    info.domain_id = domain_id;
> -    info.addr = addr;
> -    info.mask = ~((1 << am) - 1);
> -    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
> +
> +    mask = ~((1 << am) - 1);
> +    gfn = (addr >> VTD_PAGE_SHIFT) & mask;
> +
> +    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
> +        if (entry->domain_id == domain_id && (entry->gfn & mask) == gfn) {
> +            vtd_iotlb_remove_entry(s, entry);
> +        }
> +    }
> +
>      vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
>  }
>  
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 2d77249..76efc66 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -372,14 +372,6 @@ typedef union VTDInvDesc VTDInvDesc;
>  #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
>  #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
>  
> -/* Information about page-selective IOTLB invalidate */
> -struct VTDIOTLBPageInvInfo {
> -    uint16_t domain_id;
> -    uint64_t addr;
> -    uint8_t mask;
> -};
> -typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
> -
>  /* Pagesize of VTD paging structures, including root and context tables */
>  #define VTD_PAGE_SHIFT              12
>  #define VTD_PAGE_SIZE               (1ULL << VTD_PAGE_SHIFT)
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 42d8a7e..b552815 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -34,7 +34,6 @@ vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain)
>  vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
>  vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
>  vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
> -vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
>  vtd_fault_disabled(void) "Fault processing disabled for context entry"
>  vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
>  vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 1b51c9f..d2caab3 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -98,9 +98,11 @@ struct VTDBus {
>  
>  struct VTDIOTLBEntry {
>      uint64_t gfn;
> -    uint16_t domain_id;
>      uint64_t slpte;
>      uint64_t mask;
> +    uint64_t key;
> +    QTAILQ_ENTRY(VTDIOTLBEntry) link;
> +    uint16_t domain_id;
>      uint8_t access_flags;
>  };
>  
> @@ -288,6 +290,8 @@ struct IntelIOMMUState {
>      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
>      GHashTable *iotlb;              /* IOTLB */
>      uint16_t iotlb_size;            /* IOTLB max cache entries */
> +    /* Head of IOTLB MRU list */
> +    QTAILQ_HEAD(VTDIOTLBEntryHead, VTDIOTLBEntry) iotlb_head;
>  
>      MemoryRegionIOMMUOps iommu_ops;
>      GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
> -- 
> 2.7.4
Peter Xu July 14, 2017, 4:32 a.m. UTC | #3
On Thu, Jul 13, 2017 at 04:48:42PM +0800, Jason Wang wrote:
> 
> 
> On 2017年07月12日 16:13, Peter Xu wrote:
> >It is not wise to disgard all the IOTLB cache when cache size reaches
> >max, but that's what we do now. A slightly better (but still simple) way
> >to do this is, we just throw away the least recent used cache entry.
> >
> >This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> >is to maintain a Most Recently Used list for the IOTLB entries. The hash
> >table is still used for lookup, though a new list field is added to each
> >IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> >in the hash table in s->iotlb, and also linked into the MRU list of
> >s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> >knowing whether the cache is still hot.
> >
> >After we have such a MRU list, replacing all the iterators of IOTLB
> >entries by using list iterations rather than hash table iterations.
> 
> Any reason of doing this, I thought hashtable was even a little bit faster?

Could I ask why?

I thought they are merely the same (when iterating all the items)?
Peter Xu July 14, 2017, 4:43 a.m. UTC | #4
On Thu, Jul 13, 2017 at 08:49:44PM +0300, Michael S. Tsirkin wrote:
> On Wed, Jul 12, 2017 at 04:13:43PM +0800, Peter Xu wrote:
> > It is not wise to disgard all the IOTLB cache when cache size reaches
> > max, but that's what we do now. A slightly better (but still simple) way
> > to do this is, we just throw away the least recent used cache entry.
> 
> Not wise how? Slower? It seems simpler.

When the cache is full, the cached items are still valid, but we just
throw them away. I agree it is definitely simpler, but is that really
wise?

> 
> > This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> > is to maintain a Most Recently Used list for the IOTLB entries. The hash
> > table is still used for lookup, though a new list field is added to each
> > IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> > in the hash table in s->iotlb, and also linked into the MRU list of
> > s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> > knowing whether the cache is still hot.
> 
> Helps how? Faster?  What's missing here is a report of the performance
> impact.

Not faster, the hash is already there. This patch only tries to fix
the thing I mentioned above (not throwing valid entries when cache
full). As far as I tested (all simple ones), it does not have
performance boost since it's not easy to trigger cache full (our cache
size is 1024, not to mention that current intel iommu driver will do
very rapid global flush). I just think it makes sense, and maybe it
can help in the future when there are no that frequent iotlb flushes.

If we still require performance numbers to merge this patch, I think
we can put this patch aside, until one day we might think it useful.

Thanks,
Jason Wang July 14, 2017, 7:28 a.m. UTC | #5
On 2017年07月14日 12:32, Peter Xu wrote:
> On Thu, Jul 13, 2017 at 04:48:42PM +0800, Jason Wang wrote:
>>
>> On 2017年07月12日 16:13, Peter Xu wrote:
>>> It is not wise to disgard all the IOTLB cache when cache size reaches
>>> max, but that's what we do now. A slightly better (but still simple) way
>>> to do this is, we just throw away the least recent used cache entry.
>>>
>>> This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
>>> is to maintain a Most Recently Used list for the IOTLB entries. The hash
>>> table is still used for lookup, though a new list field is added to each
>>> IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
>>> in the hash table in s->iotlb, and also linked into the MRU list of
>>> s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
>>> knowing whether the cache is still hot.
>>>
>>> After we have such a MRU list, replacing all the iterators of IOTLB
>>> entries by using list iterations rather than hash table iterations.
>> Any reason of doing this, I thought hashtable was even a little bit faster?
> Could I ask why?
>
> I thought they are merely the same (when iterating all the items)?
>

Ok, looks like I was wrong, but it they are merely the same, why bother?

Thanks
Peter Xu July 17, 2017, 1:53 a.m. UTC | #6
On Fri, Jul 14, 2017 at 03:28:09PM +0800, Jason Wang wrote:
> 
> 
> On 2017年07月14日 12:32, Peter Xu wrote:
> >On Thu, Jul 13, 2017 at 04:48:42PM +0800, Jason Wang wrote:
> >>
> >>On 2017年07月12日 16:13, Peter Xu wrote:
> >>>It is not wise to disgard all the IOTLB cache when cache size reaches
> >>>max, but that's what we do now. A slightly better (but still simple) way
> >>>to do this is, we just throw away the least recent used cache entry.
> >>>
> >>>This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> >>>is to maintain a Most Recently Used list for the IOTLB entries. The hash
> >>>table is still used for lookup, though a new list field is added to each
> >>>IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> >>>in the hash table in s->iotlb, and also linked into the MRU list of
> >>>s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> >>>knowing whether the cache is still hot.
> >>>
> >>>After we have such a MRU list, replacing all the iterators of IOTLB
> >>>entries by using list iterations rather than hash table iterations.
> >>Any reason of doing this, I thought hashtable was even a little bit faster?
> >Could I ask why?
> >
> >I thought they are merely the same (when iterating all the items)?
> >
> 
> Ok, looks like I was wrong, but it they are merely the same, why bother?

Because imho looping over list needs fewer LOCs and is also more
direct. E.g., for domain flush, hash needs this:

static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
                                          gpointer user_data)
{
    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
    uint16_t domain_id = *(uint16_t *)user_data;
    return entry->domain_id == domain_id;
}

Then:

    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
                                &domain_id);

For list it is only:

    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
        if (entry->domain_id == domain_id) {
            vtd_iotlb_remove_entry(s, entry);
        }
    }

Thanks,
Michael S. Tsirkin July 26, 2017, 8:37 p.m. UTC | #7
On Mon, Jul 17, 2017 at 09:53:27AM +0800, Peter Xu wrote:
> On Fri, Jul 14, 2017 at 03:28:09PM +0800, Jason Wang wrote:
> > 
> > 
> > On 2017年07月14日 12:32, Peter Xu wrote:
> > >On Thu, Jul 13, 2017 at 04:48:42PM +0800, Jason Wang wrote:
> > >>
> > >>On 2017年07月12日 16:13, Peter Xu wrote:
> > >>>It is not wise to disgard all the IOTLB cache when cache size reaches
> > >>>max, but that's what we do now. A slightly better (but still simple) way
> > >>>to do this is, we just throw away the least recent used cache entry.
> > >>>
> > >>>This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> > >>>is to maintain a Most Recently Used list for the IOTLB entries. The hash
> > >>>table is still used for lookup, though a new list field is added to each
> > >>>IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> > >>>in the hash table in s->iotlb, and also linked into the MRU list of
> > >>>s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> > >>>knowing whether the cache is still hot.
> > >>>
> > >>>After we have such a MRU list, replacing all the iterators of IOTLB
> > >>>entries by using list iterations rather than hash table iterations.
> > >>Any reason of doing this, I thought hashtable was even a little bit faster?
> > >Could I ask why?
> > >
> > >I thought they are merely the same (when iterating all the items)?
> > >
> > 
> > Ok, looks like I was wrong, but it they are merely the same, why bother?
> 
> Because imho looping over list needs fewer LOCs and is also more
> direct. E.g., for domain flush, hash needs this:
> 
> static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
>                                           gpointer user_data)
> {
>     VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>     uint16_t domain_id = *(uint16_t *)user_data;
>     return entry->domain_id == domain_id;
> }
> 
> Then:
> 
>     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
>                                 &domain_id);
> 
> For list it is only:
> 
>     FOREACH_IOTLB_SAFE(entry, s, entry_n) {
>         if (entry->domain_id == domain_id) {
>             vtd_iotlb_remove_entry(s, entry);
>         }
>     }
> 
> Thanks,

Well the LOC seems to have gone up with this patch.
If we are trying to simplify code, please state this
in commit log.


> -- 
> Peter Xu
Peter Xu July 27, 2017, 2:19 a.m. UTC | #8
On Wed, Jul 26, 2017 at 11:37:13PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 17, 2017 at 09:53:27AM +0800, Peter Xu wrote:
> > On Fri, Jul 14, 2017 at 03:28:09PM +0800, Jason Wang wrote:
> > > 
> > > 
> > > On 2017年07月14日 12:32, Peter Xu wrote:
> > > >On Thu, Jul 13, 2017 at 04:48:42PM +0800, Jason Wang wrote:
> > > >>
> > > >>On 2017年07月12日 16:13, Peter Xu wrote:
> > > >>>It is not wise to disgard all the IOTLB cache when cache size reaches
> > > >>>max, but that's what we do now. A slightly better (but still simple) way
> > > >>>to do this is, we just throw away the least recent used cache entry.
> > > >>>
> > > >>>This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
> > > >>>is to maintain a Most Recently Used list for the IOTLB entries. The hash
> > > >>>table is still used for lookup, though a new list field is added to each
> > > >>>IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
> > > >>>in the hash table in s->iotlb, and also linked into the MRU list of
> > > >>>s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
> > > >>>knowing whether the cache is still hot.
> > > >>>
> > > >>>After we have such a MRU list, replacing all the iterators of IOTLB
> > > >>>entries by using list iterations rather than hash table iterations.
> > > >>Any reason of doing this, I thought hashtable was even a little bit faster?
> > > >Could I ask why?
> > > >
> > > >I thought they are merely the same (when iterating all the items)?
> > > >
> > > 
> > > Ok, looks like I was wrong, but it they are merely the same, why bother?
> > 
> > Because imho looping over list needs fewer LOCs and is also more
> > direct. E.g., for domain flush, hash needs this:
> > 
> > static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
> >                                           gpointer user_data)
> > {
> >     VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> >     uint16_t domain_id = *(uint16_t *)user_data;
> >     return entry->domain_id == domain_id;
> > }
> > 
> > Then:
> > 
> >     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
> >                                 &domain_id);
> > 
> > For list it is only:
> > 
> >     FOREACH_IOTLB_SAFE(entry, s, entry_n) {
> >         if (entry->domain_id == domain_id) {
> >             vtd_iotlb_remove_entry(s, entry);
> >         }
> >     }
> > 
> > Thanks,
> 
> Well the LOC seems to have gone up with this patch.
> If we are trying to simplify code, please state this
> in commit log.

Ok. I can split the patch into two (one to introduce MRU list, one to
replace the iterations of hash tables) if it will be needed in the
future. Currently since we don't have plan to merge it, I'll put it
aside for now. Thanks.
diff mbox

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e17340c..6320dea 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -37,6 +37,9 @@ 
 #include "kvm_i386.h"
 #include "trace.h"
 
+#define FOREACH_IOTLB_SAFE(entry, s, entry_n) \
+    QTAILQ_FOREACH_SAFE(entry, &(s)->iotlb_head, link, entry_n)
+
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
 {
@@ -139,14 +142,6 @@  static guint vtd_uint64_hash(gconstpointer v)
     return (guint)*(const uint64_t *)v;
 }
 
-static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
-                                          gpointer user_data)
-{
-    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
-    uint16_t domain_id = *(uint16_t *)user_data;
-    return entry->domain_id == domain_id;
-}
-
 /* The shift of an addr for a certain level of paging structure */
 static inline uint32_t vtd_slpt_level_shift(uint32_t level)
 {
@@ -159,18 +154,6 @@  static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
     return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
 }
 
-static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
-                                        gpointer user_data)
-{
-    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
-    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
-    uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
-    uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
-    return (entry->domain_id == info->domain_id) &&
-            (((entry->gfn & info->mask) == gfn) ||
-             (entry->gfn == gfn_tlb));
-}
-
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
  * IntelIOMMUState to 1.
  */
@@ -201,6 +184,7 @@  static void vtd_reset_iotlb(IntelIOMMUState *s)
 {
     assert(s->iotlb);
     g_hash_table_remove_all(s->iotlb);
+    QTAILQ_INIT(&s->iotlb_head);
 }
 
 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
@@ -231,6 +215,9 @@  static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
                                 source_id, level);
         entry = g_hash_table_lookup(s->iotlb, &key);
         if (entry) {
+            /* Move the entry to the head of MRU list */
+            QTAILQ_REMOVE(&s->iotlb_head, entry, link);
+            QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
             goto out;
         }
     }
@@ -239,11 +226,23 @@  out:
     return entry;
 }
 
+static void vtd_iotlb_remove_entry(IntelIOMMUState *s, VTDIOTLBEntry *entry)
+{
+    uint64_t key = entry->key;
+
+    /*
+     * To remove an entry, we need to both remove it from the MRU
+     * list, and also from the hash table.
+     */
+    QTAILQ_REMOVE(&s->iotlb_head, entry, link);
+    g_hash_table_remove(s->iotlb, &key);
+}
+
 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
                              uint16_t domain_id, hwaddr addr, uint64_t slpte,
                              uint8_t access_flags, uint32_t level)
 {
-    VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
+    VTDIOTLBEntry *entry = g_new0(VTDIOTLBEntry, 1), *last;
     uint64_t *key = g_malloc(sizeof(*key));
     uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
 
@@ -253,8 +252,9 @@  static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
 
     trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
     if (g_hash_table_size(s->iotlb) >= s->iotlb_size) {
-        trace_vtd_iotlb_reset("iotlb exceeds size limit");
-        vtd_reset_iotlb(s);
+        /* Remove the Least Recently Used cache */
+        last = QTAILQ_LAST(&s->iotlb_head, VTDIOTLBEntryHead);
+        vtd_iotlb_remove_entry(s, last);
     }
 
     entry->gfn = gfn;
@@ -263,7 +263,11 @@  static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
     entry->access_flags = access_flags;
     entry->mask = vtd_slpt_level_page_mask(level);
     *key = vtd_get_iotlb_key(gfn, source_id, level);
+    entry->key = *key;
     g_hash_table_replace(s->iotlb, key, entry);
+
+    /* Update MRU list */
+    QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
 }
 
 /* Given the reg addr of both the message data and address, generate an
@@ -1354,11 +1358,15 @@  static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
     IntelIOMMUNotifierNode *node;
     VTDContextEntry ce;
     VTDAddressSpace *vtd_as;
+    VTDIOTLBEntry *entry, *entry_n;
 
     trace_vtd_inv_desc_iotlb_domain(domain_id);
 
-    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
-                                &domain_id);
+    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
+        if (entry->domain_id == domain_id) {
+            vtd_iotlb_remove_entry(s, entry);
+        }
+    }
 
     QLIST_FOREACH(node, &s->notifiers_list, next) {
         vtd_as = node->vtd_as;
@@ -1400,15 +1408,22 @@  static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
                                       hwaddr addr, uint8_t am)
 {
-    VTDIOTLBPageInvInfo info;
+    VTDIOTLBEntry *entry, *entry_n;
+    uint64_t gfn, mask;
 
     trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
 
     assert(am <= VTD_MAMV);
-    info.domain_id = domain_id;
-    info.addr = addr;
-    info.mask = ~((1 << am) - 1);
-    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+
+    mask = ~((1 << am) - 1);
+    gfn = (addr >> VTD_PAGE_SHIFT) & mask;
+
+    FOREACH_IOTLB_SAFE(entry, s, entry_n) {
+        if (entry->domain_id == domain_id && (entry->gfn & mask) == gfn) {
+            vtd_iotlb_remove_entry(s, entry);
+        }
+    }
+
     vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 2d77249..76efc66 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -372,14 +372,6 @@  typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
 
-/* Information about page-selective IOTLB invalidate */
-struct VTDIOTLBPageInvInfo {
-    uint16_t domain_id;
-    uint64_t addr;
-    uint8_t mask;
-};
-typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
-
 /* Pagesize of VTD paging structures, including root and context tables */
 #define VTD_PAGE_SHIFT              12
 #define VTD_PAGE_SIZE               (1ULL << VTD_PAGE_SHIFT)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 42d8a7e..b552815 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -34,7 +34,6 @@  vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain)
 vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
 vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
 vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
-vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
 vtd_fault_disabled(void) "Fault processing disabled for context entry"
 vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 1b51c9f..d2caab3 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -98,9 +98,11 @@  struct VTDBus {
 
 struct VTDIOTLBEntry {
     uint64_t gfn;
-    uint16_t domain_id;
     uint64_t slpte;
     uint64_t mask;
+    uint64_t key;
+    QTAILQ_ENTRY(VTDIOTLBEntry) link;
+    uint16_t domain_id;
     uint8_t access_flags;
 };
 
@@ -288,6 +290,8 @@  struct IntelIOMMUState {
     uint32_t context_cache_gen;     /* Should be in [1,MAX] */
     GHashTable *iotlb;              /* IOTLB */
     uint16_t iotlb_size;            /* IOTLB max cache entries */
+    /* Head of IOTLB MRU list */
+    QTAILQ_HEAD(VTDIOTLBEntryHead, VTDIOTLBEntry) iotlb_head;
 
     MemoryRegionIOMMUOps iommu_ops;
     GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */