diff mbox series

[v1,15/22] intel_iommu: replay guest pasid bindings to host

Message ID 1584880579-12178-16-git-send-email-yi.l.liu@intel.com
State New
Headers show
Series intel_iommu: expose Shared Virtual Addressing to VMs | expand

Commit Message

Yi Liu March 22, 2020, 12:36 p.m. UTC
This patch adds guest pasid bindings replay for domain
selective pasid cache invalidation(dsi) and global pasid
cache invalidation by walking guest pasid table.

Reason:
Guest OS may flush the pasid cache with a larger granularity.
e.g. guest does a svm_bind() but flush the pasid cache with
global or domain selective pasid cache invalidation instead
of pasid selective(psi) pasid cache invalidation. Regards to
such case, it works in host. Per spec, a global or domain
selective pasid cache invalidation should be able to cover
what a pasid selective invalidation does. The only concern
is performance deduction since dsi and global cache invalidation
will flush more than psi. To align with native, vIOMMU needs
emulator needs to do replay for the two invalidation granularity
to reflect the latest pasid bindings in guest pasid table.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/i386/intel_iommu.c          | 128 ++++++++++++++++++++++++++++++++++++++++-
 hw/i386/intel_iommu_internal.h |   1 +
 2 files changed, 127 insertions(+), 2 deletions(-)

Comments

Peter Xu March 24, 2020, 6 p.m. UTC | #1
On Sun, Mar 22, 2020 at 05:36:12AM -0700, Liu Yi L wrote:
> This patch adds guest pasid bindings replay for domain
> selective pasid cache invalidation(dsi) and global pasid
> cache invalidation by walking guest pasid table.
> 
> Reason:
> Guest OS may flush the pasid cache with a larger granularity.
> e.g. guest does a svm_bind() but flush the pasid cache with
> global or domain selective pasid cache invalidation instead
> of pasid selective(psi) pasid cache invalidation. Regards to
> such case, it works in host. Per spec, a global or domain
> selective pasid cache invalidation should be able to cover
> what a pasid selective invalidation does. The only concern
> is performance deduction since dsi and global cache invalidation
> will flush more than psi. To align with native, vIOMMU needs
> emulator needs to do replay for the two invalidation granularity
> to reflect the latest pasid bindings in guest pasid table.

This is actually related to my question in the other patch on whether
the replay can and should also work for the PSI case too.  I'm still
confused on why the guest cannot use a PSI for a newly created PASID
entry for one device?

> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Peter Xu <peterx@redhat.com>
> Cc: Yi Sun <yi.y.sun@linux.intel.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  hw/i386/intel_iommu.c          | 128 ++++++++++++++++++++++++++++++++++++++++-
>  hw/i386/intel_iommu_internal.h |   1 +
>  2 files changed, 127 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 0423c83..8ec638f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2717,6 +2717,130 @@ static VTDPASIDAddressSpace *vtd_add_find_pasid_as(IntelIOMMUState *s,
>      return vtd_pasid_as;
>  }
>  
> +/**
> + * Constant information used during pasid table walk
> +   @vtd_bus, @devfn: device info
> + * @flags: indicates if it is domain selective walk
> + * @did: domain ID of the pasid table walk
> + */
> +typedef struct {
> +    VTDBus *vtd_bus;
> +    uint16_t devfn;
> +#define VTD_PASID_TABLE_DID_SEL_WALK   (1ULL << 0);
> +    uint32_t flags;
> +    uint16_t did;
> +} vtd_pasid_table_walk_info;

So this is going to be similar to VTDPASIDCacheInfo as I mentioned.
Maybe you can use a shared object for both?

> +
> +/**
> + * Caller of this function should hold iommu_lock.
> + */
> +static bool vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
> +                                        dma_addr_t pt_base,
> +                                        int start,
> +                                        int end,
> +                                        vtd_pasid_table_walk_info *info)
> +{
> +    VTDPASIDEntry pe;
> +    int pasid = start;
> +    int pasid_next;
> +    VTDPASIDAddressSpace *vtd_pasid_as;
> +    VTDPASIDCacheEntry *pc_entry;
> +
> +    while (pasid < end) {
> +        pasid_next = pasid + 1;
> +
> +        if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
> +            && vtd_pe_present(&pe)) {
> +            vtd_pasid_as = vtd_add_find_pasid_as(s,
> +                                       info->vtd_bus, info->devfn, pasid);

For this chunk:

> +            pc_entry = &vtd_pasid_as->pasid_cache_entry;
> +            if (s->pasid_cache_gen == pc_entry->pasid_cache_gen) {
> +                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> +            } else {
> +                vtd_fill_in_pe_in_cache(s, vtd_pasid_as, &pe);
> +            }

We already got &pe, then would it be easier to simply call:

               vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);

?

Since IIUC the cache_gen is only helpful to avoid looking up the &pe.
And the vtd_pasid_entry_compare() check should be even more solid than
the cache_gen.

> +        }
> +        pasid = pasid_next;
> +    }
> +    return true;
> +}
> +
> +/*
> + * Currently, VT-d scalable mode pasid table is a two level table,
> + * this function aims to loop a range of PASIDs in a given pasid
> + * table to identify the pasid config in guest.
> + * Caller of this function should hold iommu_lock.
> + */
> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
> +                                    dma_addr_t pdt_base,
> +                                    int start,
> +                                    int end,
> +                                    vtd_pasid_table_walk_info *info)
> +{
> +    VTDPASIDDirEntry pdire;
> +    int pasid = start;
> +    int pasid_next;
> +    dma_addr_t pt_base;
> +
> +    while (pasid < end) {
> +        pasid_next = pasid + VTD_PASID_TBL_ENTRY_NUM;
> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> +            && vtd_pdire_present(&pdire)) {
> +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> +            if (!vtd_sm_pasid_table_walk_one(s,
> +                              pt_base, pasid, pasid_next, info)) {

vtd_sm_pasid_table_walk_one() never returns false.  Remove this check?
Maybe also let vtd_sm_pasid_table_walk_one() to return nothing.

> +                break;
> +            }
> +        }
> +        pasid = pasid_next;
> +    }
> +}
> +
> +/**
> + * This function replay the guest pasid bindings to hots by
> + * walking the guest PASID table. This ensures host will have
> + * latest guest pasid bindings.
> + */
> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
> +                                            uint16_t *did,
> +                                            bool is_dsi)
> +{
> +    VTDContextEntry ce;
> +    VTDHostIOMMUContext *vtd_dev_icx;
> +    int bus_n, devfn;
> +    vtd_pasid_table_walk_info info;
> +
> +    if (is_dsi) {
> +        info.flags = VTD_PASID_TABLE_DID_SEL_WALK;
> +        info.did = *did;
> +    }
> +
> +    /*
> +     * In this replay, only needs to care about the devices which
> +     * are backed by host IOMMU. For such devices, their vtd_dev_icx
> +     * instances are in the s->vtd_dev_icx_list. For devices which
> +     * are not backed byhost IOMMU, it is not necessary to replay
> +     * the bindings since their cache could be re-created in the future
> +     * DMA address transaltion.
> +     */
> +    vtd_iommu_lock(s);
> +    QLIST_FOREACH(vtd_dev_icx, &s->vtd_dev_icx_list, next) {
> +        bus_n = pci_bus_num(vtd_dev_icx->vtd_bus->bus);
> +        devfn = vtd_dev_icx->devfn;
> +
> +        if (!vtd_dev_to_context_entry(s, bus_n, devfn, &ce)) {
> +            info.vtd_bus = vtd_dev_icx->vtd_bus;
> +            info.devfn = devfn;
> +            vtd_sm_pasid_table_walk(s,
> +                                    VTD_CE_GET_PASID_DIR_TABLE(&ce),
> +                                    0,
> +                                    VTD_MAX_HPASID,
> +                                    &info);
> +        }
> +    }
> +    vtd_iommu_unlock(s);
> +}
> +
>  static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
>  {
>      VTDPASIDCacheInfo pc_info;
> @@ -2735,7 +2859,6 @@ static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
>      vtd_iommu_unlock(s);
>  
>      /*
> -     * TODO:
>       * Domain selective PASID cache invalidation flushes
>       * all the pasid caches within a domain. To be safe,
>       * after invalidating the pasid caches, emulator needs
> @@ -2743,6 +2866,7 @@ static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
>       * dir and pasid table. e.g. When the guest setup a new
>       * PASID entry then send a PASID DSI.
>       */
> +    vtd_replay_guest_pasid_bindings(s, &domain_id, true);
>      return 0;
>  }
>  
> @@ -2881,13 +3005,13 @@ static int vtd_pasid_cache_gsi(IntelIOMMUState *s)
>      vtd_iommu_unlock(s);
>  
>      /*
> -     * TODO:
>       * Global PASID cache invalidation flushes all
>       * the pasid caches. To be safe, after invalidating
>       * the pasid caches, emulator needs to replay the
>       * pasid bindings by walking guest pasid dir and
>       * pasid table.
>       */
> +    vtd_replay_guest_pasid_bindings(s, NULL, false);
>      return 0;
>  }
>  
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 4451acf..b0a324c 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -554,6 +554,7 @@ typedef struct VTDPASIDCacheInfo VTDPASIDCacheInfo;
>  #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>  #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
>  #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TBL_ENTRY_NUM       (1ULL << 6)
>  
>  /* PASID Granular Translation Type Mask */
>  #define VTD_PASID_ENTRY_P              1ULL
> -- 
> 2.7.4
>
Yi Liu March 25, 2020, 1:14 p.m. UTC | #2
> From: Peter Xu <peterx@redhat.com>
> Sent: Wednesday, March 25, 2020 2:00 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [PATCH v1 15/22] intel_iommu: replay guest pasid bindings to host
> 
> On Sun, Mar 22, 2020 at 05:36:12AM -0700, Liu Yi L wrote:
> > This patch adds guest pasid bindings replay for domain
> > selective pasid cache invalidation(dsi) and global pasid
> > cache invalidation by walking guest pasid table.
> >
> > Reason:
> > Guest OS may flush the pasid cache with a larger granularity.
> > e.g. guest does a svm_bind() but flush the pasid cache with
> > global or domain selective pasid cache invalidation instead
> > of pasid selective(psi) pasid cache invalidation. Regards to
> > such case, it works in host. Per spec, a global or domain
> > selective pasid cache invalidation should be able to cover
> > what a pasid selective invalidation does. The only concern
> > is performance deduction since dsi and global cache invalidation
> > will flush more than psi. To align with native, vIOMMU needs
> > emulator needs to do replay for the two invalidation granularity
> > to reflect the latest pasid bindings in guest pasid table.
> 
> This is actually related to my question in the other patch on whether
> the replay can and should also work for the PSI case too.  I'm still
> confused on why the guest cannot use a PSI for a newly created PASID
> entry for one device?

Use a PSI for a newly created PASID entry for one device is the correct
way. But spec doesn't include the device info in the invalidation descriptor.
Reason is there is DID info which is enough.

I think the replay code and the PSI code should be designed with
the same idea. With a Step 1 loop all existing pasid_as, and Step 2
to loop all assigned devices. I'll try to make them share the low
level code. e.g. the most code in PSI handling.

> 
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Peter Xu <peterx@redhat.com>
> > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Richard Henderson <rth@twiddle.net>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  hw/i386/intel_iommu.c          | 128
> ++++++++++++++++++++++++++++++++++++++++-
> >  hw/i386/intel_iommu_internal.h |   1 +
> >  2 files changed, 127 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 0423c83..8ec638f 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -2717,6 +2717,130 @@ static VTDPASIDAddressSpace
> *vtd_add_find_pasid_as(IntelIOMMUState *s,
> >      return vtd_pasid_as;
> >  }
> >
> > +/**
> > + * Constant information used during pasid table walk
> > +   @vtd_bus, @devfn: device info
> > + * @flags: indicates if it is domain selective walk
> > + * @did: domain ID of the pasid table walk
> > + */
> > +typedef struct {
> > +    VTDBus *vtd_bus;
> > +    uint16_t devfn;
> > +#define VTD_PASID_TABLE_DID_SEL_WALK   (1ULL << 0);
> > +    uint32_t flags;
> > +    uint16_t did;
> > +} vtd_pasid_table_walk_info;
> 
> So this is going to be similar to VTDPASIDCacheInfo as I mentioned.
> Maybe you can use a shared object for both?

Aha, similar. high chance to reuse for both. :-)

> > +
> > +/**
> > + * Caller of this function should hold iommu_lock.
> > + */
> > +static bool vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
> > +                                        dma_addr_t pt_base,
> > +                                        int start,
> > +                                        int end,
> > +                                        vtd_pasid_table_walk_info *info)
> > +{
> > +    VTDPASIDEntry pe;
> > +    int pasid = start;
> > +    int pasid_next;
> > +    VTDPASIDAddressSpace *vtd_pasid_as;
> > +    VTDPASIDCacheEntry *pc_entry;
> > +
> > +    while (pasid < end) {
> > +        pasid_next = pasid + 1;
> > +
> > +        if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
> > +            && vtd_pe_present(&pe)) {
> > +            vtd_pasid_as = vtd_add_find_pasid_as(s,
> > +                                       info->vtd_bus, info->devfn, pasid);
> 
> For this chunk:
> 
> > +            pc_entry = &vtd_pasid_as->pasid_cache_entry;
> > +            if (s->pasid_cache_gen == pc_entry->pasid_cache_gen) {
> > +                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> > +            } else {
> > +                vtd_fill_in_pe_in_cache(s, vtd_pasid_as, &pe);
> > +            }
> 
> We already got &pe, then would it be easier to simply call:
> 
>                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> 
> ?

If the pasid_cache_gen is equal to iommu_state's, then it means there is
a chance that the cached pasid entry is equal to pe here. While for the
else case, it is surely there is no valid pasid entry in the pasid_as. And
the difference between vtd_update_pe_in_cache() and
vtd_fill_in_pe_in_cache() is whether do a compare between the new pasid entry
and cached pasid entry.

> Since IIUC the cache_gen is only helpful to avoid looking up the &pe.
> And the vtd_pasid_entry_compare() check should be even more solid than
> the cache_gen.

But if the cache_gen is not equal the one in iommu_state, then the cached
pasid entry is not valid at all. The compare is only needed after the cache_gen
is checked.

> > +        }
> > +        pasid = pasid_next;
> > +    }
> > +    return true;
> > +}
> > +
> > +/*
> > + * Currently, VT-d scalable mode pasid table is a two level table,
> > + * this function aims to loop a range of PASIDs in a given pasid
> > + * table to identify the pasid config in guest.
> > + * Caller of this function should hold iommu_lock.
> > + */
> > +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
> > +                                    dma_addr_t pdt_base,
> > +                                    int start,
> > +                                    int end,
> > +                                    vtd_pasid_table_walk_info *info)
> > +{
> > +    VTDPASIDDirEntry pdire;
> > +    int pasid = start;
> > +    int pasid_next;
> > +    dma_addr_t pt_base;
> > +
> > +    while (pasid < end) {
> > +        pasid_next = pasid + VTD_PASID_TBL_ENTRY_NUM;
> > +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> > +            && vtd_pdire_present(&pdire)) {
> > +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> > +            if (!vtd_sm_pasid_table_walk_one(s,
> > +                              pt_base, pasid, pasid_next, info)) {
> 
> vtd_sm_pasid_table_walk_one() never returns false.  Remove this check?
> Maybe also let vtd_sm_pasid_table_walk_one() to return nothing.

Oops. Could make it as void.

Regards,
Yi Liu
Peter Xu March 25, 2020, 3:06 p.m. UTC | #3
On Wed, Mar 25, 2020 at 01:14:26PM +0000, Liu, Yi L wrote:

[...]

> > > +/**
> > > + * Caller of this function should hold iommu_lock.
> > > + */
> > > +static bool vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
> > > +                                        dma_addr_t pt_base,
> > > +                                        int start,
> > > +                                        int end,
> > > +                                        vtd_pasid_table_walk_info *info)
> > > +{
> > > +    VTDPASIDEntry pe;
> > > +    int pasid = start;
> > > +    int pasid_next;
> > > +    VTDPASIDAddressSpace *vtd_pasid_as;
> > > +    VTDPASIDCacheEntry *pc_entry;
> > > +
> > > +    while (pasid < end) {
> > > +        pasid_next = pasid + 1;
> > > +
> > > +        if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
> > > +            && vtd_pe_present(&pe)) {
> > > +            vtd_pasid_as = vtd_add_find_pasid_as(s,
> > > +                                       info->vtd_bus, info->devfn, pasid);
> > 
> > For this chunk:
> > 
> > > +            pc_entry = &vtd_pasid_as->pasid_cache_entry;
> > > +            if (s->pasid_cache_gen == pc_entry->pasid_cache_gen) {
> > > +                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> > > +            } else {
> > > +                vtd_fill_in_pe_in_cache(s, vtd_pasid_as, &pe);
> > > +            }
> > 
> > We already got &pe, then would it be easier to simply call:
> > 
> >                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> > 
> > ?
> 
> If the pasid_cache_gen is equal to iommu_state's, then it means there is
> a chance that the cached pasid entry is equal to pe here. While for the
> else case, it is surely there is no valid pasid entry in the pasid_as. And
> the difference between vtd_update_pe_in_cache() and
> vtd_fill_in_pe_in_cache() is whether do a compare between the new pasid entry
> and cached pasid entry.
> 
> > Since IIUC the cache_gen is only helpful to avoid looking up the &pe.
> > And the vtd_pasid_entry_compare() check should be even more solid than
> > the cache_gen.
> 
> But if the cache_gen is not equal the one in iommu_state, then the cached
> pasid entry is not valid at all. The compare is only needed after the cache_gen
> is checked.

Wait... If "the pasid entry context" is already exactly the same as
the "cached pasid entry context", why we still care the generation
number?  I'd just update the generation to latest and cache it again.
Maybe there's a tricky point when &pe==cache but generation number is
old, then IIUC what we can do better is simply update the generation
number to latest.

But OK - let's keep that.  I don't see anything incorrect with current
code either.
Yi Liu March 26, 2020, 3:17 a.m. UTC | #4
> From: Peter Xu <peterx@redhat.com>
> Sent: Wednesday, March 25, 2020 11:07 PM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Cc: qemu-devel@nongnu.org; alex.williamson@redhat.com;
> eric.auger@redhat.com; pbonzini@redhat.com; mst@redhat.com;
> david@gibson.dropbear.id.au; Tian, Kevin <kevin.tian@intel.com>; Tian, Jun J
> <jun.j.tian@intel.com>; Sun, Yi Y <yi.y.sun@intel.com>; kvm@vger.kernel.org; Wu,
> Hao <hao.wu@intel.com>; jean-philippe@linaro.org; Jacob Pan
> <jacob.jun.pan@linux.intel.com>; Yi Sun <yi.y.sun@linux.intel.com>; Richard
> Henderson <rth@twiddle.net>
> Subject: Re: [PATCH v1 15/22] intel_iommu: replay guest pasid bindings to host
> 
> On Wed, Mar 25, 2020 at 01:14:26PM +0000, Liu, Yi L wrote:
> 
> [...]
> 
> > > > +/**
> > > > + * Caller of this function should hold iommu_lock.
> > > > + */
> > > > +static bool vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
> > > > +                                        dma_addr_t pt_base,
> > > > +                                        int start,
> > > > +                                        int end,
> > > > +                                        vtd_pasid_table_walk_info *info)
> > > > +{
> > > > +    VTDPASIDEntry pe;
> > > > +    int pasid = start;
> > > > +    int pasid_next;
> > > > +    VTDPASIDAddressSpace *vtd_pasid_as;
> > > > +    VTDPASIDCacheEntry *pc_entry;
> > > > +
> > > > +    while (pasid < end) {
> > > > +        pasid_next = pasid + 1;
> > > > +
> > > > +        if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
> > > > +            && vtd_pe_present(&pe)) {
> > > > +            vtd_pasid_as = vtd_add_find_pasid_as(s,
> > > > +                                       info->vtd_bus, info->devfn, pasid);
> > >
> > > For this chunk:
> > >
> > > > +            pc_entry = &vtd_pasid_as->pasid_cache_entry;
> > > > +            if (s->pasid_cache_gen == pc_entry->pasid_cache_gen) {
> > > > +                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> > > > +            } else {
> > > > +                vtd_fill_in_pe_in_cache(s, vtd_pasid_as, &pe);
> > > > +            }
> > >
> > > We already got &pe, then would it be easier to simply call:
> > >
> > >                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
> > >
> > > ?
> >
> > If the pasid_cache_gen is equal to iommu_state's, then it means there is
> > a chance that the cached pasid entry is equal to pe here. While for the
> > else case, it is surely there is no valid pasid entry in the pasid_as. And
> > the difference between vtd_update_pe_in_cache() and
> > vtd_fill_in_pe_in_cache() is whether do a compare between the new pasid entry
> > and cached pasid entry.
> >
> > > Since IIUC the cache_gen is only helpful to avoid looking up the &pe.
> > > And the vtd_pasid_entry_compare() check should be even more solid than
> > > the cache_gen.
> >
> > But if the cache_gen is not equal the one in iommu_state, then the cached
> > pasid entry is not valid at all. The compare is only needed after the cache_gen
> > is checked.
> 
> Wait... If "the pasid entry context" is already exactly the same as
> the "cached pasid entry context", why we still care the generation
> number?  I'd just update the generation to latest and cache it again.
> Maybe there's a tricky point when &pe==cache but generation number is
> old, then IIUC what we can do better is simply update the generation
> number to latest.
> 
> But OK - let's keep that.  I don't see anything incorrect with current
> code either.

I see. Actually, I think it's also fine to follow your suggestion to all
vtd_update_pe_in_cache(s, vtd_pasid_as, &pe); for the else switch.
If switch to use replay for PSI, then I may drop vtd_fill_in_pe_in_cache()
as it is introduced mainly for PSI.

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0423c83..8ec638f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2717,6 +2717,130 @@  static VTDPASIDAddressSpace *vtd_add_find_pasid_as(IntelIOMMUState *s,
     return vtd_pasid_as;
 }
 
+/**
+ * Constant information used during pasid table walk
+   @vtd_bus, @devfn: device info
+ * @flags: indicates if it is domain selective walk
+ * @did: domain ID of the pasid table walk
+ */
+typedef struct {
+    VTDBus *vtd_bus;
+    uint16_t devfn;
+#define VTD_PASID_TABLE_DID_SEL_WALK   (1ULL << 0);
+    uint32_t flags;
+    uint16_t did;
+} vtd_pasid_table_walk_info;
+
+/**
+ * Caller of this function should hold iommu_lock.
+ */
+static bool vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
+                                        dma_addr_t pt_base,
+                                        int start,
+                                        int end,
+                                        vtd_pasid_table_walk_info *info)
+{
+    VTDPASIDEntry pe;
+    int pasid = start;
+    int pasid_next;
+    VTDPASIDAddressSpace *vtd_pasid_as;
+    VTDPASIDCacheEntry *pc_entry;
+
+    while (pasid < end) {
+        pasid_next = pasid + 1;
+
+        if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
+            && vtd_pe_present(&pe)) {
+            vtd_pasid_as = vtd_add_find_pasid_as(s,
+                                       info->vtd_bus, info->devfn, pasid);
+            pc_entry = &vtd_pasid_as->pasid_cache_entry;
+            if (s->pasid_cache_gen == pc_entry->pasid_cache_gen) {
+                vtd_update_pe_in_cache(s, vtd_pasid_as, &pe);
+            } else {
+                vtd_fill_in_pe_in_cache(s, vtd_pasid_as, &pe);
+            }
+        }
+        pasid = pasid_next;
+    }
+    return true;
+}
+
+/*
+ * Currently, VT-d scalable mode pasid table is a two level table,
+ * this function aims to loop a range of PASIDs in a given pasid
+ * table to identify the pasid config in guest.
+ * Caller of this function should hold iommu_lock.
+ */
+static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
+                                    dma_addr_t pdt_base,
+                                    int start,
+                                    int end,
+                                    vtd_pasid_table_walk_info *info)
+{
+    VTDPASIDDirEntry pdire;
+    int pasid = start;
+    int pasid_next;
+    dma_addr_t pt_base;
+
+    while (pasid < end) {
+        pasid_next = pasid + VTD_PASID_TBL_ENTRY_NUM;
+        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
+            && vtd_pdire_present(&pdire)) {
+            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+            if (!vtd_sm_pasid_table_walk_one(s,
+                              pt_base, pasid, pasid_next, info)) {
+                break;
+            }
+        }
+        pasid = pasid_next;
+    }
+}
+
+/**
+ * This function replay the guest pasid bindings to hots by
+ * walking the guest PASID table. This ensures host will have
+ * latest guest pasid bindings.
+ */
+static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
+                                            uint16_t *did,
+                                            bool is_dsi)
+{
+    VTDContextEntry ce;
+    VTDHostIOMMUContext *vtd_dev_icx;
+    int bus_n, devfn;
+    vtd_pasid_table_walk_info info;
+
+    if (is_dsi) {
+        info.flags = VTD_PASID_TABLE_DID_SEL_WALK;
+        info.did = *did;
+    }
+
+    /*
+     * In this replay, only needs to care about the devices which
+     * are backed by host IOMMU. For such devices, their vtd_dev_icx
+     * instances are in the s->vtd_dev_icx_list. For devices which
+     * are not backed byhost IOMMU, it is not necessary to replay
+     * the bindings since their cache could be re-created in the future
+     * DMA address transaltion.
+     */
+    vtd_iommu_lock(s);
+    QLIST_FOREACH(vtd_dev_icx, &s->vtd_dev_icx_list, next) {
+        bus_n = pci_bus_num(vtd_dev_icx->vtd_bus->bus);
+        devfn = vtd_dev_icx->devfn;
+
+        if (!vtd_dev_to_context_entry(s, bus_n, devfn, &ce)) {
+            info.vtd_bus = vtd_dev_icx->vtd_bus;
+            info.devfn = devfn;
+            vtd_sm_pasid_table_walk(s,
+                                    VTD_CE_GET_PASID_DIR_TABLE(&ce),
+                                    0,
+                                    VTD_MAX_HPASID,
+                                    &info);
+        }
+    }
+    vtd_iommu_unlock(s);
+}
+
 static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
 {
     VTDPASIDCacheInfo pc_info;
@@ -2735,7 +2859,6 @@  static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
     vtd_iommu_unlock(s);
 
     /*
-     * TODO:
      * Domain selective PASID cache invalidation flushes
      * all the pasid caches within a domain. To be safe,
      * after invalidating the pasid caches, emulator needs
@@ -2743,6 +2866,7 @@  static int vtd_pasid_cache_dsi(IntelIOMMUState *s, uint16_t domain_id)
      * dir and pasid table. e.g. When the guest setup a new
      * PASID entry then send a PASID DSI.
      */
+    vtd_replay_guest_pasid_bindings(s, &domain_id, true);
     return 0;
 }
 
@@ -2881,13 +3005,13 @@  static int vtd_pasid_cache_gsi(IntelIOMMUState *s)
     vtd_iommu_unlock(s);
 
     /*
-     * TODO:
      * Global PASID cache invalidation flushes all
      * the pasid caches. To be safe, after invalidating
      * the pasid caches, emulator needs to replay the
      * pasid bindings by walking guest pasid dir and
      * pasid table.
      */
+    vtd_replay_guest_pasid_bindings(s, NULL, false);
     return 0;
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 4451acf..b0a324c 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -554,6 +554,7 @@  typedef struct VTDPASIDCacheInfo VTDPASIDCacheInfo;
 #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
 #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
 #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable */
+#define VTD_PASID_TBL_ENTRY_NUM       (1ULL << 6)
 
 /* PASID Granular Translation Type Mask */
 #define VTD_PASID_ENTRY_P              1ULL