diff mbox series

[2/2] intel_iommu: Fix unexpected unmaps during global unmap

Message ID 20190624063733.22079-3-peterx@redhat.com
State New
Headers show
Series intel_iommu: Fix unexpected unmaps during global unmap | expand

Commit Message

Peter Xu June 24, 2019, 6:37 a.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

This is an replacement work of Yan Zhao's patch:

https://www.mail-archive.com/qemu-devel@nongnu.org/msg625340.html

vtd_address_space_unmap() will do proper page mask alignment to make
sure each IOTLB message will have correct masks for notification
messages (2^N-1), but sometimes it can be expanded to even supercede
the registered range.  That could lead to unexpected UNMAP of already
mapped regions in some other notifiers.

Instead of doing mindless expension of the start address and address
mask, we split the range into smaller ones and guarantee that each
small range will have correct masks (2^N-1) and at the same time we
should also try our best to generate as less IOTLB messages as
possible.

Reported-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[peterx: fixup mask generation algos and other touchups, introduce
 vtd_get_next_mask(), write commit message]
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 26 deletions(-)

Comments

Yan Zhao June 24, 2019, 6:41 a.m. UTC | #1
On Mon, Jun 24, 2019 at 02:37:33PM +0800, Peter Xu wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> This is an replacement work of Yan Zhao's patch:
> 
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg625340.html
> 
> vtd_address_space_unmap() will do proper page mask alignment to make
> sure each IOTLB message will have correct masks for notification
> messages (2^N-1), but sometimes it can be expanded to even supercede
> the registered range.  That could lead to unexpected UNMAP of already
> mapped regions in some other notifiers.
> 
> Instead of doing mindless expension of the start address and address
> mask, we split the range into smaller ones and guarantee that each
> small range will have correct masks (2^N-1) and at the same time we
> should also try our best to generate as less IOTLB messages as
> possible.
> 
> Reported-by: Yan Zhao <yan.y.zhao@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> [peterx: fixup mask generation algos and other touchups, introduce
>  vtd_get_next_mask(), write commit message]
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++----------------
>  1 file changed, 44 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 719ce19ab3..39cedf73b8 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3363,11 +3363,31 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>      return vtd_dev_as;
>  }
>  
> +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> +{
> +    /* Tries to find smallest mask from start first */
> +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> +
> +    assert(size && gaw > 0 && gaw < 64);
> +
> +    /* Zero start, or too big */
> +    if (!rmask || rmask > max_mask) {
> +        rmask = max_mask;
> +    }
> +
> +    /* If the start mask worked, then use it */
> +    if (rmask <= size) {
> +        return rmask;
> +    }
> +
> +    /* Find the largest page mask from size */
> +    return 1ULL << (63 - clz64(size));
> +}
> +
>  /* Unmap the whole range in the notifier's scope. */
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>  {
> -    IOMMUTLBEntry entry;
> -    hwaddr size;
> +    hwaddr size, remain;
>      hwaddr start = n->start;
>      hwaddr end = n->end;
>      IntelIOMMUState *s = as->iommu_state;
> @@ -3388,39 +3408,37 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>      }
>  
>      assert(start <= end);
> -    size = end - start;
> +    size = remain = end - start + 1;
>  
> -    if (ctpop64(size) != 1) {
> -        /*
> -         * This size cannot format a correct mask. Let's enlarge it to
> -         * suite the minimum available mask.
> -         */
> -        int n = 64 - clz64(size);
> -        if (n > s->aw_bits) {
> -            /* should not happen, but in case it happens, limit it */
> -            n = s->aw_bits;
> -        }
> -        size = 1ULL << n;
> +    while (remain > 0) {
hi 
I think here remain should still be "remain >= VTD_PAGE_SIZE"
because we cannot unmap entry less than PAGE_SIZE.

> +        IOMMUTLBEntry entry;
> +        uint64_t mask = vtd_get_next_mask(start, remain, s->aw_bits);
> +
> +        assert(mask);
> +

> +        entry.iova = start;
> +        entry.addr_mask = mask - 1;
> +        entry.target_as = &address_space_memory;
> +        entry.perm = IOMMU_NONE;
> +        /* This field is meaningless for unmap */
> +        entry.translated_addr = 0;
> +
> +        memory_region_notify_one(n, &entry);
> +
> +        start += mask;
> +        remain -= mask;
>      }
Add assert(remain) here?

>  
> -    entry.target_as = &address_space_memory;
> -    /* Adjust iova for the size */
> -    entry.iova = n->start & ~(size - 1);
> -    /* This field is meaningless for unmap */
> -    entry.translated_addr = 0;
> -    entry.perm = IOMMU_NONE;
> -    entry.addr_mask = size - 1;
> +    assert(!remain);
>  
>      trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
>                               VTD_PCI_SLOT(as->devfn),
>                               VTD_PCI_FUNC(as->devfn),
> -                             entry.iova, size);
> +                             n->start, size);
>  
> -    map.iova = entry.iova;
> -    map.size = entry.addr_mask;
> +    map.iova = n->start;
> +    map.size = size;
>      iova_tree_remove(as->iova_tree, &map);
> -
> -    memory_region_notify_one(n, &entry);
>  }
>  
>  static void vtd_address_space_unmap_all(IntelIOMMUState *s)
> -- 
> 2.21.0
>
Peter Xu June 24, 2019, 6:57 a.m. UTC | #2
On Mon, Jun 24, 2019 at 02:41:22AM -0400, Yan Zhao wrote:
> On Mon, Jun 24, 2019 at 02:37:33PM +0800, Peter Xu wrote:
> > From: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > This is an replacement work of Yan Zhao's patch:
> > 
> > https://www.mail-archive.com/qemu-devel@nongnu.org/msg625340.html
> > 
> > vtd_address_space_unmap() will do proper page mask alignment to make
> > sure each IOTLB message will have correct masks for notification
> > messages (2^N-1), but sometimes it can be expanded to even supercede
> > the registered range.  That could lead to unexpected UNMAP of already
> > mapped regions in some other notifiers.
> > 
> > Instead of doing mindless expension of the start address and address
> > mask, we split the range into smaller ones and guarantee that each
> > small range will have correct masks (2^N-1) and at the same time we
> > should also try our best to generate as less IOTLB messages as
> > possible.
> > 
> > Reported-by: Yan Zhao <yan.y.zhao@intel.com>
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > [peterx: fixup mask generation algos and other touchups, introduce
> >  vtd_get_next_mask(), write commit message]
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++----------------
> >  1 file changed, 44 insertions(+), 26 deletions(-)
> > 
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 719ce19ab3..39cedf73b8 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -3363,11 +3363,31 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> >      return vtd_dev_as;
> >  }
> >  
> > +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> > +{
> > +    /* Tries to find smallest mask from start first */
> > +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> > +
> > +    assert(size && gaw > 0 && gaw < 64);
> > +
> > +    /* Zero start, or too big */
> > +    if (!rmask || rmask > max_mask) {
> > +        rmask = max_mask;
> > +    }
> > +
> > +    /* If the start mask worked, then use it */
> > +    if (rmask <= size) {
> > +        return rmask;
> > +    }
> > +
> > +    /* Find the largest page mask from size */
> > +    return 1ULL << (63 - clz64(size));
> > +}
> > +
> >  /* Unmap the whole range in the notifier's scope. */
> >  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
> >  {
> > -    IOMMUTLBEntry entry;
> > -    hwaddr size;
> > +    hwaddr size, remain;
> >      hwaddr start = n->start;
> >      hwaddr end = n->end;
> >      IntelIOMMUState *s = as->iommu_state;
> > @@ -3388,39 +3408,37 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
> >      }
> >  
> >      assert(start <= end);
> > -    size = end - start;
> > +    size = remain = end - start + 1;
> >  
> > -    if (ctpop64(size) != 1) {
> > -        /*
> > -         * This size cannot format a correct mask. Let's enlarge it to
> > -         * suite the minimum available mask.
> > -         */
> > -        int n = 64 - clz64(size);
> > -        if (n > s->aw_bits) {
> > -            /* should not happen, but in case it happens, limit it */
> > -            n = s->aw_bits;
> > -        }
> > -        size = 1ULL << n;
> > +    while (remain > 0) {
> hi 
> I think here remain should still be "remain >= VTD_PAGE_SIZE"
> because we cannot unmap entry less than PAGE_SIZE.

Yes we can.

I'd say this is purely for protection purpose no matter what.  If we
did write the code correctly when registering the IOMMU notifier then
we'll always have aligned "remain" here and these checks will be
meaningless...  So we'll definitely fail in the case you mentioned,
imho the only difference is when it happens.

If we want to fail at the earliest point, we can probably check during
registering of the notifiers for page alignment.

> 
> > +        IOMMUTLBEntry entry;
> > +        uint64_t mask = vtd_get_next_mask(start, remain, s->aw_bits);
> > +
> > +        assert(mask);
> > +
> 
> > +        entry.iova = start;
> > +        entry.addr_mask = mask - 1;
> > +        entry.target_as = &address_space_memory;
> > +        entry.perm = IOMMU_NONE;
> > +        /* This field is meaningless for unmap */
> > +        entry.translated_addr = 0;
> > +
> > +        memory_region_notify_one(n, &entry);
> > +
> > +        start += mask;
> > +        remain -= mask;
> >      }
> Add assert(remain) here?

Do you mean assert(!remain)?  If so, it's below [1].

> 
> >  
> > -    entry.target_as = &address_space_memory;
> > -    /* Adjust iova for the size */
> > -    entry.iova = n->start & ~(size - 1);
> > -    /* This field is meaningless for unmap */
> > -    entry.translated_addr = 0;
> > -    entry.perm = IOMMU_NONE;
> > -    entry.addr_mask = size - 1;
> > +    assert(!remain);

[1]

> >  
> >      trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
> >                               VTD_PCI_SLOT(as->devfn),
> >                               VTD_PCI_FUNC(as->devfn),
> > -                             entry.iova, size);
> > +                             n->start, size);
> >  
> > -    map.iova = entry.iova;
> > -    map.size = entry.addr_mask;
> > +    map.iova = n->start;
> > +    map.size = size;
> >      iova_tree_remove(as->iova_tree, &map);
> > -
> > -    memory_region_notify_one(n, &entry);
> >  }
> >  
> >  static void vtd_address_space_unmap_all(IntelIOMMUState *s)
> > -- 
> > 2.21.0
> > 

Regards,
Yan Zhao June 24, 2019, 7:04 a.m. UTC | #3
On Mon, Jun 24, 2019 at 02:57:50PM +0800, Peter Xu wrote:
> On Mon, Jun 24, 2019 at 02:41:22AM -0400, Yan Zhao wrote:
> > On Mon, Jun 24, 2019 at 02:37:33PM +0800, Peter Xu wrote:
> > > From: Paolo Bonzini <pbonzini@redhat.com>
> > > 
> > > This is an replacement work of Yan Zhao's patch:
> > > 
> > > https://www.mail-archive.com/qemu-devel@nongnu.org/msg625340.html
> > > 
> > > vtd_address_space_unmap() will do proper page mask alignment to make
> > > sure each IOTLB message will have correct masks for notification
> > > messages (2^N-1), but sometimes it can be expanded to even supercede
> > > the registered range.  That could lead to unexpected UNMAP of already
> > > mapped regions in some other notifiers.
> > > 
> > > Instead of doing mindless expension of the start address and address
> > > mask, we split the range into smaller ones and guarantee that each
> > > small range will have correct masks (2^N-1) and at the same time we
> > > should also try our best to generate as less IOTLB messages as
> > > possible.
> > > 
> > > Reported-by: Yan Zhao <yan.y.zhao@intel.com>
> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > [peterx: fixup mask generation algos and other touchups, introduce
> > >  vtd_get_next_mask(), write commit message]
> > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > ---
> > >  hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++----------------
> > >  1 file changed, 44 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > > index 719ce19ab3..39cedf73b8 100644
> > > --- a/hw/i386/intel_iommu.c
> > > +++ b/hw/i386/intel_iommu.c
> > > @@ -3363,11 +3363,31 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> > >      return vtd_dev_as;
> > >  }
> > >  
> > > +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> > > +{
> > > +    /* Tries to find smallest mask from start first */
> > > +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> > > +
> > > +    assert(size && gaw > 0 && gaw < 64);
> > > +
> > > +    /* Zero start, or too big */
> > > +    if (!rmask || rmask > max_mask) {
> > > +        rmask = max_mask;
> > > +    }
> > > +
> > > +    /* If the start mask worked, then use it */
> > > +    if (rmask <= size) {
> > > +        return rmask;
> > > +    }
> > > +
> > > +    /* Find the largest page mask from size */
> > > +    return 1ULL << (63 - clz64(size));
> > > +}
> > > +
> > >  /* Unmap the whole range in the notifier's scope. */
> > >  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
> > >  {
> > > -    IOMMUTLBEntry entry;
> > > -    hwaddr size;
> > > +    hwaddr size, remain;
> > >      hwaddr start = n->start;
> > >      hwaddr end = n->end;
> > >      IntelIOMMUState *s = as->iommu_state;
> > > @@ -3388,39 +3408,37 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
> > >      }
> > >  
> > >      assert(start <= end);
> > > -    size = end - start;
> > > +    size = remain = end - start + 1;
> > >  
> > > -    if (ctpop64(size) != 1) {
> > > -        /*
> > > -         * This size cannot format a correct mask. Let's enlarge it to
> > > -         * suite the minimum available mask.
> > > -         */
> > > -        int n = 64 - clz64(size);
> > > -        if (n > s->aw_bits) {
> > > -            /* should not happen, but in case it happens, limit it */
> > > -            n = s->aw_bits;
> > > -        }
> > > -        size = 1ULL << n;
> > > +    while (remain > 0) {
> > hi 
> > I think here remain should still be "remain >= VTD_PAGE_SIZE"
> > because we cannot unmap entry less than PAGE_SIZE.
> 
> Yes we can.
> 
> I'd say this is purely for protection purpose no matter what.  If we
> did write the code correctly when registering the IOMMU notifier then
> we'll always have aligned "remain" here and these checks will be
> meaningless...  So we'll definitely fail in the case you mentioned,
> imho the only difference is when it happens.
> 
> If we want to fail at the earliest point, we can probably check during
> registering of the notifiers for page alignment.
>
I think it might be helpful if there anything wrong in code.
for example, when previously, size = end - start, it will happen that
size will eventually be less than page size.

> > 
> > > +        IOMMUTLBEntry entry;
> > > +        uint64_t mask = vtd_get_next_mask(start, remain, s->aw_bits);
> > > +
> > > +        assert(mask);
> > > +
> > 
> > > +        entry.iova = start;
> > > +        entry.addr_mask = mask - 1;
> > > +        entry.target_as = &address_space_memory;
> > > +        entry.perm = IOMMU_NONE;
> > > +        /* This field is meaningless for unmap */
> > > +        entry.translated_addr = 0;
> > > +
> > > +        memory_region_notify_one(n, &entry);
> > > +
> > > +        start += mask;
> > > +        remain -= mask;
> > >      }
> > Add assert(remain) here?
> 
> Do you mean assert(!remain)?  If so, it's below [1].
> 
yes, sorry, assert(!remain) :)
> > 
> > >  
> > > -    entry.target_as = &address_space_memory;
> > > -    /* Adjust iova for the size */
> > > -    entry.iova = n->start & ~(size - 1);
> > > -    /* This field is meaningless for unmap */
> > > -    entry.translated_addr = 0;
> > > -    entry.perm = IOMMU_NONE;
> > > -    entry.addr_mask = size - 1;
> > > +    assert(!remain);
> 
> [1]
> 
> > >  
> > >      trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
> > >                               VTD_PCI_SLOT(as->devfn),
> > >                               VTD_PCI_FUNC(as->devfn),
> > > -                             entry.iova, size);
> > > +                             n->start, size);
> > >  
> > > -    map.iova = entry.iova;
> > > -    map.size = entry.addr_mask;
> > > +    map.iova = n->start;
> > > +    map.size = size;
> > >      iova_tree_remove(as->iova_tree, &map);
> > > -
> > > -    memory_region_notify_one(n, &entry);
> > >  }
> > >  
> > >  static void vtd_address_space_unmap_all(IntelIOMMUState *s)
> > > -- 
> > > 2.21.0
> > > 
> 
> Regards,
> 
> -- 
> Peter Xu
Peter Xu June 24, 2019, 8:06 a.m. UTC | #4
On Mon, Jun 24, 2019 at 03:04:50AM -0400, Yan Zhao wrote:

[...]

> I think it might be helpful if there anything wrong in code.
> for example, when previously, size = end - start, it will happen that
> size will eventually be less than page size.

Well, if with such an error we'd better fix it right away in this
patch... :)

Let me wait for some more comments, I'll touch that up too if I need a
repost.

Regards,
Yan Zhao June 24, 2019, 8:22 a.m. UTC | #5
Tested-by: Yan Zhao <yan.y.zhao@intel.com>

On Mon, Jun 24, 2019 at 02:37:33PM +0800, Peter Xu wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> This is an replacement work of Yan Zhao's patch:
> 
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg625340.html
> 
> vtd_address_space_unmap() will do proper page mask alignment to make
> sure each IOTLB message will have correct masks for notification
> messages (2^N-1), but sometimes it can be expanded to even supercede
> the registered range.  That could lead to unexpected UNMAP of already
> mapped regions in some other notifiers.
> 
> Instead of doing mindless expension of the start address and address
> mask, we split the range into smaller ones and guarantee that each
> small range will have correct masks (2^N-1) and at the same time we
> should also try our best to generate as less IOTLB messages as
> possible.
> 
> Reported-by: Yan Zhao <yan.y.zhao@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> [peterx: fixup mask generation algos and other touchups, introduce
>  vtd_get_next_mask(), write commit message]
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++----------------
>  1 file changed, 44 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 719ce19ab3..39cedf73b8 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3363,11 +3363,31 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>      return vtd_dev_as;
>  }
>  
> +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> +{
> +    /* Tries to find smallest mask from start first */
> +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> +
> +    assert(size && gaw > 0 && gaw < 64);
> +
> +    /* Zero start, or too big */
> +    if (!rmask || rmask > max_mask) {
> +        rmask = max_mask;
> +    }
> +
> +    /* If the start mask worked, then use it */
> +    if (rmask <= size) {
> +        return rmask;
> +    }
> +
> +    /* Find the largest page mask from size */
> +    return 1ULL << (63 - clz64(size));
> +}
> +
>  /* Unmap the whole range in the notifier's scope. */
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>  {
> -    IOMMUTLBEntry entry;
> -    hwaddr size;
> +    hwaddr size, remain;
>      hwaddr start = n->start;
>      hwaddr end = n->end;
>      IntelIOMMUState *s = as->iommu_state;
> @@ -3388,39 +3408,37 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>      }
>  
>      assert(start <= end);
> -    size = end - start;
> +    size = remain = end - start + 1;
>  
> -    if (ctpop64(size) != 1) {
> -        /*
> -         * This size cannot format a correct mask. Let's enlarge it to
> -         * suite the minimum available mask.
> -         */
> -        int n = 64 - clz64(size);
> -        if (n > s->aw_bits) {
> -            /* should not happen, but in case it happens, limit it */
> -            n = s->aw_bits;
> -        }
> -        size = 1ULL << n;
> +    while (remain > 0) {
> +        IOMMUTLBEntry entry;
> +        uint64_t mask = vtd_get_next_mask(start, remain, s->aw_bits);
> +
> +        assert(mask);
> +
> +        entry.iova = start;
> +        entry.addr_mask = mask - 1;
> +        entry.target_as = &address_space_memory;
> +        entry.perm = IOMMU_NONE;
> +        /* This field is meaningless for unmap */
> +        entry.translated_addr = 0;
> +
> +        memory_region_notify_one(n, &entry);
> +
> +        start += mask;
> +        remain -= mask;
>      }
>  
> -    entry.target_as = &address_space_memory;
> -    /* Adjust iova for the size */
> -    entry.iova = n->start & ~(size - 1);
> -    /* This field is meaningless for unmap */
> -    entry.translated_addr = 0;
> -    entry.perm = IOMMU_NONE;
> -    entry.addr_mask = size - 1;
> +    assert(!remain);
>  
>      trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
>                               VTD_PCI_SLOT(as->devfn),
>                               VTD_PCI_FUNC(as->devfn),
> -                             entry.iova, size);
> +                             n->start, size);
>  
> -    map.iova = entry.iova;
> -    map.size = entry.addr_mask;
> +    map.iova = n->start;
> +    map.size = size;
>      iova_tree_remove(as->iova_tree, &map);
> -
> -    memory_region_notify_one(n, &entry);
>  }
>  
>  static void vtd_address_space_unmap_all(IntelIOMMUState *s)
> -- 
> 2.21.0
>
Paolo Bonzini June 24, 2019, 8:43 a.m. UTC | #6
On 24/06/19 10:06, Peter Xu wrote:
> Well, if with such an error we'd better fix it right away in this
> patch... :)
> 
> Let me wait for some more comments, I'll touch that up too if I need a
> repost.

Looks good to me, except for one minor issue in this patch.  But do not
attribute this one to me, it's basically all code from you.

> +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> +{
> +    /* Tries to find smallest mask from start first */
> +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> +
> +    assert(size && gaw > 0 && gaw < 64);
> +
> +    /* Zero start, or too big */
> +    if (!rmask || rmask > max_mask) {
> +        rmask = max_mask;
> +    }

Perhaps simpler:

    uint64_t max_mask = 1ULL << gaw;
    uint64_t alignment = start ? start & -start : max_mask;

    size = MIN(size, max_mask);
    if (alignment <= size) {
        /* Increase the alignment of start */
        return alignment;
    } else {
        /* Find the largest page mask from size */
        return 1ULL << (63 - clz64(size));
    }

Also please rename it to get_naturally_aligned_size.

Paolo
Peter Xu June 24, 2019, 9:09 a.m. UTC | #7
On Mon, Jun 24, 2019 at 10:43:21AM +0200, Paolo Bonzini wrote:
> On 24/06/19 10:06, Peter Xu wrote:
> > Well, if with such an error we'd better fix it right away in this
> > patch... :)
> > 
> > Let me wait for some more comments, I'll touch that up too if I need a
> > repost.
> 
> Looks good to me, except for one minor issue in this patch.  But do not
> attribute this one to me, it's basically all code from you.

OK.

> 
> > +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> > +{
> > +    /* Tries to find smallest mask from start first */
> > +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> > +
> > +    assert(size && gaw > 0 && gaw < 64);
> > +
> > +    /* Zero start, or too big */
> > +    if (!rmask || rmask > max_mask) {
> > +        rmask = max_mask;
> > +    }
> 
> Perhaps simpler:
> 
>     uint64_t max_mask = 1ULL << gaw;
>     uint64_t alignment = start ? start & -start : max_mask;
> 

(I'll add another "alignment = MIN(alignment, max_mask)" here if no
 one disagree...)

>     size = MIN(size, max_mask);
>     if (alignment <= size) {
>         /* Increase the alignment of start */
>         return alignment;
>     } else {
>         /* Find the largest page mask from size */
>         return 1ULL << (63 - clz64(size));
>     }
> 
> Also please rename it to get_naturally_aligned_size.

Will do.  Thanks,
Paolo Bonzini June 24, 2019, 10:11 a.m. UTC | #8
On 24/06/19 11:09, Peter Xu wrote:
> On Mon, Jun 24, 2019 at 10:43:21AM +0200, Paolo Bonzini wrote:
>> On 24/06/19 10:06, Peter Xu wrote:
>>> Well, if with such an error we'd better fix it right away in this
>>> patch... :)
>>>
>>> Let me wait for some more comments, I'll touch that up too if I need a
>>> repost.
>>
>> Looks good to me, except for one minor issue in this patch.  But do not
>> attribute this one to me, it's basically all code from you.
> 
> OK.
> 
>>
>>> +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
>>> +{
>>> +    /* Tries to find smallest mask from start first */
>>> +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
>>> +
>>> +    assert(size && gaw > 0 && gaw < 64);
>>> +
>>> +    /* Zero start, or too big */
>>> +    if (!rmask || rmask > max_mask) {
>>> +        rmask = max_mask;
>>> +    }
>>
>> Perhaps simpler:
>>
>>     uint64_t max_mask = 1ULL << gaw;
>>     uint64_t alignment = start ? start & -start : max_mask;
>>
> 
> (I'll add another "alignment = MIN(alignment, max_mask)" here if no
>  one disagree...)

I do! :)  If alignment > max_mask, then it will also be > size below so
clamping is unnecessary.

There is another way which is to compute on the mask, so that start == 0
underflows to all-ones:

    uint64_t max_mask = (1ULL << gaw) - 1;
    uint64_t start_mask = (start & -start) - 1;
    uint64_t size_mask = pow2floor(size) - 1;
    return MIN(MIN(size_mask, start_mask), max_mask) + 1;

Paolo

>>     size = MIN(size, max_mask);
>>     if (alignment <= size) {
>>         /* Increase the alignment of start */
>>         return alignment;
>>     } else {
>>         /* Find the largest page mask from size */
>>         return 1ULL << (63 - clz64(size));
>>     }
>>
>> Also please rename it to get_naturally_aligned_size.
> 
> Will do.  Thanks,
>
Peter Xu June 24, 2019, 10:28 a.m. UTC | #9
On Mon, Jun 24, 2019 at 12:11:54PM +0200, Paolo Bonzini wrote:
> On 24/06/19 11:09, Peter Xu wrote:
> > On Mon, Jun 24, 2019 at 10:43:21AM +0200, Paolo Bonzini wrote:
> >> On 24/06/19 10:06, Peter Xu wrote:
> >>> Well, if with such an error we'd better fix it right away in this
> >>> patch... :)
> >>>
> >>> Let me wait for some more comments, I'll touch that up too if I need a
> >>> repost.
> >>
> >> Looks good to me, except for one minor issue in this patch.  But do not
> >> attribute this one to me, it's basically all code from you.
> > 
> > OK.
> > 
> >>
> >>> +static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
> >>> +{
> >>> +    /* Tries to find smallest mask from start first */
> >>> +    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
> >>> +
> >>> +    assert(size && gaw > 0 && gaw < 64);
> >>> +
> >>> +    /* Zero start, or too big */
> >>> +    if (!rmask || rmask > max_mask) {
> >>> +        rmask = max_mask;
> >>> +    }
> >>
> >> Perhaps simpler:
> >>
> >>     uint64_t max_mask = 1ULL << gaw;
> >>     uint64_t alignment = start ? start & -start : max_mask;
> >>
> > 
> > (I'll add another "alignment = MIN(alignment, max_mask)" here if no
> >  one disagree...)
> 
> I do! :)  If alignment > max_mask, then it will also be > size below so
> clamping is unnecessary.

You are right. ;)

> 
> There is another way which is to compute on the mask, so that start == 0
> underflows to all-ones:
> 
>     uint64_t max_mask = (1ULL << gaw) - 1;
>     uint64_t start_mask = (start & -start) - 1;
>     uint64_t size_mask = pow2floor(size) - 1;
>     return MIN(MIN(size_mask, start_mask), max_mask) + 1;

The last line still seems problematic, but I just want to say the
calculation of size_mask is indeed a smart move! (I did think the zero
check was a bit ugly)

Thanks,
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 719ce19ab3..39cedf73b8 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3363,11 +3363,31 @@  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
     return vtd_dev_as;
 }
 
+static uint64_t vtd_get_next_mask(uint64_t start, uint64_t size, int gaw)
+{
+    /* Tries to find smallest mask from start first */
+    uint64_t rmask = start & -start, max_mask = 1ULL << gaw;
+
+    assert(size && gaw > 0 && gaw < 64);
+
+    /* Zero start, or too big */
+    if (!rmask || rmask > max_mask) {
+        rmask = max_mask;
+    }
+
+    /* If the start mask worked, then use it */
+    if (rmask <= size) {
+        return rmask;
+    }
+
+    /* Find the largest page mask from size */
+    return 1ULL << (63 - clz64(size));
+}
+
 /* Unmap the whole range in the notifier's scope. */
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 {
-    IOMMUTLBEntry entry;
-    hwaddr size;
+    hwaddr size, remain;
     hwaddr start = n->start;
     hwaddr end = n->end;
     IntelIOMMUState *s = as->iommu_state;
@@ -3388,39 +3408,37 @@  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
     }
 
     assert(start <= end);
-    size = end - start;
+    size = remain = end - start + 1;
 
-    if (ctpop64(size) != 1) {
-        /*
-         * This size cannot format a correct mask. Let's enlarge it to
-         * suite the minimum available mask.
-         */
-        int n = 64 - clz64(size);
-        if (n > s->aw_bits) {
-            /* should not happen, but in case it happens, limit it */
-            n = s->aw_bits;
-        }
-        size = 1ULL << n;
+    while (remain > 0) {
+        IOMMUTLBEntry entry;
+        uint64_t mask = vtd_get_next_mask(start, remain, s->aw_bits);
+
+        assert(mask);
+
+        entry.iova = start;
+        entry.addr_mask = mask - 1;
+        entry.target_as = &address_space_memory;
+        entry.perm = IOMMU_NONE;
+        /* This field is meaningless for unmap */
+        entry.translated_addr = 0;
+
+        memory_region_notify_one(n, &entry);
+
+        start += mask;
+        remain -= mask;
     }
 
-    entry.target_as = &address_space_memory;
-    /* Adjust iova for the size */
-    entry.iova = n->start & ~(size - 1);
-    /* This field is meaningless for unmap */
-    entry.translated_addr = 0;
-    entry.perm = IOMMU_NONE;
-    entry.addr_mask = size - 1;
+    assert(!remain);
 
     trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
                              VTD_PCI_SLOT(as->devfn),
                              VTD_PCI_FUNC(as->devfn),
-                             entry.iova, size);
+                             n->start, size);
 
-    map.iova = entry.iova;
-    map.size = entry.addr_mask;
+    map.iova = n->start;
+    map.size = size;
     iova_tree_remove(as->iova_tree, &map);
-
-    memory_region_notify_one(n, &entry);
 }
 
 static void vtd_address_space_unmap_all(IntelIOMMUState *s)