diff mbox

[RFC,qemu,v3,2/4] vfio: Use different page size for different IOMMU types

Message ID 1436876514-2946-3-git-send-email-aik@ozlabs.ru
State New
Headers show

Commit Message

Alexey Kardashevskiy July 14, 2015, 12:21 p.m. UTC
The existing memory listener is called on RAM or PCI address space
which implies potentially different page size. Instead of guessing
what page size should be used, this replaces a single IOMMU memory
listener by two, one per supported IOMMU type; listener callbacks
call the existing helpers with a known page size.

For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
from IOMMU.

As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
non IOMMU regions (which is an MSIX window) which duplicates
vfio_listener_skipped_section() a bit.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 76 insertions(+), 19 deletions(-)

Comments

Alex Williamson July 15, 2015, 6:26 p.m. UTC | #1
On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote:
> The existing memory listener is called on RAM or PCI address space
> which implies potentially different page size. Instead of guessing
> what page size should be used, this replaces a single IOMMU memory
> listener by two, one per supported IOMMU type; listener callbacks
> call the existing helpers with a known page size.
> 
> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
> from IOMMU.
> 
> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
> non IOMMU regions (which is an MSIX window) which duplicates
> vfio_listener_skipped_section() a bit.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 76 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 85ee9b0..aad41e1 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -312,11 +312,11 @@ out:
>      rcu_read_unlock();
>  }
>  
> -static void vfio_listener_region_add(MemoryListener *listener,
> +static void vfio_listener_region_add(VFIOContainer *container,
> +                                     hwaddr page_mask,

Should memory_region_iommu_get_page_sizes() return a hwaddr?

> +                                     MemoryListener *listener,
>                                       MemoryRegionSection *section)
>  {
> -    VFIOContainer *container = container_of(listener, VFIOContainer,
> -                                            iommu_data.type1.listener);
>      hwaddr iova, end;
>      Int128 llend;
>      void *vaddr;
> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
>          return;
>      }
>  
> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
> +                 (section->offset_within_region & ~page_mask))) {
>          error_report("%s received unaligned region", __func__);
>          return;
>      }
>  
> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>      llend = int128_make64(section->offset_within_address_space);
>      llend = int128_add(llend, section->size);
> -    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +    llend = int128_and(llend, int128_exts64(page_mask));
>  
>      if (int128_ge(int128_make64(iova), llend)) {
>          return;
> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
>      }
>  }
>  
> -static void vfio_listener_region_del(MemoryListener *listener,
> +static void vfio_listener_region_del(VFIOContainer *container,
> +                                     hwaddr page_mask,
> +                                     MemoryListener *listener,
>                                       MemoryRegionSection *section)
>  {
> -    VFIOContainer *container = container_of(listener, VFIOContainer,
> -                                            iommu_data.type1.listener);
>      hwaddr iova, end;
>      int ret;
>  
> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
>          return;
>      }
>  
> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
> +                 (section->offset_within_region & ~page_mask))) {
>          error_report("%s received unaligned region", __func__);
>          return;
>      }
> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
>           */
>      }
>  
> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>      end = (section->offset_within_address_space + int128_get64(section->size)) &
> -          TARGET_PAGE_MASK;
> +          page_mask;
>  
>      if (iova >= end) {
>          return;
> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener,
>      }
>  }
>  
> -static const MemoryListener vfio_memory_listener = {
> -    .region_add = vfio_listener_region_add,
> -    .region_del = vfio_listener_region_del,
> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
> +                                                 MemoryRegionSection *section)
> +{
> +    VFIOContainer *container = container_of(listener, VFIOContainer,
> +                                            iommu_data.type1.listener);
> +
> +    vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
> +                             section);
> +}
> +
> +
> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
> +                                                 MemoryRegionSection *section)
> +{
> +    VFIOContainer *container = container_of(listener, VFIOContainer,
> +                                            iommu_data.type1.listener);
> +
> +    vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
> +                             section);
> +}
> +
> +static const MemoryListener vfio_type1_iommu_listener = {
> +    .region_add = vfio_type1_iommu_listener_region_add,
> +    .region_del = vfio_type1_iommu_listener_region_del,
> +};
> +
> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
> +                                     MemoryRegionSection *section)
> +{
> +    VFIOContainer *container;
> +    hwaddr page_mask;
> +
> +    if (!memory_region_is_iommu(section->mr)) {
> +        return;
> +    }
> +    container = container_of(listener, VFIOContainer,
> +                             iommu_data.type1.listener);
> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
> +    vfio_listener_region_add(container, page_mask, listener, section);
> +}
> +
> +
> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
> +                                     MemoryRegionSection *section)
> +{
> +    VFIOContainer *container;
> +    hwaddr page_mask;
> +
> +    if (!memory_region_is_iommu(section->mr)) {
> +        return;
> +    }
> +    container = container_of(listener, VFIOContainer,
> +                             iommu_data.type1.listener);
> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
> +    vfio_listener_region_del(container, page_mask, listener, section);
> +}
> +
> +static const MemoryListener vfio_spapr_iommu_listener = {
> +    .region_add = vfio_spapr_iommu_listener_region_add,
> +    .region_del = vfio_spapr_iommu_listener_region_del,
>  };


Rather than creating all these wrappers, why don't we create a structure
that gives us callbacks we can use for a shared function?  If we had:

struct VFIOMemoryListener {
	struct MemoryListener listener;
	bool (*filter)(MemoryRegionSection *section);
	hwaddr (page_size)(MemoryRegionSection *section);
	VFIOContainer *container;
}

Then there would be no reason for you to have separate wrappers for
spapr.  VFIOType1 would have a single VFIOMemoryListener, you could have
an array of two.
>  
>  static void vfio_listener_release(VFIOContainer *container)
> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>              goto free_container_exit;
>          }
>  
> -        container->iommu_data.type1.listener = vfio_memory_listener;
> +        container->iommu_data.type1.listener = vfio_type1_iommu_listener;
>          container->iommu_data.release = vfio_listener_release;
>  
>          memory_listener_register(&container->iommu_data.type1.listener,
> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>              goto free_container_exit;
>          }
>  
> -        container->iommu_data.type1.listener = vfio_memory_listener;
> +        container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
>          container->iommu_data.release = vfio_listener_release;
>  
>          memory_listener_register(&container->iommu_data.type1.listener,
Alexey Kardashevskiy July 16, 2015, 1:26 a.m. UTC | #2
On 07/16/2015 04:26 AM, Alex Williamson wrote:
> On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote:
>> The existing memory listener is called on RAM or PCI address space
>> which implies potentially different page size. Instead of guessing
>> what page size should be used, this replaces a single IOMMU memory
>> listener by two, one per supported IOMMU type; listener callbacks
>> call the existing helpers with a known page size.
>>
>> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
>> from IOMMU.
>>
>> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
>> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
>> non IOMMU regions (which is an MSIX window) which duplicates
>> vfio_listener_skipped_section() a bit.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>   hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
>>   1 file changed, 76 insertions(+), 19 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 85ee9b0..aad41e1 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -312,11 +312,11 @@ out:
>>       rcu_read_unlock();
>>   }
>>
>> -static void vfio_listener_region_add(MemoryListener *listener,
>> +static void vfio_listener_region_add(VFIOContainer *container,
>> +                                     hwaddr page_mask,
>
> Should memory_region_iommu_get_page_sizes() return a hwaddr?


I do not think so, memory.c uses uint64_t for masks.



>> +                                     MemoryListener *listener,
>>                                        MemoryRegionSection *section)
>>   {
>> -    VFIOContainer *container = container_of(listener, VFIOContainer,
>> -                                            iommu_data.type1.listener);
>>       hwaddr iova, end;
>>       Int128 llend;
>>       void *vaddr;
>> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>           return;
>>       }
>>
>> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
>> +                 (section->offset_within_region & ~page_mask))) {
>>           error_report("%s received unaligned region", __func__);
>>           return;
>>       }
>>
>> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>       llend = int128_make64(section->offset_within_address_space);
>>       llend = int128_add(llend, section->size);
>> -    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>> +    llend = int128_and(llend, int128_exts64(page_mask));
>>
>>       if (int128_ge(int128_make64(iova), llend)) {
>>           return;
>> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>       }
>>   }
>>
>> -static void vfio_listener_region_del(MemoryListener *listener,
>> +static void vfio_listener_region_del(VFIOContainer *container,
>> +                                     hwaddr page_mask,
>> +                                     MemoryListener *listener,
>>                                        MemoryRegionSection *section)
>>   {
>> -    VFIOContainer *container = container_of(listener, VFIOContainer,
>> -                                            iommu_data.type1.listener);
>>       hwaddr iova, end;
>>       int ret;
>>
>> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>           return;
>>       }
>>
>> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
>> +                 (section->offset_within_region & ~page_mask))) {
>>           error_report("%s received unaligned region", __func__);
>>           return;
>>       }
>> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>            */
>>       }
>>
>> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>       end = (section->offset_within_address_space + int128_get64(section->size)) &
>> -          TARGET_PAGE_MASK;
>> +          page_mask;
>>
>>       if (iova >= end) {
>>           return;
>> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>       }
>>   }
>>
>> -static const MemoryListener vfio_memory_listener = {
>> -    .region_add = vfio_listener_region_add,
>> -    .region_del = vfio_listener_region_del,
>> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
>> +                                                 MemoryRegionSection *section)
>> +{
>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>> +                                            iommu_data.type1.listener);
>> +
>> +    vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
>> +                             section);
>> +}
>> +
>> +
>> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
>> +                                                 MemoryRegionSection *section)
>> +{
>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>> +                                            iommu_data.type1.listener);
>> +
>> +    vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
>> +                             section);
>> +}
>> +
>> +static const MemoryListener vfio_type1_iommu_listener = {
>> +    .region_add = vfio_type1_iommu_listener_region_add,
>> +    .region_del = vfio_type1_iommu_listener_region_del,
>> +};
>> +
>> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
>> +                                     MemoryRegionSection *section)
>> +{
>> +    VFIOContainer *container;
>> +    hwaddr page_mask;
>> +
>> +    if (!memory_region_is_iommu(section->mr)) {
>> +        return;
>> +    }
>> +    container = container_of(listener, VFIOContainer,
>> +                             iommu_data.type1.listener);
>> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>> +    vfio_listener_region_add(container, page_mask, listener, section);
>> +}
>> +
>> +
>> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
>> +                                     MemoryRegionSection *section)
>> +{
>> +    VFIOContainer *container;
>> +    hwaddr page_mask;
>> +
>> +    if (!memory_region_is_iommu(section->mr)) {
>> +        return;
>> +    }
>> +    container = container_of(listener, VFIOContainer,
>> +                             iommu_data.type1.listener);
>> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>> +    vfio_listener_region_del(container, page_mask, listener, section);
>> +}
>> +
>> +static const MemoryListener vfio_spapr_iommu_listener = {
>> +    .region_add = vfio_spapr_iommu_listener_region_add,
>> +    .region_del = vfio_spapr_iommu_listener_region_del,
>>   };
>
>
> Rather than creating all these wrappers, why don't we create a structure
> that gives us callbacks we can use for a shared function?  If we had:
>
> struct VFIOMemoryListener {
> 	struct MemoryListener listener;
> 	bool (*filter)(MemoryRegionSection *section);
> 	hwaddr (page_size)(MemoryRegionSection *section);
> 	VFIOContainer *container;
> }
>
> Then there would be no reason for you to have separate wrappers for
> spapr.  VFIOType1 would have a single VFIOMemoryListener, you could have
> an array of two.

Sorry, I am missing the point here...

I cannot just have an array of these - I will have to store an address 
spaces per a listener in a container to implement filter() so I'd rather 
store just an address space and not add filter() at all. And I will still 
need "memory: Add reporting of supported page sizes" - or there is no way 
to implement it all. So I still end up having 6 callbacks in 
hw/vfio/common.c. What does this win for us?


>>
>>   static void vfio_listener_release(VFIOContainer *container)
>> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>               goto free_container_exit;
>>           }
>>
>> -        container->iommu_data.type1.listener = vfio_memory_listener;
>> +        container->iommu_data.type1.listener = vfio_type1_iommu_listener;
>>           container->iommu_data.release = vfio_listener_release;
>>
>>           memory_listener_register(&container->iommu_data.type1.listener,
>> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>               goto free_container_exit;
>>           }
>>
>> -        container->iommu_data.type1.listener = vfio_memory_listener;
>> +        container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
>>           container->iommu_data.release = vfio_listener_release;
>>
>>           memory_listener_register(&container->iommu_data.type1.listener,
>
>
>
Alex Williamson July 16, 2015, 2:51 a.m. UTC | #3
On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote:
> On 07/16/2015 04:26 AM, Alex Williamson wrote:
> > On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote:
> >> The existing memory listener is called on RAM or PCI address space
> >> which implies potentially different page size. Instead of guessing
> >> what page size should be used, this replaces a single IOMMU memory
> >> listener by two, one per supported IOMMU type; listener callbacks
> >> call the existing helpers with a known page size.
> >>
> >> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
> >> from IOMMU.
> >>
> >> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
> >> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
> >> non IOMMU regions (which is an MSIX window) which duplicates
> >> vfio_listener_skipped_section() a bit.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> ---
> >>   hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
> >>   1 file changed, 76 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >> index 85ee9b0..aad41e1 100644
> >> --- a/hw/vfio/common.c
> >> +++ b/hw/vfio/common.c
> >> @@ -312,11 +312,11 @@ out:
> >>       rcu_read_unlock();
> >>   }
> >>
> >> -static void vfio_listener_region_add(MemoryListener *listener,
> >> +static void vfio_listener_region_add(VFIOContainer *container,
> >> +                                     hwaddr page_mask,
> >
> > Should memory_region_iommu_get_page_sizes() return a hwaddr?
> 
> 
> I do not think so, memory.c uses uint64_t for masks.
> 
> 
> 
> >> +                                     MemoryListener *listener,
> >>                                        MemoryRegionSection *section)
> >>   {
> >> -    VFIOContainer *container = container_of(listener, VFIOContainer,
> >> -                                            iommu_data.type1.listener);
> >>       hwaddr iova, end;
> >>       Int128 llend;
> >>       void *vaddr;
> >> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
> >>           return;
> >>       }
> >>
> >> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> >> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> >> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
> >> +                 (section->offset_within_region & ~page_mask))) {
> >>           error_report("%s received unaligned region", __func__);
> >>           return;
> >>       }
> >>
> >> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> >> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
> >>       llend = int128_make64(section->offset_within_address_space);
> >>       llend = int128_add(llend, section->size);
> >> -    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> >> +    llend = int128_and(llend, int128_exts64(page_mask));
> >>
> >>       if (int128_ge(int128_make64(iova), llend)) {
> >>           return;
> >> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
> >>       }
> >>   }
> >>
> >> -static void vfio_listener_region_del(MemoryListener *listener,
> >> +static void vfio_listener_region_del(VFIOContainer *container,
> >> +                                     hwaddr page_mask,
> >> +                                     MemoryListener *listener,
> >>                                        MemoryRegionSection *section)
> >>   {
> >> -    VFIOContainer *container = container_of(listener, VFIOContainer,
> >> -                                            iommu_data.type1.listener);
> >>       hwaddr iova, end;
> >>       int ret;
> >>
> >> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
> >>           return;
> >>       }
> >>
> >> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> >> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> >> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
> >> +                 (section->offset_within_region & ~page_mask))) {
> >>           error_report("%s received unaligned region", __func__);
> >>           return;
> >>       }
> >> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
> >>            */
> >>       }
> >>
> >> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> >> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
> >>       end = (section->offset_within_address_space + int128_get64(section->size)) &
> >> -          TARGET_PAGE_MASK;
> >> +          page_mask;
> >>
> >>       if (iova >= end) {
> >>           return;
> >> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener,
> >>       }
> >>   }
> >>
> >> -static const MemoryListener vfio_memory_listener = {
> >> -    .region_add = vfio_listener_region_add,
> >> -    .region_del = vfio_listener_region_del,
> >> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
> >> +                                                 MemoryRegionSection *section)
> >> +{
> >> +    VFIOContainer *container = container_of(listener, VFIOContainer,
> >> +                                            iommu_data.type1.listener);
> >> +
> >> +    vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
> >> +                             section);
> >> +}
> >> +
> >> +
> >> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
> >> +                                                 MemoryRegionSection *section)
> >> +{
> >> +    VFIOContainer *container = container_of(listener, VFIOContainer,
> >> +                                            iommu_data.type1.listener);
> >> +
> >> +    vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
> >> +                             section);
> >> +}
> >> +
> >> +static const MemoryListener vfio_type1_iommu_listener = {
> >> +    .region_add = vfio_type1_iommu_listener_region_add,
> >> +    .region_del = vfio_type1_iommu_listener_region_del,
> >> +};
> >> +
> >> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
> >> +                                     MemoryRegionSection *section)
> >> +{
> >> +    VFIOContainer *container;
> >> +    hwaddr page_mask;
> >> +
> >> +    if (!memory_region_is_iommu(section->mr)) {
> >> +        return;
> >> +    }
> >> +    container = container_of(listener, VFIOContainer,
> >> +                             iommu_data.type1.listener);
> >> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
> >> +    vfio_listener_region_add(container, page_mask, listener, section);
> >> +}
> >> +
> >> +
> >> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
> >> +                                     MemoryRegionSection *section)
> >> +{
> >> +    VFIOContainer *container;
> >> +    hwaddr page_mask;
> >> +
> >> +    if (!memory_region_is_iommu(section->mr)) {
> >> +        return;
> >> +    }
> >> +    container = container_of(listener, VFIOContainer,
> >> +                             iommu_data.type1.listener);
> >> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
> >> +    vfio_listener_region_del(container, page_mask, listener, section);
> >> +}
> >> +
> >> +static const MemoryListener vfio_spapr_iommu_listener = {
> >> +    .region_add = vfio_spapr_iommu_listener_region_add,
> >> +    .region_del = vfio_spapr_iommu_listener_region_del,
> >>   };
> >
> >
> > Rather than creating all these wrappers, why don't we create a structure
> > that gives us callbacks we can use for a shared function?  If we had:
> >
> > struct VFIOMemoryListener {
> > 	struct MemoryListener listener;
> > 	bool (*filter)(MemoryRegionSection *section);
> > 	hwaddr (page_size)(MemoryRegionSection *section);
> > 	VFIOContainer *container;
> > }
> >
> > Then there would be no reason for you to have separate wrappers for
> > spapr.  VFIOType1 would have a single VFIOMemoryListener, you could have
> > an array of two.
> 
> Sorry, I am missing the point here...
> 
> I cannot just have an array of these - I will have to store an address 
> spaces per a listener in a container to implement filter() so I'd rather 
> store just an address space and not add filter() at all. And I will still 

You're not storing an address space now.

> need "memory: Add reporting of supported page sizes" - or there is no way 

Just like you do now.

> to implement it all. So I still end up having 6 callbacks in 
> hw/vfio/common.c. What does this win for us?

Nothing you're suggesting adds any more callbacks.

The current filter is vfio_listener_skipped_section().  The iommu filter
is simply:

bool vfio_iommu_listener_skipped_section(mr)
{
  return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr);
}

Notice how I didn't have to call it spapr because it's probably what any
guest iommu is going to do...

The v2 spapr filter is:

bool vfio_nodump_listener_skipped_section(mr)
{
  return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr);
}

Notice again that we can use generic functions and not make everything
spapr specific.

The page_size callbacks are similarly trivial, the iommu one obviously
calls memory_region_iommu_get_page_sizes, the other one uses
~qemu_real_host_page_mask.  Again, nothing spapr specific.

The container of course points back to the container and now
vfio_listener_region_add() simply does:

VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener);
VFIOContainer = vlistener->container;
hwaddr page_mask = vlistener->page_size(mr);

if (vlistener->filter && vlistener->filter(mr)) {
    return
}
...

And suddenly we get to piece together a handful of _generic_ helper
functions into a VFIOMemoryListener and we're not adding a new pair of
wrappers for everything and duplicating trivial setup.  Thanks,

Alex

> >>
> >>   static void vfio_listener_release(VFIOContainer *container)
> >> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
> >>               goto free_container_exit;
> >>           }
> >>
> >> -        container->iommu_data.type1.listener = vfio_memory_listener;
> >> +        container->iommu_data.type1.listener = vfio_type1_iommu_listener;
> >>           container->iommu_data.release = vfio_listener_release;
> >>
> >>           memory_listener_register(&container->iommu_data.type1.listener,
> >> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
> >>               goto free_container_exit;
> >>           }
> >>
> >> -        container->iommu_data.type1.listener = vfio_memory_listener;
> >> +        container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
> >>           container->iommu_data.release = vfio_listener_release;
> >>
> >>           memory_listener_register(&container->iommu_data.type1.listener,
> >
> >
> >
> 
>
Alexey Kardashevskiy July 16, 2015, 3:31 a.m. UTC | #4
On 07/16/2015 12:51 PM, Alex Williamson wrote:
> On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote:
>> On 07/16/2015 04:26 AM, Alex Williamson wrote:
>>> On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote:
>>>> The existing memory listener is called on RAM or PCI address space
>>>> which implies potentially different page size. Instead of guessing
>>>> what page size should be used, this replaces a single IOMMU memory
>>>> listener by two, one per supported IOMMU type; listener callbacks
>>>> call the existing helpers with a known page size.
>>>>
>>>> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
>>>> from IOMMU.
>>>>
>>>> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
>>>> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
>>>> non IOMMU regions (which is an MSIX window) which duplicates
>>>> vfio_listener_skipped_section() a bit.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>>    hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
>>>>    1 file changed, 76 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index 85ee9b0..aad41e1 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -312,11 +312,11 @@ out:
>>>>        rcu_read_unlock();
>>>>    }
>>>>
>>>> -static void vfio_listener_region_add(MemoryListener *listener,
>>>> +static void vfio_listener_region_add(VFIOContainer *container,
>>>> +                                     hwaddr page_mask,
>>>
>>> Should memory_region_iommu_get_page_sizes() return a hwaddr?
>>
>>
>> I do not think so, memory.c uses uint64_t for masks.
>>
>>
>>
>>>> +                                     MemoryListener *listener,
>>>>                                         MemoryRegionSection *section)
>>>>    {
>>>> -    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> -                                            iommu_data.type1.listener);
>>>>        hwaddr iova, end;
>>>>        Int128 llend;
>>>>        void *vaddr;
>>>> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>>            return;
>>>>        }
>>>>
>>>> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>>>> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>>>> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
>>>> +                 (section->offset_within_region & ~page_mask))) {
>>>>            error_report("%s received unaligned region", __func__);
>>>>            return;
>>>>        }
>>>>
>>>> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>>> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>>>        llend = int128_make64(section->offset_within_address_space);
>>>>        llend = int128_add(llend, section->size);
>>>> -    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>>>> +    llend = int128_and(llend, int128_exts64(page_mask));
>>>>
>>>>        if (int128_ge(int128_make64(iova), llend)) {
>>>>            return;
>>>> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>>        }
>>>>    }
>>>>
>>>> -static void vfio_listener_region_del(MemoryListener *listener,
>>>> +static void vfio_listener_region_del(VFIOContainer *container,
>>>> +                                     hwaddr page_mask,
>>>> +                                     MemoryListener *listener,
>>>>                                         MemoryRegionSection *section)
>>>>    {
>>>> -    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> -                                            iommu_data.type1.listener);
>>>>        hwaddr iova, end;
>>>>        int ret;
>>>>
>>>> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>>            return;
>>>>        }
>>>>
>>>> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>>>> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>>>> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
>>>> +                 (section->offset_within_region & ~page_mask))) {
>>>>            error_report("%s received unaligned region", __func__);
>>>>            return;
>>>>        }
>>>> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>>             */
>>>>        }
>>>>
>>>> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>>> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>>>        end = (section->offset_within_address_space + int128_get64(section->size)) &
>>>> -          TARGET_PAGE_MASK;
>>>> +          page_mask;
>>>>
>>>>        if (iova >= end) {
>>>>            return;
>>>> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>>        }
>>>>    }
>>>>
>>>> -static const MemoryListener vfio_memory_listener = {
>>>> -    .region_add = vfio_listener_region_add,
>>>> -    .region_del = vfio_listener_region_del,
>>>> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
>>>> +                                                 MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> +                                            iommu_data.type1.listener);
>>>> +
>>>> +    vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
>>>> +                             section);
>>>> +}
>>>> +
>>>> +
>>>> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
>>>> +                                                 MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> +                                            iommu_data.type1.listener);
>>>> +
>>>> +    vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
>>>> +                             section);
>>>> +}
>>>> +
>>>> +static const MemoryListener vfio_type1_iommu_listener = {
>>>> +    .region_add = vfio_type1_iommu_listener_region_add,
>>>> +    .region_del = vfio_type1_iommu_listener_region_del,
>>>> +};
>>>> +
>>>> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
>>>> +                                     MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container;
>>>> +    hwaddr page_mask;
>>>> +
>>>> +    if (!memory_region_is_iommu(section->mr)) {
>>>> +        return;
>>>> +    }
>>>> +    container = container_of(listener, VFIOContainer,
>>>> +                             iommu_data.type1.listener);
>>>> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>>>> +    vfio_listener_region_add(container, page_mask, listener, section);
>>>> +}
>>>> +
>>>> +
>>>> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
>>>> +                                     MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container;
>>>> +    hwaddr page_mask;
>>>> +
>>>> +    if (!memory_region_is_iommu(section->mr)) {
>>>> +        return;
>>>> +    }
>>>> +    container = container_of(listener, VFIOContainer,
>>>> +                             iommu_data.type1.listener);
>>>> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>>>> +    vfio_listener_region_del(container, page_mask, listener, section);
>>>> +}
>>>> +
>>>> +static const MemoryListener vfio_spapr_iommu_listener = {
>>>> +    .region_add = vfio_spapr_iommu_listener_region_add,
>>>> +    .region_del = vfio_spapr_iommu_listener_region_del,
>>>>    };
>>>
>>>
>>> Rather than creating all these wrappers, why don't we create a structure
>>> that gives us callbacks we can use for a shared function?  If we had:
>>>
>>> struct VFIOMemoryListener {
>>> 	struct MemoryListener listener;
>>> 	bool (*filter)(MemoryRegionSection *section);
>>> 	hwaddr (page_size)(MemoryRegionSection *section);
>>> 	VFIOContainer *container;
>>> }
>>>
>>> Then there would be no reason for you to have separate wrappers for
>>> spapr.  VFIOType1 would have a single VFIOMemoryListener, you could have
>>> an array of two.
>>
>> Sorry, I am missing the point here...
>>
>> I cannot just have an array of these - I will have to store an address
>> spaces per a listener in a container to implement filter() so I'd rather
>> store just an address space and not add filter() at all. And I will still
>
> You're not storing an address space now.


I am, indirectly, when register the listener with a filter which is never 
NULL and which is passed to the listener. If not that, the whole thing 
would be simpler. The rest is quite clear now, you are right.



>> need "memory: Add reporting of supported page sizes" - or there is no way
>
> Just like you do now.
>
>> to implement it all. So I still end up having 6 callbacks in
>> hw/vfio/common.c. What does this win for us?
>
> Nothing you're suggesting adds any more callbacks.
>
> The current filter is vfio_listener_skipped_section().  The iommu filter
> is simply:
>
> bool vfio_iommu_listener_skipped_section(mr)
> {
>    return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr);
> }
>
> Notice how I didn't have to call it spapr because it's probably what any
> guest iommu is going to do...
>
> The v2 spapr filter is:
>
> bool vfio_nodump_listener_skipped_section(mr)
> {
>    return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr);
> }
>
> Notice again that we can use generic functions and not make everything
> spapr specific.


Ok, we add 2 (not 3) as vfio_listener_skipped_section() is still there.
You got +1.


> The page_size callbacks are similarly trivial, the iommu one obviously
> calls memory_region_iommu_get_page_sizes, the other one uses
> ~qemu_real_host_page_mask.  Again, nothing spapr specific.


These are trivial but I need all 3 - for type1 iommu (real page size), 
spapr iommu (TCE page size) and spapr memoryprereg (real page size).
Ok, I can reuse 1st for 3rd and get one less. +2.


> The container of course points back to the container and now
> vfio_listener_region_add() simply does:
>
> VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener);
> VFIOContainer = vlistener->container;
> hwaddr page_mask = vlistener->page_size(mr);
>
> if (vlistener->filter && vlistener->filter(mr)) {
>      return
> }
> ...
>
> And suddenly we get to piece together a handful of _generic_ helper
> functions into a VFIOMemoryListener and we're not adding a new pair of
> wrappers for everything and duplicating trivial setup.  Thanks,

Ok, we are saving 2 callbacks and make others smaller. Makes sense. Thanks 
for chewing (right word here?) this stuff for me :)


>
> Alex
>
>>>>
>>>>    static void vfio_listener_release(VFIOContainer *container)
>>>> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>>>                goto free_container_exit;
>>>>            }
>>>>
>>>> -        container->iommu_data.type1.listener = vfio_memory_listener;
>>>> +        container->iommu_data.type1.listener = vfio_type1_iommu_listener;
>>>>            container->iommu_data.release = vfio_listener_release;
>>>>
>>>>            memory_listener_register(&container->iommu_data.type1.listener,
>>>> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>>>                goto free_container_exit;
>>>>            }
>>>>
>>>> -        container->iommu_data.type1.listener = vfio_memory_listener;
>>>> +        container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
>>>>            container->iommu_data.release = vfio_listener_release;
>>>>
>>>>            memory_listener_register(&container->iommu_data.type1.listener,
>>>
>>>
>>>
>>
>>
>
>
>
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 85ee9b0..aad41e1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -312,11 +312,11 @@  out:
     rcu_read_unlock();
 }
 
-static void vfio_listener_region_add(MemoryListener *listener,
+static void vfio_listener_region_add(VFIOContainer *container,
+                                     hwaddr page_mask,
+                                     MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
-    VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.type1.listener);
     hwaddr iova, end;
     Int128 llend;
     void *vaddr;
@@ -330,16 +330,16 @@  static void vfio_listener_region_add(MemoryListener *listener,
         return;
     }
 
-    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
-                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+    if (unlikely((section->offset_within_address_space & ~page_mask) !=
+                 (section->offset_within_region & ~page_mask))) {
         error_report("%s received unaligned region", __func__);
         return;
     }
 
-    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
     llend = int128_make64(section->offset_within_address_space);
     llend = int128_add(llend, section->size);
-    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+    llend = int128_and(llend, int128_exts64(page_mask));
 
     if (int128_ge(int128_make64(iova), llend)) {
         return;
@@ -416,11 +416,11 @@  static void vfio_listener_region_add(MemoryListener *listener,
     }
 }
 
-static void vfio_listener_region_del(MemoryListener *listener,
+static void vfio_listener_region_del(VFIOContainer *container,
+                                     hwaddr page_mask,
+                                     MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
-    VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.type1.listener);
     hwaddr iova, end;
     int ret;
 
@@ -432,8 +432,8 @@  static void vfio_listener_region_del(MemoryListener *listener,
         return;
     }
 
-    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
-                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+    if (unlikely((section->offset_within_address_space & ~page_mask) !=
+                 (section->offset_within_region & ~page_mask))) {
         error_report("%s received unaligned region", __func__);
         return;
     }
@@ -459,9 +459,9 @@  static void vfio_listener_region_del(MemoryListener *listener,
          */
     }
 
-    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
     end = (section->offset_within_address_space + int128_get64(section->size)) &
-          TARGET_PAGE_MASK;
+          page_mask;
 
     if (iova >= end) {
         return;
@@ -478,9 +478,66 @@  static void vfio_listener_region_del(MemoryListener *listener,
     }
 }
 
-static const MemoryListener vfio_memory_listener = {
-    .region_add = vfio_listener_region_add,
-    .region_del = vfio_listener_region_del,
+static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
+                                                 MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.type1.listener);
+
+    vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
+                             section);
+}
+
+
+static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
+                                                 MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.type1.listener);
+
+    vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
+                             section);
+}
+
+static const MemoryListener vfio_type1_iommu_listener = {
+    .region_add = vfio_type1_iommu_listener_region_add,
+    .region_del = vfio_type1_iommu_listener_region_del,
+};
+
+static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container;
+    hwaddr page_mask;
+
+    if (!memory_region_is_iommu(section->mr)) {
+        return;
+    }
+    container = container_of(listener, VFIOContainer,
+                             iommu_data.type1.listener);
+    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
+    vfio_listener_region_add(container, page_mask, listener, section);
+}
+
+
+static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container;
+    hwaddr page_mask;
+
+    if (!memory_region_is_iommu(section->mr)) {
+        return;
+    }
+    container = container_of(listener, VFIOContainer,
+                             iommu_data.type1.listener);
+    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
+    vfio_listener_region_del(container, page_mask, listener, section);
+}
+
+static const MemoryListener vfio_spapr_iommu_listener = {
+    .region_add = vfio_spapr_iommu_listener_region_add,
+    .region_del = vfio_spapr_iommu_listener_region_del,
 };
 
 static void vfio_listener_release(VFIOContainer *container)
@@ -684,7 +741,7 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
             goto free_container_exit;
         }
 
-        container->iommu_data.type1.listener = vfio_memory_listener;
+        container->iommu_data.type1.listener = vfio_type1_iommu_listener;
         container->iommu_data.release = vfio_listener_release;
 
         memory_listener_register(&container->iommu_data.type1.listener,
@@ -724,7 +781,7 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
             goto free_container_exit;
         }
 
-        container->iommu_data.type1.listener = vfio_memory_listener;
+        container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
         container->iommu_data.release = vfio_listener_release;
 
         memory_listener_register(&container->iommu_data.type1.listener,