Message ID | 1436876514-2946-3-git-send-email-aik@ozlabs.ru |
---|---|
State | New |
Headers | show |
On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote: > The existing memory listener is called on RAM or PCI address space > which implies potentially different page size. Instead of guessing > what page size should be used, this replaces a single IOMMU memory > listener by two, one per supported IOMMU type; listener callbacks > call the existing helpers with a known page size. > > For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size > from IOMMU. > > As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, > this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip > non IOMMU regions (which is an MSIX window) which duplicates > vfio_listener_skipped_section() a bit. > > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > --- > hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 76 insertions(+), 19 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 85ee9b0..aad41e1 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -312,11 +312,11 @@ out: > rcu_read_unlock(); > } > > -static void vfio_listener_region_add(MemoryListener *listener, > +static void vfio_listener_region_add(VFIOContainer *container, > + hwaddr page_mask, Should memory_region_iommu_get_page_sizes() return a hwaddr? > + MemoryListener *listener, > MemoryRegionSection *section) > { > - VFIOContainer *container = container_of(listener, VFIOContainer, > - iommu_data.type1.listener); > hwaddr iova, end; > Int128 llend; > void *vaddr; > @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener, > return; > } > > - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + if (unlikely((section->offset_within_address_space & ~page_mask) != > + (section->offset_within_region & ~page_mask))) { > error_report("%s received unaligned region", __func__); > return; > } > > - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); > llend = int128_make64(section->offset_within_address_space); > llend = int128_add(llend, section->size); > - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > + llend = int128_and(llend, int128_exts64(page_mask)); > > if (int128_ge(int128_make64(iova), llend)) { > return; > @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener, > } > } > > -static void vfio_listener_region_del(MemoryListener *listener, > +static void vfio_listener_region_del(VFIOContainer *container, > + hwaddr page_mask, > + MemoryListener *listener, > MemoryRegionSection *section) > { > - VFIOContainer *container = container_of(listener, VFIOContainer, > - iommu_data.type1.listener); > hwaddr iova, end; > int ret; > > @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener, > return; > } > > - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + if (unlikely((section->offset_within_address_space & ~page_mask) != > + (section->offset_within_region & ~page_mask))) { > error_report("%s received unaligned region", __func__); > return; > } > @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener, > */ > } > > - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); > end = (section->offset_within_address_space + int128_get64(section->size)) & > - TARGET_PAGE_MASK; > + page_mask; > > if (iova >= end) { > return; > @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener, > } > } > > -static const MemoryListener vfio_memory_listener = { > - .region_add = vfio_listener_region_add, > - .region_del = vfio_listener_region_del, > +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.type1.listener); > + > + vfio_listener_region_add(container, qemu_real_host_page_mask, listener, > + section); > +} > + > + > +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.type1.listener); > + > + vfio_listener_region_del(container, qemu_real_host_page_mask, listener, > + section); > +} > + > +static const MemoryListener vfio_type1_iommu_listener = { > + .region_add = vfio_type1_iommu_listener_region_add, > + .region_del = vfio_type1_iommu_listener_region_del, > +}; > + > +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container; > + hwaddr page_mask; > + > + if (!memory_region_is_iommu(section->mr)) { > + return; > + } > + container = container_of(listener, VFIOContainer, > + iommu_data.type1.listener); > + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); > + vfio_listener_region_add(container, page_mask, listener, section); > +} > + > + > +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container; > + hwaddr page_mask; > + > + if (!memory_region_is_iommu(section->mr)) { > + return; > + } > + container = container_of(listener, VFIOContainer, > + iommu_data.type1.listener); > + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); > + vfio_listener_region_del(container, page_mask, listener, section); > +} > + > +static const MemoryListener vfio_spapr_iommu_listener = { > + .region_add = vfio_spapr_iommu_listener_region_add, > + .region_del = vfio_spapr_iommu_listener_region_del, > }; Rather than creating all these wrappers, why don't we create a structure that gives us callbacks we can use for a shared function? If we had: struct VFIOMemoryListener { struct MemoryListener listener; bool (*filter)(MemoryRegionSection *section); hwaddr (page_size)(MemoryRegionSection *section); VFIOContainer *container; } Then there would be no reason for you to have separate wrappers for spapr. VFIOType1 would have a single VFIOMemoryListener, you could have an array of two. > > static void vfio_listener_release(VFIOContainer *container) > @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) > goto free_container_exit; > } > > - container->iommu_data.type1.listener = vfio_memory_listener; > + container->iommu_data.type1.listener = vfio_type1_iommu_listener; > container->iommu_data.release = vfio_listener_release; > > memory_listener_register(&container->iommu_data.type1.listener, > @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) > goto free_container_exit; > } > > - container->iommu_data.type1.listener = vfio_memory_listener; > + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; > container->iommu_data.release = vfio_listener_release; > > memory_listener_register(&container->iommu_data.type1.listener,
On 07/16/2015 04:26 AM, Alex Williamson wrote: > On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote: >> The existing memory listener is called on RAM or PCI address space >> which implies potentially different page size. Instead of guessing >> what page size should be used, this replaces a single IOMMU memory >> listener by two, one per supported IOMMU type; listener callbacks >> call the existing helpers with a known page size. >> >> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size >> from IOMMU. >> >> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, >> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip >> non IOMMU regions (which is an MSIX window) which duplicates >> vfio_listener_skipped_section() a bit. >> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >> --- >> hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ >> 1 file changed, 76 insertions(+), 19 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 85ee9b0..aad41e1 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -312,11 +312,11 @@ out: >> rcu_read_unlock(); >> } >> >> -static void vfio_listener_region_add(MemoryListener *listener, >> +static void vfio_listener_region_add(VFIOContainer *container, >> + hwaddr page_mask, > > Should memory_region_iommu_get_page_sizes() return a hwaddr? I do not think so, memory.c uses uint64_t for masks. >> + MemoryListener *listener, >> MemoryRegionSection *section) >> { >> - VFIOContainer *container = container_of(listener, VFIOContainer, >> - iommu_data.type1.listener); >> hwaddr iova, end; >> Int128 llend; >> void *vaddr; >> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener, >> return; >> } >> >> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != >> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { >> + if (unlikely((section->offset_within_address_space & ~page_mask) != >> + (section->offset_within_region & ~page_mask))) { >> error_report("%s received unaligned region", __func__); >> return; >> } >> >> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); >> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); >> llend = int128_make64(section->offset_within_address_space); >> llend = int128_add(llend, section->size); >> - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); >> + llend = int128_and(llend, int128_exts64(page_mask)); >> >> if (int128_ge(int128_make64(iova), llend)) { >> return; >> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener, >> } >> } >> >> -static void vfio_listener_region_del(MemoryListener *listener, >> +static void vfio_listener_region_del(VFIOContainer *container, >> + hwaddr page_mask, >> + MemoryListener *listener, >> MemoryRegionSection *section) >> { >> - VFIOContainer *container = container_of(listener, VFIOContainer, >> - iommu_data.type1.listener); >> hwaddr iova, end; >> int ret; >> >> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener, >> return; >> } >> >> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != >> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { >> + if (unlikely((section->offset_within_address_space & ~page_mask) != >> + (section->offset_within_region & ~page_mask))) { >> error_report("%s received unaligned region", __func__); >> return; >> } >> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener, >> */ >> } >> >> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); >> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); >> end = (section->offset_within_address_space + int128_get64(section->size)) & >> - TARGET_PAGE_MASK; >> + page_mask; >> >> if (iova >= end) { >> return; >> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener, >> } >> } >> >> -static const MemoryListener vfio_memory_listener = { >> - .region_add = vfio_listener_region_add, >> - .region_del = vfio_listener_region_del, >> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, >> + MemoryRegionSection *section) >> +{ >> + VFIOContainer *container = container_of(listener, VFIOContainer, >> + iommu_data.type1.listener); >> + >> + vfio_listener_region_add(container, qemu_real_host_page_mask, listener, >> + section); >> +} >> + >> + >> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, >> + MemoryRegionSection *section) >> +{ >> + VFIOContainer *container = container_of(listener, VFIOContainer, >> + iommu_data.type1.listener); >> + >> + vfio_listener_region_del(container, qemu_real_host_page_mask, listener, >> + section); >> +} >> + >> +static const MemoryListener vfio_type1_iommu_listener = { >> + .region_add = vfio_type1_iommu_listener_region_add, >> + .region_del = vfio_type1_iommu_listener_region_del, >> +}; >> + >> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, >> + MemoryRegionSection *section) >> +{ >> + VFIOContainer *container; >> + hwaddr page_mask; >> + >> + if (!memory_region_is_iommu(section->mr)) { >> + return; >> + } >> + container = container_of(listener, VFIOContainer, >> + iommu_data.type1.listener); >> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); >> + vfio_listener_region_add(container, page_mask, listener, section); >> +} >> + >> + >> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, >> + MemoryRegionSection *section) >> +{ >> + VFIOContainer *container; >> + hwaddr page_mask; >> + >> + if (!memory_region_is_iommu(section->mr)) { >> + return; >> + } >> + container = container_of(listener, VFIOContainer, >> + iommu_data.type1.listener); >> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); >> + vfio_listener_region_del(container, page_mask, listener, section); >> +} >> + >> +static const MemoryListener vfio_spapr_iommu_listener = { >> + .region_add = vfio_spapr_iommu_listener_region_add, >> + .region_del = vfio_spapr_iommu_listener_region_del, >> }; > > > Rather than creating all these wrappers, why don't we create a structure > that gives us callbacks we can use for a shared function? If we had: > > struct VFIOMemoryListener { > struct MemoryListener listener; > bool (*filter)(MemoryRegionSection *section); > hwaddr (page_size)(MemoryRegionSection *section); > VFIOContainer *container; > } > > Then there would be no reason for you to have separate wrappers for > spapr. VFIOType1 would have a single VFIOMemoryListener, you could have > an array of two. Sorry, I am missing the point here... I cannot just have an array of these - I will have to store an address spaces per a listener in a container to implement filter() so I'd rather store just an address space and not add filter() at all. And I will still need "memory: Add reporting of supported page sizes" - or there is no way to implement it all. So I still end up having 6 callbacks in hw/vfio/common.c. What does this win for us? >> >> static void vfio_listener_release(VFIOContainer *container) >> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) >> goto free_container_exit; >> } >> >> - container->iommu_data.type1.listener = vfio_memory_listener; >> + container->iommu_data.type1.listener = vfio_type1_iommu_listener; >> container->iommu_data.release = vfio_listener_release; >> >> memory_listener_register(&container->iommu_data.type1.listener, >> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) >> goto free_container_exit; >> } >> >> - container->iommu_data.type1.listener = vfio_memory_listener; >> + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; >> container->iommu_data.release = vfio_listener_release; >> >> memory_listener_register(&container->iommu_data.type1.listener, > > >
On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote: > On 07/16/2015 04:26 AM, Alex Williamson wrote: > > On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote: > >> The existing memory listener is called on RAM or PCI address space > >> which implies potentially different page size. Instead of guessing > >> what page size should be used, this replaces a single IOMMU memory > >> listener by two, one per supported IOMMU type; listener callbacks > >> call the existing helpers with a known page size. > >> > >> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size > >> from IOMMU. > >> > >> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, > >> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip > >> non IOMMU regions (which is an MSIX window) which duplicates > >> vfio_listener_skipped_section() a bit. > >> > >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > >> --- > >> hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ > >> 1 file changed, 76 insertions(+), 19 deletions(-) > >> > >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >> index 85ee9b0..aad41e1 100644 > >> --- a/hw/vfio/common.c > >> +++ b/hw/vfio/common.c > >> @@ -312,11 +312,11 @@ out: > >> rcu_read_unlock(); > >> } > >> > >> -static void vfio_listener_region_add(MemoryListener *listener, > >> +static void vfio_listener_region_add(VFIOContainer *container, > >> + hwaddr page_mask, > > > > Should memory_region_iommu_get_page_sizes() return a hwaddr? > > > I do not think so, memory.c uses uint64_t for masks. > > > > >> + MemoryListener *listener, > >> MemoryRegionSection *section) > >> { > >> - VFIOContainer *container = container_of(listener, VFIOContainer, > >> - iommu_data.type1.listener); > >> hwaddr iova, end; > >> Int128 llend; > >> void *vaddr; > >> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener, > >> return; > >> } > >> > >> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > >> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > >> + if (unlikely((section->offset_within_address_space & ~page_mask) != > >> + (section->offset_within_region & ~page_mask))) { > >> error_report("%s received unaligned region", __func__); > >> return; > >> } > >> > >> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > >> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); > >> llend = int128_make64(section->offset_within_address_space); > >> llend = int128_add(llend, section->size); > >> - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > >> + llend = int128_and(llend, int128_exts64(page_mask)); > >> > >> if (int128_ge(int128_make64(iova), llend)) { > >> return; > >> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener, > >> } > >> } > >> > >> -static void vfio_listener_region_del(MemoryListener *listener, > >> +static void vfio_listener_region_del(VFIOContainer *container, > >> + hwaddr page_mask, > >> + MemoryListener *listener, > >> MemoryRegionSection *section) > >> { > >> - VFIOContainer *container = container_of(listener, VFIOContainer, > >> - iommu_data.type1.listener); > >> hwaddr iova, end; > >> int ret; > >> > >> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener, > >> return; > >> } > >> > >> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > >> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > >> + if (unlikely((section->offset_within_address_space & ~page_mask) != > >> + (section->offset_within_region & ~page_mask))) { > >> error_report("%s received unaligned region", __func__); > >> return; > >> } > >> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener, > >> */ > >> } > >> > >> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > >> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); > >> end = (section->offset_within_address_space + int128_get64(section->size)) & > >> - TARGET_PAGE_MASK; > >> + page_mask; > >> > >> if (iova >= end) { > >> return; > >> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener, > >> } > >> } > >> > >> -static const MemoryListener vfio_memory_listener = { > >> - .region_add = vfio_listener_region_add, > >> - .region_del = vfio_listener_region_del, > >> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, > >> + MemoryRegionSection *section) > >> +{ > >> + VFIOContainer *container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + > >> + vfio_listener_region_add(container, qemu_real_host_page_mask, listener, > >> + section); > >> +} > >> + > >> + > >> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, > >> + MemoryRegionSection *section) > >> +{ > >> + VFIOContainer *container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + > >> + vfio_listener_region_del(container, qemu_real_host_page_mask, listener, > >> + section); > >> +} > >> + > >> +static const MemoryListener vfio_type1_iommu_listener = { > >> + .region_add = vfio_type1_iommu_listener_region_add, > >> + .region_del = vfio_type1_iommu_listener_region_del, > >> +}; > >> + > >> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, > >> + MemoryRegionSection *section) > >> +{ > >> + VFIOContainer *container; > >> + hwaddr page_mask; > >> + > >> + if (!memory_region_is_iommu(section->mr)) { > >> + return; > >> + } > >> + container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); > >> + vfio_listener_region_add(container, page_mask, listener, section); > >> +} > >> + > >> + > >> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, > >> + MemoryRegionSection *section) > >> +{ > >> + VFIOContainer *container; > >> + hwaddr page_mask; > >> + > >> + if (!memory_region_is_iommu(section->mr)) { > >> + return; > >> + } > >> + container = container_of(listener, VFIOContainer, > >> + iommu_data.type1.listener); > >> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); > >> + vfio_listener_region_del(container, page_mask, listener, section); > >> +} > >> + > >> +static const MemoryListener vfio_spapr_iommu_listener = { > >> + .region_add = vfio_spapr_iommu_listener_region_add, > >> + .region_del = vfio_spapr_iommu_listener_region_del, > >> }; > > > > > > Rather than creating all these wrappers, why don't we create a structure > > that gives us callbacks we can use for a shared function? If we had: > > > > struct VFIOMemoryListener { > > struct MemoryListener listener; > > bool (*filter)(MemoryRegionSection *section); > > hwaddr (page_size)(MemoryRegionSection *section); > > VFIOContainer *container; > > } > > > > Then there would be no reason for you to have separate wrappers for > > spapr. VFIOType1 would have a single VFIOMemoryListener, you could have > > an array of two. > > Sorry, I am missing the point here... > > I cannot just have an array of these - I will have to store an address > spaces per a listener in a container to implement filter() so I'd rather > store just an address space and not add filter() at all. And I will still You're not storing an address space now. > need "memory: Add reporting of supported page sizes" - or there is no way Just like you do now. > to implement it all. So I still end up having 6 callbacks in > hw/vfio/common.c. What does this win for us? Nothing you're suggesting adds any more callbacks. The current filter is vfio_listener_skipped_section(). The iommu filter is simply: bool vfio_iommu_listener_skipped_section(mr) { return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr); } Notice how I didn't have to call it spapr because it's probably what any guest iommu is going to do... The v2 spapr filter is: bool vfio_nodump_listener_skipped_section(mr) { return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr); } Notice again that we can use generic functions and not make everything spapr specific. The page_size callbacks are similarly trivial, the iommu one obviously calls memory_region_iommu_get_page_sizes, the other one uses ~qemu_real_host_page_mask. Again, nothing spapr specific. The container of course points back to the container and now vfio_listener_region_add() simply does: VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener); VFIOContainer = vlistener->container; hwaddr page_mask = vlistener->page_size(mr); if (vlistener->filter && vlistener->filter(mr)) { return } ... And suddenly we get to piece together a handful of _generic_ helper functions into a VFIOMemoryListener and we're not adding a new pair of wrappers for everything and duplicating trivial setup. Thanks, Alex > >> > >> static void vfio_listener_release(VFIOContainer *container) > >> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) > >> goto free_container_exit; > >> } > >> > >> - container->iommu_data.type1.listener = vfio_memory_listener; > >> + container->iommu_data.type1.listener = vfio_type1_iommu_listener; > >> container->iommu_data.release = vfio_listener_release; > >> > >> memory_listener_register(&container->iommu_data.type1.listener, > >> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) > >> goto free_container_exit; > >> } > >> > >> - container->iommu_data.type1.listener = vfio_memory_listener; > >> + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; > >> container->iommu_data.release = vfio_listener_release; > >> > >> memory_listener_register(&container->iommu_data.type1.listener, > > > > > > > >
On 07/16/2015 12:51 PM, Alex Williamson wrote: > On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote: >> On 07/16/2015 04:26 AM, Alex Williamson wrote: >>> On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote: >>>> The existing memory listener is called on RAM or PCI address space >>>> which implies potentially different page size. Instead of guessing >>>> what page size should be used, this replaces a single IOMMU memory >>>> listener by two, one per supported IOMMU type; listener callbacks >>>> call the existing helpers with a known page size. >>>> >>>> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size >>>> from IOMMU. >>>> >>>> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, >>>> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip >>>> non IOMMU regions (which is an MSIX window) which duplicates >>>> vfio_listener_skipped_section() a bit. >>>> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>>> --- >>>> hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ >>>> 1 file changed, 76 insertions(+), 19 deletions(-) >>>> >>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >>>> index 85ee9b0..aad41e1 100644 >>>> --- a/hw/vfio/common.c >>>> +++ b/hw/vfio/common.c >>>> @@ -312,11 +312,11 @@ out: >>>> rcu_read_unlock(); >>>> } >>>> >>>> -static void vfio_listener_region_add(MemoryListener *listener, >>>> +static void vfio_listener_region_add(VFIOContainer *container, >>>> + hwaddr page_mask, >>> >>> Should memory_region_iommu_get_page_sizes() return a hwaddr? >> >> >> I do not think so, memory.c uses uint64_t for masks. >> >> >> >>>> + MemoryListener *listener, >>>> MemoryRegionSection *section) >>>> { >>>> - VFIOContainer *container = container_of(listener, VFIOContainer, >>>> - iommu_data.type1.listener); >>>> hwaddr iova, end; >>>> Int128 llend; >>>> void *vaddr; >>>> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener, >>>> return; >>>> } >>>> >>>> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != >>>> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { >>>> + if (unlikely((section->offset_within_address_space & ~page_mask) != >>>> + (section->offset_within_region & ~page_mask))) { >>>> error_report("%s received unaligned region", __func__); >>>> return; >>>> } >>>> >>>> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); >>>> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); >>>> llend = int128_make64(section->offset_within_address_space); >>>> llend = int128_add(llend, section->size); >>>> - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); >>>> + llend = int128_and(llend, int128_exts64(page_mask)); >>>> >>>> if (int128_ge(int128_make64(iova), llend)) { >>>> return; >>>> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener, >>>> } >>>> } >>>> >>>> -static void vfio_listener_region_del(MemoryListener *listener, >>>> +static void vfio_listener_region_del(VFIOContainer *container, >>>> + hwaddr page_mask, >>>> + MemoryListener *listener, >>>> MemoryRegionSection *section) >>>> { >>>> - VFIOContainer *container = container_of(listener, VFIOContainer, >>>> - iommu_data.type1.listener); >>>> hwaddr iova, end; >>>> int ret; >>>> >>>> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener, >>>> return; >>>> } >>>> >>>> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != >>>> - (section->offset_within_region & ~TARGET_PAGE_MASK))) { >>>> + if (unlikely((section->offset_within_address_space & ~page_mask) != >>>> + (section->offset_within_region & ~page_mask))) { >>>> error_report("%s received unaligned region", __func__); >>>> return; >>>> } >>>> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener, >>>> */ >>>> } >>>> >>>> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); >>>> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); >>>> end = (section->offset_within_address_space + int128_get64(section->size)) & >>>> - TARGET_PAGE_MASK; >>>> + page_mask; >>>> >>>> if (iova >= end) { >>>> return; >>>> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener, >>>> } >>>> } >>>> >>>> -static const MemoryListener vfio_memory_listener = { >>>> - .region_add = vfio_listener_region_add, >>>> - .region_del = vfio_listener_region_del, >>>> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, >>>> + MemoryRegionSection *section) >>>> +{ >>>> + VFIOContainer *container = container_of(listener, VFIOContainer, >>>> + iommu_data.type1.listener); >>>> + >>>> + vfio_listener_region_add(container, qemu_real_host_page_mask, listener, >>>> + section); >>>> +} >>>> + >>>> + >>>> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, >>>> + MemoryRegionSection *section) >>>> +{ >>>> + VFIOContainer *container = container_of(listener, VFIOContainer, >>>> + iommu_data.type1.listener); >>>> + >>>> + vfio_listener_region_del(container, qemu_real_host_page_mask, listener, >>>> + section); >>>> +} >>>> + >>>> +static const MemoryListener vfio_type1_iommu_listener = { >>>> + .region_add = vfio_type1_iommu_listener_region_add, >>>> + .region_del = vfio_type1_iommu_listener_region_del, >>>> +}; >>>> + >>>> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, >>>> + MemoryRegionSection *section) >>>> +{ >>>> + VFIOContainer *container; >>>> + hwaddr page_mask; >>>> + >>>> + if (!memory_region_is_iommu(section->mr)) { >>>> + return; >>>> + } >>>> + container = container_of(listener, VFIOContainer, >>>> + iommu_data.type1.listener); >>>> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); >>>> + vfio_listener_region_add(container, page_mask, listener, section); >>>> +} >>>> + >>>> + >>>> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, >>>> + MemoryRegionSection *section) >>>> +{ >>>> + VFIOContainer *container; >>>> + hwaddr page_mask; >>>> + >>>> + if (!memory_region_is_iommu(section->mr)) { >>>> + return; >>>> + } >>>> + container = container_of(listener, VFIOContainer, >>>> + iommu_data.type1.listener); >>>> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); >>>> + vfio_listener_region_del(container, page_mask, listener, section); >>>> +} >>>> + >>>> +static const MemoryListener vfio_spapr_iommu_listener = { >>>> + .region_add = vfio_spapr_iommu_listener_region_add, >>>> + .region_del = vfio_spapr_iommu_listener_region_del, >>>> }; >>> >>> >>> Rather than creating all these wrappers, why don't we create a structure >>> that gives us callbacks we can use for a shared function? If we had: >>> >>> struct VFIOMemoryListener { >>> struct MemoryListener listener; >>> bool (*filter)(MemoryRegionSection *section); >>> hwaddr (page_size)(MemoryRegionSection *section); >>> VFIOContainer *container; >>> } >>> >>> Then there would be no reason for you to have separate wrappers for >>> spapr. VFIOType1 would have a single VFIOMemoryListener, you could have >>> an array of two. >> >> Sorry, I am missing the point here... >> >> I cannot just have an array of these - I will have to store an address >> spaces per a listener in a container to implement filter() so I'd rather >> store just an address space and not add filter() at all. And I will still > > You're not storing an address space now. I am, indirectly, when register the listener with a filter which is never NULL and which is passed to the listener. If not that, the whole thing would be simpler. The rest is quite clear now, you are right. >> need "memory: Add reporting of supported page sizes" - or there is no way > > Just like you do now. > >> to implement it all. So I still end up having 6 callbacks in >> hw/vfio/common.c. What does this win for us? > > Nothing you're suggesting adds any more callbacks. > > The current filter is vfio_listener_skipped_section(). The iommu filter > is simply: > > bool vfio_iommu_listener_skipped_section(mr) > { > return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr); > } > > Notice how I didn't have to call it spapr because it's probably what any > guest iommu is going to do... > > The v2 spapr filter is: > > bool vfio_nodump_listener_skipped_section(mr) > { > return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr); > } > > Notice again that we can use generic functions and not make everything > spapr specific. Ok, we add 2 (not 3) as vfio_listener_skipped_section() is still there. You got +1. > The page_size callbacks are similarly trivial, the iommu one obviously > calls memory_region_iommu_get_page_sizes, the other one uses > ~qemu_real_host_page_mask. Again, nothing spapr specific. These are trivial but I need all 3 - for type1 iommu (real page size), spapr iommu (TCE page size) and spapr memoryprereg (real page size). Ok, I can reuse 1st for 3rd and get one less. +2. > The container of course points back to the container and now > vfio_listener_region_add() simply does: > > VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener); > VFIOContainer = vlistener->container; > hwaddr page_mask = vlistener->page_size(mr); > > if (vlistener->filter && vlistener->filter(mr)) { > return > } > ... > > And suddenly we get to piece together a handful of _generic_ helper > functions into a VFIOMemoryListener and we're not adding a new pair of > wrappers for everything and duplicating trivial setup. Thanks, Ok, we are saving 2 callbacks and make others smaller. Makes sense. Thanks for chewing (right word here?) this stuff for me :) > > Alex > >>>> >>>> static void vfio_listener_release(VFIOContainer *container) >>>> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) >>>> goto free_container_exit; >>>> } >>>> >>>> - container->iommu_data.type1.listener = vfio_memory_listener; >>>> + container->iommu_data.type1.listener = vfio_type1_iommu_listener; >>>> container->iommu_data.release = vfio_listener_release; >>>> >>>> memory_listener_register(&container->iommu_data.type1.listener, >>>> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) >>>> goto free_container_exit; >>>> } >>>> >>>> - container->iommu_data.type1.listener = vfio_memory_listener; >>>> + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; >>>> container->iommu_data.release = vfio_listener_release; >>>> >>>> memory_listener_register(&container->iommu_data.type1.listener, >>> >>> >>> >> >> > > >
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 85ee9b0..aad41e1 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -312,11 +312,11 @@ out: rcu_read_unlock(); } -static void vfio_listener_region_add(MemoryListener *listener, +static void vfio_listener_region_add(VFIOContainer *container, + hwaddr page_mask, + MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.type1.listener); hwaddr iova, end; Int128 llend; void *vaddr; @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { error_report("%s received unaligned region", __func__); return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); llend = int128_make64(section->offset_within_address_space); llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = int128_and(llend, int128_exts64(page_mask)); if (int128_ge(int128_make64(iova), llend)) { return; @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener, } } -static void vfio_listener_region_del(MemoryListener *listener, +static void vfio_listener_region_del(VFIOContainer *container, + hwaddr page_mask, + MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.type1.listener); hwaddr iova, end; int ret; @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { error_report("%s received unaligned region", __func__); return; } @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener, */ } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1); end = (section->offset_within_address_space + int128_get64(section->size)) & - TARGET_PAGE_MASK; + page_mask; if (iova >= end) { return; @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener, } } -static const MemoryListener vfio_memory_listener = { - .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + + vfio_listener_region_add(container, qemu_real_host_page_mask, listener, + section); +} + + +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + + vfio_listener_region_del(container, qemu_real_host_page_mask, listener, + section); +} + +static const MemoryListener vfio_type1_iommu_listener = { + .region_add = vfio_type1_iommu_listener_region_add, + .region_del = vfio_type1_iommu_listener_region_del, +}; + +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container; + hwaddr page_mask; + + if (!memory_region_is_iommu(section->mr)) { + return; + } + container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); + vfio_listener_region_add(container, page_mask, listener, section); +} + + +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container; + hwaddr page_mask; + + if (!memory_region_is_iommu(section->mr)) { + return; + } + container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1); + vfio_listener_region_del(container, page_mask, listener, section); +} + +static const MemoryListener vfio_spapr_iommu_listener = { + .region_add = vfio_spapr_iommu_listener_region_add, + .region_del = vfio_spapr_iommu_listener_region_del, }; static void vfio_listener_release(VFIOContainer *container) @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) goto free_container_exit; } - container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.type1.listener = vfio_type1_iommu_listener; container->iommu_data.release = vfio_listener_release; memory_listener_register(&container->iommu_data.type1.listener, @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) goto free_container_exit; } - container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.type1.listener = vfio_spapr_iommu_listener; container->iommu_data.release = vfio_listener_release; memory_listener_register(&container->iommu_data.type1.listener,
The existing memory listener is called on RAM or PCI address space which implies potentially different page size. Instead of guessing what page size should be used, this replaces a single IOMMU memory listener by two, one per supported IOMMU type; listener callbacks call the existing helpers with a known page size. For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size from IOMMU. As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions, this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip non IOMMU regions (which is an MSIX window) which duplicates vfio_listener_skipped_section() a bit. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 19 deletions(-)