diff mbox

[v2] vfio: Add support for mmapping sub-page MMIO BARs

Message ID 1470913557-4355-1-git-send-email-xyjxie@linux.vnet.ibm.com
State New
Headers show

Commit Message

Yongji Xie Aug. 11, 2016, 11:05 a.m. UTC
Now the kernel commit 05f0c03fbac1 ("vfio-pci: Allow to mmap
sub-page MMIO BARs if the mmio page is exclusive") allows VFIO
to mmap sub-page BARs. This is the corresponding QEMU patch.
With those patches applied, we could passthrough sub-page BARs
to guest, which can help to improve IO performance for some devices.

In this patch, we expand MemoryRegions of these sub-page
MMIO BARs to PAGE_SIZE in vfio_pci_write_config(), so that
the BARs could be passed to KVM ioctl KVM_SET_USER_MEMORY_REGION
with a valid size. The expanding size will be recovered when
the base address of sub-page BAR is changed and not page aligned
any more in guest. And we also set the priority of these BARs'
memory regions to zero in case of overlap with BARs which share
the same page with sub-page BARs in guest.

Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
---
 hw/vfio/common.c |    3 +--
 hw/vfio/pci.c    |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 2 deletions(-)

Comments

no-reply@patchew.org Aug. 11, 2016, 11:09 a.m. UTC | #1
Hi,

Your series seems to have some coding style problems. See output below for
more information:

Message-id: 1470913557-4355-1-git-send-email-xyjxie@linux.vnet.ibm.com
Subject: [Qemu-devel] [PATCH v2] vfio: Add support for mmapping sub-page MMIO BARs
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
    echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
    if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
        failed=1
        echo
    fi
    n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]         patchew/1470913557-4355-1-git-send-email-xyjxie@linux.vnet.ibm.com -> patchew/1470913557-4355-1-git-send-email-xyjxie@linux.vnet.ibm.com
Switched to a new branch 'test'
36f327d vfio: Add support for mmapping sub-page MMIO BARs

=== OUTPUT BEGIN ===
Checking PATCH 1/1: vfio: Add support for mmapping sub-page MMIO BARs...
ERROR: code indent should never use tabs
#87: FILE: hw/vfio/pci.c:1101:
+^I}$

total: 1 errors, 0 warnings, 97 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@freelists.org
Yongji Xie Sept. 5, 2016, 10:25 a.m. UTC | #2
Ping?

On 2016/8/11 19:05, Yongji Xie wrote:
> Now the kernel commit 05f0c03fbac1 ("vfio-pci: Allow to mmap
> sub-page MMIO BARs if the mmio page is exclusive") allows VFIO
> to mmap sub-page BARs. This is the corresponding QEMU patch.
> With those patches applied, we could passthrough sub-page BARs
> to guest, which can help to improve IO performance for some devices.
>
> In this patch, we expand MemoryRegions of these sub-page
> MMIO BARs to PAGE_SIZE in vfio_pci_write_config(), so that
> the BARs could be passed to KVM ioctl KVM_SET_USER_MEMORY_REGION
> with a valid size. The expanding size will be recovered when
> the base address of sub-page BAR is changed and not page aligned
> any more in guest. And we also set the priority of these BARs'
> memory regions to zero in case of overlap with BARs which share
> the same page with sub-page BARs in guest.
>
> Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
> ---
>   hw/vfio/common.c |    3 +--
>   hw/vfio/pci.c    |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 77 insertions(+), 2 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index b313e7c..1a70307 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -661,8 +661,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
>                                 region, name, region->size);
>
>           if (!vbasedev->no_mmap &&
> -            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
> -            !(region->size & ~qemu_real_host_page_mask)) {
> +            region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
>
>               vfio_setup_region_sparse_mmaps(region, info);
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 7bfa17c..7035617 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1057,6 +1057,65 @@ static const MemoryRegionOps vfio_vga_ops = {
>   };
>
>   /*
> + * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
> + * size if the BAR is in an exclusive page in host so that we could map
> + * this BAR to guest. But this sub-page BAR may not occupy an exclusive
> + * page in guest. So we should set the priority of the expanded memory
> + * region to zero in case of overlap with BARs which share the same page
> + * with the sub-page BAR in guest. Besides, we should also recover the
> + * size of this sub-page BAR when its base address is changed in guest
> + * and not page aligned any more.
> + */
> +static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    VFIORegion *region = &vdev->bars[bar].region;
> +    MemoryRegion *mmap_mr, *mr;
> +    PCIIORegion *r;
> +    pcibus_t bar_addr;
> +
> +    /* Make sure that the whole region is allowed to be mmapped */
> +    if (!(region->nr_mmaps == 1 &&
> +        region->mmaps[0].size == region->size)) {
> +        return;
> +    }
> +
> +    r = &pdev->io_regions[bar];
> +    bar_addr = r->addr;
> +    if (bar_addr == PCI_BAR_UNMAPPED) {
> +        return;
> +    }
> +
> +    mr = region->mem;
> +    mmap_mr = &region->mmaps[0].mem;
> +    memory_region_transaction_begin();
> +    if (memory_region_size(mr) < qemu_real_host_page_size) {
> +        if (!(bar_addr & ~qemu_real_host_page_mask) &&
> +            memory_region_is_mapped(mr) && region->mmaps[0].mmap) {
> +            /* Expand memory region to page size and set priority */
> +            memory_region_del_subregion(r->address_space, mr);
> +            memory_region_set_size(mr, qemu_real_host_page_size);
> +            memory_region_set_size(mmap_mr, qemu_real_host_page_size);
> +            memory_region_add_subregion_overlap(r->address_space,
> +                                                bar_addr, mr, 0);
> +	}
> +    } else {
> +        /* This case would happen when guest rescan one PCI device */
> +        if (bar_addr & ~qemu_real_host_page_mask) {
> +            /* Recover the size of memory region */
> +            memory_region_set_size(mr, r->size);
> +            memory_region_set_size(mmap_mr, r->size);
> +        } else if (memory_region_is_mapped(mr)) {
> +            /* Set the priority of memory region to zero */
> +            memory_region_del_subregion(r->address_space, mr);
> +            memory_region_add_subregion_overlap(r->address_space,
> +                                                bar_addr, mr, 0);
> +        }
> +    }
> +    memory_region_transaction_commit();
> +}
> +
> +/*
>    * PCI config space
>    */
>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
> @@ -1139,6 +1198,23 @@ void vfio_pci_write_config(PCIDevice *pdev,
>           } else if (was_enabled && !is_enabled) {
>               vfio_msix_disable(vdev);
>           }
> +    } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
> +        range_covers_byte(addr, len, PCI_COMMAND)) {
> +        pcibus_t old_addr[PCI_NUM_REGIONS - 1];
> +        int bar;
> +
> +        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
> +            old_addr[bar] = pdev->io_regions[bar].addr;
> +        }
> +
> +        pci_default_write_config(pdev, addr, val, len);
> +
> +        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
> +            if (old_addr[bar] != pdev->io_regions[bar].addr &&
> +                pdev->io_regions[bar].size > 0 &&
> +                pdev->io_regions[bar].size < qemu_real_host_page_size)
> +                vfio_sub_page_bar_update_mapping(pdev, bar);
> +        }
>       } else {
>           /* Write everything to QEMU to keep emulated bits correct */
>           pci_default_write_config(pdev, addr, val, len);
Alex Williamson Sept. 13, 2016, 10:55 p.m. UTC | #3
[cc +Paolo]

On Thu, 11 Aug 2016 19:05:57 +0800
Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote:

> Now the kernel commit 05f0c03fbac1 ("vfio-pci: Allow to mmap
> sub-page MMIO BARs if the mmio page is exclusive") allows VFIO
> to mmap sub-page BARs. This is the corresponding QEMU patch.
> With those patches applied, we could passthrough sub-page BARs
> to guest, which can help to improve IO performance for some devices.
> 
> In this patch, we expand MemoryRegions of these sub-page
> MMIO BARs to PAGE_SIZE in vfio_pci_write_config(), so that
> the BARs could be passed to KVM ioctl KVM_SET_USER_MEMORY_REGION
> with a valid size. The expanding size will be recovered when
> the base address of sub-page BAR is changed and not page aligned
> any more in guest. And we also set the priority of these BARs'
> memory regions to zero in case of overlap with BARs which share
> the same page with sub-page BARs in guest.
> 
> Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
> ---
>  hw/vfio/common.c |    3 +--
>  hw/vfio/pci.c    |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 77 insertions(+), 2 deletions(-)

Hi Yongji,

There was an automated patch checker reply to this patch already, see:

https://patchwork.kernel.org/patch/9275077/

Looks like there's a trivial whitespace issue with using a tab.  Also
another QEMU style issue noted below.

> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index b313e7c..1a70307 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -661,8 +661,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
>                                region, name, region->size);
>  
>          if (!vbasedev->no_mmap &&
> -            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
> -            !(region->size & ~qemu_real_host_page_mask)) {
> +            region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
>  
>              vfio_setup_region_sparse_mmaps(region, info);
>  
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 7bfa17c..7035617 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1057,6 +1057,65 @@ static const MemoryRegionOps vfio_vga_ops = {
>  };
>  
>  /*
> + * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
> + * size if the BAR is in an exclusive page in host so that we could map
> + * this BAR to guest. But this sub-page BAR may not occupy an exclusive
> + * page in guest. So we should set the priority of the expanded memory
> + * region to zero in case of overlap with BARs which share the same page
> + * with the sub-page BAR in guest. Besides, we should also recover the
> + * size of this sub-page BAR when its base address is changed in guest
> + * and not page aligned any more.
> + */
> +static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    VFIORegion *region = &vdev->bars[bar].region;
> +    MemoryRegion *mmap_mr, *mr;
> +    PCIIORegion *r;
> +    pcibus_t bar_addr;
> +
> +    /* Make sure that the whole region is allowed to be mmapped */
> +    if (!(region->nr_mmaps == 1 &&
> +        region->mmaps[0].size == region->size)) {
> +        return;
> +    }
> +
> +    r = &pdev->io_regions[bar];
> +    bar_addr = r->addr;
> +    if (bar_addr == PCI_BAR_UNMAPPED) {
> +        return;
> +    }
> +
> +    mr = region->mem;
> +    mmap_mr = &region->mmaps[0].mem;
> +    memory_region_transaction_begin();
> +    if (memory_region_size(mr) < qemu_real_host_page_size) {
> +        if (!(bar_addr & ~qemu_real_host_page_mask) &&
> +            memory_region_is_mapped(mr) && region->mmaps[0].mmap) {
> +            /* Expand memory region to page size and set priority */
> +            memory_region_del_subregion(r->address_space, mr);
> +            memory_region_set_size(mr, qemu_real_host_page_size);
> +            memory_region_set_size(mmap_mr, qemu_real_host_page_size);
> +            memory_region_add_subregion_overlap(r->address_space,
> +                                                bar_addr, mr, 0);
> +	}
> +    } else {
> +        /* This case would happen when guest rescan one PCI device */
> +        if (bar_addr & ~qemu_real_host_page_mask) {
> +            /* Recover the size of memory region */
> +            memory_region_set_size(mr, r->size);
> +            memory_region_set_size(mmap_mr, r->size);
> +        } else if (memory_region_is_mapped(mr)) {
> +            /* Set the priority of memory region to zero */
> +            memory_region_del_subregion(r->address_space, mr);
> +            memory_region_add_subregion_overlap(r->address_space,
> +                                                bar_addr, mr, 0);
> +        }
> +    }
> +    memory_region_transaction_commit();

Paolo, as the reigning memory API expert, do you see any issues with
this?  Expanding the size of a container MemoryRegion and the contained
mmap'd subregion and manipulating priorities so that we don't interfere
with other MemoryRegions.

> +}
> +
> +/*
>   * PCI config space
>   */
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
> @@ -1139,6 +1198,23 @@ void vfio_pci_write_config(PCIDevice *pdev,
>          } else if (was_enabled && !is_enabled) {
>              vfio_msix_disable(vdev);
>          }
> +    } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
> +        range_covers_byte(addr, len, PCI_COMMAND)) {
> +        pcibus_t old_addr[PCI_NUM_REGIONS - 1];
> +        int bar;
> +
> +        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
> +            old_addr[bar] = pdev->io_regions[bar].addr;
> +        }
> +
> +        pci_default_write_config(pdev, addr, val, len);
> +
> +        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
> +            if (old_addr[bar] != pdev->io_regions[bar].addr &&
> +                pdev->io_regions[bar].size > 0 &&
> +                pdev->io_regions[bar].size < qemu_real_host_page_size)

This requires {} per QEMU coding standards.

> +                vfio_sub_page_bar_update_mapping(pdev, bar);
> +        }
>      } else {
>          /* Write everything to QEMU to keep emulated bits correct */
>          pci_default_write_config(pdev, addr, val, len);

Thanks,
Alex
Yongji Xie Sept. 14, 2016, 5:04 a.m. UTC | #4
On 2016/9/14 6:55, Alex Williamson wrote:

> [cc +Paolo]
>
> On Thu, 11 Aug 2016 19:05:57 +0800
> Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote:
>
>> Now the kernel commit 05f0c03fbac1 ("vfio-pci: Allow to mmap
>> sub-page MMIO BARs if the mmio page is exclusive") allows VFIO
>> to mmap sub-page BARs. This is the corresponding QEMU patch.
>> With those patches applied, we could passthrough sub-page BARs
>> to guest, which can help to improve IO performance for some devices.
>>
>> In this patch, we expand MemoryRegions of these sub-page
>> MMIO BARs to PAGE_SIZE in vfio_pci_write_config(), so that
>> the BARs could be passed to KVM ioctl KVM_SET_USER_MEMORY_REGION
>> with a valid size. The expanding size will be recovered when
>> the base address of sub-page BAR is changed and not page aligned
>> any more in guest. And we also set the priority of these BARs'
>> memory regions to zero in case of overlap with BARs which share
>> the same page with sub-page BARs in guest.
>>
>> Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
>> ---
>>   hw/vfio/common.c |    3 +--
>>   hw/vfio/pci.c    |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 77 insertions(+), 2 deletions(-)
> Hi Yongji,
>
> There was an automated patch checker reply to this patch already, see:
>
> https://patchwork.kernel.org/patch/9275077/
>
> Looks like there's a trivial whitespace issue with using a tab.  Also
> another QEMU style issue noted below.

Yes,  I saw it. I'll fix it in next version. Thanks for your remind.

>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index b313e7c..1a70307 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -661,8 +661,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
>>                                 region, name, region->size);
>>   
>>           if (!vbasedev->no_mmap &&
>> -            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
>> -            !(region->size & ~qemu_real_host_page_mask)) {
>> +            region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
>>   
>>               vfio_setup_region_sparse_mmaps(region, info);
>>   
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 7bfa17c..7035617 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1057,6 +1057,65 @@ static const MemoryRegionOps vfio_vga_ops = {
>>   };
>>   
>>   /*
>> + * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
>> + * size if the BAR is in an exclusive page in host so that we could map
>> + * this BAR to guest. But this sub-page BAR may not occupy an exclusive
>> + * page in guest. So we should set the priority of the expanded memory
>> + * region to zero in case of overlap with BARs which share the same page
>> + * with the sub-page BAR in guest. Besides, we should also recover the
>> + * size of this sub-page BAR when its base address is changed in guest
>> + * and not page aligned any more.
>> + */
>> +static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
>> +{
>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +    VFIORegion *region = &vdev->bars[bar].region;
>> +    MemoryRegion *mmap_mr, *mr;
>> +    PCIIORegion *r;
>> +    pcibus_t bar_addr;
>> +
>> +    /* Make sure that the whole region is allowed to be mmapped */
>> +    if (!(region->nr_mmaps == 1 &&
>> +        region->mmaps[0].size == region->size)) {
>> +        return;
>> +    }
>> +
>> +    r = &pdev->io_regions[bar];
>> +    bar_addr = r->addr;
>> +    if (bar_addr == PCI_BAR_UNMAPPED) {
>> +        return;
>> +    }
>> +
>> +    mr = region->mem;
>> +    mmap_mr = &region->mmaps[0].mem;
>> +    memory_region_transaction_begin();
>> +    if (memory_region_size(mr) < qemu_real_host_page_size) {
>> +        if (!(bar_addr & ~qemu_real_host_page_mask) &&
>> +            memory_region_is_mapped(mr) && region->mmaps[0].mmap) {
>> +            /* Expand memory region to page size and set priority */
>> +            memory_region_del_subregion(r->address_space, mr);
>> +            memory_region_set_size(mr, qemu_real_host_page_size);
>> +            memory_region_set_size(mmap_mr, qemu_real_host_page_size);
>> +            memory_region_add_subregion_overlap(r->address_space,
>> +                                                bar_addr, mr, 0);
>> +	}
>> +    } else {
>> +        /* This case would happen when guest rescan one PCI device */
>> +        if (bar_addr & ~qemu_real_host_page_mask) {
>> +            /* Recover the size of memory region */
>> +            memory_region_set_size(mr, r->size);
>> +            memory_region_set_size(mmap_mr, r->size);
>> +        } else if (memory_region_is_mapped(mr)) {
>> +            /* Set the priority of memory region to zero */
>> +            memory_region_del_subregion(r->address_space, mr);
>> +            memory_region_add_subregion_overlap(r->address_space,
>> +                                                bar_addr, mr, 0);
>> +        }
>> +    }
>> +    memory_region_transaction_commit();
> Paolo, as the reigning memory API expert, do you see any issues with
> this?  Expanding the size of a container MemoryRegion and the contained
> mmap'd subregion and manipulating priorities so that we don't interfere
> with other MemoryRegions.
>
>> +}
>> +
>> +/*
>>    * PCI config space
>>    */
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
>> @@ -1139,6 +1198,23 @@ void vfio_pci_write_config(PCIDevice *pdev,
>>           } else if (was_enabled && !is_enabled) {
>>               vfio_msix_disable(vdev);
>>           }
>> +    } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
>> +        range_covers_byte(addr, len, PCI_COMMAND)) {
>> +        pcibus_t old_addr[PCI_NUM_REGIONS - 1];
>> +        int bar;
>> +
>> +        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
>> +            old_addr[bar] = pdev->io_regions[bar].addr;
>> +        }
>> +
>> +        pci_default_write_config(pdev, addr, val, len);
>> +
>> +        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
>> +            if (old_addr[bar] != pdev->io_regions[bar].addr &&
>> +                pdev->io_regions[bar].size > 0 &&
>> +                pdev->io_regions[bar].size < qemu_real_host_page_size)
> This requires {} per QEMU coding standards.

Will do.

Thanks,
Yongji
Paolo Bonzini Sept. 30, 2016, 4:05 p.m. UTC | #5
On 14/09/2016 00:55, Alex Williamson wrote:
> Paolo, as the reigning memory API expert, do you see any issues with
> this?  Expanding the size of a container MemoryRegion and the contained
> mmap'd subregion and manipulating priorities so that we don't interfere
> with other MemoryRegions.

I guess it's fine if you are okay with maintaining it.
memory_region_set_size exists, might as well use it. :)

What I'm worried about, is what happens if two such regions end up in
the same guest page.  Then the two priorities conflict.

Paolo
Yongji Xie Oct. 4, 2016, 6:57 a.m. UTC | #6
On 2016/10/1 0:05, Paolo Bonzini wrote:

> On 14/09/2016 00:55, Alex Williamson wrote:
>> Paolo, as the reigning memory API expert, do you see any issues with
>> this?  Expanding the size of a container MemoryRegion and the contained
>> mmap'd subregion and manipulating priorities so that we don't interfere
>> with other MemoryRegions.
> I guess it's fine if you are okay with maintaining it.
> memory_region_set_size exists, might as well use it. :)
>
> What I'm worried about, is what happens if two such regions end up in
> the same guest page.  Then the two priorities conflict.
>

Hi Paolo,

I think I can answer this question. We would expand only one MemoryRegion
which is page aligned and set its priority to zero if we have two region 
in the same
guest page. Then the Memory Region with higher priority will overlap the 
expanded
part of page aligned one as if we didn't do the expanding.

Thanks,
Yongji
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b313e7c..1a70307 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -661,8 +661,7 @@  int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                               region, name, region->size);
 
         if (!vbasedev->no_mmap &&
-            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
-            !(region->size & ~qemu_real_host_page_mask)) {
+            region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
 
             vfio_setup_region_sparse_mmaps(region, info);
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 7bfa17c..7035617 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1057,6 +1057,65 @@  static const MemoryRegionOps vfio_vga_ops = {
 };
 
 /*
+ * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
+ * size if the BAR is in an exclusive page in host so that we could map
+ * this BAR to guest. But this sub-page BAR may not occupy an exclusive
+ * page in guest. So we should set the priority of the expanded memory
+ * region to zero in case of overlap with BARs which share the same page
+ * with the sub-page BAR in guest. Besides, we should also recover the
+ * size of this sub-page BAR when its base address is changed in guest
+ * and not page aligned any more.
+ */
+static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIORegion *region = &vdev->bars[bar].region;
+    MemoryRegion *mmap_mr, *mr;
+    PCIIORegion *r;
+    pcibus_t bar_addr;
+
+    /* Make sure that the whole region is allowed to be mmapped */
+    if (!(region->nr_mmaps == 1 &&
+        region->mmaps[0].size == region->size)) {
+        return;
+    }
+
+    r = &pdev->io_regions[bar];
+    bar_addr = r->addr;
+    if (bar_addr == PCI_BAR_UNMAPPED) {
+        return;
+    }
+
+    mr = region->mem;
+    mmap_mr = &region->mmaps[0].mem;
+    memory_region_transaction_begin();
+    if (memory_region_size(mr) < qemu_real_host_page_size) {
+        if (!(bar_addr & ~qemu_real_host_page_mask) &&
+            memory_region_is_mapped(mr) && region->mmaps[0].mmap) {
+            /* Expand memory region to page size and set priority */
+            memory_region_del_subregion(r->address_space, mr);
+            memory_region_set_size(mr, qemu_real_host_page_size);
+            memory_region_set_size(mmap_mr, qemu_real_host_page_size);
+            memory_region_add_subregion_overlap(r->address_space,
+                                                bar_addr, mr, 0);
+	}
+    } else {
+        /* This case would happen when guest rescan one PCI device */
+        if (bar_addr & ~qemu_real_host_page_mask) {
+            /* Recover the size of memory region */
+            memory_region_set_size(mr, r->size);
+            memory_region_set_size(mmap_mr, r->size);
+        } else if (memory_region_is_mapped(mr)) {
+            /* Set the priority of memory region to zero */
+            memory_region_del_subregion(r->address_space, mr);
+            memory_region_add_subregion_overlap(r->address_space,
+                                                bar_addr, mr, 0);
+        }
+    }
+    memory_region_transaction_commit();
+}
+
+/*
  * PCI config space
  */
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
@@ -1139,6 +1198,23 @@  void vfio_pci_write_config(PCIDevice *pdev,
         } else if (was_enabled && !is_enabled) {
             vfio_msix_disable(vdev);
         }
+    } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
+        range_covers_byte(addr, len, PCI_COMMAND)) {
+        pcibus_t old_addr[PCI_NUM_REGIONS - 1];
+        int bar;
+
+        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+            old_addr[bar] = pdev->io_regions[bar].addr;
+        }
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+            if (old_addr[bar] != pdev->io_regions[bar].addr &&
+                pdev->io_regions[bar].size > 0 &&
+                pdev->io_regions[bar].size < qemu_real_host_page_size)
+                vfio_sub_page_bar_update_mapping(pdev, bar);
+        }
     } else {
         /* Write everything to QEMU to keep emulated bits correct */
         pci_default_write_config(pdev, addr, val, len);