diff mbox series

[v3,10/13] vfio/common: Add device dirty page bitmap sync

Message ID 20230304014343.33646-11-joao.m.martins@oracle.com
State New
Headers show
Series vfio/migration: Device dirty page tracking | expand

Commit Message

Joao Martins March 4, 2023, 1:43 a.m. UTC
Add device dirty page bitmap sync functionality. This uses the device
DMA logging uAPI to sync dirty page bitmap from the device.

Device dirty page bitmap sync is used only if all devices within a
container support device dirty page tracking.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 9 deletions(-)

Comments

Alex Williamson March 6, 2023, 7:22 p.m. UTC | #1
On Sat,  4 Mar 2023 01:43:40 +0000
Joao Martins <joao.m.martins@oracle.com> wrote:

> Add device dirty page bitmap sync functionality. This uses the device
> DMA logging uAPI to sync dirty page bitmap from the device.
> 
> Device dirty page bitmap sync is used only if all devices within a
> container support device dirty page tracking.
> 
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 79 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index b0c7d03279ab..5b8456975e97 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -342,6 +342,9 @@ static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
>      return 0;
>  }
>  
> +static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
> +                                 uint64_t size, ram_addr_t ram_addr);
> +
>  bool vfio_mig_active(void)
>  {
>      VFIOGroup *group;
> @@ -565,10 +568,16 @@ static int vfio_dma_unmap(VFIOContainer *container,
>          .iova = iova,
>          .size = size,
>      };
> +    bool need_dirty_sync = false;
> +    int ret;
> +
> +    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
> +        if (!vfio_devices_all_device_dirty_tracking(container) &&
> +            container->dirty_pages_supported) {
> +            return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
> +        }
>  
> -    if (iotlb && container->dirty_pages_supported &&
> -        vfio_devices_all_running_and_mig_active(container)) {
> -        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
> +        need_dirty_sync = true;
>      }
>  
>      while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
> @@ -594,10 +603,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
>          return -errno;
>      }
>  
> -    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
> -        cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
> -                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
> -                                            DIRTY_CLIENTS_NOCODE);
> +    if (need_dirty_sync) {
> +        ret = vfio_get_dirty_bitmap(container, iova, size,
> +                                    iotlb->translated_addr);
> +        if (ret) {
> +            return ret;
> +        }
>      }
>  
>      return 0;
> @@ -1579,6 +1590,58 @@ static void vfio_listener_log_global_stop(MemoryListener *listener)
>      }
>  }
>  
> +static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
> +                                          hwaddr size, void *bitmap)
> +{
> +    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> +                        sizeof(struct vfio_device_feature_dma_logging_report),
> +                        sizeof(__aligned_u64))] = {};
> +    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
> +    struct vfio_device_feature_dma_logging_report *report =
> +        (struct vfio_device_feature_dma_logging_report *)feature->data;
> +
> +    report->iova = iova;
> +    report->length = size;
> +    report->page_size = qemu_real_host_page_size();
> +    report->bitmap = (__aligned_u64)bitmap;
> +
> +    feature->argsz = sizeof(buf);
> +    feature->flags =
> +        VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;

Nit, the series is inconsistent between initializing flags as above and
as:

    feature->flags = VFIO_DEVICE_FEATURE_GET;
    feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;

My personal preference would be more like:

    feature->flags = VFIO_DEVICE_FEATURE_GET |
                     VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;

Thanks,
Alex

> +
> +    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
> +        return -errno;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
> +                                           VFIOBitmap *vbmap, hwaddr iova,
> +                                           hwaddr size)
> +{
> +    VFIODevice *vbasedev;
> +    VFIOGroup *group;
> +    int ret;
> +
> +    QLIST_FOREACH(group, &container->group_list, container_next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            ret = vfio_device_dma_logging_report(vbasedev, iova, size,
> +                                                 vbmap->bitmap);
> +            if (ret) {
> +                error_report("%s: Failed to get DMA logging report, iova: "
> +                             "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
> +                             ", err: %d (%s)",
> +                             vbasedev->name, iova, size, ret, strerror(-ret));
> +
> +                return ret;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
>                                     hwaddr iova, hwaddr size)
>  {
> @@ -1619,10 +1682,12 @@ static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
>  static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>                                   uint64_t size, ram_addr_t ram_addr)
>  {
> +    bool all_device_dirty_tracking =
> +        vfio_devices_all_device_dirty_tracking(container);
>      VFIOBitmap vbmap;
>      int ret;
>  
> -    if (!container->dirty_pages_supported) {
> +    if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
>          cpu_physical_memory_set_dirty_range(ram_addr, size,
>                                              tcg_enabled() ? DIRTY_CLIENTS_ALL :
>                                              DIRTY_CLIENTS_NOCODE);
> @@ -1634,7 +1699,12 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>          return -errno;
>      }
>  
> -    ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
> +    if (all_device_dirty_tracking) {
> +        ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
> +    } else {
> +        ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
> +    }
> +
>      if (ret) {
>          goto out;
>      }
Joao Martins March 6, 2023, 7:42 p.m. UTC | #2
On 06/03/2023 19:22, Alex Williamson wrote:
> On Sat,  4 Mar 2023 01:43:40 +0000
> Joao Martins <joao.m.martins@oracle.com> wrote:
> 
>> Add device dirty page bitmap sync functionality. This uses the device
>> DMA logging uAPI to sync dirty page bitmap from the device.
>>
>> Device dirty page bitmap sync is used only if all devices within a
>> container support device dirty page tracking.
>>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>  hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 79 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index b0c7d03279ab..5b8456975e97 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -342,6 +342,9 @@ static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
>>      return 0;
>>  }
>>  
>> +static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>> +                                 uint64_t size, ram_addr_t ram_addr);
>> +
>>  bool vfio_mig_active(void)
>>  {
>>      VFIOGroup *group;
>> @@ -565,10 +568,16 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>          .iova = iova,
>>          .size = size,
>>      };
>> +    bool need_dirty_sync = false;
>> +    int ret;
>> +
>> +    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
>> +        if (!vfio_devices_all_device_dirty_tracking(container) &&
>> +            container->dirty_pages_supported) {
>> +            return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>> +        }
>>  
>> -    if (iotlb && container->dirty_pages_supported &&
>> -        vfio_devices_all_running_and_mig_active(container)) {
>> -        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>> +        need_dirty_sync = true;
>>      }
>>  
>>      while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
>> @@ -594,10 +603,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>          return -errno;
>>      }
>>  
>> -    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
>> -        cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
>> -                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
>> -                                            DIRTY_CLIENTS_NOCODE);
>> +    if (need_dirty_sync) {
>> +        ret = vfio_get_dirty_bitmap(container, iova, size,
>> +                                    iotlb->translated_addr);
>> +        if (ret) {
>> +            return ret;
>> +        }
>>      }
>>  
>>      return 0;
>> @@ -1579,6 +1590,58 @@ static void vfio_listener_log_global_stop(MemoryListener *listener)
>>      }
>>  }
>>  
>> +static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
>> +                                          hwaddr size, void *bitmap)
>> +{
>> +    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
>> +                        sizeof(struct vfio_device_feature_dma_logging_report),
>> +                        sizeof(__aligned_u64))] = {};
>> +    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
>> +    struct vfio_device_feature_dma_logging_report *report =
>> +        (struct vfio_device_feature_dma_logging_report *)feature->data;
>> +
>> +    report->iova = iova;
>> +    report->length = size;
>> +    report->page_size = qemu_real_host_page_size();
>> +    report->bitmap = (__aligned_u64)bitmap;
>> +
>> +    feature->argsz = sizeof(buf);
>> +    feature->flags =
>> +        VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
> 
> Nit, the series is inconsistent between initializing flags as above and
> as:
> 
>     feature->flags = VFIO_DEVICE_FEATURE_GET;
>     feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
> 
> My personal preference would be more like:
> 
>     feature->flags = VFIO_DEVICE_FEATURE_GET |
>                      VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
>

I'll switch the overall style to the latter.

> Thanks,
> Alex
> 
>> +
>> +    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
>> +        return -errno;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
>> +                                           VFIOBitmap *vbmap, hwaddr iova,
>> +                                           hwaddr size)
>> +{
>> +    VFIODevice *vbasedev;
>> +    VFIOGroup *group;
>> +    int ret;
>> +
>> +    QLIST_FOREACH(group, &container->group_list, container_next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            ret = vfio_device_dma_logging_report(vbasedev, iova, size,
>> +                                                 vbmap->bitmap);
>> +            if (ret) {
>> +                error_report("%s: Failed to get DMA logging report, iova: "
>> +                             "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
>> +                             ", err: %d (%s)",
>> +                             vbasedev->name, iova, size, ret, strerror(-ret));
>> +
>> +                return ret;
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
>>                                     hwaddr iova, hwaddr size)
>>  {
>> @@ -1619,10 +1682,12 @@ static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
>>  static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>>                                   uint64_t size, ram_addr_t ram_addr)
>>  {
>> +    bool all_device_dirty_tracking =
>> +        vfio_devices_all_device_dirty_tracking(container);
>>      VFIOBitmap vbmap;
>>      int ret;
>>  
>> -    if (!container->dirty_pages_supported) {
>> +    if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
>>          cpu_physical_memory_set_dirty_range(ram_addr, size,
>>                                              tcg_enabled() ? DIRTY_CLIENTS_ALL :
>>                                              DIRTY_CLIENTS_NOCODE);
>> @@ -1634,7 +1699,12 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>>          return -errno;
>>      }
>>  
>> -    ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
>> +    if (all_device_dirty_tracking) {
>> +        ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
>> +    } else {
>> +        ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
>> +    }
>> +
>>      if (ret) {
>>          goto out;
>>      }
>
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b0c7d03279ab..5b8456975e97 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -342,6 +342,9 @@  static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
     return 0;
 }
 
+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+                                 uint64_t size, ram_addr_t ram_addr);
+
 bool vfio_mig_active(void)
 {
     VFIOGroup *group;
@@ -565,10 +568,16 @@  static int vfio_dma_unmap(VFIOContainer *container,
         .iova = iova,
         .size = size,
     };
+    bool need_dirty_sync = false;
+    int ret;
+
+    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
+        if (!vfio_devices_all_device_dirty_tracking(container) &&
+            container->dirty_pages_supported) {
+            return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+        }
 
-    if (iotlb && container->dirty_pages_supported &&
-        vfio_devices_all_running_and_mig_active(container)) {
-        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+        need_dirty_sync = true;
     }
 
     while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
@@ -594,10 +603,12 @@  static int vfio_dma_unmap(VFIOContainer *container,
         return -errno;
     }
 
-    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
-        cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
-                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
-                                            DIRTY_CLIENTS_NOCODE);
+    if (need_dirty_sync) {
+        ret = vfio_get_dirty_bitmap(container, iova, size,
+                                    iotlb->translated_addr);
+        if (ret) {
+            return ret;
+        }
     }
 
     return 0;
@@ -1579,6 +1590,58 @@  static void vfio_listener_log_global_stop(MemoryListener *listener)
     }
 }
 
+static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
+                                          hwaddr size, void *bitmap)
+{
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                        sizeof(struct vfio_device_feature_dma_logging_report),
+                        sizeof(__aligned_u64))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_dma_logging_report *report =
+        (struct vfio_device_feature_dma_logging_report *)feature->data;
+
+    report->iova = iova;
+    report->length = size;
+    report->page_size = qemu_real_host_page_size();
+    report->bitmap = (__aligned_u64)bitmap;
+
+    feature->argsz = sizeof(buf);
+    feature->flags =
+        VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
+
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
+                                           VFIOBitmap *vbmap, hwaddr iova,
+                                           hwaddr size)
+{
+    VFIODevice *vbasedev;
+    VFIOGroup *group;
+    int ret;
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            ret = vfio_device_dma_logging_report(vbasedev, iova, size,
+                                                 vbmap->bitmap);
+            if (ret) {
+                error_report("%s: Failed to get DMA logging report, iova: "
+                             "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
+                             ", err: %d (%s)",
+                             vbasedev->name, iova, size, ret, strerror(-ret));
+
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
                                    hwaddr iova, hwaddr size)
 {
@@ -1619,10 +1682,12 @@  static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
 static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
                                  uint64_t size, ram_addr_t ram_addr)
 {
+    bool all_device_dirty_tracking =
+        vfio_devices_all_device_dirty_tracking(container);
     VFIOBitmap vbmap;
     int ret;
 
-    if (!container->dirty_pages_supported) {
+    if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
         cpu_physical_memory_set_dirty_range(ram_addr, size,
                                             tcg_enabled() ? DIRTY_CLIENTS_ALL :
                                             DIRTY_CLIENTS_NOCODE);
@@ -1634,7 +1699,12 @@  static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
         return -errno;
     }
 
-    ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
+    if (all_device_dirty_tracking) {
+        ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
+    } else {
+        ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
+    }
+
     if (ret) {
         goto out;
     }