diff mbox series

[v11,7/7] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

Message ID 1544516693-5395-8-git-send-email-wei.w.wang@intel.com
State New
Headers show
Series virtio-balloon: free page hint support | expand

Commit Message

Wang, Wei W Dec. 11, 2018, 8:24 a.m. UTC
The new feature enables the virtio-balloon device to receive hints of
guest free pages from the free page vq.

A notifier is registered to the migration precopy notifier chain. The
notifier calls free_page_start after the migration thread syncs the dirty
bitmap, so that the free page optimization starts to clear bits of free
pages from the bitmap. It calls the free_page_stop before the migration
thread syncs the bitmap, which is the end of the current round of ram
save. The free_page_stop is also called to stop the optimization in the
case when there is an error occurred in the process of ram saving.

Note: balloon will report pages which were free at the time of this call.
As the reporting happens asynchronously, dirty bit logging must be
enabled before this free_page_start call is made. Guest reporting must be
disabled before the migration dirty bitmap is synchronized.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Peter Xu <peterx@redhat.com>
---
 hw/virtio/virtio-balloon.c                      | 263 ++++++++++++++++++++++++
 include/hw/virtio/virtio-balloon.h              |  28 ++-
 include/standard-headers/linux/virtio_balloon.h |   5 +
 3 files changed, 295 insertions(+), 1 deletion(-)

Comments

Dr. David Alan Gilbert Dec. 13, 2018, 3:45 p.m. UTC | #1
* Wei Wang (wei.w.wang@intel.com) wrote:
> The new feature enables the virtio-balloon device to receive hints of
> guest free pages from the free page vq.
> 
> A notifier is registered to the migration precopy notifier chain. The
> notifier calls free_page_start after the migration thread syncs the dirty
> bitmap, so that the free page optimization starts to clear bits of free
> pages from the bitmap. It calls the free_page_stop before the migration
> thread syncs the bitmap, which is the end of the current round of ram
> save. The free_page_stop is also called to stop the optimization in the
> case when there is an error occurred in the process of ram saving.
> 
> Note: balloon will report pages which were free at the time of this call.
> As the reporting happens asynchronously, dirty bit logging must be
> enabled before this free_page_start call is made. Guest reporting must be
> disabled before the migration dirty bitmap is synchronized.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Peter Xu <peterx@redhat.com>

I think I'm OK for this from the migration side, I'd appreciate
someone checking the virtio and aio bits.

I'm not too sure how it gets switched on and off - i.e. if we get a nice
new qemu on a new kernel, what happens when I try and migrate to the
same qemu on an older kernel without these hints?

Dave

> ---
>  hw/virtio/virtio-balloon.c                      | 263 ++++++++++++++++++++++++
>  include/hw/virtio/virtio-balloon.h              |  28 ++-
>  include/standard-headers/linux/virtio_balloon.h |   5 +
>  3 files changed, 295 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 1728e4f..543bbd4 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -27,6 +27,7 @@
>  #include "qapi/visitor.h"
>  #include "trace.h"
>  #include "qemu/error-report.h"
> +#include "migration/misc.h"
>  
>  #include "hw/virtio/virtio-bus.h"
>  #include "hw/virtio/virtio-access.h"
> @@ -308,6 +309,184 @@ out:
>      }
>  }
>  
> +static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
> +                                               VirtQueue *vq)
> +{
> +    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> +    qemu_bh_schedule(s->free_page_bh);
> +}
> +
> +static bool get_free_page_hints(VirtIOBalloon *dev)
> +{
> +    VirtQueueElement *elem;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VirtQueue *vq = dev->free_page_vq;
> +
> +    while (dev->block_iothread) {
> +        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
> +    }
> +
> +    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +    if (!elem) {
> +        return false;
> +    }
> +
> +    if (elem->out_num) {
> +        uint32_t id;
> +        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
> +                                 &id, sizeof(id));
> +        virtqueue_push(vq, elem, size);
> +        g_free(elem);
> +
> +        virtio_tswap32s(vdev, &id);
> +        if (unlikely(size != sizeof(id))) {
> +            virtio_error(vdev, "received an incorrect cmd id");
> +            return false;
> +        }
> +        if (id == dev->free_page_report_cmd_id) {
> +            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
> +        } else {
> +            /*
> +             * Stop the optimization only when it has started. This
> +             * avoids a stale stop sign for the previous command.
> +             */
> +            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> +                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
> +            }
> +        }
> +    }
> +
> +    if (elem->in_num) {
> +        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> +            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
> +                                      elem->in_sg[0].iov_len);
> +        }
> +        virtqueue_push(vq, elem, 1);
> +        g_free(elem);
> +    }
> +
> +    return true;
> +}
> +
> +static void virtio_ballloon_get_free_page_hints(void *opaque)
> +{
> +    VirtIOBalloon *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VirtQueue *vq = dev->free_page_vq;
> +    bool continue_to_get_hints;
> +
> +    do {
> +        qemu_mutex_lock(&dev->free_page_lock);
> +        virtio_queue_set_notification(vq, 0);
> +        continue_to_get_hints = get_free_page_hints(dev);
> +        qemu_mutex_unlock(&dev->free_page_lock);
> +        virtio_notify(vdev, vq);
> +      /*
> +       * Start to poll the vq once the reporting started. Otherwise, continue
> +       * only when there are entries on the vq, which need to be given back.
> +       */
> +    } while (continue_to_get_hints ||
> +             dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
> +    virtio_queue_set_notification(vq, 1);
> +}
> +
> +static bool virtio_balloon_free_page_support(void *opaque)
> +{
> +    VirtIOBalloon *s = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> +
> +    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
> +}
> +
> +static void virtio_balloon_free_page_start(VirtIOBalloon *s)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> +
> +    /* For the stop and copy phase, we don't need to start the optimization */
> +    if (!vdev->vm_running) {
> +        return;
> +    }
> +
> +    if (s->free_page_report_cmd_id == UINT_MAX) {
> +        s->free_page_report_cmd_id =
> +                       VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
> +    } else {
> +        s->free_page_report_cmd_id++;
> +    }
> +
> +    s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
> +    virtio_notify_config(vdev);
> +}
> +
> +static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> +
> +    if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
> +        /*
> +         * The lock also guarantees us that the
> +         * virtio_ballloon_get_free_page_hints exits after the
> +         * free_page_report_status is set to S_STOP.
> +         */
> +        qemu_mutex_lock(&s->free_page_lock);
> +        /*
> +         * The guest hasn't done the reporting, so host sends a notification
> +         * to the guest to actively stop the reporting.
> +         */
> +        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
> +        qemu_mutex_unlock(&s->free_page_lock);
> +        virtio_notify_config(vdev);
> +    }
> +}
> +
> +static void virtio_balloon_free_page_done(VirtIOBalloon *s)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> +
> +    s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
> +    virtio_notify_config(vdev);
> +}
> +
> +static int
> +virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
> +{
> +    VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
> +                                      free_page_report_notify);
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    PrecopyNotifyData *pnd = data;
> +
> +    if (!virtio_balloon_free_page_support(dev)) {
> +        /*
> +         * This is an optimization provided to migration, so just return 0 to
> +         * have the normal migration process not affected when this feature is
> +         * not supported.
> +         */
> +        return 0;
> +    }
> +
> +    switch (pnd->reason) {
> +    case PRECOPY_NOTIFY_SETUP:
> +        precopy_enable_free_page_optimization();
> +        break;
> +    case PRECOPY_NOTIFY_COMPLETE:
> +    case PRECOPY_NOTIFY_CLEANUP:
> +    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
> +        virtio_balloon_free_page_stop(dev);
> +        break;
> +    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
> +        if (vdev->vm_running) {
> +            virtio_balloon_free_page_start(dev);
> +        } else {
> +            virtio_balloon_free_page_done(dev);
> +        }
> +        break;
> +    default:
> +        virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
> +    }
> +
> +    return 0;
> +}
> +
>  static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>  {
>      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
> @@ -316,6 +495,17 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>      config.num_pages = cpu_to_le32(dev->num_pages);
>      config.actual = cpu_to_le32(dev->actual);
>  
> +    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
> +        config.free_page_report_cmd_id =
> +                       cpu_to_le32(dev->free_page_report_cmd_id);
> +    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
> +        config.free_page_report_cmd_id =
> +                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID);
> +    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
> +        config.free_page_report_cmd_id =
> +                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID);
> +    }
> +
>      trace_virtio_balloon_get_config(config.num_pages, config.actual);
>      memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
>  }
> @@ -376,6 +566,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
>      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
>      f |= dev->host_features;
>      virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
> +
>      return f;
>  }
>  
> @@ -412,6 +603,18 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id)
>      return 0;
>  }
>  
> +static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
> +    .name = "virtio-balloon-device/free-page-report",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = virtio_balloon_free_page_support,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
> +        VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription vmstate_virtio_balloon_device = {
>      .name = "virtio-balloon-device",
>      .version_id = 1,
> @@ -422,6 +625,10 @@ static const VMStateDescription vmstate_virtio_balloon_device = {
>          VMSTATE_UINT32(actual, VirtIOBalloon),
>          VMSTATE_END_OF_LIST()
>      },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_virtio_balloon_free_page_report,
> +        NULL
> +    }
>  };
>  
>  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
> @@ -446,6 +653,29 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
>      s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
>  
> +    if (virtio_has_feature(s->host_features,
> +                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> +        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
> +                                           virtio_balloon_handle_free_page_vq);
> +        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
> +        s->free_page_report_cmd_id =
> +                           VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
> +        s->free_page_report_notify.notify =
> +                                       virtio_balloon_free_page_report_notify;
> +        precopy_add_notifier(&s->free_page_report_notify);
> +        if (s->iothread) {
> +            object_ref(OBJECT(s->iothread));
> +            s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
> +                                       virtio_ballloon_get_free_page_hints, s);
> +            qemu_mutex_init(&s->free_page_lock);
> +            qemu_cond_init(&s->free_page_cond);
> +            s->block_iothread = false;
> +        } else {
> +            /* Simply disable this feature if the iothread wasn't created. */
> +            s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
> +            virtio_error(vdev, "iothread is missing");
> +        }
> +    }
>      reset_stats(s);
>  }
>  
> @@ -454,6 +684,11 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>  
> +    if (virtio_balloon_free_page_support(s)) {
> +        qemu_bh_delete(s->free_page_bh);
> +        virtio_balloon_free_page_stop(s);
> +        precopy_remove_notifier(&s->free_page_report_notify);
> +    }
>      balloon_stats_destroy_timer(s);
>      qemu_remove_balloon_handler(s);
>      virtio_cleanup(vdev);
> @@ -463,6 +698,10 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
>  
> +    if (virtio_balloon_free_page_support(s)) {
> +        virtio_balloon_free_page_stop(s);
> +    }
> +
>      if (s->stats_vq_elem != NULL) {
>          virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
>          g_free(s->stats_vq_elem);
> @@ -480,6 +719,26 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
>           * was stopped */
>          virtio_balloon_receive_stats(vdev, s->svq);
>      }
> +
> +    if (virtio_balloon_free_page_support(s)) {
> +        /*
> +         * The VM is woken up and the iothread was blocked, so signal it to
> +         * continue.
> +         */
> +        if (vdev->vm_running && s->block_iothread) {
> +            qemu_mutex_lock(&s->free_page_lock);
> +            s->block_iothread = false;
> +            qemu_cond_signal(&s->free_page_cond);
> +            qemu_mutex_unlock(&s->free_page_lock);
> +        }
> +
> +        /* The VM is stopped, block the iothread. */
> +        if (!vdev->vm_running) {
> +            qemu_mutex_lock(&s->free_page_lock);
> +            s->block_iothread = true;
> +            qemu_mutex_unlock(&s->free_page_lock);
> +        }
> +    }
>  }
>  
>  static void virtio_balloon_instance_init(Object *obj)
> @@ -508,6 +767,10 @@ static const VMStateDescription vmstate_virtio_balloon = {
>  static Property virtio_balloon_properties[] = {
>      DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
>                      VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
> +    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
> +                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
> +    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
> +                     IOThread *),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index e0df352..503349a 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -17,11 +17,14 @@
>  
>  #include "standard-headers/linux/virtio_balloon.h"
>  #include "hw/virtio/virtio.h"
> +#include "sysemu/iothread.h"
>  
>  #define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
>  #define VIRTIO_BALLOON(obj) \
>          OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON)
>  
> +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000
> +
>  typedef struct virtio_balloon_stat VirtIOBalloonStat;
>  
>  typedef struct virtio_balloon_stat_modern {
> @@ -30,15 +33,38 @@ typedef struct virtio_balloon_stat_modern {
>         uint64_t val;
>  } VirtIOBalloonStatModern;
>  
> +enum virtio_balloon_free_page_report_status {
> +    FREE_PAGE_REPORT_S_STOP = 0,
> +    FREE_PAGE_REPORT_S_REQUESTED = 1,
> +    FREE_PAGE_REPORT_S_START = 2,
> +    FREE_PAGE_REPORT_S_DONE = 3,
> +};
> +
>  typedef struct VirtIOBalloon {
>      VirtIODevice parent_obj;
> -    VirtQueue *ivq, *dvq, *svq;
> +    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
> +    uint32_t free_page_report_status;
>      uint32_t num_pages;
>      uint32_t actual;
> +    uint32_t free_page_report_cmd_id;
>      uint64_t stats[VIRTIO_BALLOON_S_NR];
>      VirtQueueElement *stats_vq_elem;
>      size_t stats_vq_offset;
>      QEMUTimer *stats_timer;
> +    IOThread *iothread;
> +    QEMUBH *free_page_bh;
> +    /*
> +     * Lock to synchronize threads to access the free page reporting related
> +     * fields (e.g. free_page_report_status).
> +     */
> +    QemuMutex free_page_lock;
> +    QemuCond  free_page_cond;
> +    /*
> +     * Set to block iothread to continue reading free page hints as the VM is
> +     * stopped.
> +     */
> +    bool block_iothread;
> +    NotifierWithReturn free_page_report_notify;
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
>      uint32_t host_features;
> diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
> index 4dbb7dc..9eee1c6 100644
> --- a/include/standard-headers/linux/virtio_balloon.h
> +++ b/include/standard-headers/linux/virtio_balloon.h
> @@ -34,15 +34,20 @@
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
>  
> +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0
> +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID 1
>  struct virtio_balloon_config {
>  	/* Number of pages host wants Guest to give up. */
>  	uint32_t num_pages;
>  	/* Number of pages we've actually got in balloon. */
>  	uint32_t actual;
> +	/* Free page report command id, readonly by guest */
> +	uint32_t free_page_report_cmd_id;
>  };
>  
>  #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Dec. 14, 2018, 6:39 a.m. UTC | #2
On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> The new feature enables the virtio-balloon device to receive hints of
>> guest free pages from the free page vq.
>>
>> A notifier is registered to the migration precopy notifier chain. The
>> notifier calls free_page_start after the migration thread syncs the dirty
>> bitmap, so that the free page optimization starts to clear bits of free
>> pages from the bitmap. It calls the free_page_stop before the migration
>> thread syncs the bitmap, which is the end of the current round of ram
>> save. The free_page_stop is also called to stop the optimization in the
>> case when there is an error occurred in the process of ram saving.
>>
>> Note: balloon will report pages which were free at the time of this call.
>> As the reporting happens asynchronously, dirty bit logging must be
>> enabled before this free_page_start call is made. Guest reporting must be
>> disabled before the migration dirty bitmap is synchronized.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> CC: Michael S. Tsirkin <mst@redhat.com>
>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> CC: Peter Xu <peterx@redhat.com>
> I think I'm OK for this from the migration side, I'd appreciate
> someone checking the virtio and aio bits.
>
> I'm not too sure how it gets switched on and off - i.e. if we get a nice
> new qemu on a new kernel, what happens when I try and migrate to the
> same qemu on an older kernel without these hints?
>

This feature doesn't rely on the host kernel. Those hints are reported 
from the guest kernel.
So migration across different hosts wouldn't affect the use of this feature.
Please correct me if I didn't get your point.

Best,
Wei
Dr. David Alan Gilbert Dec. 14, 2018, 9:56 a.m. UTC | #3
* Wei Wang (wei.w.wang@intel.com) wrote:
> On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
> > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > The new feature enables the virtio-balloon device to receive hints of
> > > guest free pages from the free page vq.
> > > 
> > > A notifier is registered to the migration precopy notifier chain. The
> > > notifier calls free_page_start after the migration thread syncs the dirty
> > > bitmap, so that the free page optimization starts to clear bits of free
> > > pages from the bitmap. It calls the free_page_stop before the migration
> > > thread syncs the bitmap, which is the end of the current round of ram
> > > save. The free_page_stop is also called to stop the optimization in the
> > > case when there is an error occurred in the process of ram saving.
> > > 
> > > Note: balloon will report pages which were free at the time of this call.
> > > As the reporting happens asynchronously, dirty bit logging must be
> > > enabled before this free_page_start call is made. Guest reporting must be
> > > disabled before the migration dirty bitmap is synchronized.
> > > 
> > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > CC: Juan Quintela <quintela@redhat.com>
> > > CC: Peter Xu <peterx@redhat.com>
> > I think I'm OK for this from the migration side, I'd appreciate
> > someone checking the virtio and aio bits.
> > 
> > I'm not too sure how it gets switched on and off - i.e. if we get a nice
> > new qemu on a new kernel, what happens when I try and migrate to the
> > same qemu on an older kernel without these hints?
> > 
> 
> This feature doesn't rely on the host kernel. Those hints are reported from
> the guest kernel.
> So migration across different hosts wouldn't affect the use of this feature.
> Please correct me if I didn't get your point.

Ah OK, yes;  now what about migrating from new->old qemu with a new
guest but old machine type?

Dave

> Best,
> Wei
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Dec. 14, 2018, 10:30 a.m. UTC | #4
On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>>> The new feature enables the virtio-balloon device to receive hints of
>>>> guest free pages from the free page vq.
>>>>
>>>> A notifier is registered to the migration precopy notifier chain. The
>>>> notifier calls free_page_start after the migration thread syncs the dirty
>>>> bitmap, so that the free page optimization starts to clear bits of free
>>>> pages from the bitmap. It calls the free_page_stop before the migration
>>>> thread syncs the bitmap, which is the end of the current round of ram
>>>> save. The free_page_stop is also called to stop the optimization in the
>>>> case when there is an error occurred in the process of ram saving.
>>>>
>>>> Note: balloon will report pages which were free at the time of this call.
>>>> As the reporting happens asynchronously, dirty bit logging must be
>>>> enabled before this free_page_start call is made. Guest reporting must be
>>>> disabled before the migration dirty bitmap is synchronized.
>>>>
>>>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> CC: Juan Quintela <quintela@redhat.com>
>>>> CC: Peter Xu <peterx@redhat.com>
>>> I think I'm OK for this from the migration side, I'd appreciate
>>> someone checking the virtio and aio bits.
>>>
>>> I'm not too sure how it gets switched on and off - i.e. if we get a nice
>>> new qemu on a new kernel, what happens when I try and migrate to the
>>> same qemu on an older kernel without these hints?
>>>
>> This feature doesn't rely on the host kernel. Those hints are reported from
>> the guest kernel.
>> So migration across different hosts wouldn't affect the use of this feature.
>> Please correct me if I didn't get your point.
> Ah OK, yes;  now what about migrating from new->old qemu with a new
> guest but old machine type?
>

I think normally, the source QEMU and destination QEMU should have the same
QEMU booting parameter. If the destination QEMU doesn't support
"--device virtio-balloon,free-page-hint=true", which the source QEMU 
has, the
destination side QEMU will fail to boot, and migration will not happen then.

But I think there is still an option to make the migration possible. The 
"free-page-hint"
can be set to false via e.g. QMP on the source side, then the 
destination side QEMU
can boot without "free-page-hint".

Best,
Wei
Dr. David Alan Gilbert Dec. 14, 2018, 11:17 a.m. UTC | #5
* Wei Wang (wei.w.wang@intel.com) wrote:
> On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
> > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
> > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > > The new feature enables the virtio-balloon device to receive hints of
> > > > > guest free pages from the free page vq.
> > > > > 
> > > > > A notifier is registered to the migration precopy notifier chain. The
> > > > > notifier calls free_page_start after the migration thread syncs the dirty
> > > > > bitmap, so that the free page optimization starts to clear bits of free
> > > > > pages from the bitmap. It calls the free_page_stop before the migration
> > > > > thread syncs the bitmap, which is the end of the current round of ram
> > > > > save. The free_page_stop is also called to stop the optimization in the
> > > > > case when there is an error occurred in the process of ram saving.
> > > > > 
> > > > > Note: balloon will report pages which were free at the time of this call.
> > > > > As the reporting happens asynchronously, dirty bit logging must be
> > > > > enabled before this free_page_start call is made. Guest reporting must be
> > > > > disabled before the migration dirty bitmap is synchronized.
> > > > > 
> > > > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > CC: Juan Quintela <quintela@redhat.com>
> > > > > CC: Peter Xu <peterx@redhat.com>
> > > > I think I'm OK for this from the migration side, I'd appreciate
> > > > someone checking the virtio and aio bits.
> > > > 
> > > > I'm not too sure how it gets switched on and off - i.e. if we get a nice
> > > > new qemu on a new kernel, what happens when I try and migrate to the
> > > > same qemu on an older kernel without these hints?
> > > > 
> > > This feature doesn't rely on the host kernel. Those hints are reported from
> > > the guest kernel.
> > > So migration across different hosts wouldn't affect the use of this feature.
> > > Please correct me if I didn't get your point.
> > Ah OK, yes;  now what about migrating from new->old qemu with a new
> > guest but old machine type?
> > 
> 
> I think normally, the source QEMU and destination QEMU should have the same
> QEMU booting parameter. If the destination QEMU doesn't support
> "--device virtio-balloon,free-page-hint=true", which the source QEMU has,
> the
> destination side QEMU will fail to boot, and migration will not happen then.

Ah that's OK; as long as free-page-hint is false by default that will
work fine.

Dave

> But I think there is still an option to make the migration possible. The
> "free-page-hint"
> can be set to false via e.g. QMP on the source side, then the destination
> side QEMU
> can boot without "free-page-hint".
> 
> Best,
> Wei
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Feb. 19, 2019, 9:18 a.m. UTC | #6
On Friday, December 14, 2018 7:17 PM, Dr. David Alan Gilbert wrote:
> > On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
> > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
> > > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > > > The new feature enables the virtio-balloon device to receive
> > > > > > hints of guest free pages from the free page vq.
> > > > > >
> > > > > > A notifier is registered to the migration precopy notifier
> > > > > > chain. The notifier calls free_page_start after the migration
> > > > > > thread syncs the dirty bitmap, so that the free page
> > > > > > optimization starts to clear bits of free pages from the
> > > > > > bitmap. It calls the free_page_stop before the migration
> > > > > > thread syncs the bitmap, which is the end of the current round
> > > > > > of ram save. The free_page_stop is also called to stop the
> optimization in the case when there is an error occurred in the process of
> ram saving.
> > > > > >
> > > > > > Note: balloon will report pages which were free at the time of this
> call.
> > > > > > As the reporting happens asynchronously, dirty bit logging
> > > > > > must be enabled before this free_page_start call is made.
> > > > > > Guest reporting must be disabled before the migration dirty bitmap
> is synchronized.
> > > > > >
> > > > > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > > > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > > > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > > CC: Juan Quintela <quintela@redhat.com>
> > > > > > CC: Peter Xu <peterx@redhat.com>
> > > > > I think I'm OK for this from the migration side, I'd appreciate
> > > > > someone checking the virtio and aio bits.
> > > > >
> > > > > I'm not too sure how it gets switched on and off - i.e. if we
> > > > > get a nice new qemu on a new kernel, what happens when I try and
> > > > > migrate to the same qemu on an older kernel without these hints?
> > > > >
> > > > This feature doesn't rely on the host kernel. Those hints are
> > > > reported from the guest kernel.
> > > > So migration across different hosts wouldn't affect the use of this
> feature.
> > > > Please correct me if I didn't get your point.
> > > Ah OK, yes;  now what about migrating from new->old qemu with a new
> > > guest but old machine type?
> > >
> >
> > I think normally, the source QEMU and destination QEMU should have the
> > same QEMU booting parameter. If the destination QEMU doesn't support
> > "--device virtio-balloon,free-page-hint=true", which the source QEMU
> > has, the destination side QEMU will fail to boot, and migration will
> > not happen then.
> 
> Ah that's OK; as long as free-page-hint is false by default that will work fine.
> 
> Dave
> 

Hi Dave,

Could we have this feature in QEMU 4.0 (freeze on Mar 12)?

Best,
Wei
Dr. David Alan Gilbert Feb. 20, 2019, 1:12 p.m. UTC | #7
* Wang, Wei W (wei.w.wang@intel.com) wrote:
> On Friday, December 14, 2018 7:17 PM, Dr. David Alan Gilbert wrote:
> > > On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
> > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > > On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
> > > > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > > > > The new feature enables the virtio-balloon device to receive
> > > > > > > hints of guest free pages from the free page vq.
> > > > > > >
> > > > > > > A notifier is registered to the migration precopy notifier
> > > > > > > chain. The notifier calls free_page_start after the migration
> > > > > > > thread syncs the dirty bitmap, so that the free page
> > > > > > > optimization starts to clear bits of free pages from the
> > > > > > > bitmap. It calls the free_page_stop before the migration
> > > > > > > thread syncs the bitmap, which is the end of the current round
> > > > > > > of ram save. The free_page_stop is also called to stop the
> > optimization in the case when there is an error occurred in the process of
> > ram saving.
> > > > > > >
> > > > > > > Note: balloon will report pages which were free at the time of this
> > call.
> > > > > > > As the reporting happens asynchronously, dirty bit logging
> > > > > > > must be enabled before this free_page_start call is made.
> > > > > > > Guest reporting must be disabled before the migration dirty bitmap
> > is synchronized.
> > > > > > >
> > > > > > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > > > > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > > > CC: Juan Quintela <quintela@redhat.com>
> > > > > > > CC: Peter Xu <peterx@redhat.com>
> > > > > > I think I'm OK for this from the migration side, I'd appreciate
> > > > > > someone checking the virtio and aio bits.
> > > > > >
> > > > > > I'm not too sure how it gets switched on and off - i.e. if we
> > > > > > get a nice new qemu on a new kernel, what happens when I try and
> > > > > > migrate to the same qemu on an older kernel without these hints?
> > > > > >
> > > > > This feature doesn't rely on the host kernel. Those hints are
> > > > > reported from the guest kernel.
> > > > > So migration across different hosts wouldn't affect the use of this
> > feature.
> > > > > Please correct me if I didn't get your point.
> > > > Ah OK, yes;  now what about migrating from new->old qemu with a new
> > > > guest but old machine type?
> > > >
> > >
> > > I think normally, the source QEMU and destination QEMU should have the
> > > same QEMU booting parameter. If the destination QEMU doesn't support
> > > "--device virtio-balloon,free-page-hint=true", which the source QEMU
> > > has, the destination side QEMU will fail to boot, and migration will
> > > not happen then.
> > 
> > Ah that's OK; as long as free-page-hint is false by default that will work fine.
> > 
> > Dave
> > 
> 
> Hi Dave,
> 
> Could we have this feature in QEMU 4.0 (freeze on Mar 12)?

I think so; can you remind me where we're up to:
  a) It looks like you've already got the kernel changes merged -
correct?
  b) What about the virtio spec changes - where are they upto?
  c) Where are the other reviews upto - I think most are reviewed - is
it just 7/7 that is missing the review-by?

Dave

> Best,
> Wei
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Feb. 21, 2019, 1:49 a.m. UTC | #8
On 02/20/2019 09:12 PM, Dr. David Alan Gilbert wrote:
> * Wang, Wei W (wei.w.wang@intel.com) wrote:
>> On Friday, December 14, 2018 7:17 PM, Dr. David Alan Gilbert wrote:
>>>> On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
>>>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>>>>> On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
>>>>>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>>>>>>> The new feature enables the virtio-balloon device to receive
>>>>>>>> hints of guest free pages from the free page vq.
>>>>>>>>
>>>>>>>> A notifier is registered to the migration precopy notifier
>>>>>>>> chain. The notifier calls free_page_start after the migration
>>>>>>>> thread syncs the dirty bitmap, so that the free page
>>>>>>>> optimization starts to clear bits of free pages from the
>>>>>>>> bitmap. It calls the free_page_stop before the migration
>>>>>>>> thread syncs the bitmap, which is the end of the current round
>>>>>>>> of ram save. The free_page_stop is also called to stop the
>>> optimization in the case when there is an error occurred in the process of
>>> ram saving.
>>>>>>>> Note: balloon will report pages which were free at the time of this
>>> call.
>>>>>>>> As the reporting happens asynchronously, dirty bit logging
>>>>>>>> must be enabled before this free_page_start call is made.
>>>>>>>> Guest reporting must be disabled before the migration dirty bitmap
>>> is synchronized.
>>>>>>>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>>>>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>>>>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>>>> CC: Juan Quintela <quintela@redhat.com>
>>>>>>>> CC: Peter Xu <peterx@redhat.com>
>>>>>>> I think I'm OK for this from the migration side, I'd appreciate
>>>>>>> someone checking the virtio and aio bits.
>>>>>>>
>>>>>>> I'm not too sure how it gets switched on and off - i.e. if we
>>>>>>> get a nice new qemu on a new kernel, what happens when I try and
>>>>>>> migrate to the same qemu on an older kernel without these hints?
>>>>>>>
>>>>>> This feature doesn't rely on the host kernel. Those hints are
>>>>>> reported from the guest kernel.
>>>>>> So migration across different hosts wouldn't affect the use of this
>>> feature.
>>>>>> Please correct me if I didn't get your point.
>>>>> Ah OK, yes;  now what about migrating from new->old qemu with a new
>>>>> guest but old machine type?
>>>>>
>>>> I think normally, the source QEMU and destination QEMU should have the
>>>> same QEMU booting parameter. If the destination QEMU doesn't support
>>>> "--device virtio-balloon,free-page-hint=true", which the source QEMU
>>>> has, the destination side QEMU will fail to boot, and migration will
>>>> not happen then.
>>> Ah that's OK; as long as free-page-hint is false by default that will work fine.
>>>
>>> Dave
>>>
>> Hi Dave,
>>
>> Could we have this feature in QEMU 4.0 (freeze on Mar 12)?
> I think so; can you remind me where we're up to:
>    a) It looks like you've already got the kernel changes merged -
> correct?

Yes, they were already merged half year ago.

>    b) What about the virtio spec changes - where are they upto?

The spec changes are in progress. v1 were posted out, a v2 is in 
preparation.

>    c) Where are the other reviews upto - I think most are reviewed - is
> it just 7/7 that is missing the review-by?
7/7 is about the virtio changes, and Michael has given the reviewed-by:
http://lists.nongnu.org/archive/html/qemu-devel/2018-12/msg03732.html


Best,
Wei
Dr. David Alan Gilbert Feb. 21, 2019, 10:18 a.m. UTC | #9
* Wei Wang (wei.w.wang@intel.com) wrote:
> On 02/20/2019 09:12 PM, Dr. David Alan Gilbert wrote:
> > * Wang, Wei W (wei.w.wang@intel.com) wrote:
> > > On Friday, December 14, 2018 7:17 PM, Dr. David Alan Gilbert wrote:
> > > > > On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
> > > > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > > > > On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
> > > > > > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > > > > > > > The new feature enables the virtio-balloon device to receive
> > > > > > > > > hints of guest free pages from the free page vq.
> > > > > > > > > 
> > > > > > > > > A notifier is registered to the migration precopy notifier
> > > > > > > > > chain. The notifier calls free_page_start after the migration
> > > > > > > > > thread syncs the dirty bitmap, so that the free page
> > > > > > > > > optimization starts to clear bits of free pages from the
> > > > > > > > > bitmap. It calls the free_page_stop before the migration
> > > > > > > > > thread syncs the bitmap, which is the end of the current round
> > > > > > > > > of ram save. The free_page_stop is also called to stop the
> > > > optimization in the case when there is an error occurred in the process of
> > > > ram saving.
> > > > > > > > > Note: balloon will report pages which were free at the time of this
> > > > call.
> > > > > > > > > As the reporting happens asynchronously, dirty bit logging
> > > > > > > > > must be enabled before this free_page_start call is made.
> > > > > > > > > Guest reporting must be disabled before the migration dirty bitmap
> > > > is synchronized.
> > > > > > > > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > > > > > > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > > > > > CC: Juan Quintela <quintela@redhat.com>
> > > > > > > > > CC: Peter Xu <peterx@redhat.com>
> > > > > > > > I think I'm OK for this from the migration side, I'd appreciate
> > > > > > > > someone checking the virtio and aio bits.
> > > > > > > > 
> > > > > > > > I'm not too sure how it gets switched on and off - i.e. if we
> > > > > > > > get a nice new qemu on a new kernel, what happens when I try and
> > > > > > > > migrate to the same qemu on an older kernel without these hints?
> > > > > > > > 
> > > > > > > This feature doesn't rely on the host kernel. Those hints are
> > > > > > > reported from the guest kernel.
> > > > > > > So migration across different hosts wouldn't affect the use of this
> > > > feature.
> > > > > > > Please correct me if I didn't get your point.
> > > > > > Ah OK, yes;  now what about migrating from new->old qemu with a new
> > > > > > guest but old machine type?
> > > > > > 
> > > > > I think normally, the source QEMU and destination QEMU should have the
> > > > > same QEMU booting parameter. If the destination QEMU doesn't support
> > > > > "--device virtio-balloon,free-page-hint=true", which the source QEMU
> > > > > has, the destination side QEMU will fail to boot, and migration will
> > > > > not happen then.
> > > > Ah that's OK; as long as free-page-hint is false by default that will work fine.
> > > > 
> > > > Dave
> > > > 
> > > Hi Dave,
> > > 
> > > Could we have this feature in QEMU 4.0 (freeze on Mar 12)?
> > I think so; can you remind me where we're up to:
> >    a) It looks like you've already got the kernel changes merged -
> > correct?
> 
> Yes, they were already merged half year ago.
> 
> >    b) What about the virtio spec changes - where are they upto?
> 
> The spec changes are in progress. v1 were posted out, a v2 is in
> preparation.
> 
> >    c) Where are the other reviews upto - I think most are reviewed - is
> > it just 7/7 that is missing the review-by?
> 7/7 is about the virtio changes, and Michael has given the reviewed-by:
> http://lists.nongnu.org/archive/html/qemu-devel/2018-12/msg03732.html

OK, I was going to check with mst for (b) because I prefer it after the
spec changes have been merged, but since mst is OK with it, then we can
merge especially with (a) already merged.

Dave

> 
> Best,
> Wei
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Feb. 22, 2019, 1:11 a.m. UTC | #10
On 02/21/2019 06:18 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> On 02/20/2019 09:12 PM, Dr. David Alan Gilbert wrote:
>>> * Wang, Wei W (wei.w.wang@intel.com) wrote:
>>>> On Friday, December 14, 2018 7:17 PM, Dr. David Alan Gilbert wrote:
>>>>>> On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:
>>>>>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>>>>>>> On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:
>>>>>>>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>>>>>>>>> The new feature enables the virtio-balloon device to receive
>>>>>>>>>> hints of guest free pages from the free page vq.
>>>>>>>>>>
>>>>>>>>>> A notifier is registered to the migration precopy notifier
>>>>>>>>>> chain. The notifier calls free_page_start after the migration
>>>>>>>>>> thread syncs the dirty bitmap, so that the free page
>>>>>>>>>> optimization starts to clear bits of free pages from the
>>>>>>>>>> bitmap. It calls the free_page_stop before the migration
>>>>>>>>>> thread syncs the bitmap, which is the end of the current round
>>>>>>>>>> of ram save. The free_page_stop is also called to stop the
>>>>> optimization in the case when there is an error occurred in the process of
>>>>> ram saving.
>>>>>>>>>> Note: balloon will report pages which were free at the time of this
>>>>> call.
>>>>>>>>>> As the reporting happens asynchronously, dirty bit logging
>>>>>>>>>> must be enabled before this free_page_start call is made.
>>>>>>>>>> Guest reporting must be disabled before the migration dirty bitmap
>>>>> is synchronized.
>>>>>>>>>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>>>>>>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>>>>>>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>>>>>> CC: Juan Quintela <quintela@redhat.com>
>>>>>>>>>> CC: Peter Xu <peterx@redhat.com>
>>>>>>>>> I think I'm OK for this from the migration side, I'd appreciate
>>>>>>>>> someone checking the virtio and aio bits.
>>>>>>>>>
>>>>>>>>> I'm not too sure how it gets switched on and off - i.e. if we
>>>>>>>>> get a nice new qemu on a new kernel, what happens when I try and
>>>>>>>>> migrate to the same qemu on an older kernel without these hints?
>>>>>>>>>
>>>>>>>> This feature doesn't rely on the host kernel. Those hints are
>>>>>>>> reported from the guest kernel.
>>>>>>>> So migration across different hosts wouldn't affect the use of this
>>>>> feature.
>>>>>>>> Please correct me if I didn't get your point.
>>>>>>> Ah OK, yes;  now what about migrating from new->old qemu with a new
>>>>>>> guest but old machine type?
>>>>>>>
>>>>>> I think normally, the source QEMU and destination QEMU should have the
>>>>>> same QEMU booting parameter. If the destination QEMU doesn't support
>>>>>> "--device virtio-balloon,free-page-hint=true", which the source QEMU
>>>>>> has, the destination side QEMU will fail to boot, and migration will
>>>>>> not happen then.
>>>>> Ah that's OK; as long as free-page-hint is false by default that will work fine.
>>>>>
>>>>> Dave
>>>>>
>>>> Hi Dave,
>>>>
>>>> Could we have this feature in QEMU 4.0 (freeze on Mar 12)?
>>> I think so; can you remind me where we're up to:
>>>     a) It looks like you've already got the kernel changes merged -
>>> correct?
>> Yes, they were already merged half year ago.
>>
>>>     b) What about the virtio spec changes - where are they upto?
>> The spec changes are in progress. v1 were posted out, a v2 is in
>> preparation.
>>
>>>     c) Where are the other reviews upto - I think most are reviewed - is
>>> it just 7/7 that is missing the review-by?
>> 7/7 is about the virtio changes, and Michael has given the reviewed-by:
>> http://lists.nongnu.org/archive/html/qemu-devel/2018-12/msg03732.html
> OK, I was going to check with mst for (b) because I prefer it after the
> spec changes have been merged, but since mst is OK with it, then we can
> merge especially with (a) already merged.

OK, thanks!

Best,
Wei
Dr. David Alan Gilbert March 5, 2019, 2:50 p.m. UTC | #11
* Wei Wang (wei.w.wang@intel.com) wrote:
> The new feature enables the virtio-balloon device to receive hints of
> guest free pages from the free page vq.
> 
> A notifier is registered to the migration precopy notifier chain. The
> notifier calls free_page_start after the migration thread syncs the dirty
> bitmap, so that the free page optimization starts to clear bits of free
> pages from the bitmap. It calls the free_page_stop before the migration
> thread syncs the bitmap, which is the end of the current round of ram
> save. The free_page_stop is also called to stop the optimization in the
> case when there is an error occurred in the process of ram saving.
> 
> Note: balloon will report pages which were free at the time of this call.
> As the reporting happens asynchronously, dirty bit logging must be
> enabled before this free_page_start call is made. Guest reporting must be
> disabled before the migration dirty bitmap is synchronized.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Peter Xu <peterx@redhat.com>
> ---
>  hw/virtio/virtio-balloon.c                      | 263 ++++++++++++++++++++++++
>  include/hw/virtio/virtio-balloon.h              |  28 ++-
>  include/standard-headers/linux/virtio_balloon.h |   5 +
>  3 files changed, 295 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c

> +    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
> +        config.free_page_report_cmd_id =
> +                       cpu_to_le32(dev->free_page_report_cmd_id);
> +    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
> +        config.free_page_report_cmd_id =
> +                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID);
> +    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
> +        config.free_page_report_cmd_id =
> +                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID);
> +    }

It looks like somewhere in the last 3 months the name in the kernel
changed; so I think I've fixed this correctly but please shout if it's
wrong:

    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
        config.free_page_report_cmd_id =
                       cpu_to_le32(dev->free_page_report_cmd_id);
    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
        config.free_page_report_cmd_id =
                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
        config.free_page_report_cmd_id =
                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
    }

and I've dropped the kernel header update since it's already there.

Dave

> +
>      trace_virtio_balloon_get_config(config.num_pages, config.actual);
>      memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
>  }
> @@ -376,6 +566,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
>      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
>      f |= dev->host_features;
>      virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
> +
>      return f;
>  }
>  
> @@ -412,6 +603,18 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id)
>      return 0;
>  }
>  
> +static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
> +    .name = "virtio-balloon-device/free-page-report",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = virtio_balloon_free_page_support,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
> +        VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription vmstate_virtio_balloon_device = {
>      .name = "virtio-balloon-device",
>      .version_id = 1,
> @@ -422,6 +625,10 @@ static const VMStateDescription vmstate_virtio_balloon_device = {
>          VMSTATE_UINT32(actual, VirtIOBalloon),
>          VMSTATE_END_OF_LIST()
>      },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_virtio_balloon_free_page_report,
> +        NULL
> +    }
>  };
>  
>  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
> @@ -446,6 +653,29 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
>      s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
>  
> +    if (virtio_has_feature(s->host_features,
> +                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> +        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
> +                                           virtio_balloon_handle_free_page_vq);
> +        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
> +        s->free_page_report_cmd_id =
> +                           VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
> +        s->free_page_report_notify.notify =
> +                                       virtio_balloon_free_page_report_notify;
> +        precopy_add_notifier(&s->free_page_report_notify);
> +        if (s->iothread) {
> +            object_ref(OBJECT(s->iothread));
> +            s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
> +                                       virtio_ballloon_get_free_page_hints, s);
> +            qemu_mutex_init(&s->free_page_lock);
> +            qemu_cond_init(&s->free_page_cond);
> +            s->block_iothread = false;
> +        } else {
> +            /* Simply disable this feature if the iothread wasn't created. */
> +            s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
> +            virtio_error(vdev, "iothread is missing");
> +        }
> +    }
>      reset_stats(s);
>  }
>  
> @@ -454,6 +684,11 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>  
> +    if (virtio_balloon_free_page_support(s)) {
> +        qemu_bh_delete(s->free_page_bh);
> +        virtio_balloon_free_page_stop(s);
> +        precopy_remove_notifier(&s->free_page_report_notify);
> +    }
>      balloon_stats_destroy_timer(s);
>      qemu_remove_balloon_handler(s);
>      virtio_cleanup(vdev);
> @@ -463,6 +698,10 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
>  
> +    if (virtio_balloon_free_page_support(s)) {
> +        virtio_balloon_free_page_stop(s);
> +    }
> +
>      if (s->stats_vq_elem != NULL) {
>          virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
>          g_free(s->stats_vq_elem);
> @@ -480,6 +719,26 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
>           * was stopped */
>          virtio_balloon_receive_stats(vdev, s->svq);
>      }
> +
> +    if (virtio_balloon_free_page_support(s)) {
> +        /*
> +         * The VM is woken up and the iothread was blocked, so signal it to
> +         * continue.
> +         */
> +        if (vdev->vm_running && s->block_iothread) {
> +            qemu_mutex_lock(&s->free_page_lock);
> +            s->block_iothread = false;
> +            qemu_cond_signal(&s->free_page_cond);
> +            qemu_mutex_unlock(&s->free_page_lock);
> +        }
> +
> +        /* The VM is stopped, block the iothread. */
> +        if (!vdev->vm_running) {
> +            qemu_mutex_lock(&s->free_page_lock);
> +            s->block_iothread = true;
> +            qemu_mutex_unlock(&s->free_page_lock);
> +        }
> +    }
>  }
>  
>  static void virtio_balloon_instance_init(Object *obj)
> @@ -508,6 +767,10 @@ static const VMStateDescription vmstate_virtio_balloon = {
>  static Property virtio_balloon_properties[] = {
>      DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
>                      VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
> +    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
> +                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
> +    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
> +                     IOThread *),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index e0df352..503349a 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -17,11 +17,14 @@
>  
>  #include "standard-headers/linux/virtio_balloon.h"
>  #include "hw/virtio/virtio.h"
> +#include "sysemu/iothread.h"
>  
>  #define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
>  #define VIRTIO_BALLOON(obj) \
>          OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON)
>  
> +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000
> +
>  typedef struct virtio_balloon_stat VirtIOBalloonStat;
>  
>  typedef struct virtio_balloon_stat_modern {
> @@ -30,15 +33,38 @@ typedef struct virtio_balloon_stat_modern {
>         uint64_t val;
>  } VirtIOBalloonStatModern;
>  
> +enum virtio_balloon_free_page_report_status {
> +    FREE_PAGE_REPORT_S_STOP = 0,
> +    FREE_PAGE_REPORT_S_REQUESTED = 1,
> +    FREE_PAGE_REPORT_S_START = 2,
> +    FREE_PAGE_REPORT_S_DONE = 3,
> +};
> +
>  typedef struct VirtIOBalloon {
>      VirtIODevice parent_obj;
> -    VirtQueue *ivq, *dvq, *svq;
> +    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
> +    uint32_t free_page_report_status;
>      uint32_t num_pages;
>      uint32_t actual;
> +    uint32_t free_page_report_cmd_id;
>      uint64_t stats[VIRTIO_BALLOON_S_NR];
>      VirtQueueElement *stats_vq_elem;
>      size_t stats_vq_offset;
>      QEMUTimer *stats_timer;
> +    IOThread *iothread;
> +    QEMUBH *free_page_bh;
> +    /*
> +     * Lock to synchronize threads to access the free page reporting related
> +     * fields (e.g. free_page_report_status).
> +     */
> +    QemuMutex free_page_lock;
> +    QemuCond  free_page_cond;
> +    /*
> +     * Set to block iothread to continue reading free page hints as the VM is
> +     * stopped.
> +     */
> +    bool block_iothread;
> +    NotifierWithReturn free_page_report_notify;
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
>      uint32_t host_features;
> diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
> index 4dbb7dc..9eee1c6 100644
> --- a/include/standard-headers/linux/virtio_balloon.h
> +++ b/include/standard-headers/linux/virtio_balloon.h
> @@ -34,15 +34,20 @@
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
>  
> +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0
> +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID 1
>  struct virtio_balloon_config {
>  	/* Number of pages host wants Guest to give up. */
>  	uint32_t num_pages;
>  	/* Number of pages we've actually got in balloon. */
>  	uint32_t actual;
> +	/* Free page report command id, readonly by guest */
> +	uint32_t free_page_report_cmd_id;
>  };
>  
>  #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W March 6, 2019, 2:04 a.m. UTC | #12
On 03/05/2019 10:50 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> The new feature enables the virtio-balloon device to receive hints of
>> guest free pages from the free page vq.
>>
>> A notifier is registered to the migration precopy notifier chain. The
>> notifier calls free_page_start after the migration thread syncs the dirty
>> bitmap, so that the free page optimization starts to clear bits of free
>> pages from the bitmap. It calls the free_page_stop before the migration
>> thread syncs the bitmap, which is the end of the current round of ram
>> save. The free_page_stop is also called to stop the optimization in the
>> case when there is an error occurred in the process of ram saving.
>>
>> Note: balloon will report pages which were free at the time of this call.
>> As the reporting happens asynchronously, dirty bit logging must be
>> enabled before this free_page_start call is made. Guest reporting must be
>> disabled before the migration dirty bitmap is synchronized.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> CC: Michael S. Tsirkin <mst@redhat.com>
>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> CC: Peter Xu <peterx@redhat.com>
>> ---
>>   hw/virtio/virtio-balloon.c                      | 263 ++++++++++++++++++++++++
>>   include/hw/virtio/virtio-balloon.h              |  28 ++-
>>   include/standard-headers/linux/virtio_balloon.h |   5 +
>>   3 files changed, 295 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
>> +    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
>> +        config.free_page_report_cmd_id =
>> +                       cpu_to_le32(dev->free_page_report_cmd_id);
>> +    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
>> +        config.free_page_report_cmd_id =
>> +                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID);
>> +    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
>> +        config.free_page_report_cmd_id =
>> +                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID);
>> +    }
> It looks like somewhere in the last 3 months the name in the kernel
> changed; so I think I've fixed this correctly but please shout if it's
> wrong:
>
>      if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
>          config.free_page_report_cmd_id =
>                         cpu_to_le32(dev->free_page_report_cmd_id);
>      } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
>          config.free_page_report_cmd_id =
>                         cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
>      } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
>          config.free_page_report_cmd_id =
>                         cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
>      }
>
> and I've dropped the kernel header update since it's already there.
>

Looks good. Thanks for the update.

Best,
Wei
diff mbox series

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 1728e4f..543bbd4 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -27,6 +27,7 @@ 
 #include "qapi/visitor.h"
 #include "trace.h"
 #include "qemu/error-report.h"
+#include "migration/misc.h"
 
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
@@ -308,6 +309,184 @@  out:
     }
 }
 
+static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
+                                               VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    qemu_bh_schedule(s->free_page_bh);
+}
+
+static bool get_free_page_hints(VirtIOBalloon *dev)
+{
+    VirtQueueElement *elem;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+
+    while (dev->block_iothread) {
+        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
+    }
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        return false;
+    }
+
+    if (elem->out_num) {
+        uint32_t id;
+        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
+                                 &id, sizeof(id));
+        virtqueue_push(vq, elem, size);
+        g_free(elem);
+
+        virtio_tswap32s(vdev, &id);
+        if (unlikely(size != sizeof(id))) {
+            virtio_error(vdev, "received an incorrect cmd id");
+            return false;
+        }
+        if (id == dev->free_page_report_cmd_id) {
+            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
+        } else {
+            /*
+             * Stop the optimization only when it has started. This
+             * avoids a stale stop sign for the previous command.
+             */
+            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+            }
+        }
+    }
+
+    if (elem->in_num) {
+        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
+                                      elem->in_sg[0].iov_len);
+        }
+        virtqueue_push(vq, elem, 1);
+        g_free(elem);
+    }
+
+    return true;
+}
+
+static void virtio_ballloon_get_free_page_hints(void *opaque)
+{
+    VirtIOBalloon *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+    bool continue_to_get_hints;
+
+    do {
+        qemu_mutex_lock(&dev->free_page_lock);
+        virtio_queue_set_notification(vq, 0);
+        continue_to_get_hints = get_free_page_hints(dev);
+        qemu_mutex_unlock(&dev->free_page_lock);
+        virtio_notify(vdev, vq);
+      /*
+       * Start to poll the vq once the reporting started. Otherwise, continue
+       * only when there are entries on the vq, which need to be given back.
+       */
+    } while (continue_to_get_hints ||
+             dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
+    virtio_queue_set_notification(vq, 1);
+}
+
+static bool virtio_balloon_free_page_support(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+}
+
+static void virtio_balloon_free_page_start(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    /* For the stop and copy phase, we don't need to start the optimization */
+    if (!vdev->vm_running) {
+        return;
+    }
+
+    if (s->free_page_report_cmd_id == UINT_MAX) {
+        s->free_page_report_cmd_id =
+                       VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+    } else {
+        s->free_page_report_cmd_id++;
+    }
+
+    s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
+    virtio_notify_config(vdev);
+}
+
+static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
+        /*
+         * The lock also guarantees us that the
+         * virtio_ballloon_get_free_page_hints exits after the
+         * free_page_report_status is set to S_STOP.
+         */
+        qemu_mutex_lock(&s->free_page_lock);
+        /*
+         * The guest hasn't done the reporting, so host sends a notification
+         * to the guest to actively stop the reporting.
+         */
+        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+        qemu_mutex_unlock(&s->free_page_lock);
+        virtio_notify_config(vdev);
+    }
+}
+
+static void virtio_balloon_free_page_done(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
+    virtio_notify_config(vdev);
+}
+
+static int
+virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
+{
+    VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
+                                      free_page_report_notify);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    PrecopyNotifyData *pnd = data;
+
+    if (!virtio_balloon_free_page_support(dev)) {
+        /*
+         * This is an optimization provided to migration, so just return 0 to
+         * have the normal migration process not affected when this feature is
+         * not supported.
+         */
+        return 0;
+    }
+
+    switch (pnd->reason) {
+    case PRECOPY_NOTIFY_SETUP:
+        precopy_enable_free_page_optimization();
+        break;
+    case PRECOPY_NOTIFY_COMPLETE:
+    case PRECOPY_NOTIFY_CLEANUP:
+    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
+        virtio_balloon_free_page_stop(dev);
+        break;
+    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
+        if (vdev->vm_running) {
+            virtio_balloon_free_page_start(dev);
+        } else {
+            virtio_balloon_free_page_done(dev);
+        }
+        break;
+    default:
+        virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
+    }
+
+    return 0;
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
@@ -316,6 +495,17 @@  static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
 
+    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(dev->free_page_report_cmd_id);
+    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID);
+    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID);
+    }
+
     trace_virtio_balloon_get_config(config.num_pages, config.actual);
     memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
 }
@@ -376,6 +566,7 @@  static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+
     return f;
 }
 
@@ -412,6 +603,18 @@  static int virtio_balloon_post_load_device(void *opaque, int version_id)
     return 0;
 }
 
+static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
+    .name = "virtio-balloon-device/free-page-report",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = virtio_balloon_free_page_support,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
+        VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_virtio_balloon_device = {
     .name = "virtio-balloon-device",
     .version_id = 1,
@@ -422,6 +625,10 @@  static const VMStateDescription vmstate_virtio_balloon_device = {
         VMSTATE_UINT32(actual, VirtIOBalloon),
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_balloon_free_page_report,
+        NULL
+    }
 };
 
 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
@@ -446,6 +653,29 @@  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
+    if (virtio_has_feature(s->host_features,
+                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
+                                           virtio_balloon_handle_free_page_vq);
+        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+        s->free_page_report_cmd_id =
+                           VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+        s->free_page_report_notify.notify =
+                                       virtio_balloon_free_page_report_notify;
+        precopy_add_notifier(&s->free_page_report_notify);
+        if (s->iothread) {
+            object_ref(OBJECT(s->iothread));
+            s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
+                                       virtio_ballloon_get_free_page_hints, s);
+            qemu_mutex_init(&s->free_page_lock);
+            qemu_cond_init(&s->free_page_cond);
+            s->block_iothread = false;
+        } else {
+            /* Simply disable this feature if the iothread wasn't created. */
+            s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+            virtio_error(vdev, "iothread is missing");
+        }
+    }
     reset_stats(s);
 }
 
@@ -454,6 +684,11 @@  static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 
+    if (virtio_balloon_free_page_support(s)) {
+        qemu_bh_delete(s->free_page_bh);
+        virtio_balloon_free_page_stop(s);
+        precopy_remove_notifier(&s->free_page_report_notify);
+    }
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     virtio_cleanup(vdev);
@@ -463,6 +698,10 @@  static void virtio_balloon_device_reset(VirtIODevice *vdev)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 
+    if (virtio_balloon_free_page_support(s)) {
+        virtio_balloon_free_page_stop(s);
+    }
+
     if (s->stats_vq_elem != NULL) {
         virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
         g_free(s->stats_vq_elem);
@@ -480,6 +719,26 @@  static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
          * was stopped */
         virtio_balloon_receive_stats(vdev, s->svq);
     }
+
+    if (virtio_balloon_free_page_support(s)) {
+        /*
+         * The VM is woken up and the iothread was blocked, so signal it to
+         * continue.
+         */
+        if (vdev->vm_running && s->block_iothread) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = false;
+            qemu_cond_signal(&s->free_page_cond);
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+
+        /* The VM is stopped, block the iothread. */
+        if (!vdev->vm_running) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = true;
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+    }
 }
 
 static void virtio_balloon_instance_init(Object *obj)
@@ -508,6 +767,10 @@  static const VMStateDescription vmstate_virtio_balloon = {
 static Property virtio_balloon_properties[] = {
     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
+    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
+                     IOThread *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index e0df352..503349a 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -17,11 +17,14 @@ 
 
 #include "standard-headers/linux/virtio_balloon.h"
 #include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
 
 #define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
 #define VIRTIO_BALLOON(obj) \
         OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON)
 
+#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000
+
 typedef struct virtio_balloon_stat VirtIOBalloonStat;
 
 typedef struct virtio_balloon_stat_modern {
@@ -30,15 +33,38 @@  typedef struct virtio_balloon_stat_modern {
        uint64_t val;
 } VirtIOBalloonStatModern;
 
+enum virtio_balloon_free_page_report_status {
+    FREE_PAGE_REPORT_S_STOP = 0,
+    FREE_PAGE_REPORT_S_REQUESTED = 1,
+    FREE_PAGE_REPORT_S_START = 2,
+    FREE_PAGE_REPORT_S_DONE = 3,
+};
+
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
-    VirtQueue *ivq, *dvq, *svq;
+    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
+    uint32_t free_page_report_cmd_id;
     uint64_t stats[VIRTIO_BALLOON_S_NR];
     VirtQueueElement *stats_vq_elem;
     size_t stats_vq_offset;
     QEMUTimer *stats_timer;
+    IOThread *iothread;
+    QEMUBH *free_page_bh;
+    /*
+     * Lock to synchronize threads to access the free page reporting related
+     * fields (e.g. free_page_report_status).
+     */
+    QemuMutex free_page_lock;
+    QemuCond  free_page_cond;
+    /*
+     * Set to block iothread to continue reading free page hints as the VM is
+     * stopped.
+     */
+    bool block_iothread;
+    NotifierWithReturn free_page_report_notify;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
     uint32_t host_features;
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 4dbb7dc..9eee1c6 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -34,15 +34,20 @@ 
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
+#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0
+#define VIRTIO_BALLOON_FREE_PAGE_REPORT_DONE_ID 1
 struct virtio_balloon_config {
 	/* Number of pages host wants Guest to give up. */
 	uint32_t num_pages;
 	/* Number of pages we've actually got in balloon. */
 	uint32_t actual;
+	/* Free page report command id, readonly by guest */
+	uint32_t free_page_report_cmd_id;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */