Patchwork [RFC,v2,18/23] qcow2: Delay the COW

login
register
mail settings
Submitter Kevin Wolf
Date Feb. 13, 2013, 1:22 p.m.
Message ID <1360761733-25347-19-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/220160/
State New
Headers show

Comments

Kevin Wolf - Feb. 13, 2013, 1:22 p.m.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c         |   28 +++++++++++++++++++++++++---
 block/qcow2.h         |   14 ++++++++++++++
 3 files changed, 86 insertions(+), 3 deletions(-)
Stefan Hajnoczi - Feb. 15, 2013, 1:36 p.m.
On Wed, Feb 13, 2013 at 02:22:08PM +0100, Kevin Wolf wrote:
>      /**
> +     * true if the request is sleeping in the COW delay and the coroutine may
> +     * be reentered in order to cancel the timer.
> +     */
> +    bool sleeping;

Does reentering actually cancel the timer...or does it lead to a
spurious entry when the timer fires in the future?

Do we need anything to really delete the timer in case we re-enter and
terminate the coroutine before the timer fires?

Stefan
Kevin Wolf - Feb. 15, 2013, 2:16 p.m.
On Fri, Feb 15, 2013 at 02:36:37PM +0100, Stefan Hajnoczi wrote:
> On Wed, Feb 13, 2013 at 02:22:08PM +0100, Kevin Wolf wrote:
> >      /**
> > +     * true if the request is sleeping in the COW delay and the coroutine may
> > +     * be reentered in order to cancel the timer.
> > +     */
> > +    bool sleeping;
> 
> Does reentering actually cancel the timer...or does it lead to a
> spurious entry when the timer fires in the future?
> 
> Do we need anything to really delete the timer in case we re-enter and
> terminate the coroutine before the timer fires?

co_sleep_ns() supports this since commit 3ed99025, it cancels and deletes the
timer. Block jobs use the same thing when you cancel them.

Kevin
Stefan Hajnoczi - Feb. 18, 2013, 3:33 p.m.
On Wed, Feb 13, 2013 at 02:22:08PM +0100, Kevin Wolf wrote:
> @@ -800,6 +820,9 @@ static inline coroutine_fn void stop_l2meta(BlockDriverState *bs)
>  {
>      BDRVQcowState *s = bs->opaque;
>  
> +    /* Kick the requests once if they are sleepeing and then just wait until

s/sleepeing/sleeping/

> @@ -265,6 +275,8 @@ typedef struct QCowL2Meta
>      /** Pointer to next L2Meta of the same write request */
>      struct QCowL2Meta *next;
>  
> +    void *kick_l2meta;

Please forward declare KickL2Meta instead of using void*.

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index be75c17..33e595a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -791,6 +791,52 @@  out:
     return i;
 }
 
+struct KickL2Meta {
+    QEMUBH *bh;
+    QCowL2Meta *m;
+};
+
+static void kick_l2meta_bh(void *opaque)
+{
+    struct KickL2Meta *k = opaque;
+    QCowL2Meta *m = k->m;
+
+    m->kick_l2meta = NULL;
+    qemu_bh_delete(k->bh);
+    g_free(k);
+
+    if (m->sleeping) {
+        qemu_coroutine_enter(m->co, NULL);
+    }
+}
+
+static void kick_l2meta(QCowL2Meta *m)
+{
+    struct KickL2Meta *k;
+
+    if (m->kick_l2meta) {
+        return;
+    }
+
+    k = g_malloc(sizeof(*k));
+    k->bh = qemu_bh_new(kick_l2meta_bh, k);
+    k->m = m;
+
+    m->kick_l2meta = k;
+
+    qemu_bh_schedule(k->bh);
+}
+
+void qcow2_delete_kick_l2meta_bh(void *opaque)
+{
+    if (opaque) {
+        struct KickL2Meta *k = opaque;
+
+        qemu_bh_delete(k->bh);
+        g_free(k);
+    }
+}
+
 /*
  * Check if there already is an AIO write request in flight which allocates
  * the same cluster. In this case we need to wait until the previous
@@ -833,6 +879,7 @@  static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
                 /* Wait for the dependency to complete. We need to recheck
                  * the free/allocated clusters when we continue. */
                 qemu_co_mutex_unlock(&s->lock);
+                kick_l2meta(old_alloc);
                 qemu_co_queue_wait(&old_alloc->dependent_requests);
                 qemu_co_mutex_lock(&s->lock);
                 return -EAGAIN;
diff --git a/block/qcow2.c b/block/qcow2.c
index 3f169b8..57552aa 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -767,6 +767,13 @@  static void coroutine_fn process_l2meta(void *opaque)
     int ret;
 
     assert(s->l2meta_flush.reader > 0);
+
+    if (!s->in_l2meta_flush) {
+        m->sleeping = true;
+        co_sleep_ns(rt_clock, 1000000);
+        m->sleeping = false;
+    }
+
     qemu_co_mutex_lock(&s->lock);
 
     ret = qcow2_alloc_cluster_link_l2(bs, m);
@@ -784,6 +791,7 @@  static void coroutine_fn process_l2meta(void *opaque)
     /* Meanwhile some new dependencies could have accumulated */
     qemu_co_queue_restart_all(&m->dependent_requests);
 
+    qcow2_delete_kick_l2meta_bh(m->kick_l2meta);
     g_free(m);
 
     qemu_co_rwlock_unlock(&s->l2meta_flush);
@@ -792,6 +800,18 @@  static void coroutine_fn process_l2meta(void *opaque)
 static bool qcow2_drain(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
+    QCowL2Meta *m;
+
+    s->in_l2meta_flush = true;
+again:
+    QLIST_FOREACH(m, &s->cluster_allocs, next_in_flight) {
+        if (m->sleeping) {
+            qemu_coroutine_enter(m->co, NULL);
+            /* next_in_flight link could have become invalid */
+            goto again;
+        }
+    }
+    s->in_l2meta_flush = false;
 
     return !QLIST_EMPTY(&s->cluster_allocs);
 }
@@ -800,6 +820,9 @@  static inline coroutine_fn void stop_l2meta(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
 
+    /* Kick the requests once if they are sleepeing and then just wait until
+     * they complete and we get the lock */
+    qcow2_drain(bs);
     qemu_co_rwlock_wrlock(&s->l2meta_flush);
 }
 
@@ -893,7 +916,6 @@  static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             qemu_co_mutex_unlock(&s->lock);
 
             while (l2meta != NULL) {
-                Coroutine *co;
                 QCowL2Meta *next;
 
                 ProcessL2Meta p = {
@@ -913,8 +935,8 @@  static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                 /* l2meta might already be freed after the coroutine has run */
                 next = l2meta->next;
 
-                co = qemu_coroutine_create(process_l2meta);
-                qemu_coroutine_enter(co, &p);
+                l2meta->co = qemu_coroutine_create(process_l2meta);
+                qemu_coroutine_enter(l2meta->co, &p);
 
                 l2meta = next;
             }
diff --git a/block/qcow2.h b/block/qcow2.h
index 46ed112..1d7cdab 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -169,6 +169,7 @@  typedef struct BDRVQcowState {
      * Writers: Anyone who requires l2meta to be flushed
      */
     CoRwlock l2meta_flush;
+    bool in_l2meta_flush;
 
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
     uint32_t crypt_method_header;
@@ -245,6 +246,15 @@  typedef struct QCowL2Meta
     bool is_written;
 
     /**
+     * true if the request is sleeping in the COW delay and the coroutine may
+     * be reentered in order to cancel the timer.
+     */
+    bool sleeping;
+
+    /** Coroutine that handles delayed COW and updates L2 entry */
+    Coroutine *co;
+
+    /**
      * Requests that overlap with this allocation and wait to be restarted
      * when the allocating request has completed.
      */
@@ -265,6 +275,8 @@  typedef struct QCowL2Meta
     /** Pointer to next L2Meta of the same write request */
     struct QCowL2Meta *next;
 
+    void *kick_l2meta;
+
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
 
@@ -404,6 +416,8 @@  int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
     int nb_sectors);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
 
+void qcow2_delete_kick_l2meta_bh(void *opaque);
+
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);