diff mbox

[v2,07/11] blockjob: add .clean property

Message ID 1475272849-19990-8-git-send-email-jsnow@redhat.com
State New
Headers show

Commit Message

John Snow Sept. 30, 2016, 10 p.m. UTC
Cleaning up after we have deferred to the main thread but before the
transaction has converged can be dangerous and result in deadlocks
if the job cleanup invokes any BH polling loops.

A job may attempt to begin cleaning up, but may induce another job to
enter its cleanup routine. The second job, part of our same transaction,
will block waiting for the first job to finish, so neither job may now
make progress.

To rectify this, allow jobs to register a cleanup operation that will
always run regardless of if the job was in a transaction or not, and
if the transaction job group completed successfully or not.

Move sensitive cleanup to this callback instead which is guaranteed to
be run only after the transaction has converged, which removes sensitive
timing constraints from said cleanup.

Furthermore, in future patches these cleanup operations will be performed
regardless of whether or not we actually started the job. Therefore,
cleanup callbacks should essentially confine themselves to undoing create
operations, e.g. setup actions taken in what is now backup_run.

Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: John Snow <jsnow@redhat.com>
---
 block/backup.c               | 11 ++++++++---
 blockjob.c                   |  5 +++--
 include/block/blockjob_int.h |  8 ++++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Oct. 12, 2016, 11:11 a.m. UTC | #1
On 01.10.2016 01:00, John Snow wrote:
> Cleaning up after we have deferred to the main thread but before the
> transaction has converged can be dangerous and result in deadlocks
> if the job cleanup invokes any BH polling loops.
>
> A job may attempt to begin cleaning up, but may induce another job to
> enter its cleanup routine. The second job, part of our same transaction,
> will block waiting for the first job to finish, so neither job may now
> make progress.
>
> To rectify this, allow jobs to register a cleanup operation that will
> always run regardless of if the job was in a transaction or not, and
> if the transaction job group completed successfully or not.
>
> Move sensitive cleanup to this callback instead which is guaranteed to
> be run only after the transaction has converged, which removes sensitive
> timing constraints from said cleanup.
>
> Furthermore, in future patches these cleanup operations will be performed
> regardless of whether or not we actually started the job. Therefore,
> cleanup callbacks should essentially confine themselves to undoing create
> operations, e.g. setup actions taken in what is now backup_run.
>
> Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>   block/backup.c               | 11 ++++++++---
>   blockjob.c                   |  5 +++--
>   include/block/blockjob_int.h |  8 ++++++++
>   3 files changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/block/backup.c b/block/backup.c
> index d667482..42ff4c0 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -242,6 +242,13 @@ static void backup_abort(BlockJob *job)
>       }
>   }
>   
> +static void backup_clean(BlockJob *job)
> +{
> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
> +    assert(s->target);
> +    blk_unref(s->target);
> +}
> +
>   static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
>   {
>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
> @@ -306,6 +313,7 @@ static const BlockJobDriver backup_job_driver = {
>       .set_speed              = backup_set_speed,
>       .commit                 = backup_commit,
>       .abort                  = backup_abort,
> +    .clean                  = backup_clean,
>       .attached_aio_context   = backup_attached_aio_context,
>   };
>   
> @@ -327,11 +335,8 @@ typedef struct {
>   
>   static void backup_complete(BlockJob *job, void *opaque)
>   {
> -    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>       BackupCompleteData *data = opaque;
>   
> -    blk_unref(s->target);
> -
>       block_job_completed(job, data->ret);
>       g_free(data);
>   }
> diff --git a/blockjob.c b/blockjob.c
> index 09fb602..44cbf6c 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -217,7 +217,9 @@ static void block_job_completed_single(BlockJob *job)
>               job->driver->abort(job);
>           }
>       }
> -

I think empty line is removed by mistake, it returns back in 09

> +    if (job->driver->clean) {
> +        job->driver->clean(job);
> +    }
>       if (job->cb) {
>           job->cb(job->opaque, job->ret);
>       }
> @@ -230,7 +232,6 @@ static void block_job_completed_single(BlockJob *job)
>           }
>           block_job_event_completed(job, msg);
>       }
> -

and here

>       if (job->txn) {
>           QLIST_REMOVE(job, txn_list);
>           block_job_txn_unref(job->txn);
> diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
> index c6da7e4..b7aeaef 100644
> --- a/include/block/blockjob_int.h
> +++ b/include/block/blockjob_int.h
> @@ -74,6 +74,14 @@ struct BlockJobDriver {
>       void (*abort)(BlockJob *job);
>   
>       /**
> +     * If the callback is not NULL, it will be invoked after a call to either
> +     * .commit() or .abort(). Regardless of which callback is invoked after
> +     * completion, .clean() will always be called, even if the job does not
> +     * belong to a transaction group.
> +     */
> +    void (*clean)(BlockJob *job);
> +
> +    /**
>        * If the callback is not NULL, it will be invoked when the job transitions
>        * into the paused state.  Paused jobs must not perform any asynchronous
>        * I/O or event loop activity.  This callback is used to quiesce jobs.
diff mbox

Patch

diff --git a/block/backup.c b/block/backup.c
index d667482..42ff4c0 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -242,6 +242,13 @@  static void backup_abort(BlockJob *job)
     }
 }
 
+static void backup_clean(BlockJob *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    assert(s->target);
+    blk_unref(s->target);
+}
+
 static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
@@ -306,6 +313,7 @@  static const BlockJobDriver backup_job_driver = {
     .set_speed              = backup_set_speed,
     .commit                 = backup_commit,
     .abort                  = backup_abort,
+    .clean                  = backup_clean,
     .attached_aio_context   = backup_attached_aio_context,
 };
 
@@ -327,11 +335,8 @@  typedef struct {
 
 static void backup_complete(BlockJob *job, void *opaque)
 {
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
     BackupCompleteData *data = opaque;
 
-    blk_unref(s->target);
-
     block_job_completed(job, data->ret);
     g_free(data);
 }
diff --git a/blockjob.c b/blockjob.c
index 09fb602..44cbf6c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -217,7 +217,9 @@  static void block_job_completed_single(BlockJob *job)
             job->driver->abort(job);
         }
     }
-
+    if (job->driver->clean) {
+        job->driver->clean(job);
+    }
     if (job->cb) {
         job->cb(job->opaque, job->ret);
     }
@@ -230,7 +232,6 @@  static void block_job_completed_single(BlockJob *job)
         }
         block_job_event_completed(job, msg);
     }
-
     if (job->txn) {
         QLIST_REMOVE(job, txn_list);
         block_job_txn_unref(job->txn);
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index c6da7e4..b7aeaef 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -74,6 +74,14 @@  struct BlockJobDriver {
     void (*abort)(BlockJob *job);
 
     /**
+     * If the callback is not NULL, it will be invoked after a call to either
+     * .commit() or .abort(). Regardless of which callback is invoked after
+     * completion, .clean() will always be called, even if the job does not
+     * belong to a transaction group.
+     */
+    void (*clean)(BlockJob *job);
+
+    /**
      * If the callback is not NULL, it will be invoked when the job transitions
      * into the paused state.  Paused jobs must not perform any asynchronous
      * I/O or event loop activity.  This callback is used to quiesce jobs.