diff mbox series

[v2,4/7] block-copy: add a CoMutex to the BlockCopyTask list

Message ID 20210518100757.31243-5-eesposit@redhat.com
State New
Headers show
Series block-copy: protect block-copy internal structures | expand

Commit Message

Emanuele Giuseppe Esposito May 18, 2021, 10:07 a.m. UTC
Because the list of tasks is only modified by coroutine
functions, add a CoMutex in order to protect them.

Use the same mutex to protect also BlockCopyState in_flight_bytes
field to avoid adding additional syncronization primitives.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 block/block-copy.c | 55 +++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 20 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy May 20, 2021, 3:19 p.m. UTC | #1
18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
> Because the list of tasks is only modified by coroutine
> functions, add a CoMutex in order to protect them.
> 
> Use the same mutex to protect also BlockCopyState in_flight_bytes
> field to avoid adding additional syncronization primitives.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>   block/block-copy.c | 55 +++++++++++++++++++++++++++++-----------------
>   1 file changed, 35 insertions(+), 20 deletions(-)
> 
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 2e610b4142..3a949fab64 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -83,7 +83,7 @@ typedef struct BlockCopyTask {
>        */
>       bool zeroes;
>   
> -    /* State */
> +    /* State. Protected by tasks_lock */
>       CoQueue wait_queue; /* coroutines blocked on this task */
>   
>       /* To reference all call states from BlockCopyState */
> @@ -106,8 +106,9 @@ typedef struct BlockCopyState {
>       BdrvChild *target;
>   
>       /* State */
> -    int64_t in_flight_bytes;
> +    int64_t in_flight_bytes; /* protected by tasks_lock */

only this field is protected? or the whole State section?

>       BlockCopyMethod method;
> +    CoMutex tasks_lock;
>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
>       QLIST_HEAD(, BlockCopyCallState) calls;
>       /* State fields that use a thread-safe API */
> @@ -142,8 +143,10 @@ typedef struct BlockCopyState {
>       bool skip_unallocated;
>   } BlockCopyState;
>   
> -static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
> -                                            int64_t offset, int64_t bytes)
> +/* Called with lock held */
> +static BlockCopyTask *find_conflicting_task_locked(BlockCopyState *s,
> +                                                   int64_t offset,
> +                                                   int64_t bytes)
>   {
>       BlockCopyTask *t;
>   
> @@ -163,13 +166,16 @@ static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
>   static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
>                                                int64_t bytes)
>   {
> -    BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
> +    BlockCopyTask *task;
> +
> +    QEMU_LOCK_GUARD(&s->tasks_lock);
> +    task = find_conflicting_task_locked(s, offset, bytes);
>   
>       if (!task) {
>           return false;
>       }
>   
> -    qemu_co_queue_wait(&task->wait_queue, NULL);
> +    qemu_co_queue_wait(&task->wait_queue, &s->tasks_lock);
>   
>       return true;
>   }
> @@ -213,11 +219,7 @@ static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>       assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
>       bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
>   

Looking at block_copy_task_create():

First, !bdrv_dirty_bitmap_next_dirty_area() doesn't take bitmaps lock, so it's not protected at all.

Next, even if we take bitmaps lock in bdrv_dirty_bitmap_next_dirty_area() or around it, it doesn't bring thread-safety to block_copy_task_create():

Imagine block_copy_task_create() is called from two threads simultaneously. Both calls will get same dirty area and create equal tasks. Then, "assert(!find_conflicting_task_locked(s, offset, bytes));" will crash.


> -    /* region is dirty, so no existent tasks possible in it */
> -    assert(!find_conflicting_task(s, offset, bytes));
> -
>       bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
> -    s->in_flight_bytes += bytes;
>   
>       task = g_new(BlockCopyTask, 1);
>       *task = (BlockCopyTask) {
> @@ -228,7 +230,13 @@ static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>           .bytes = bytes,
>       };
>       qemu_co_queue_init(&task->wait_queue);
> -    QLIST_INSERT_HEAD(&s->tasks, task, list);
> +
> +    WITH_QEMU_LOCK_GUARD(&s->tasks_lock) {
> +        s->in_flight_bytes += bytes;
> +        /* region is dirty, so no existent tasks possible in it */
> +        assert(!find_conflicting_task_locked(s, offset, bytes));
> +        QLIST_INSERT_HEAD(&s->tasks, task, list);
> +    }
>   
>       return task;
>   }
> @@ -249,25 +257,29 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
>   
>       assert(new_bytes > 0 && new_bytes < task->bytes);
>   
> -    task->s->in_flight_bytes -= task->bytes - new_bytes;
>       bdrv_set_dirty_bitmap(task->s->copy_bitmap,
>                             task->offset + new_bytes, task->bytes - new_bytes);

Then look here. Imagine, immediately after bdrv_set_dirty_bitmap() in parallel thread the new task is created, which consumes these new dirty bits. Again, it will find conflicting task (this one, as task->bytes is not modified yet) and crash.

>   
> -    task->bytes = new_bytes;
> -    qemu_co_queue_restart_all(&task->wait_queue);
> +    WITH_QEMU_LOCK_GUARD(&task->s->tasks_lock) {
> +        task->s->in_flight_bytes -= task->bytes - new_bytes;
> +        task->bytes = new_bytes;
> +        qemu_co_queue_restart_all(&task->wait_queue);
> +    }
>   }
>   
>   static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
>   {
> -    task->s->in_flight_bytes -= task->bytes;
>       if (ret < 0) {
>           bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
>       }
> -    QLIST_REMOVE(task, list);
> -    progress_set_remaining(task->s->progress,
> -                           bdrv_get_dirty_count(task->s->copy_bitmap) +
> -                           task->s->in_flight_bytes);
> -    qemu_co_queue_restart_all(&task->wait_queue);
> +    WITH_QEMU_LOCK_GUARD(&task->s->tasks_lock) {
> +        task->s->in_flight_bytes -= task->bytes;
> +        QLIST_REMOVE(task, list);
> +        progress_set_remaining(task->s->progress,
> +                               bdrv_get_dirty_count(task->s->copy_bitmap) +
> +                               task->s->in_flight_bytes);
> +        qemu_co_queue_restart_all(&task->wait_queue);
> +    }
>   }
>   
>   void block_copy_state_free(BlockCopyState *s)
> @@ -336,6 +348,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>       }
>   
>       ratelimit_init(&s->rate_limit);
> +    qemu_co_mutex_init(&s->tasks_lock);
>       QLIST_INIT(&s->tasks);
>       QLIST_INIT(&s->calls);
>   
> @@ -586,9 +599,11 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
>   
>       if (!ret) {
>           bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
> +        qemu_co_mutex_lock(&s->tasks_lock);
>           progress_set_remaining(s->progress,
>                                  bdrv_get_dirty_count(s->copy_bitmap) +
>                                  s->in_flight_bytes);
> +        qemu_co_mutex_unlock(&s->tasks_lock);
>       }
>   
>       *count = bytes;
>
Emanuele Giuseppe Esposito May 25, 2021, 10:07 a.m. UTC | #2
On 20/05/2021 17:19, Vladimir Sementsov-Ogievskiy wrote:
> 18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
>> Because the list of tasks is only modified by coroutine
>> functions, add a CoMutex in order to protect them.
>>
>> Use the same mutex to protect also BlockCopyState in_flight_bytes
>> field to avoid adding additional syncronization primitives.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> ---
>>   block/block-copy.c | 55 +++++++++++++++++++++++++++++-----------------
>>   1 file changed, 35 insertions(+), 20 deletions(-)
>>
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 2e610b4142..3a949fab64 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -83,7 +83,7 @@ typedef struct BlockCopyTask {
>>        */
>>       bool zeroes;
>> -    /* State */
>> +    /* State. Protected by tasks_lock */
>>       CoQueue wait_queue; /* coroutines blocked on this task */
>>       /* To reference all call states from BlockCopyState */
>> @@ -106,8 +106,9 @@ typedef struct BlockCopyState {
>>       BdrvChild *target;
>>       /* State */
>> -    int64_t in_flight_bytes;
>> +    int64_t in_flight_bytes; /* protected by tasks_lock */
> 
> only this field is protected? or the whole State section?

I guess you figured it already, here there is only in_flight_bytes 
because in next patches I take care of the others.

[...]

>>   }
>> @@ -213,11 +219,7 @@ static coroutine_fn BlockCopyTask 
>> *block_copy_task_create(BlockCopyState *s,
>>       assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
>>       bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
> 
> Looking at block_copy_task_create():
> 
> First, !bdrv_dirty_bitmap_next_dirty_area() doesn't take bitmaps lock, 
> so it's not protected at all.
> 
> Next, even if we take bitmaps lock in 
> bdrv_dirty_bitmap_next_dirty_area() or around it, it doesn't bring 
> thread-safety to block_copy_task_create():

The simplest solution here seems to protect 
bdrv_dirty_bitmap_next_dirty_area and also bdrv_reset_dirty_bitmap with 
the tasks lock, so that once it is released the bitmap is updated 
accordingly and from my understanding no other task can get that region.

Btw, out of curiosity, why is bdrv_dirty_bitmap_next_dirty_area not 
protected by a lock? Can't we have a case where two threads (like you 
just mention below) check the bitmap? Or am I missing something?

> 
> Imagine block_copy_task_create() is called from two threads 
> simultaneously. Both calls will get same dirty area and create equal 
> tasks. Then, "assert(!find_conflicting_task_locked(s, offset, bytes));" 
> will crash.
> 


> 
>> -    /* region is dirty, so no existent tasks possible in it */
>> -    assert(!find_conflicting_task(s, offset, bytes));
>> -
>>       bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
>> -    s->in_flight_bytes += bytes;
>>       task = g_new(BlockCopyTask, 1);
>>       *task = (BlockCopyTask) {
>> @@ -228,7 +230,13 @@ static coroutine_fn BlockCopyTask 
>> *block_copy_task_create(BlockCopyState *s,
>>           .bytes = bytes,
>>       };
>>       qemu_co_queue_init(&task->wait_queue);
>> -    QLIST_INSERT_HEAD(&s->tasks, task, list);
>> +
>> +    WITH_QEMU_LOCK_GUARD(&s->tasks_lock) {
>> +        s->in_flight_bytes += bytes;
>> +        /* region is dirty, so no existent tasks possible in it */
>> +        assert(!find_conflicting_task_locked(s, offset, bytes));
>> +        QLIST_INSERT_HEAD(&s->tasks, task, list);
>> +    }
>>       return task;
>>   }
>> @@ -249,25 +257,29 @@ static void coroutine_fn 
>> block_copy_task_shrink(BlockCopyTask *task,
>>       assert(new_bytes > 0 && new_bytes < task->bytes);
>> -    task->s->in_flight_bytes -= task->bytes - new_bytes;
>>       bdrv_set_dirty_bitmap(task->s->copy_bitmap,
>>                             task->offset + new_bytes, task->bytes - 
>> new_bytes);
> 
> Then look here. Imagine, immediately after bdrv_set_dirty_bitmap() in 
> parallel thread the new task is created, which consumes these new dirty 
> bits. Again, it will find conflicting task (this one, as task->bytes is 
> not modified yet) and crash.

Also here, the lock guard should be enlarged to cover also the dirty 
bitmap. Thank you for pointing these cases!

Emanuele

> 
>> -    task->bytes = new_bytes;
>> -    qemu_co_queue_restart_all(&task->wait_queue);
>> +    WITH_QEMU_LOCK_GUARD(&task->s->tasks_lock) {
>> +        task->s->in_flight_bytes -= task->bytes - new_bytes;
>> +        task->bytes = new_bytes;
>> +        qemu_co_queue_restart_all(&task->wait_queue);
>> +    }
>>   }
Vladimir Sementsov-Ogievskiy May 25, 2021, 10:25 a.m. UTC | #3
25.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
> 
> 
> On 20/05/2021 17:19, Vladimir Sementsov-Ogievskiy wrote:
>> 18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
>>> Because the list of tasks is only modified by coroutine
>>> functions, add a CoMutex in order to protect them.
>>>
>>> Use the same mutex to protect also BlockCopyState in_flight_bytes
>>> field to avoid adding additional syncronization primitives.
>>>
>>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>>> ---
>>>   block/block-copy.c | 55 +++++++++++++++++++++++++++++-----------------
>>>   1 file changed, 35 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/block/block-copy.c b/block/block-copy.c
>>> index 2e610b4142..3a949fab64 100644
>>> --- a/block/block-copy.c
>>> +++ b/block/block-copy.c
>>> @@ -83,7 +83,7 @@ typedef struct BlockCopyTask {
>>>        */
>>>       bool zeroes;
>>> -    /* State */
>>> +    /* State. Protected by tasks_lock */
>>>       CoQueue wait_queue; /* coroutines blocked on this task */
>>>       /* To reference all call states from BlockCopyState */
>>> @@ -106,8 +106,9 @@ typedef struct BlockCopyState {
>>>       BdrvChild *target;
>>>       /* State */
>>> -    int64_t in_flight_bytes;
>>> +    int64_t in_flight_bytes; /* protected by tasks_lock */
>>
>> only this field is protected? or the whole State section?
> 
> I guess you figured it already, here there is only in_flight_bytes because in next patches I take care of the others.

Honestly, I don't like this way of introducing thread-safety. As I show below, we should protect not only structures, but also the logic working with these structures. And simple protecting access to some variable doesn't make much sense. Critical sections should be wide enough to protect the logic. So I'd prefer introducing all the critical sections together with the mutex in one patch, to have the whole picture.

> 
> [...]
> 
>>>   }
>>> @@ -213,11 +219,7 @@ static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>>>       assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
>>>       bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
>>
>> Looking at block_copy_task_create():
>>
>> First, !bdrv_dirty_bitmap_next_dirty_area() doesn't take bitmaps lock, so it's not protected at all.
>>
>> Next, even if we take bitmaps lock in bdrv_dirty_bitmap_next_dirty_area() or around it, it doesn't bring thread-safety to block_copy_task_create():
> 
> The simplest solution here seems to protect bdrv_dirty_bitmap_next_dirty_area and also bdrv_reset_dirty_bitmap with the tasks lock, so that once it is released the bitmap is updated accordingly and from my understanding no other task can get that region.
> 

Yes, we just need to protect larger areas by outer lock, to protect the logic, not only structures itself.

> Btw, out of curiosity, why is bdrv_dirty_bitmap_next_dirty_area not protected by a lock? Can't we have a case where two threads (like you just mention below) check the bitmap? Or am I missing something?

Hmm, don't know) Probably it's a bug, may be not. We need to check its callers.

> 
>>
>> Imagine block_copy_task_create() is called from two threads simultaneously. Both calls will get same dirty area and create equal tasks. Then, "assert(!find_conflicting_task_locked(s, offset, bytes));" will crash.
>>
> 
> 
>>
>>> -    /* region is dirty, so no existent tasks possible in it */
>>> -    assert(!find_conflicting_task(s, offset, bytes));
>>> -
>>>       bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
>>> -    s->in_flight_bytes += bytes;
>>>       task = g_new(BlockCopyTask, 1);
>>>       *task = (BlockCopyTask) {
>>> @@ -228,7 +230,13 @@ static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>>>           .bytes = bytes,
>>>       };
>>>       qemu_co_queue_init(&task->wait_queue);
>>> -    QLIST_INSERT_HEAD(&s->tasks, task, list);
>>> +
>>> +    WITH_QEMU_LOCK_GUARD(&s->tasks_lock) {
>>> +        s->in_flight_bytes += bytes;
>>> +        /* region is dirty, so no existent tasks possible in it */
>>> +        assert(!find_conflicting_task_locked(s, offset, bytes));
>>> +        QLIST_INSERT_HEAD(&s->tasks, task, list);
>>> +    }
>>>       return task;
>>>   }
>>> @@ -249,25 +257,29 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
>>>       assert(new_bytes > 0 && new_bytes < task->bytes);
>>> -    task->s->in_flight_bytes -= task->bytes - new_bytes;
>>>       bdrv_set_dirty_bitmap(task->s->copy_bitmap,
>>>                             task->offset + new_bytes, task->bytes - new_bytes);
>>
>> Then look here. Imagine, immediately after bdrv_set_dirty_bitmap() in parallel thread the new task is created, which consumes these new dirty bits. Again, it will find conflicting task (this one, as task->bytes is not modified yet) and crash.
> 
> Also here, the lock guard should be enlarged to cover also the dirty bitmap. Thank you for pointing these cases!
> 

Yes. And then we'll come to most of the logic covered by mutex, and we'll not need atomic operations. And nothing bad in doing simple operations with variables under mutex, it's a lot simpler than bothering with atomic operations.

> 
>>
>>> -    task->bytes = new_bytes;
>>> -    qemu_co_queue_restart_all(&task->wait_queue);
>>> +    WITH_QEMU_LOCK_GUARD(&task->s->tasks_lock) {
>>> +        task->s->in_flight_bytes -= task->bytes - new_bytes;
>>> +        task->bytes = new_bytes;
>>> +        qemu_co_queue_restart_all(&task->wait_queue);
>>> +    }
>>>   }
>
Paolo Bonzini May 26, 2021, 2:58 p.m. UTC | #4
On 25/05/21 12:25, Vladimir Sementsov-Ogievskiy wrote:
>>> Next, even if we take bitmaps lock in 
>>> bdrv_dirty_bitmap_next_dirty_area() or around it, it doesn't bring 
>>> thread-safety to block_copy_task_create():
>>
>> The simplest solution here seems to protect 
>> bdrv_dirty_bitmap_next_dirty_area and also bdrv_reset_dirty_bitmap 
>> with the tasks lock, so that once it is released the bitmap is updated 
>> accordingly and from my understanding no other task can get that region.
> 
> Yes, we just need to protect larger areas by outer lock, to protect the 
> logic, not only structures itself.

Locks protects data, not code; code must ensure invariants are respected 
at the end of critical sections.  Here we have both problems:

- it's protecting too little data (the bitmap is not protected).  This 
is a block/dirty-bitmap.c bug.

- it's not respecting the invariant that tasks always reflected a 
superset of what is set in the dirty bitmap.  This is solved by making 
the critical section larger.

Paolo
Vladimir Sementsov-Ogievskiy May 26, 2021, 4:13 p.m. UTC | #5
26.05.2021 17:58, Paolo Bonzini wrote:
> On 25/05/21 12:25, Vladimir Sementsov-Ogievskiy wrote:
>>>> Next, even if we take bitmaps lock in bdrv_dirty_bitmap_next_dirty_area() or around it, it doesn't bring thread-safety to block_copy_task_create():
>>>
>>> The simplest solution here seems to protect bdrv_dirty_bitmap_next_dirty_area and also bdrv_reset_dirty_bitmap with the tasks lock, so that once it is released the bitmap is updated accordingly and from my understanding no other task can get that region.
>>
>> Yes, we just need to protect larger areas by outer lock, to protect the logic, not only structures itself.
> 
> Locks protects data, not code; code must ensure invariants are respected at the end of critical sections.  Here we have both problems:

Hmm. Anyway, if code doesn't work with data that is potentially shared with other threads, it doesn't need any protection. So, I agree.. I just wanted to say that, if we have two data structures A and B, each protected by own lock, it doesn't mean that our code is thread-safe. In your terminology we can say that the whole data (which is a combination of A and B) is not protected by partial locks, we need another lock to protect the combination.

> 
> - it's protecting too little data (the bitmap is not protected).  This is a block/dirty-bitmap.c bug.
> 
> - it's not respecting the invariant that tasks always reflected a superset of what is set in the dirty bitmap.  This is solved by making the critical section larger.
>
Stefan Hajnoczi May 27, 2021, 9:07 a.m. UTC | #6
On Tue, May 18, 2021 at 12:07:54PM +0200, Emanuele Giuseppe Esposito wrote:
> -static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
> -                                            int64_t offset, int64_t bytes)
> +/* Called with lock held */

s/lock/tasks_lock/
diff mbox series

Patch

diff --git a/block/block-copy.c b/block/block-copy.c
index 2e610b4142..3a949fab64 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -83,7 +83,7 @@  typedef struct BlockCopyTask {
      */
     bool zeroes;
 
-    /* State */
+    /* State. Protected by tasks_lock */
     CoQueue wait_queue; /* coroutines blocked on this task */
 
     /* To reference all call states from BlockCopyState */
@@ -106,8 +106,9 @@  typedef struct BlockCopyState {
     BdrvChild *target;
 
     /* State */
-    int64_t in_flight_bytes;
+    int64_t in_flight_bytes; /* protected by tasks_lock */
     BlockCopyMethod method;
+    CoMutex tasks_lock;
     QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
     QLIST_HEAD(, BlockCopyCallState) calls;
     /* State fields that use a thread-safe API */
@@ -142,8 +143,10 @@  typedef struct BlockCopyState {
     bool skip_unallocated;
 } BlockCopyState;
 
-static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
-                                            int64_t offset, int64_t bytes)
+/* Called with lock held */
+static BlockCopyTask *find_conflicting_task_locked(BlockCopyState *s,
+                                                   int64_t offset,
+                                                   int64_t bytes)
 {
     BlockCopyTask *t;
 
@@ -163,13 +166,16 @@  static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
 static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
                                              int64_t bytes)
 {
-    BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
+    BlockCopyTask *task;
+
+    QEMU_LOCK_GUARD(&s->tasks_lock);
+    task = find_conflicting_task_locked(s, offset, bytes);
 
     if (!task) {
         return false;
     }
 
-    qemu_co_queue_wait(&task->wait_queue, NULL);
+    qemu_co_queue_wait(&task->wait_queue, &s->tasks_lock);
 
     return true;
 }
@@ -213,11 +219,7 @@  static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
     bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
 
-    /* region is dirty, so no existent tasks possible in it */
-    assert(!find_conflicting_task(s, offset, bytes));
-
     bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
-    s->in_flight_bytes += bytes;
 
     task = g_new(BlockCopyTask, 1);
     *task = (BlockCopyTask) {
@@ -228,7 +230,13 @@  static coroutine_fn BlockCopyTask *block_copy_task_create(BlockCopyState *s,
         .bytes = bytes,
     };
     qemu_co_queue_init(&task->wait_queue);
-    QLIST_INSERT_HEAD(&s->tasks, task, list);
+
+    WITH_QEMU_LOCK_GUARD(&s->tasks_lock) {
+        s->in_flight_bytes += bytes;
+        /* region is dirty, so no existent tasks possible in it */
+        assert(!find_conflicting_task_locked(s, offset, bytes));
+        QLIST_INSERT_HEAD(&s->tasks, task, list);
+    }
 
     return task;
 }
@@ -249,25 +257,29 @@  static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
 
     assert(new_bytes > 0 && new_bytes < task->bytes);
 
-    task->s->in_flight_bytes -= task->bytes - new_bytes;
     bdrv_set_dirty_bitmap(task->s->copy_bitmap,
                           task->offset + new_bytes, task->bytes - new_bytes);
 
-    task->bytes = new_bytes;
-    qemu_co_queue_restart_all(&task->wait_queue);
+    WITH_QEMU_LOCK_GUARD(&task->s->tasks_lock) {
+        task->s->in_flight_bytes -= task->bytes - new_bytes;
+        task->bytes = new_bytes;
+        qemu_co_queue_restart_all(&task->wait_queue);
+    }
 }
 
 static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
 {
-    task->s->in_flight_bytes -= task->bytes;
     if (ret < 0) {
         bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
     }
-    QLIST_REMOVE(task, list);
-    progress_set_remaining(task->s->progress,
-                           bdrv_get_dirty_count(task->s->copy_bitmap) +
-                           task->s->in_flight_bytes);
-    qemu_co_queue_restart_all(&task->wait_queue);
+    WITH_QEMU_LOCK_GUARD(&task->s->tasks_lock) {
+        task->s->in_flight_bytes -= task->bytes;
+        QLIST_REMOVE(task, list);
+        progress_set_remaining(task->s->progress,
+                               bdrv_get_dirty_count(task->s->copy_bitmap) +
+                               task->s->in_flight_bytes);
+        qemu_co_queue_restart_all(&task->wait_queue);
+    }
 }
 
 void block_copy_state_free(BlockCopyState *s)
@@ -336,6 +348,7 @@  BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
     }
 
     ratelimit_init(&s->rate_limit);
+    qemu_co_mutex_init(&s->tasks_lock);
     QLIST_INIT(&s->tasks);
     QLIST_INIT(&s->calls);
 
@@ -586,9 +599,11 @@  int64_t block_copy_reset_unallocated(BlockCopyState *s,
 
     if (!ret) {
         bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
+        qemu_co_mutex_lock(&s->tasks_lock);
         progress_set_remaining(s->progress,
                                bdrv_get_dirty_count(s->copy_bitmap) +
                                s->in_flight_bytes);
+        qemu_co_mutex_unlock(&s->tasks_lock);
     }
 
     *count = bytes;