diff mbox series

[v4,3/6] block-copy: improve comments of BlockCopyTask and BlockCopyState types and functions

Message ID 20210614073350.17048-4-eesposit@redhat.com
State New
Headers show
Series block-copy: protect block-copy internal structures | expand

Commit Message

Emanuele Giuseppe Esposito June 14, 2021, 7:33 a.m. UTC
As done in BlockCopyCallState, categorize BlockCopyTask
and BlockCopyState in IN, State and OUT fields.
This is just to understand which field has to be protected with a lock.

.sleep_state is handled in the series "coroutine: new sleep/wake API"
and thus here left as TODO.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 block/block-copy.c | 49 +++++++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 18 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy June 19, 2021, 3:23 p.m. UTC | #1
14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
> As done in BlockCopyCallState, categorize BlockCopyTask
> and BlockCopyState in IN, State and OUT fields.
> This is just to understand which field has to be protected with a lock.
> 
> .sleep_state is handled in the series "coroutine: new sleep/wake API"
> and thus here left as TODO.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>   block/block-copy.c | 49 +++++++++++++++++++++++++++++-----------------
>   1 file changed, 31 insertions(+), 18 deletions(-)
> 
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 3f26be8ddc..5ff7764e87 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>       /* Coroutine where async block-copy is running */
>       Coroutine *co;
>   
> -    /* To reference all call states from BlockCopyState */
> -    QLIST_ENTRY(BlockCopyCallState) list;
> -
>       /* State */
> -    int ret;
>       bool finished;
> -    QemuCoSleep sleep;
> -    bool cancelled;
> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
> +
> +    /* To reference all call states from BlockCopyState */
> +    QLIST_ENTRY(BlockCopyCallState) list;
>   
>       /* OUT parameters */
> +    bool cancelled;
>       bool error_is_read;
> +    int ret;
>   } BlockCopyCallState;
>   
>   typedef struct BlockCopyTask {
>       AioTask task;
>   
> +    /*
> +     * IN parameters. Initialized in block_copy_task_create()
> +     * and never changed.
> +     */

That's just not true for method field :(

>       BlockCopyState *s;
>       BlockCopyCallState *call_state;
>       int64_t offset;
> -    int64_t bytes;
>       BlockCopyMethod method;
> -    QLIST_ENTRY(BlockCopyTask) list;
> +
> +    /* State */
>       CoQueue wait_queue; /* coroutines blocked on this task */
> +    int64_t bytes;
> +    QLIST_ENTRY(BlockCopyTask) list;
>   } BlockCopyTask;
>   
>   static int64_t task_end(BlockCopyTask *task)
> @@ -90,15 +96,25 @@ typedef struct BlockCopyState {
>        */
>       BdrvChild *source;
>       BdrvChild *target;
> -    BdrvDirtyBitmap *copy_bitmap;
> +
> +    /* State */
>       int64_t in_flight_bytes;
> -    int64_t cluster_size;
>       BlockCopyMethod method;
> -    int64_t max_transfer;
> -    uint64_t len;
>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
>       QLIST_HEAD(, BlockCopyCallState) calls;
> +    /* State fields that use a thread-safe API */
> +    BdrvDirtyBitmap *copy_bitmap;
> +    ProgressMeter *progress;
> +    SharedResource *mem;
> +    RateLimit rate_limit;
>   
> +    /*
> +     * IN parameters. Initialized in block_copy_state_new()
> +     * and never changed.
> +     */
> +    int64_t cluster_size;
> +    int64_t max_transfer;
> +    uint64_t len;
>       BdrvRequestFlags write_flags;
>   
>       /*
> @@ -114,14 +130,11 @@ typedef struct BlockCopyState {
>        * In this case, block_copy() will query the source’s allocation status,
>        * skip unallocated regions, clear them in the copy_bitmap, and invoke
>        * block_copy_reset_unallocated() every time it does.
> +     *
> +     * This field is set in backup_run() before coroutines are run,
> +     * therefore is an IN.

That's not true: it is modified from backup_run, when block-copy already initialized, and block_copy() calls may be done from backup-top filter.

>        */
>       bool skip_unallocated;
> -
> -    ProgressMeter *progress;
> -
> -    SharedResource *mem;
> -
> -    RateLimit rate_limit;
>   } BlockCopyState;
>   
>   static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
>
Vladimir Sementsov-Ogievskiy June 19, 2021, 5:27 p.m. UTC | #2
14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
> As done in BlockCopyCallState, categorize BlockCopyTask
> and BlockCopyState in IN, State and OUT fields.
> This is just to understand which field has to be protected with a lock.
> 
> .sleep_state is handled in the series "coroutine: new sleep/wake API"
> and thus here left as TODO.
> 
> Signed-off-by: Emanuele Giuseppe Esposito<eesposit@redhat.com>
> ---
>   block/block-copy.c | 49 +++++++++++++++++++++++++++++-----------------
>   1 file changed, 31 insertions(+), 18 deletions(-)
> 
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 3f26be8ddc..5ff7764e87 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>       /* Coroutine where async block-copy is running */
>       Coroutine *co;
>   
> -    /* To reference all call states from BlockCopyState */
> -    QLIST_ENTRY(BlockCopyCallState) list;
> -
>       /* State */
> -    int ret;
>       bool finished;
> -    QemuCoSleep sleep;
> -    bool cancelled;
> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
> +
> +    /* To reference all call states from BlockCopyState */
> +    QLIST_ENTRY(BlockCopyCallState) list;
>   
>       /* OUT parameters */
> +    bool cancelled;

actually, cancelled is not OUT field. It's set by user to cancel the operation. And block-copy tracks the field to understand should it cancel at the moment or not. So, it's part of state.

Also, I just now understand, that "out parameter" sounds strange here. As "parameter" is an input by general meaning.. We may have "out parameters" for functions, as all parameters of a function are generally called "parameters" anyway. I think "OUT fields" is more correct here. I don't insist, as I'm not an expert in English (for sure, you'll find mistakes even in this paragraph :\
Vladimir Sementsov-Ogievskiy June 19, 2021, 6:31 p.m. UTC | #3
19.06.2021 18:23, Vladimir Sementsov-Ogievskiy wrote:
>>   typedef struct BlockCopyTask {
>>       AioTask task;
>> +    /*
>> +     * IN parameters. Initialized in block_copy_task_create()
>> +     * and never changed.
>> +     */
> 
> That's just not true for method field :(

I think, we just need to document that @method is never accessed concurrently
Vladimir Sementsov-Ogievskiy June 19, 2021, 6:53 p.m. UTC | #4
14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>       /* Coroutine where async block-copy is running */
>       Coroutine *co;
>   
> -    /* To reference all call states from BlockCopyState */
> -    QLIST_ENTRY(BlockCopyCallState) list;
> -
>       /* State */
> -    int ret;
>       bool finished;
> -    QemuCoSleep sleep;
> -    bool cancelled;
> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
> +
> +    /* To reference all call states from BlockCopyState */
> +    QLIST_ENTRY(BlockCopyCallState) list;
>   
>       /* OUT parameters */
> +    bool cancelled;
>       bool error_is_read;
> +    int ret;

Hmm, about that. Is @ret an "OUT parameter"? Yes it is.

But someone may think, that out parameters doesn't need locking like "State" parameters (otherwise, what is the difference for the person who read these comments?). But that is wrong. And ret is modified under mutex for reason.

Actually, the full description of "ret" field usage may look as follows:

Set concurrently by tasks under mutex. Only set once by first failed task (and untouched if no task failed).
After finish (if call_state->finished is true) not modified anymore and may be read safely without mutex.

So, before finished, ret is a kind of "State" too: it is both read and written by tasks.

This shows to me that dividing fields into "IN", "State" and "OUT", doesn't really help here. In this series we use different policies of concurrent access to fields: some are accessed only under mutex, other has more complex usage scenario (like this @ret), some needs atomic access.
Emanuele Giuseppe Esposito June 21, 2021, 7:59 a.m. UTC | #5
On 19/06/2021 17:23, Vladimir Sementsov-Ogievskiy wrote:
> 14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
>> As done in BlockCopyCallState, categorize BlockCopyTask
>> and BlockCopyState in IN, State and OUT fields.
>> This is just to understand which field has to be protected with a lock.
>>
>> .sleep_state is handled in the series "coroutine: new sleep/wake API"
>> and thus here left as TODO.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> ---
>>   block/block-copy.c | 49 +++++++++++++++++++++++++++++-----------------
>>   1 file changed, 31 insertions(+), 18 deletions(-)
>>
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 3f26be8ddc..5ff7764e87 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>>       /* Coroutine where async block-copy is running */
>>       Coroutine *co;
>> -    /* To reference all call states from BlockCopyState */
>> -    QLIST_ENTRY(BlockCopyCallState) list;
>> -
>>       /* State */
>> -    int ret;
>>       bool finished;
>> -    QemuCoSleep sleep;
>> -    bool cancelled;
>> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
>> +
>> +    /* To reference all call states from BlockCopyState */
>> +    QLIST_ENTRY(BlockCopyCallState) list;
>>       /* OUT parameters */
>> +    bool cancelled;
>>       bool error_is_read;
>> +    int ret;
>>   } BlockCopyCallState;
>>   typedef struct BlockCopyTask {
>>       AioTask task;
>> +    /*
>> +     * IN parameters. Initialized in block_copy_task_create()
>> +     * and never changed.
>> +     */
> 
> That's just not true for method field :(

You're right, because it is modified later in the while loop of 
block_copy_dirty_clusters, but the task is already in the list.
Will move it to state field.

> 
>>       BlockCopyState *s;
>>       BlockCopyCallState *call_state;
>>       int64_t offset;
>> -    int64_t bytes;
>>       BlockCopyMethod method;
>> -    QLIST_ENTRY(BlockCopyTask) list;
>> +
>> +    /* State */
>>       CoQueue wait_queue; /* coroutines blocked on this task */
>> +    int64_t bytes;
>> +    QLIST_ENTRY(BlockCopyTask) list;
>>   } BlockCopyTask;
>>   static int64_t task_end(BlockCopyTask *task)
>> @@ -90,15 +96,25 @@ typedef struct BlockCopyState {
>>        */
>>       BdrvChild *source;
>>       BdrvChild *target;
>> -    BdrvDirtyBitmap *copy_bitmap;
>> +
>> +    /* State */
>>       int64_t in_flight_bytes;
>> -    int64_t cluster_size;
>>       BlockCopyMethod method;
>> -    int64_t max_transfer;
>> -    uint64_t len;
>>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all 
>> block-copy calls */
>>       QLIST_HEAD(, BlockCopyCallState) calls;
>> +    /* State fields that use a thread-safe API */
>> +    BdrvDirtyBitmap *copy_bitmap;
>> +    ProgressMeter *progress;
>> +    SharedResource *mem;
>> +    RateLimit rate_limit;
>> +    /*
>> +     * IN parameters. Initialized in block_copy_state_new()
>> +     * and never changed.
>> +     */
>> +    int64_t cluster_size;
>> +    int64_t max_transfer;
>> +    uint64_t len;
>>       BdrvRequestFlags write_flags;
>>       /*
>> @@ -114,14 +130,11 @@ typedef struct BlockCopyState {
>>        * In this case, block_copy() will query the source’s allocation 
>> status,
>>        * skip unallocated regions, clear them in the copy_bitmap, and 
>> invoke
>>        * block_copy_reset_unallocated() every time it does.
>> +     *
>> +     * This field is set in backup_run() before coroutines are run,
>> +     * therefore is an IN.
> 
> That's not true: it is modified from backup_run, when block-copy already 
> initialized, and block_copy() calls may be done from backup-top filter.
> 

Ok, I am not very familiar with the backup code, so I did not see it.
This means we need to protect this field too.

At this point, I think we can increase the granularity of the lock in 
block_copy_dirty_clusters:
instead of having in each while loop

block_copy_task_create(); // locks and releases internally
block_copy_block_status(); // no lock used, but uses skip_unallocated so 
it will need one
if()
	block_copy_task_shrink(); // locks and releases internally
if(s->skip_unallocated) // will need lock
	block_copy_task_end(); // locks and releases internally
	[..]
if()
	task->method = COPY_WRITE_ZEROS; // needs lock
[..]

we can just lock the while section and eventually create _locked() 
version of task_end and similar functions that are also used in 
non-locked code blocks.


Emanuele

>>        */
>>       bool skip_unallocated;
>> -
>> -    ProgressMeter *progress;
>> -
>> -    SharedResource *mem;
>> -
>> -    RateLimit rate_limit;
>>   } BlockCopyState;
>>   static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
>>
> 
>
Emanuele Giuseppe Esposito June 21, 2021, 8:13 a.m. UTC | #6
On 19/06/2021 20:31, Vladimir Sementsov-Ogievskiy wrote:
> 19.06.2021 18:23, Vladimir Sementsov-Ogievskiy wrote:
>>>   typedef struct BlockCopyTask {
>>>       AioTask task;
>>> +    /*
>>> +     * IN parameters. Initialized in block_copy_task_create()
>>> +     * and never changed.
>>> +     */
>>
>> That's just not true for method field :(
> 
> I think, we just need to document that @method is never accessed 
> concurrently
> 
Ok I got confused in the last patch. Method is read by 
block_copy_task_entry only after it is re-set in 
block_copy_dirty_clusters loop. Sorry for that.

Will leave it as IN and document it better.

Still, moving the lock granularity inside the while loop might not be 
too bad. Not sure though. At this point skip_unallocated can also be an 
atomic, even though I sense that you won't like that :)

Emanuele
Emanuele Giuseppe Esposito June 21, 2021, 8:21 a.m. UTC | #7
On 19/06/2021 19:27, Vladimir Sementsov-Ogievskiy wrote:
> 14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
>> As done in BlockCopyCallState, categorize BlockCopyTask
>> and BlockCopyState in IN, State and OUT fields.
>> This is just to understand which field has to be protected with a lock.
>>
>> .sleep_state is handled in the series "coroutine: new sleep/wake API"
>> and thus here left as TODO.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito<eesposit@redhat.com>
>> ---
>>   block/block-copy.c | 49 +++++++++++++++++++++++++++++-----------------
>>   1 file changed, 31 insertions(+), 18 deletions(-)
>>
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 3f26be8ddc..5ff7764e87 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>>       /* Coroutine where async block-copy is running */
>>       Coroutine *co;
>> -    /* To reference all call states from BlockCopyState */
>> -    QLIST_ENTRY(BlockCopyCallState) list;
>> -
>>       /* State */
>> -    int ret;
>>       bool finished;
>> -    QemuCoSleep sleep;
>> -    bool cancelled;
>> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
>> +
>> +    /* To reference all call states from BlockCopyState */
>> +    QLIST_ENTRY(BlockCopyCallState) list;
>>       /* OUT parameters */
>> +    bool cancelled;
> 
> actually, cancelled is not OUT field. It's set by user to cancel the 
> operation. And block-copy tracks the field to understand should it 
> cancel at the moment or not. So, it's part of state.

Makes sense.

> 
> Also, I just now understand, that "out parameter" sounds strange here. 
> As "parameter" is an input by general meaning.. We may have "out 
> parameters" for functions, as all parameters of a function are generally 
> called "parameters" anyway. I think "OUT fields" is more correct here. I 
> don't insist, as I'm not an expert in English (for sure, you'll find 
> mistakes even in this paragraph :\
> 
Actually I am using the terminology that was already there :)
Anyways, I am not expert here either but fields do sounds better.
I will change parameter -> field replacement to this patch.

Emanuele
Emanuele Giuseppe Esposito June 21, 2021, 8:28 a.m. UTC | #8
On 19/06/2021 20:53, Vladimir Sementsov-Ogievskiy wrote:
> 14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>>       /* Coroutine where async block-copy is running */
>>       Coroutine *co;
>> -    /* To reference all call states from BlockCopyState */
>> -    QLIST_ENTRY(BlockCopyCallState) list;
>> -
>>       /* State */
>> -    int ret;
>>       bool finished;
>> -    QemuCoSleep sleep;
>> -    bool cancelled;
>> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
>> +
>> +    /* To reference all call states from BlockCopyState */
>> +    QLIST_ENTRY(BlockCopyCallState) list;
>>       /* OUT parameters */
>> +    bool cancelled;
>>       bool error_is_read;
>> +    int ret;
> 
> Hmm, about that. Is @ret an "OUT parameter"? Yes it is.
> 
> But someone may think, that out parameters doesn't need locking like 
> "State" parameters (otherwise, what is the difference for the person who 
> read these comments?). But that is wrong. And ret is modified under 
> mutex for reason.

In patch 5 I added a comment above @ret and @error_is_read:
/* Fields protected by lock in BlockCopyState */

I can add your explanation too.

> 
> Actually, the full description of "ret" field usage may look as follows:
> 
> Set concurrently by tasks under mutex. Only set once by first failed 
> task (and untouched if no task failed).
> After finish (if call_state->finished is true) not modified anymore and 
> may be read safely without mutex.
> 
> So, before finished, ret is a kind of "State" too: it is both read and 
> written by tasks.
> 
> This shows to me that dividing fields into "IN", "State" and "OUT", 
> doesn't really help here. In this series we use different policies of 
> concurrent access to fields: some are accessed only under mutex, other 
> has more complex usage scenario (like this @ret), some needs atomic access.
> 
Yes but I think especially the IN vs State division helps a lot to 
understand what needs a lock and what doesn't.

Emanuele
Vladimir Sementsov-Ogievskiy June 22, 2021, 9:16 a.m. UTC | #9
21.06.2021 10:59, Emanuele Giuseppe Esposito wrote:
> 
> 
> On 19/06/2021 17:23, Vladimir Sementsov-Ogievskiy wrote:
>> 14.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
>>> As done in BlockCopyCallState, categorize BlockCopyTask
>>> and BlockCopyState in IN, State and OUT fields.
>>> This is just to understand which field has to be protected with a lock.
>>>
>>> .sleep_state is handled in the series "coroutine: new sleep/wake API"
>>> and thus here left as TODO.
>>>
>>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>>> ---
>>>   block/block-copy.c | 49 +++++++++++++++++++++++++++++-----------------
>>>   1 file changed, 31 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/block/block-copy.c b/block/block-copy.c
>>> index 3f26be8ddc..5ff7764e87 100644
>>> --- a/block/block-copy.c
>>> +++ b/block/block-copy.c
>>> @@ -52,29 +52,35 @@ typedef struct BlockCopyCallState {
>>>       /* Coroutine where async block-copy is running */
>>>       Coroutine *co;
>>> -    /* To reference all call states from BlockCopyState */
>>> -    QLIST_ENTRY(BlockCopyCallState) list;
>>> -
>>>       /* State */
>>> -    int ret;
>>>       bool finished;
>>> -    QemuCoSleep sleep;
>>> -    bool cancelled;
>>> +    QemuCoSleep sleep; /* TODO: protect API with a lock */
>>> +
>>> +    /* To reference all call states from BlockCopyState */
>>> +    QLIST_ENTRY(BlockCopyCallState) list;
>>>       /* OUT parameters */
>>> +    bool cancelled;
>>>       bool error_is_read;
>>> +    int ret;
>>>   } BlockCopyCallState;
>>>   typedef struct BlockCopyTask {
>>>       AioTask task;
>>> +    /*
>>> +     * IN parameters. Initialized in block_copy_task_create()
>>> +     * and never changed.
>>> +     */
>>
>> That's just not true for method field :(
> 
> You're right, because it is modified later in the while loop of block_copy_dirty_clusters, but the task is already in the list.
> Will move it to state field.
> 
>>
>>>       BlockCopyState *s;
>>>       BlockCopyCallState *call_state;
>>>       int64_t offset;
>>> -    int64_t bytes;
>>>       BlockCopyMethod method;
>>> -    QLIST_ENTRY(BlockCopyTask) list;
>>> +
>>> +    /* State */
>>>       CoQueue wait_queue; /* coroutines blocked on this task */
>>> +    int64_t bytes;
>>> +    QLIST_ENTRY(BlockCopyTask) list;
>>>   } BlockCopyTask;
>>>   static int64_t task_end(BlockCopyTask *task)
>>> @@ -90,15 +96,25 @@ typedef struct BlockCopyState {
>>>        */
>>>       BdrvChild *source;
>>>       BdrvChild *target;
>>> -    BdrvDirtyBitmap *copy_bitmap;
>>> +
>>> +    /* State */
>>>       int64_t in_flight_bytes;
>>> -    int64_t cluster_size;
>>>       BlockCopyMethod method;
>>> -    int64_t max_transfer;
>>> -    uint64_t len;
>>>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
>>>       QLIST_HEAD(, BlockCopyCallState) calls;
>>> +    /* State fields that use a thread-safe API */
>>> +    BdrvDirtyBitmap *copy_bitmap;
>>> +    ProgressMeter *progress;
>>> +    SharedResource *mem;
>>> +    RateLimit rate_limit;
>>> +    /*
>>> +     * IN parameters. Initialized in block_copy_state_new()
>>> +     * and never changed.
>>> +     */
>>> +    int64_t cluster_size;
>>> +    int64_t max_transfer;
>>> +    uint64_t len;
>>>       BdrvRequestFlags write_flags;
>>>       /*
>>> @@ -114,14 +130,11 @@ typedef struct BlockCopyState {
>>>        * In this case, block_copy() will query the source’s allocation status,
>>>        * skip unallocated regions, clear them in the copy_bitmap, and invoke
>>>        * block_copy_reset_unallocated() every time it does.
>>> +     *
>>> +     * This field is set in backup_run() before coroutines are run,
>>> +     * therefore is an IN.
>>
>> That's not true: it is modified from backup_run, when block-copy already initialized, and block_copy() calls may be done from backup-top filter.
>>
> 
> Ok, I am not very familiar with the backup code, so I did not see it.
> This means we need to protect this field too.
> 
> At this point, I think we can increase the granularity of the lock in block_copy_dirty_clusters:
> instead of having in each while loop
> 
> block_copy_task_create(); // locks and releases internally
> block_copy_block_status(); // no lock used, but uses skip_unallocated so it will need one
> if()
>      block_copy_task_shrink(); // locks and releases internally
> if(s->skip_unallocated) // will need lock
>      block_copy_task_end(); // locks and releases internally
>      [..]
> if()
>      task->method = COPY_WRITE_ZEROS; // needs lock
> [..]
> 
> we can just lock the while section and eventually create _locked() version of task_end and similar functions that are also used in non-locked code blocks.

No, holding lock during block_copy_block_status is bad idea, as the function may yield (it call block_status).

I tend to agree that making atomic access to skip_unallocated is porbably the simplest way.

> 
>>>        */
>>>       bool skip_unallocated;
>>> -
>>> -    ProgressMeter *progress;
>>> -
>>> -    SharedResource *mem;
>>> -
>>> -    RateLimit rate_limit;
>>>   } BlockCopyState;
>>>   static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
>>>
>>
>>
>
Vladimir Sementsov-Ogievskiy June 22, 2021, 9:20 a.m. UTC | #10
21.06.2021 11:13, Emanuele Giuseppe Esposito wrote:
> 
> 
> On 19/06/2021 20:31, Vladimir Sementsov-Ogievskiy wrote:
>> 19.06.2021 18:23, Vladimir Sementsov-Ogievskiy wrote:
>>>>   typedef struct BlockCopyTask {
>>>>       AioTask task;
>>>> +    /*
>>>> +     * IN parameters. Initialized in block_copy_task_create()
>>>> +     * and never changed.
>>>> +     */
>>>
>>> That's just not true for method field :(
>>
>> I think, we just need to document that @method is never accessed concurrently
>>
> Ok I got confused in the last patch. Method is read by block_copy_task_entry only after it is re-set in block_copy_dirty_clusters loop. Sorry for that.
> 
> Will leave it as IN and document it better.

IN/State/OUT doesn't really help IMHO. As most of fields of interest doesn't cleanly belong to any of the categories. Better documentation is good. If same variables share the same access documentation - make them a group.

> 
> Still, moving the lock granularity inside the while loop might not be too bad. Not sure though. At this point skip_unallocated can also be an atomic, even though I sense that you won't like that :)
> 

As I said in parallel email, We can't keep lock locked during block_status. So, make it atomic if it is convenient.
diff mbox series

Patch

diff --git a/block/block-copy.c b/block/block-copy.c
index 3f26be8ddc..5ff7764e87 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -52,29 +52,35 @@  typedef struct BlockCopyCallState {
     /* Coroutine where async block-copy is running */
     Coroutine *co;
 
-    /* To reference all call states from BlockCopyState */
-    QLIST_ENTRY(BlockCopyCallState) list;
-
     /* State */
-    int ret;
     bool finished;
-    QemuCoSleep sleep;
-    bool cancelled;
+    QemuCoSleep sleep; /* TODO: protect API with a lock */
+
+    /* To reference all call states from BlockCopyState */
+    QLIST_ENTRY(BlockCopyCallState) list;
 
     /* OUT parameters */
+    bool cancelled;
     bool error_is_read;
+    int ret;
 } BlockCopyCallState;
 
 typedef struct BlockCopyTask {
     AioTask task;
 
+    /*
+     * IN parameters. Initialized in block_copy_task_create()
+     * and never changed.
+     */
     BlockCopyState *s;
     BlockCopyCallState *call_state;
     int64_t offset;
-    int64_t bytes;
     BlockCopyMethod method;
-    QLIST_ENTRY(BlockCopyTask) list;
+
+    /* State */
     CoQueue wait_queue; /* coroutines blocked on this task */
+    int64_t bytes;
+    QLIST_ENTRY(BlockCopyTask) list;
 } BlockCopyTask;
 
 static int64_t task_end(BlockCopyTask *task)
@@ -90,15 +96,25 @@  typedef struct BlockCopyState {
      */
     BdrvChild *source;
     BdrvChild *target;
-    BdrvDirtyBitmap *copy_bitmap;
+
+    /* State */
     int64_t in_flight_bytes;
-    int64_t cluster_size;
     BlockCopyMethod method;
-    int64_t max_transfer;
-    uint64_t len;
     QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
     QLIST_HEAD(, BlockCopyCallState) calls;
+    /* State fields that use a thread-safe API */
+    BdrvDirtyBitmap *copy_bitmap;
+    ProgressMeter *progress;
+    SharedResource *mem;
+    RateLimit rate_limit;
 
+    /*
+     * IN parameters. Initialized in block_copy_state_new()
+     * and never changed.
+     */
+    int64_t cluster_size;
+    int64_t max_transfer;
+    uint64_t len;
     BdrvRequestFlags write_flags;
 
     /*
@@ -114,14 +130,11 @@  typedef struct BlockCopyState {
      * In this case, block_copy() will query the source’s allocation status,
      * skip unallocated regions, clear them in the copy_bitmap, and invoke
      * block_copy_reset_unallocated() every time it does.
+     *
+     * This field is set in backup_run() before coroutines are run,
+     * therefore is an IN.
      */
     bool skip_unallocated;
-
-    ProgressMeter *progress;
-
-    SharedResource *mem;
-
-    RateLimit rate_limit;
 } BlockCopyState;
 
 static BlockCopyTask *find_conflicting_task(BlockCopyState *s,