diff mbox

[RFC] migration/block:limit the time used for block migration

Message ID 1490693009-10037-1-git-send-email-lidongchen@tencent.com
State New
Headers show

Commit Message

858585 jemmy March 28, 2017, 9:23 a.m. UTC
when migration with quick speed, mig_save_device_bulk invoke
bdrv_is_allocated too frequently, and cause vnc reponse slowly.
this patch limit the time used for bdrv_is_allocated.

Signed-off-by: Lidong Chen <lidongchen@tencent.com>
---
 migration/block.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

Comments

858585 jemmy March 28, 2017, 9:32 a.m. UTC | #1
when migrate the vm with quick speed, i find vnc response slowly.

the bug can be reproduce by this command:
virsh migrate-setspeed 165cf436-312f-47e7-90f2-f8aa63f34893 900
virsh migrate --live 165cf436-312f-47e7-90f2-f8aa63f34893
--copy-storage-inc qemu+ssh://10.59.163.38/system

and --copy-storage-all have no problem.
virsh migrate --live 165cf436-312f-47e7-90f2-f8aa63f34893
--copy-storage-all qemu+ssh://10.59.163.38/system

mig_save_device_bulk invoke bdrv_is_allocated, but bdrv_is_allocated maybe
wait for a long time.
so cause the main thread wait for a long time.

this patch limit the time wait for bdrv_is_allocated.

i do not find a better way to solve this bug, Any suggestion?

Thanks.

On Tue, Mar 28, 2017 at 5:23 PM, Lidong Chen <jemmy858585@gmail.com> wrote:

> when migration with quick speed, mig_save_device_bulk invoke
> bdrv_is_allocated too frequently, and cause vnc reponse slowly.
> this patch limit the time used for bdrv_is_allocated.
>
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> ---
>  migration/block.c | 39 +++++++++++++++++++++++++++++++--------
>  1 file changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/migration/block.c b/migration/block.c
> index 7734ff7..d3e81ca 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -110,6 +110,7 @@ typedef struct BlkMigState {
>      int transferred;
>      int prev_progress;
>      int bulk_completed;
> +    int time_ns_used;
>
>      /* Lock must be taken _inside_ the iothread lock and any
> AioContexts.  */
>      QemuMutex lock;
> @@ -263,6 +264,7 @@ static void blk_mig_read_cb(void *opaque, int ret)
>      blk_mig_unlock();
>  }
>
> +#define BILLION 1000000000L
>  /* Called with no lock taken.  */
>
>  static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
> @@ -272,16 +274,33 @@ static int mig_save_device_bulk(QEMUFile *f,
> BlkMigDevState *bmds)
>      BlockBackend *bb = bmds->blk;
>      BlkMigBlock *blk;
>      int nr_sectors;
> +    struct timespec ts1, ts2;
> +    int ret = 0;
> +    int timeout_flag = 0;
>
>      if (bmds->shared_base) {
>          qemu_mutex_lock_iothread();
>          aio_context_acquire(blk_get_aio_context(bb));
>          /* Skip unallocated sectors; intentionally treats failure as
>           * an allocated sector */
> -        while (cur_sector < total_sectors &&
> -               !bdrv_is_allocated(blk_bs(bb), cur_sector,
> -                                  MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
> -            cur_sector += nr_sectors;
> +        while (cur_sector < total_sectors) {
> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
> +            ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
> +                                    MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
> +
> +            block_mig_state.time_ns_used += (ts2.tv_sec - ts1.tv_sec) *
> BILLION
> +                          + (ts2.tv_nsec - ts1.tv_nsec);
> +
> +            if (!ret) {
> +                cur_sector += nr_sectors;
> +                if (block_mig_state.time_ns_used > 100000) {
> +                    timeout_flag = 1;
> +                    break;
> +                }
> +            } else {
> +                break;
> +            }
>          }
>          aio_context_release(blk_get_aio_context(bb));
>          qemu_mutex_unlock_iothread();
> @@ -292,6 +311,11 @@ static int mig_save_device_bulk(QEMUFile *f,
> BlkMigDevState *bmds)
>          return 1;
>      }
>
> +    if (timeout_flag == 1) {
> +        bmds->cur_sector = bmds->completed_sectors = cur_sector;
> +        return 0;
> +    }
> +
>      bmds->completed_sectors = cur_sector;
>
>      cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
> @@ -576,9 +600,6 @@ static int mig_save_device_dirty(QEMUFile *f,
> BlkMigDevState *bmds,
>              }
>
>              bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector,
> nr_sectors);
> -            sector += nr_sectors;
> -            bmds->cur_dirty = sector;
> -
>              break;
>          }
>          sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
> @@ -756,6 +777,7 @@ static int block_save_iterate(QEMUFile *f, void
> *opaque)
>      }
>
>      blk_mig_reset_dirty_cursor();
> +    block_mig_state.time_ns_used = 0;
>
>      /* control the rate of transfer */
>      blk_mig_lock();
> @@ -764,7 +786,8 @@ static int block_save_iterate(QEMUFile *f, void
> *opaque)
>             qemu_file_get_rate_limit(f) &&
>             (block_mig_state.submitted +
>              block_mig_state.read_done) <
> -           MAX_INFLIGHT_IO) {
> +           MAX_INFLIGHT_IO &&
> +           block_mig_state.time_ns_used <= 100000) {
>          blk_mig_unlock();
>          if (block_mig_state.bulk_completed == 0) {
>              /* first finish the bulk phase */
> --
> 1.8.3.1
>
>
Juan Quintela March 28, 2017, 9:47 a.m. UTC | #2
Lidong Chen <jemmy858585@gmail.com> wrote:
> when migration with quick speed, mig_save_device_bulk invoke
> bdrv_is_allocated too frequently, and cause vnc reponse slowly.
> this patch limit the time used for bdrv_is_allocated.
>
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> ---
>  migration/block.c | 39 +++++++++++++++++++++++++++++++--------
>  1 file changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/migration/block.c b/migration/block.c
> index 7734ff7..d3e81ca 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -110,6 +110,7 @@ typedef struct BlkMigState {
>      int transferred;
>      int prev_progress;
>      int bulk_completed;
> +    int time_ns_used;

An int that can only take values 0/1 is called a bool O:-)


>      if (bmds->shared_base) {
>          qemu_mutex_lock_iothread();
>          aio_context_acquire(blk_get_aio_context(bb));
>          /* Skip unallocated sectors; intentionally treats failure as
>           * an allocated sector */
> -        while (cur_sector < total_sectors &&
> -               !bdrv_is_allocated(blk_bs(bb), cur_sector,
> -                                  MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
> -            cur_sector += nr_sectors;
> +        while (cur_sector < total_sectors) {
> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
> +            ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
> +                                    MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);

Do we really want to call clock_gettime each time that
bdrv_is_allocated() is called?  My understanding is that clock_gettime
is expensive, but I don't know how expensive is brdrv_is_allocated()

And while we are at it, .... shouldn't we check since before the while?


> +
> +            block_mig_state.time_ns_used += (ts2.tv_sec - ts1.tv_sec) * BILLION
> +                          + (ts2.tv_nsec - ts1.tv_nsec);
> +
> +            if (!ret) {
> +                cur_sector += nr_sectors;
> +                if (block_mig_state.time_ns_used > 100000) {
> +                    timeout_flag = 1;
> +                    break;
> +                }
> +            } else {
> +                break;
> +            }
>          }
>          aio_context_release(blk_get_aio_context(bb));
>          qemu_mutex_unlock_iothread();
> @@ -292,6 +311,11 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>          return 1;
>      }
>  
> +    if (timeout_flag == 1) {
> +        bmds->cur_sector = bmds->completed_sectors = cur_sector;
> +        return 0;
> +    }
> +
>      bmds->completed_sectors = cur_sector;
>  
>      cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
> @@ -576,9 +600,6 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
>              }
>  
>              bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
> -            sector += nr_sectors;
> -            bmds->cur_dirty = sector;
> -
>              break;
>          }
>          sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
> @@ -756,6 +777,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
>      }
>  
>      blk_mig_reset_dirty_cursor();
> +    block_mig_state.time_ns_used = 0;
>  
>      /* control the rate of transfer */
>      blk_mig_lock();
> @@ -764,7 +786,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
>             qemu_file_get_rate_limit(f) &&
>             (block_mig_state.submitted +
>              block_mig_state.read_done) <
> -           MAX_INFLIGHT_IO) {
> +           MAX_INFLIGHT_IO &&
> +           block_mig_state.time_ns_used <= 100000) {

changed this 10.000 (and the one used previously) to one constant that
says BIG_DELAY, 100MS or whatever you fancy.

Thanks, Juan.
858585 jemmy March 29, 2017, 1:21 p.m. UTC | #3
On Tue, Mar 28, 2017 at 5:47 PM, Juan Quintela <quintela@redhat.com> wrote:
> Lidong Chen <jemmy858585@gmail.com> wrote:
>> when migration with quick speed, mig_save_device_bulk invoke
>> bdrv_is_allocated too frequently, and cause vnc reponse slowly.
>> this patch limit the time used for bdrv_is_allocated.
>>
>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>> ---
>>  migration/block.c | 39 +++++++++++++++++++++++++++++++--------
>>  1 file changed, 31 insertions(+), 8 deletions(-)
>>
>> diff --git a/migration/block.c b/migration/block.c
>> index 7734ff7..d3e81ca 100644
>> --- a/migration/block.c
>> +++ b/migration/block.c
>> @@ -110,6 +110,7 @@ typedef struct BlkMigState {
>>      int transferred;
>>      int prev_progress;
>>      int bulk_completed;
>> +    int time_ns_used;
>
> An int that can only take values 0/1 is called a bool O:-)
time_ns_used is used to store how many ns used by bdrv_is_allocated.

>
>
>>      if (bmds->shared_base) {
>>          qemu_mutex_lock_iothread();
>>          aio_context_acquire(blk_get_aio_context(bb));
>>          /* Skip unallocated sectors; intentionally treats failure as
>>           * an allocated sector */
>> -        while (cur_sector < total_sectors &&
>> -               !bdrv_is_allocated(blk_bs(bb), cur_sector,
>> -                                  MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
>> -            cur_sector += nr_sectors;
>> +        while (cur_sector < total_sectors) {
>> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
>> +            ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
>> +                                    MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
>> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
>
> Do we really want to call clock_gettime each time that
> bdrv_is_allocated() is called?  My understanding is that clock_gettime
> is expensive, but I don't know how expensive is brdrv_is_allocated()

i write this code to measure the time used by  brdrv_is_allocated()

 279     static int max_time = 0;
 280     int tmp;

 288             clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
 289             ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
 290                                     MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
 291             clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
 292
 293
 294             tmp =  (ts2.tv_sec - ts1.tv_sec)*1000000000L
 295                            + (ts2.tv_nsec - ts1.tv_nsec);
 296             if (tmp > max_time) {
 297                max_time=tmp;
 298                fprintf(stderr, "max_time is %d\n", max_time);
 299             }

the test result is below:

 max_time is 37014
 max_time is 1075534
 max_time is 17180913
 max_time is 28586762
 max_time is 49563584
 max_time is 103085447
 max_time is 110836833
 max_time is 120331438

so i think it's necessary to clock_gettime each time.
but clock_gettime only available on linux. maybe clock() is better.

>
> And while we are at it, .... shouldn't we check since before the while?
i also check it in block_save_iterate.
+           MAX_INFLIGHT_IO &&
+           block_mig_state.time_ns_used <= 100000) {

>
>
>> +
>> +            block_mig_state.time_ns_used += (ts2.tv_sec - ts1.tv_sec) * BILLION
>> +                          + (ts2.tv_nsec - ts1.tv_nsec);
>> +
>> +            if (!ret) {
>> +                cur_sector += nr_sectors;
>> +                if (block_mig_state.time_ns_used > 100000) {
>> +                    timeout_flag = 1;
>> +                    break;
>> +                }
>> +            } else {
>> +                break;
>> +            }
>>          }
>>          aio_context_release(blk_get_aio_context(bb));
>>          qemu_mutex_unlock_iothread();
>> @@ -292,6 +311,11 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>>          return 1;
>>      }
>>
>> +    if (timeout_flag == 1) {
>> +        bmds->cur_sector = bmds->completed_sectors = cur_sector;
>> +        return 0;
>> +    }
>> +
>>      bmds->completed_sectors = cur_sector;
>>
>>      cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
>> @@ -576,9 +600,6 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
>>              }
>>
>>              bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
>> -            sector += nr_sectors;
>> -            bmds->cur_dirty = sector;
>> -
>>              break;
>>          }
>>          sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
>> @@ -756,6 +777,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
>>      }
>>
>>      blk_mig_reset_dirty_cursor();
>> +    block_mig_state.time_ns_used = 0;
>>
>>      /* control the rate of transfer */
>>      blk_mig_lock();
>> @@ -764,7 +786,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
>>             qemu_file_get_rate_limit(f) &&
>>             (block_mig_state.submitted +
>>              block_mig_state.read_done) <
>> -           MAX_INFLIGHT_IO) {
>> +           MAX_INFLIGHT_IO &&
>> +           block_mig_state.time_ns_used <= 100000) {
>
> changed this 10.000 (and the one used previously) to one constant that
> says BIG_DELAY, 100MS or whatever you fancy.
ok,i will change to BIG_DELAY.

>
> Thanks, Juan.
Juan Quintela March 29, 2017, 3:57 p.m. UTC | #4
858585 jemmy <jemmy858585@gmail.com> wrote:
> On Tue, Mar 28, 2017 at 5:47 PM, Juan Quintela <quintela@redhat.com> wrote:
>> Lidong Chen <jemmy858585@gmail.com> wrote:
>>> when migration with quick speed, mig_save_device_bulk invoke
>>> bdrv_is_allocated too frequently, and cause vnc reponse slowly.
>>> this patch limit the time used for bdrv_is_allocated.
>>>
>>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>>> ---
>>>  migration/block.c | 39 +++++++++++++++++++++++++++++++--------
>>>  1 file changed, 31 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/migration/block.c b/migration/block.c
>>> index 7734ff7..d3e81ca 100644
>>> --- a/migration/block.c
>>> +++ b/migration/block.c
>>> @@ -110,6 +110,7 @@ typedef struct BlkMigState {
>>>      int transferred;
>>>      int prev_progress;
>>>      int bulk_completed;
>>> +    int time_ns_used;
>>
>> An int that can only take values 0/1 is called a bool O:-)
> time_ns_used is used to store how many ns used by bdrv_is_allocated.

Oops, I really mean timeout_flag, sorry :-(

>> Do we really want to call clock_gettime each time that
>> bdrv_is_allocated() is called?  My understanding is that clock_gettime
>> is expensive, but I don't know how expensive is brdrv_is_allocated()
>
> i write this code to measure the time used by  brdrv_is_allocated()
>
>  279     static int max_time = 0;
>  280     int tmp;
>
>  288             clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
>  289             ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
>  290                                     MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
>  291             clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
>  292
>  293
>  294             tmp =  (ts2.tv_sec - ts1.tv_sec)*1000000000L
>  295                            + (ts2.tv_nsec - ts1.tv_nsec);
>  296             if (tmp > max_time) {
>  297                max_time=tmp;
>  298                fprintf(stderr, "max_time is %d\n", max_time);
>  299             }
>
> the test result is below:
>
>  max_time is 37014
>  max_time is 1075534
>  max_time is 17180913
>  max_time is 28586762
>  max_time is 49563584
>  max_time is 103085447
>  max_time is 110836833
>  max_time is 120331438

this is around 120ms, no?  It is quite a lot, really :-(

> so i think it's necessary to clock_gettime each time.
> but clock_gettime only available on linux. maybe clock() is better.

Thanks, Juan.
858585 jemmy April 5, 2017, 3:55 a.m. UTC | #5
On Wed, Mar 29, 2017 at 11:57 PM, Juan Quintela <quintela@redhat.com> wrote:
>
> 858585 jemmy <jemmy858585@gmail.com> wrote:
> > On Tue, Mar 28, 2017 at 5:47 PM, Juan Quintela <quintela@redhat.com> wrote:
> >> Lidong Chen <jemmy858585@gmail.com> wrote:
> >>> when migration with quick speed, mig_save_device_bulk invoke
> >>> bdrv_is_allocated too frequently, and cause vnc reponse slowly.
> >>> this patch limit the time used for bdrv_is_allocated.
> >>>
> >>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> >>> ---
> >>>  migration/block.c | 39 +++++++++++++++++++++++++++++++--------
> >>>  1 file changed, 31 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/migration/block.c b/migration/block.c
> >>> index 7734ff7..d3e81ca 100644
> >>> --- a/migration/block.c
> >>> +++ b/migration/block.c
> >>> @@ -110,6 +110,7 @@ typedef struct BlkMigState {
> >>>      int transferred;
> >>>      int prev_progress;
> >>>      int bulk_completed;
> >>> +    int time_ns_used;
> >>
> >> An int that can only take values 0/1 is called a bool O:-)
> > time_ns_used is used to store how many ns used by bdrv_is_allocated.
>
> Oops, I really mean timeout_flag, sorry :-(
>
> >> Do we really want to call clock_gettime each time that
> >> bdrv_is_allocated() is called?  My understanding is that clock_gettime
> >> is expensive, but I don't know how expensive is brdrv_is_allocated()
> >
> > i write this code to measure the time used by  brdrv_is_allocated()
> >
> >  279     static int max_time = 0;
> >  280     int tmp;
> >
> >  288             clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
> >  289             ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
> >  290                                     MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
> >  291             clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
> >  292
> >  293
> >  294             tmp =  (ts2.tv_sec - ts1.tv_sec)*1000000000L
> >  295                            + (ts2.tv_nsec - ts1.tv_nsec);
> >  296             if (tmp > max_time) {
> >  297                max_time=tmp;
> >  298                fprintf(stderr, "max_time is %d\n", max_time);
> >  299             }
> >
> > the test result is below:
> >
> >  max_time is 37014
> >  max_time is 1075534
> >  max_time is 17180913
> >  max_time is 28586762
> >  max_time is 49563584
> >  max_time is 103085447
> >  max_time is 110836833
> >  max_time is 120331438
>
> this is around 120ms, no?  It is quite a lot, really :-(

i find the reason is mainly caused by qemu_co_mutex_lock invoked by
qcow2_co_get_block_status.
    qemu_co_mutex_lock(&s->lock);
    ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
                                   &cluster_offset);
    qemu_co_mutex_unlock(&s->lock);

>
>
> > so i think it's necessary to clock_gettime each time.
> > but clock_gettime only available on linux. maybe clock() is better.
>
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) is a better option.

> Thanks, Juan.
858585 jemmy April 5, 2017, 7:38 a.m. UTC | #6
On Wed, Mar 29, 2017 at 9:21 PM, 858585 jemmy <jemmy858585@gmail.com> wrote:
> On Tue, Mar 28, 2017 at 5:47 PM, Juan Quintela <quintela@redhat.com> wrote:
>> Lidong Chen <jemmy858585@gmail.com> wrote:
>>> when migration with quick speed, mig_save_device_bulk invoke
>>> bdrv_is_allocated too frequently, and cause vnc reponse slowly.
>>> this patch limit the time used for bdrv_is_allocated.
>>>
>>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>>> ---
>>>  migration/block.c | 39 +++++++++++++++++++++++++++++++--------
>>>  1 file changed, 31 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/migration/block.c b/migration/block.c
>>> index 7734ff7..d3e81ca 100644
>>> --- a/migration/block.c
>>> +++ b/migration/block.c
>>> @@ -110,6 +110,7 @@ typedef struct BlkMigState {
>>>      int transferred;
>>>      int prev_progress;
>>>      int bulk_completed;
>>> +    int time_ns_used;
>>
>> An int that can only take values 0/1 is called a bool O:-)
> time_ns_used is used to store how many ns used by bdrv_is_allocated.
>
>>
>>
>>>      if (bmds->shared_base) {
>>>          qemu_mutex_lock_iothread();
>>>          aio_context_acquire(blk_get_aio_context(bb));
>>>          /* Skip unallocated sectors; intentionally treats failure as
>>>           * an allocated sector */
>>> -        while (cur_sector < total_sectors &&
>>> -               !bdrv_is_allocated(blk_bs(bb), cur_sector,
>>> -                                  MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
>>> -            cur_sector += nr_sectors;
>>> +        while (cur_sector < total_sectors) {
>>> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
>>> +            ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
>>> +                                    MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
>>> +            clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
>>
>> Do we really want to call clock_gettime each time that
>> bdrv_is_allocated() is called?  My understanding is that clock_gettime
>> is expensive, but I don't know how expensive is brdrv_is_allocated()
>
> i write this code to measure the time used by  brdrv_is_allocated()
>
>  279     static int max_time = 0;
>  280     int tmp;
>
>  288             clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
>  289             ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
>  290                                     MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
>  291             clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
>  292
>  293
>  294             tmp =  (ts2.tv_sec - ts1.tv_sec)*1000000000L
>  295                            + (ts2.tv_nsec - ts1.tv_nsec);
>  296             if (tmp > max_time) {
>  297                max_time=tmp;
>  298                fprintf(stderr, "max_time is %d\n", max_time);
>  299             }
>
> the test result is below:
>
>  max_time is 37014
>  max_time is 1075534
>  max_time is 17180913
>  max_time is 28586762
>  max_time is 49563584
>  max_time is 103085447
>  max_time is 110836833
>  max_time is 120331438
>
> so i think it's necessary to clock_gettime each time.
> but clock_gettime only available on linux. maybe clock() is better.
>
>>
>> And while we are at it, .... shouldn't we check since before the while?
> i also check it in block_save_iterate.
> +           MAX_INFLIGHT_IO &&
> +           block_mig_state.time_ns_used <= 100000) {
>
>>
>>
>>> +
>>> +            block_mig_state.time_ns_used += (ts2.tv_sec - ts1.tv_sec) * BILLION
>>> +                          + (ts2.tv_nsec - ts1.tv_nsec);
>>> +
>>> +            if (!ret) {
>>> +                cur_sector += nr_sectors;
>>> +                if (block_mig_state.time_ns_used > 100000) {
>>> +                    timeout_flag = 1;
>>> +                    break;
>>> +                }
>>> +            } else {
>>> +                break;
>>> +            }
>>>          }
>>>          aio_context_release(blk_get_aio_context(bb));
>>>          qemu_mutex_unlock_iothread();
>>> @@ -292,6 +311,11 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>>>          return 1;
>>>      }
>>>
>>> +    if (timeout_flag == 1) {
>>> +        bmds->cur_sector = bmds->completed_sectors = cur_sector;
>>> +        return 0;
>>> +    }
>>> +
>>>      bmds->completed_sectors = cur_sector;
>>>
>>>      cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
>>> @@ -576,9 +600,6 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
>>>              }
>>>
>>>              bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
>>> -            sector += nr_sectors;
>>> -            bmds->cur_dirty = sector;
>>> -
>>>              break;
>>>          }
>>>          sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
>>> @@ -756,6 +777,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
>>>      }
>>>
>>>      blk_mig_reset_dirty_cursor();
>>> +    block_mig_state.time_ns_used = 0;
>>>
>>>      /* control the rate of transfer */
>>>      blk_mig_lock();
>>> @@ -764,7 +786,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
>>>             qemu_file_get_rate_limit(f) &&
>>>             (block_mig_state.submitted +
>>>              block_mig_state.read_done) <
>>> -           MAX_INFLIGHT_IO) {
>>> +           MAX_INFLIGHT_IO &&
>>> +           block_mig_state.time_ns_used <= 100000) {
>>
>> changed this 10.000 (and the one used previously) to one constant that
>> says BIG_DELAY, 100MS or whatever you fancy.
> ok,i will change to BIG_DELAY.

i find 10000 ns is too small, and will reduce the migration speed.
i will change BIG_DELAY to 500000. and the migration speed will not be effected.

i will resubmit this patch.

>
>>
>> Thanks, Juan.
diff mbox

Patch

diff --git a/migration/block.c b/migration/block.c
index 7734ff7..d3e81ca 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -110,6 +110,7 @@  typedef struct BlkMigState {
     int transferred;
     int prev_progress;
     int bulk_completed;
+    int time_ns_used;
 
     /* Lock must be taken _inside_ the iothread lock and any AioContexts.  */
     QemuMutex lock;
@@ -263,6 +264,7 @@  static void blk_mig_read_cb(void *opaque, int ret)
     blk_mig_unlock();
 }
 
+#define BILLION 1000000000L
 /* Called with no lock taken.  */
 
 static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
@@ -272,16 +274,33 @@  static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
     BlockBackend *bb = bmds->blk;
     BlkMigBlock *blk;
     int nr_sectors;
+    struct timespec ts1, ts2;
+    int ret = 0;
+    int timeout_flag = 0;
 
     if (bmds->shared_base) {
         qemu_mutex_lock_iothread();
         aio_context_acquire(blk_get_aio_context(bb));
         /* Skip unallocated sectors; intentionally treats failure as
          * an allocated sector */
-        while (cur_sector < total_sectors &&
-               !bdrv_is_allocated(blk_bs(bb), cur_sector,
-                                  MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
-            cur_sector += nr_sectors;
+        while (cur_sector < total_sectors) {
+            clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
+            ret = bdrv_is_allocated(blk_bs(bb), cur_sector,
+                                    MAX_IS_ALLOCATED_SEARCH, &nr_sectors);
+            clock_gettime(CLOCK_MONOTONIC_RAW, &ts2);
+
+            block_mig_state.time_ns_used += (ts2.tv_sec - ts1.tv_sec) * BILLION
+                          + (ts2.tv_nsec - ts1.tv_nsec);
+
+            if (!ret) {
+                cur_sector += nr_sectors;
+                if (block_mig_state.time_ns_used > 100000) {
+                    timeout_flag = 1;
+                    break;
+                }
+            } else {
+                break;
+            }
         }
         aio_context_release(blk_get_aio_context(bb));
         qemu_mutex_unlock_iothread();
@@ -292,6 +311,11 @@  static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
         return 1;
     }
 
+    if (timeout_flag == 1) {
+        bmds->cur_sector = bmds->completed_sectors = cur_sector;
+        return 0;
+    }
+
     bmds->completed_sectors = cur_sector;
 
     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
@@ -576,9 +600,6 @@  static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
             }
 
             bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
-            sector += nr_sectors;
-            bmds->cur_dirty = sector;
-
             break;
         }
         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -756,6 +777,7 @@  static int block_save_iterate(QEMUFile *f, void *opaque)
     }
 
     blk_mig_reset_dirty_cursor();
+    block_mig_state.time_ns_used = 0;
 
     /* control the rate of transfer */
     blk_mig_lock();
@@ -764,7 +786,8 @@  static int block_save_iterate(QEMUFile *f, void *opaque)
            qemu_file_get_rate_limit(f) &&
            (block_mig_state.submitted +
             block_mig_state.read_done) <
-           MAX_INFLIGHT_IO) {
+           MAX_INFLIGHT_IO &&
+           block_mig_state.time_ns_used <= 100000) {
         blk_mig_unlock();
         if (block_mig_state.bulk_completed == 0) {
             /* first finish the bulk phase */