diff mbox

[v2] mirror: double performance of the bulk stage if the disc is full

Message ID 1468516741-82174-1-git-send-email-vsementsov@virtuozzo.com
State New
Headers show

Commit Message

Vladimir Sementsov-Ogievskiy July 14, 2016, 5:19 p.m. UTC
Mirror can do up to 16 in-flight requests, but actually on full copy
(the whole source disk is non-zero) in-flight is always 1. This happens
as the request is not limited in size: the data occupies maximum available
capacity of s->buf.

The patch limits the size of the request to some artificial constant
(1 Mb here), which is not that big or small. This effectively enables
back parallelism in mirror code as it was designed.

The result is important: the time to migrate 10 Gb disk is reduced from
~350 sec to 170 sec.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
---

v2: in case of s->buf_size larger than default use it to limit io_sectors

 block/mirror.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

Denis V. Lunev July 18, 2016, 3:36 p.m. UTC | #1
On 07/14/2016 08:19 PM, Vladimir Sementsov-Ogievskiy wrote:
> Mirror can do up to 16 in-flight requests, but actually on full copy
> (the whole source disk is non-zero) in-flight is always 1. This happens
> as the request is not limited in size: the data occupies maximum available
> capacity of s->buf.
>
> The patch limits the size of the request to some artificial constant
> (1 Mb here), which is not that big or small. This effectively enables
> back parallelism in mirror code as it was designed.
>
> The result is important: the time to migrate 10 Gb disk is reduced from
> ~350 sec to 170 sec.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Fam Zheng <famz@redhat.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Jeff Cody <jcody@redhat.com>
> CC: Eric Blake <eblake@redhat.com>
> ---
>
> v2: in case of s->buf_size larger than default use it to limit io_sectors
>
>   block/mirror.c | 10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/block/mirror.c b/block/mirror.c
> index b1e633e..3ac3b4d 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -23,7 +23,9 @@
>   
>   #define SLICE_TIME    100000000ULL /* ns */
>   #define MAX_IN_FLIGHT 16
> -#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
> +#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
> +#define DEFAULT_MIRROR_BUF_SIZE \
> +    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
>   
>   /* The mirroring buffer is a list of granularity-sized chunks.
>    * Free chunks are organized in a list.
> @@ -322,6 +324,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>       int nb_chunks = 1;
>       int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
>       int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
> +    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
> +                             MAX_IO_SECTORS);
>   
>       sector_num = hbitmap_iter_next(&s->hbi);
>       if (sector_num < 0) {
> @@ -385,7 +389,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>                                             nb_chunks * sectors_per_chunk,
>                                             &io_sectors, &file);
>           if (ret < 0) {
> -            io_sectors = nb_chunks * sectors_per_chunk;
> +            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
> +        } else if (ret & BDRV_BLOCK_DATA) {
> +            io_sectors = MIN(io_sectors, max_io_sectors);
>           }
>   
>           io_sectors -= io_sectors % sectors_per_chunk;
ping
Denis V. Lunev July 20, 2016, 5:36 p.m. UTC | #2
On 07/14/2016 08:19 PM, Vladimir Sementsov-Ogievskiy wrote:
> Mirror can do up to 16 in-flight requests, but actually on full copy
> (the whole source disk is non-zero) in-flight is always 1. This happens
> as the request is not limited in size: the data occupies maximum available
> capacity of s->buf.
>
> The patch limits the size of the request to some artificial constant
> (1 Mb here), which is not that big or small. This effectively enables
> back parallelism in mirror code as it was designed.
>
> The result is important: the time to migrate 10 Gb disk is reduced from
> ~350 sec to 170 sec.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Fam Zheng <famz@redhat.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Jeff Cody <jcody@redhat.com>
> CC: Eric Blake <eblake@redhat.com>
> ---
>
> v2: in case of s->buf_size larger than default use it to limit io_sectors
>
>   block/mirror.c | 10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/block/mirror.c b/block/mirror.c
> index b1e633e..3ac3b4d 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -23,7 +23,9 @@
>   
>   #define SLICE_TIME    100000000ULL /* ns */
>   #define MAX_IN_FLIGHT 16
> -#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
> +#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
> +#define DEFAULT_MIRROR_BUF_SIZE \
> +    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
>   
>   /* The mirroring buffer is a list of granularity-sized chunks.
>    * Free chunks are organized in a list.
> @@ -322,6 +324,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>       int nb_chunks = 1;
>       int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
>       int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
> +    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
> +                             MAX_IO_SECTORS);
>   
>       sector_num = hbitmap_iter_next(&s->hbi);
>       if (sector_num < 0) {
> @@ -385,7 +389,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>                                             nb_chunks * sectors_per_chunk,
>                                             &io_sectors, &file);
>           if (ret < 0) {
> -            io_sectors = nb_chunks * sectors_per_chunk;
> +            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
> +        } else if (ret & BDRV_BLOCK_DATA) {
> +            io_sectors = MIN(io_sectors, max_io_sectors);
>           }
>   
>           io_sectors -= io_sectors % sectors_per_chunk;
guys, what about this patch?
Paolo Bonzini July 20, 2016, 7:08 p.m. UTC | #3
----- Original Message -----
> From: "Denis V. Lunev" <den@openvz.org>
> To: "Vladimir Sementsov-Ogievskiy" <vsementsov@virtuozzo.com>, qemu-block@nongnu.org, qemu-devel@nongnu.org
> Cc: stefanha@redhat.com, famz@redhat.com, mreitz@redhat.com, jcody@redhat.com, eblake@redhat.com,
> pbonzini@redhat.com, "Kevin Wolf" <kwolf@redhat.com>
> Sent: Wednesday, July 20, 2016 7:36:00 PM
> Subject: Re: [PATCH v2] mirror: double performance of the bulk stage if the disc is full
> 
> On 07/14/2016 08:19 PM, Vladimir Sementsov-Ogievskiy wrote:
> > Mirror can do up to 16 in-flight requests, but actually on full copy
> > (the whole source disk is non-zero) in-flight is always 1. This happens
> > as the request is not limited in size: the data occupies maximum available
> > capacity of s->buf.
> >
> > The patch limits the size of the request to some artificial constant
> > (1 Mb here), which is not that big or small. This effectively enables
> > back parallelism in mirror code as it was designed.
> >
> > The result is important: the time to migrate 10 Gb disk is reduced from
> > ~350 sec to 170 sec.
> >
> > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> > Signed-off-by: Denis V. Lunev <den@openvz.org>
> > CC: Stefan Hajnoczi <stefanha@redhat.com>
> > CC: Fam Zheng <famz@redhat.com>
> > CC: Kevin Wolf <kwolf@redhat.com>
> > CC: Max Reitz <mreitz@redhat.com>
> > CC: Jeff Cody <jcody@redhat.com>
> > CC: Eric Blake <eblake@redhat.com>
> > ---
> >
> > v2: in case of s->buf_size larger than default use it to limit io_sectors
> >
> >   block/mirror.c | 10 ++++++++--
> >   1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/block/mirror.c b/block/mirror.c
> > index b1e633e..3ac3b4d 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -23,7 +23,9 @@
> >   
> >   #define SLICE_TIME    100000000ULL /* ns */
> >   #define MAX_IN_FLIGHT 16
> > -#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
> > +#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
> > +#define DEFAULT_MIRROR_BUF_SIZE \
> > +    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
> >   
> >   /* The mirroring buffer is a list of granularity-sized chunks.
> >    * Free chunks are organized in a list.
> > @@ -322,6 +324,8 @@ static uint64_t coroutine_fn
> > mirror_iteration(MirrorBlockJob *s)
> >       int nb_chunks = 1;
> >       int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
> >       int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
> > +    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) /
> > MAX_IN_FLIGHT,
> > +                             MAX_IO_SECTORS);
> >   
> >       sector_num = hbitmap_iter_next(&s->hbi);
> >       if (sector_num < 0) {
> > @@ -385,7 +389,9 @@ static uint64_t coroutine_fn
> > mirror_iteration(MirrorBlockJob *s)
> >                                             nb_chunks * sectors_per_chunk,
> >                                             &io_sectors, &file);
> >           if (ret < 0) {
> > -            io_sectors = nb_chunks * sectors_per_chunk;
> > +            io_sectors = MIN(nb_chunks * sectors_per_chunk,
> > max_io_sectors);
> > +        } else if (ret & BDRV_BLOCK_DATA) {
> > +            io_sectors = MIN(io_sectors, max_io_sectors);
> >           }
> >   
> >           io_sectors -= io_sectors % sectors_per_chunk;
> guys, what about this patch?

I think that at this point it has missed hard freeze.  It's not up to me
whether to consider it a bugfix.

Paolo
Denis V. Lunev July 20, 2016, 8:30 p.m. UTC | #4
On 07/20/2016 10:08 PM, Paolo Bonzini wrote:
>
> ----- Original Message -----
>> From: "Denis V. Lunev" <den@openvz.org>
>> To: "Vladimir Sementsov-Ogievskiy" <vsementsov@virtuozzo.com>, qemu-block@nongnu.org, qemu-devel@nongnu.org
>> Cc: stefanha@redhat.com, famz@redhat.com, mreitz@redhat.com, jcody@redhat.com, eblake@redhat.com,
>> pbonzini@redhat.com, "Kevin Wolf" <kwolf@redhat.com>
>> Sent: Wednesday, July 20, 2016 7:36:00 PM
>> Subject: Re: [PATCH v2] mirror: double performance of the bulk stage if the disc is full
>>
>> On 07/14/2016 08:19 PM, Vladimir Sementsov-Ogievskiy wrote:
>>> Mirror can do up to 16 in-flight requests, but actually on full copy
>>> (the whole source disk is non-zero) in-flight is always 1. This happens
>>> as the request is not limited in size: the data occupies maximum available
>>> capacity of s->buf.
>>>
>>> The patch limits the size of the request to some artificial constant
>>> (1 Mb here), which is not that big or small. This effectively enables
>>> back parallelism in mirror code as it was designed.
>>>
>>> The result is important: the time to migrate 10 Gb disk is reduced from
>>> ~350 sec to 170 sec.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> Signed-off-by: Denis V. Lunev <den@openvz.org>
>>> CC: Stefan Hajnoczi <stefanha@redhat.com>
>>> CC: Fam Zheng <famz@redhat.com>
>>> CC: Kevin Wolf <kwolf@redhat.com>
>>> CC: Max Reitz <mreitz@redhat.com>
>>> CC: Jeff Cody <jcody@redhat.com>
>>> CC: Eric Blake <eblake@redhat.com>
>>> ---
>>>
>>> v2: in case of s->buf_size larger than default use it to limit io_sectors
>>>
>>>    block/mirror.c | 10 ++++++++--
>>>    1 file changed, 8 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/block/mirror.c b/block/mirror.c
>>> index b1e633e..3ac3b4d 100644
>>> --- a/block/mirror.c
>>> +++ b/block/mirror.c
>>> @@ -23,7 +23,9 @@
>>>    
>>>    #define SLICE_TIME    100000000ULL /* ns */
>>>    #define MAX_IN_FLIGHT 16
>>> -#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
>>> +#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
>>> +#define DEFAULT_MIRROR_BUF_SIZE \
>>> +    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
>>>    
>>>    /* The mirroring buffer is a list of granularity-sized chunks.
>>>     * Free chunks are organized in a list.
>>> @@ -322,6 +324,8 @@ static uint64_t coroutine_fn
>>> mirror_iteration(MirrorBlockJob *s)
>>>        int nb_chunks = 1;
>>>        int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
>>>        int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
>>> +    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) /
>>> MAX_IN_FLIGHT,
>>> +                             MAX_IO_SECTORS);
>>>    
>>>        sector_num = hbitmap_iter_next(&s->hbi);
>>>        if (sector_num < 0) {
>>> @@ -385,7 +389,9 @@ static uint64_t coroutine_fn
>>> mirror_iteration(MirrorBlockJob *s)
>>>                                              nb_chunks * sectors_per_chunk,
>>>                                              &io_sectors, &file);
>>>            if (ret < 0) {
>>> -            io_sectors = nb_chunks * sectors_per_chunk;
>>> +            io_sectors = MIN(nb_chunks * sectors_per_chunk,
>>> max_io_sectors);
>>> +        } else if (ret & BDRV_BLOCK_DATA) {
>>> +            io_sectors = MIN(io_sectors, max_io_sectors);
>>>            }
>>>    
>>>            io_sectors -= io_sectors % sectors_per_chunk;
>> guys, what about this patch?
> I think that at this point it has missed hard freeze.  It's not up to me
> whether to consider it a bugfix.
>
> Paolo
I see. It's pity this patch has missed deadline...
The difference is really big.

OK ;) I think it will not be missed in the next
release.

Den
Max Reitz July 22, 2016, 4:41 p.m. UTC | #5
On 14.07.2016 19:19, Vladimir Sementsov-Ogievskiy wrote:
> Mirror can do up to 16 in-flight requests, but actually on full copy
> (the whole source disk is non-zero) in-flight is always 1. This happens
> as the request is not limited in size: the data occupies maximum available
> capacity of s->buf.
> 
> The patch limits the size of the request to some artificial constant
> (1 Mb here), which is not that big or small. This effectively enables
> back parallelism in mirror code as it was designed.
> 
> The result is important: the time to migrate 10 Gb disk is reduced from
> ~350 sec to 170 sec.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Fam Zheng <famz@redhat.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Jeff Cody <jcody@redhat.com>
> CC: Eric Blake <eblake@redhat.com>
> ---
> 
> v2: in case of s->buf_size larger than default use it to limit io_sectors
> 
>  block/mirror.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>

I think that this can be considered a bugfix, but strictly speaking it's
a patch for Jeff's tree, so I'm hesitant to take it.

Max
Jeff Cody July 26, 2016, 2:47 a.m. UTC | #6
On Thu, Jul 14, 2016 at 08:19:01PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Mirror can do up to 16 in-flight requests, but actually on full copy
> (the whole source disk is non-zero) in-flight is always 1. This happens
> as the request is not limited in size: the data occupies maximum available
> capacity of s->buf.
> 
> The patch limits the size of the request to some artificial constant
> (1 Mb here), which is not that big or small. This effectively enables
> back parallelism in mirror code as it was designed.
> 
> The result is important: the time to migrate 10 Gb disk is reduced from
> ~350 sec to 170 sec.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Fam Zheng <famz@redhat.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Jeff Cody <jcody@redhat.com>
> CC: Eric Blake <eblake@redhat.com>
> ---
> 
> v2: in case of s->buf_size larger than default use it to limit io_sectors
> 
>  block/mirror.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index b1e633e..3ac3b4d 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -23,7 +23,9 @@
>  
>  #define SLICE_TIME    100000000ULL /* ns */
>  #define MAX_IN_FLIGHT 16
> -#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
> +#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
> +#define DEFAULT_MIRROR_BUF_SIZE \
> +    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
>  
>  /* The mirroring buffer is a list of granularity-sized chunks.
>   * Free chunks are organized in a list.
> @@ -322,6 +324,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>      int nb_chunks = 1;
>      int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
>      int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
> +    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
> +                             MAX_IO_SECTORS);
>  
>      sector_num = hbitmap_iter_next(&s->hbi);
>      if (sector_num < 0) {
> @@ -385,7 +389,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>                                            nb_chunks * sectors_per_chunk,
>                                            &io_sectors, &file);
>          if (ret < 0) {
> -            io_sectors = nb_chunks * sectors_per_chunk;
> +            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
> +        } else if (ret & BDRV_BLOCK_DATA) {
> +            io_sectors = MIN(io_sectors, max_io_sectors);
>          }
>  
>          io_sectors -= io_sectors % sectors_per_chunk;
> -- 
> 1.8.3.1
>

Reviewed-by: Jeff Cody <jcody@redhat.com>

I agree with Max, I think this qualifies as a bug fix, as the current
behavior is not what I'd expect as the intended behavior.  I'll pull this in
through my tree, for my -rc1 pull req.

Jeff
Jeff Cody July 26, 2016, 2:48 a.m. UTC | #7
On Thu, Jul 14, 2016 at 08:19:01PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Mirror can do up to 16 in-flight requests, but actually on full copy
> (the whole source disk is non-zero) in-flight is always 1. This happens
> as the request is not limited in size: the data occupies maximum available
> capacity of s->buf.
> 
> The patch limits the size of the request to some artificial constant
> (1 Mb here), which is not that big or small. This effectively enables
> back parallelism in mirror code as it was designed.
> 
> The result is important: the time to migrate 10 Gb disk is reduced from
> ~350 sec to 170 sec.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Fam Zheng <famz@redhat.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Jeff Cody <jcody@redhat.com>
> CC: Eric Blake <eblake@redhat.com>
> ---
> 
> v2: in case of s->buf_size larger than default use it to limit io_sectors
> 
>  block/mirror.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index b1e633e..3ac3b4d 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -23,7 +23,9 @@
>  
>  #define SLICE_TIME    100000000ULL /* ns */
>  #define MAX_IN_FLIGHT 16
> -#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
> +#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
> +#define DEFAULT_MIRROR_BUF_SIZE \
> +    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
>  
>  /* The mirroring buffer is a list of granularity-sized chunks.
>   * Free chunks are organized in a list.
> @@ -322,6 +324,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>      int nb_chunks = 1;
>      int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
>      int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
> +    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
> +                             MAX_IO_SECTORS);
>  
>      sector_num = hbitmap_iter_next(&s->hbi);
>      if (sector_num < 0) {
> @@ -385,7 +389,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
>                                            nb_chunks * sectors_per_chunk,
>                                            &io_sectors, &file);
>          if (ret < 0) {
> -            io_sectors = nb_chunks * sectors_per_chunk;
> +            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
> +        } else if (ret & BDRV_BLOCK_DATA) {
> +            io_sectors = MIN(io_sectors, max_io_sectors);
>          }
>  
>          io_sectors -= io_sectors % sectors_per_chunk;
> -- 
> 1.8.3.1
> 

Thanks,

Applied to my block branch:

git://github.com/codyprime/qemu-kvm-jtc.git block

-Jeff
Kevin Wolf Aug. 3, 2016, 10:52 a.m. UTC | #8
Am 14.07.2016 um 19:19 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Mirror can do up to 16 in-flight requests, but actually on full copy
> (the whole source disk is non-zero) in-flight is always 1. This happens
> as the request is not limited in size: the data occupies maximum available
> capacity of s->buf.
> 
> The patch limits the size of the request to some artificial constant
> (1 Mb here), which is not that big or small. This effectively enables
> back parallelism in mirror code as it was designed.
> 
> The result is important: the time to migrate 10 Gb disk is reduced from
> ~350 sec to 170 sec.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>

This broke qemu-iotests 109 for raw. Can you please check whether the
output changes are expected, and send a fix either for the code or for
the test case?

Kevin
Vladimir Sementsov-Ogievskiy Aug. 3, 2016, 12:20 p.m. UTC | #9
On 03.08.2016 13:52, Kevin Wolf wrote:
> Am 14.07.2016 um 19:19 hat Vladimir Sementsov-Ogievskiy geschrieben:
>> Mirror can do up to 16 in-flight requests, but actually on full copy
>> (the whole source disk is non-zero) in-flight is always 1. This happens
>> as the request is not limited in size: the data occupies maximum available
>> capacity of s->buf.
>>
>> The patch limits the size of the request to some artificial constant
>> (1 Mb here), which is not that big or small. This effectively enables
>> back parallelism in mirror code as it was designed.
>>
>> The result is important: the time to migrate 10 Gb disk is reduced from
>> ~350 sec to 170 sec.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> Signed-off-by: Denis V. Lunev <den@openvz.org>
> This broke qemu-iotests 109 for raw. Can you please check whether the
> output changes are expected, and send a fix either for the code or for
> the test case?
>
> Kevin

As mirror becomes "more asynchronous" after this patch, in this test we 
have the following behaviour change:

old behaviour:
we create one sync request, which occupies all memory for in-flight 
requests, so it is only one in-flight request. It fails and we see 
BLOCK_JOB_COMPLETED (with zero progress) immediately after BLOCK_JOB_ERROR

new behaviour
several requests are created in parallel, so some of them are finished 
with success and we see BLOCK_JOB_COMPLETED with some progress..

I'm afaraid that this "some progress" may depend on disk speed or other 
environment factors, so simply update 109.out to new value is not good 
solution.

Anyway, I think
+         if (s->ret < 0) {
+             return 0;
+         }

should be added before io request creation in mirror_iteration - to not 
create new async reqs after error. This reduce "some progress", but is 
is still not zero, as some of async req's was started before an error 
has occured. Moreover, some req's was finished with success before this 
error for first chank has occured.

So, the progress of the job in case of error becomes unpredicatable and 
should be filtered in io tests.
diff mbox

Patch

diff --git a/block/mirror.c b/block/mirror.c
index b1e633e..3ac3b4d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -23,7 +23,9 @@ 
 
 #define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
-#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
+#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE \
+    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
 
 /* The mirroring buffer is a list of granularity-sized chunks.
  * Free chunks are organized in a list.
@@ -322,6 +324,8 @@  static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     int nb_chunks = 1;
     int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
     int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
+                             MAX_IO_SECTORS);
 
     sector_num = hbitmap_iter_next(&s->hbi);
     if (sector_num < 0) {
@@ -385,7 +389,9 @@  static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                                           nb_chunks * sectors_per_chunk,
                                           &io_sectors, &file);
         if (ret < 0) {
-            io_sectors = nb_chunks * sectors_per_chunk;
+            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+        } else if (ret & BDRV_BLOCK_DATA) {
+            io_sectors = MIN(io_sectors, max_io_sectors);
         }
 
         io_sectors -= io_sectors % sectors_per_chunk;