diff mbox

[RFC,4/6] block: request overlap detection

Message ID 1318866452-30026-5-git-send-email-stefanha@linux.vnet.ibm.com
State New
Headers show

Commit Message

Stefan Hajnoczi Oct. 17, 2011, 3:47 p.m. UTC
Detect overlapping requests and remember to align to cluster boundaries
if the image format uses them.  This assumes that allocating I/O is
performed in cluster granularity - which is true for qcow2, qed, etc.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block.c |   39 +++++++++++++++++++++++++++++++++++++--
 1 files changed, 37 insertions(+), 2 deletions(-)

Comments

Zhiyong Wu Nov. 7, 2011, 11:49 a.m. UTC | #1
On Mon, Oct 17, 2011 at 11:47 PM, Stefan Hajnoczi
<stefanha@linux.vnet.ibm.com> wrote:
> Detect overlapping requests and remember to align to cluster boundaries
> if the image format uses them.  This assumes that allocating I/O is
> performed in cluster granularity - which is true for qcow2, qed, etc.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  block.c |   39 +++++++++++++++++++++++++++++++++++++--
>  1 files changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/block.c b/block.c
> index cc3202c..0c22741 100644
> --- a/block.c
> +++ b/block.c
> @@ -1052,21 +1052,56 @@ static BdrvTrackedRequest *tracked_request_add(BlockDriverState *bs,
>     return req;
>  }
>
> +/**
> + * Round a region to cluster boundaries
> + */
> +static void round_to_clusters(BlockDriverState *bs,
> +                              int64_t sector_num, int nb_sectors,
> +                              int64_t *cluster_sector_num,
> +                              int *cluster_nb_sectors)
> +{
> +    BlockDriverInfo bdi;
> +
> +    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
> +        *cluster_sector_num = sector_num;
> +        *cluster_nb_sectors = nb_sectors;
> +    } else {
> +        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
> +        *cluster_sector_num = (sector_num / c) * c;
             I can understand the above formula, but the one below is
very magic. :) and can not be understood by me.
> +        *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;

> +    }
> +}
> +
>  static bool tracked_request_overlaps(BdrvTrackedRequest *req,
>                                      int64_t sector_num, int nb_sectors) {
> -    return false; /* not yet implemented */
> +    /*        aaaa   bbbb */
> +    if (sector_num >= req->sector_num + req->nb_sectors) {
> +        return false;
> +    }
> +    /* bbbb   aaaa        */
> +    if (req->sector_num >= sector_num + nb_sectors) {
> +        return false;
> +    }
> +    return true;
>  }
>
>  static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
>         int64_t sector_num, int nb_sectors)
>  {
>     BdrvTrackedRequest *req;
> +    int64_t cluster_sector_num;
> +    int cluster_nb_sectors;
>     bool retry;
>
> +    /* If we touch the same cluster it counts as an overlap */
> +    round_to_clusters(bs, sector_num, nb_sectors,
> +                      &cluster_sector_num, &cluster_nb_sectors);
> +
>     do {
>         retry = false;
>         QLIST_FOREACH(req, &bs->tracked_requests, list) {
> -            if (tracked_request_overlaps(req, sector_num, nb_sectors)) {
> +            if (tracked_request_overlaps(req, cluster_sector_num,
> +                                         cluster_nb_sectors)) {
>                 qemu_co_queue_wait(&req->wait_queue);
>                 retry = true;
>                 break;
> --
> 1.7.6.3
>
>
>
Stefan Hajnoczi Nov. 7, 2011, 2:37 p.m. UTC | #2
On Mon, Nov 7, 2011 at 11:49 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
> On Mon, Oct 17, 2011 at 11:47 PM, Stefan Hajnoczi
> <stefanha@linux.vnet.ibm.com> wrote:
>> Detect overlapping requests and remember to align to cluster boundaries
>> if the image format uses them.  This assumes that allocating I/O is
>> performed in cluster granularity - which is true for qcow2, qed, etc.
>>
>> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
>> ---
>>  block.c |   39 +++++++++++++++++++++++++++++++++++++--
>>  1 files changed, 37 insertions(+), 2 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index cc3202c..0c22741 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -1052,21 +1052,56 @@ static BdrvTrackedRequest *tracked_request_add(BlockDriverState *bs,
>>     return req;
>>  }
>>
>> +/**
>> + * Round a region to cluster boundaries
>> + */
>> +static void round_to_clusters(BlockDriverState *bs,
>> +                              int64_t sector_num, int nb_sectors,
>> +                              int64_t *cluster_sector_num,
>> +                              int *cluster_nb_sectors)
>> +{
>> +    BlockDriverInfo bdi;
>> +
>> +    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
>> +        *cluster_sector_num = sector_num;
>> +        *cluster_nb_sectors = nb_sectors;
>> +    } else {
>> +        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
>> +        *cluster_sector_num = (sector_num / c) * c;
>             I can understand the above formula, but the one below is
> very magic. :) and can not be understood by me.
>> +        *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;

I agree this is ugly.  Here is what is going on:

c = number of sectors per cluster
cluster_sector_num = sector number rounded *down* to cluster boundary
cluster_nb_sectors = number of sectors from cluster_sector_num to
rounded up sector_num+nb_sectors

So the magic expression is takes the original sector_num to
sector_num+nb_sectors region:

|---XXX|XXX---|

Where |-----| is a cluster and XXXX is the region from sector_num to
sector_num+nb_sectors, then the output should be:

|RRRRRR|RRRRRR|

Everything has been rounded to clusters.  So here is the expression broken down:

*cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
                        AAAAAAAAAAAAAA    XXXXXXXXXX   BBBBBBBBBBBBBB

|AAAXXX|XXXBBB|

A is actually equivalent to sector_num - cluster_sector_num.

X is the original unrounded region.

B is the rounding up to the next cluster bounary.

Another way of writing this:

*cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
nb_sectors, c);

I'll try to improve the code in the next revision.

Stefan
Zhiyong Wu Nov. 8, 2011, 6:34 a.m. UTC | #3
On Mon, Nov 7, 2011 at 10:37 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Mon, Nov 7, 2011 at 11:49 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
>> On Mon, Oct 17, 2011 at 11:47 PM, Stefan Hajnoczi
>> <stefanha@linux.vnet.ibm.com> wrote:
>>> Detect overlapping requests and remember to align to cluster boundaries
>>> if the image format uses them.  This assumes that allocating I/O is
>>> performed in cluster granularity - which is true for qcow2, qed, etc.
>>>
>>> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
>>> ---
>>>  block.c |   39 +++++++++++++++++++++++++++++++++++++--
>>>  1 files changed, 37 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/block.c b/block.c
>>> index cc3202c..0c22741 100644
>>> --- a/block.c
>>> +++ b/block.c
>>> @@ -1052,21 +1052,56 @@ static BdrvTrackedRequest *tracked_request_add(BlockDriverState *bs,
>>>     return req;
>>>  }
>>>
>>> +/**
>>> + * Round a region to cluster boundaries
>>> + */
>>> +static void round_to_clusters(BlockDriverState *bs,
>>> +                              int64_t sector_num, int nb_sectors,
>>> +                              int64_t *cluster_sector_num,
>>> +                              int *cluster_nb_sectors)
>>> +{
>>> +    BlockDriverInfo bdi;
>>> +
>>> +    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
>>> +        *cluster_sector_num = sector_num;
>>> +        *cluster_nb_sectors = nb_sectors;
>>> +    } else {
>>> +        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
>>> +        *cluster_sector_num = (sector_num / c) * c;
>>             I can understand the above formula, but the one below is
>> very magic. :) and can not be understood by me.
>>> +        *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
>
> I agree this is ugly.  Here is what is going on:
>
> c = number of sectors per cluster
> cluster_sector_num = sector number rounded *down* to cluster boundary
> cluster_nb_sectors = number of sectors from cluster_sector_num to
> rounded up sector_num+nb_sectors
>
> So the magic expression is takes the original sector_num to
> sector_num+nb_sectors region:
>
> |---XXX|XXX---|
>
> Where |-----| is a cluster and XXXX is the region from sector_num to
> sector_num+nb_sectors, then the output should be:
>
> |RRRRRR|RRRRRR|
>
> Everything has been rounded to clusters.  So here is the expression broken down:
>
> *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
>                        AAAAAAAAAAAAAA    XXXXXXXXXX   BBBBBBBBBBBBBB
>
> |AAAXXX|XXXBBB|
>
> A is actually equivalent to sector_num - cluster_sector_num.
>
> X is the original unrounded region.
>
> B is the rounding up to the next cluster bounary.
>
> Another way of writing this:
>
> *cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
> nb_sectors, c);
Above expression seems to not be correct;
It should be
*cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
nb_sectors, c) * c;

*cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;

#define ROUND_UP(x,y) (((x)+(y)-1)/(y))

I seems to understand your magic expression. thanks.

>
> I'll try to improve the code in the next revision.
>
> Stefan
>
Stefan Hajnoczi Nov. 8, 2011, 8:16 a.m. UTC | #4
On Tue, Nov 08, 2011 at 02:34:22PM +0800, Zhi Yong Wu wrote:
> On Mon, Nov 7, 2011 at 10:37 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> > On Mon, Nov 7, 2011 at 11:49 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
> >> On Mon, Oct 17, 2011 at 11:47 PM, Stefan Hajnoczi
> >> <stefanha@linux.vnet.ibm.com> wrote:
> >>> Detect overlapping requests and remember to align to cluster boundaries
> >>> if the image format uses them.  This assumes that allocating I/O is
> >>> performed in cluster granularity - which is true for qcow2, qed, etc.
> >>>
> >>> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> >>> ---
> >>>  block.c |   39 +++++++++++++++++++++++++++++++++++++--
> >>>  1 files changed, 37 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/block.c b/block.c
> >>> index cc3202c..0c22741 100644
> >>> --- a/block.c
> >>> +++ b/block.c
> >>> @@ -1052,21 +1052,56 @@ static BdrvTrackedRequest *tracked_request_add(BlockDriverState *bs,
> >>>     return req;
> >>>  }
> >>>
> >>> +/**
> >>> + * Round a region to cluster boundaries
> >>> + */
> >>> +static void round_to_clusters(BlockDriverState *bs,
> >>> +                              int64_t sector_num, int nb_sectors,
> >>> +                              int64_t *cluster_sector_num,
> >>> +                              int *cluster_nb_sectors)
> >>> +{
> >>> +    BlockDriverInfo bdi;
> >>> +
> >>> +    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
> >>> +        *cluster_sector_num = sector_num;
> >>> +        *cluster_nb_sectors = nb_sectors;
> >>> +    } else {
> >>> +        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
> >>> +        *cluster_sector_num = (sector_num / c) * c;
> >>             I can understand the above formula, but the one below is
> >> very magic. :) and can not be understood by me.
> >>> +        *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
> >
> > I agree this is ugly.  Here is what is going on:
> >
> > c = number of sectors per cluster
> > cluster_sector_num = sector number rounded *down* to cluster boundary
> > cluster_nb_sectors = number of sectors from cluster_sector_num to
> > rounded up sector_num+nb_sectors
> >
> > So the magic expression is takes the original sector_num to
> > sector_num+nb_sectors region:
> >
> > |---XXX|XXX---|
> >
> > Where |-----| is a cluster and XXXX is the region from sector_num to
> > sector_num+nb_sectors, then the output should be:
> >
> > |RRRRRR|RRRRRR|
> >
> > Everything has been rounded to clusters.  So here is the expression broken down:
> >
> > *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
> >                        AAAAAAAAAAAAAA    XXXXXXXXXX   BBBBBBBBBBBBBB
> >
> > |AAAXXX|XXXBBB|
> >
> > A is actually equivalent to sector_num - cluster_sector_num.
> >
> > X is the original unrounded region.
> >
> > B is the rounding up to the next cluster bounary.
> >
> > Another way of writing this:
> >
> > *cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
> > nb_sectors, c);
> Above expression seems to not be correct;
> It should be
> *cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
> nb_sectors, c) * c;
> 
> *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
> 
> #define ROUND_UP(x,y) (((x)+(y)-1)/(y))

ALIGN_UP() may be a better macro name, for example:

ALIGN_UP(1024, 4096) = 4096

I see how you're defining ROUND_UP() and it is different.

Stefan
Zhiyong Wu Nov. 8, 2011, 9:49 a.m. UTC | #5
On Tue, Nov 8, 2011 at 4:16 PM, Stefan Hajnoczi
<stefanha@linux.vnet.ibm.com> wrote:
> On Tue, Nov 08, 2011 at 02:34:22PM +0800, Zhi Yong Wu wrote:
>> On Mon, Nov 7, 2011 at 10:37 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> > On Mon, Nov 7, 2011 at 11:49 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
>> >> On Mon, Oct 17, 2011 at 11:47 PM, Stefan Hajnoczi
>> >> <stefanha@linux.vnet.ibm.com> wrote:
>> >>> Detect overlapping requests and remember to align to cluster boundaries
>> >>> if the image format uses them.  This assumes that allocating I/O is
>> >>> performed in cluster granularity - which is true for qcow2, qed, etc.
>> >>>
>> >>> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
>> >>> ---
>> >>>  block.c |   39 +++++++++++++++++++++++++++++++++++++--
>> >>>  1 files changed, 37 insertions(+), 2 deletions(-)
>> >>>
>> >>> diff --git a/block.c b/block.c
>> >>> index cc3202c..0c22741 100644
>> >>> --- a/block.c
>> >>> +++ b/block.c
>> >>> @@ -1052,21 +1052,56 @@ static BdrvTrackedRequest *tracked_request_add(BlockDriverState *bs,
>> >>>     return req;
>> >>>  }
>> >>>
>> >>> +/**
>> >>> + * Round a region to cluster boundaries
>> >>> + */
>> >>> +static void round_to_clusters(BlockDriverState *bs,
>> >>> +                              int64_t sector_num, int nb_sectors,
>> >>> +                              int64_t *cluster_sector_num,
>> >>> +                              int *cluster_nb_sectors)
>> >>> +{
>> >>> +    BlockDriverInfo bdi;
>> >>> +
>> >>> +    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
>> >>> +        *cluster_sector_num = sector_num;
>> >>> +        *cluster_nb_sectors = nb_sectors;
>> >>> +    } else {
>> >>> +        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
>> >>> +        *cluster_sector_num = (sector_num / c) * c;
>> >>             I can understand the above formula, but the one below is
>> >> very magic. :) and can not be understood by me.
>> >>> +        *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
>> >
>> > I agree this is ugly.  Here is what is going on:
>> >
>> > c = number of sectors per cluster
>> > cluster_sector_num = sector number rounded *down* to cluster boundary
>> > cluster_nb_sectors = number of sectors from cluster_sector_num to
>> > rounded up sector_num+nb_sectors
>> >
>> > So the magic expression is takes the original sector_num to
>> > sector_num+nb_sectors region:
>> >
>> > |---XXX|XXX---|
>> >
>> > Where |-----| is a cluster and XXXX is the region from sector_num to
>> > sector_num+nb_sectors, then the output should be:
>> >
>> > |RRRRRR|RRRRRR|
>> >
>> > Everything has been rounded to clusters.  So here is the expression broken down:
>> >
>> > *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
>> >                        AAAAAAAAAAAAAA    XXXXXXXXXX   BBBBBBBBBBBBBB
>> >
>> > |AAAXXX|XXXBBB|
>> >
>> > A is actually equivalent to sector_num - cluster_sector_num.
>> >
>> > X is the original unrounded region.
>> >
>> > B is the rounding up to the next cluster bounary.
>> >
>> > Another way of writing this:
>> >
>> > *cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
>> > nb_sectors, c);
>> Above expression seems to not be correct;
>> It should be
>> *cluster_nb_sectors = ROUND_UP((sector_num - cluster_sector_num) +
>> nb_sectors, c) * c;
>>
>> *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
>>
>> #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
>
> ALIGN_UP() may be a better macro name, for example:
>
> ALIGN_UP(1024, 4096) = 4096
OK. Hope to see this in your next revision.
>
> I see how you're defining ROUND_UP() and it is different.
>
> Stefan
>
diff mbox

Patch

diff --git a/block.c b/block.c
index cc3202c..0c22741 100644
--- a/block.c
+++ b/block.c
@@ -1052,21 +1052,56 @@  static BdrvTrackedRequest *tracked_request_add(BlockDriverState *bs,
     return req;
 }
 
+/**
+ * Round a region to cluster boundaries
+ */
+static void round_to_clusters(BlockDriverState *bs,
+                              int64_t sector_num, int nb_sectors,
+                              int64_t *cluster_sector_num,
+                              int *cluster_nb_sectors)
+{
+    BlockDriverInfo bdi;
+
+    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+        *cluster_sector_num = sector_num;
+        *cluster_nb_sectors = nb_sectors;
+    } else {
+        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
+        *cluster_sector_num = (sector_num / c) * c;
+        *cluster_nb_sectors = ((sector_num % c) + nb_sectors + c - 1) / c * c;
+    }
+}
+
 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
                                      int64_t sector_num, int nb_sectors) {
-    return false; /* not yet implemented */
+    /*        aaaa   bbbb */
+    if (sector_num >= req->sector_num + req->nb_sectors) {
+        return false;
+    }
+    /* bbbb   aaaa        */
+    if (req->sector_num >= sector_num + nb_sectors) {
+        return false;
+    }
+    return true;
 }
 
 static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors)
 {
     BdrvTrackedRequest *req;
+    int64_t cluster_sector_num;
+    int cluster_nb_sectors;
     bool retry;
 
+    /* If we touch the same cluster it counts as an overlap */
+    round_to_clusters(bs, sector_num, nb_sectors,
+                      &cluster_sector_num, &cluster_nb_sectors);
+
     do {
         retry = false;
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
-            if (tracked_request_overlaps(req, sector_num, nb_sectors)) {
+            if (tracked_request_overlaps(req, cluster_sector_num,
+                                         cluster_nb_sectors)) {
                 qemu_co_queue_wait(&req->wait_queue);
                 retry = true;
                 break;