Patchwork [v2] Consolidate reads and writes in nbd block device into one common routine

login
register
mail settings
Submitter Michael Tokarev
Date Feb. 28, 2012, 10:24 a.m.
Message ID <20120228111424.2DC63195@gandalf.tls.msk.ru>
Download mbox | patch
Permalink /patch/143408/
State New
Headers show

Comments

Michael Tokarev - Feb. 28, 2012, 10:24 a.m.
This removes quite some duplicated code.

v2 fixes a bug (uninitialized reply.error) and makes the loop more natural.

Signed-off-By: Michael Tokarev <mjt@tls.msk.ru>
---
 block/nbd.c |   95 +++++++++++++++++++---------------------------------------
 1 files changed, 31 insertions(+), 64 deletions(-)
Paolo Bonzini - Feb. 28, 2012, 11:35 a.m.
Il 28/02/2012 11:24, Michael Tokarev ha scritto:
> This removes quite some duplicated code.
> 
> v2 fixes a bug (uninitialized reply.error) and makes the loop more natural.
> 
> Signed-off-By: Michael Tokarev <mjt@tls.msk.ru>
> ---
>  block/nbd.c |   95 +++++++++++++++++++---------------------------------------
>  1 files changed, 31 insertions(+), 64 deletions(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 161b299..a78e212 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -320,91 +320,58 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      return result;
>  }
>  
> -static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
> -                          int nb_sectors, QEMUIOVector *qiov,
> -                          int offset)
> -{
> -    BDRVNBDState *s = bs->opaque;
> -    struct nbd_request request;
> -    struct nbd_reply reply;
> -
> -    request.type = NBD_CMD_READ;
> -    request.from = sector_num * 512;
> -    request.len = nb_sectors * 512;
> -
> -    nbd_coroutine_start(s, &request);
> -    if (nbd_co_send_request(s, &request, NULL, 0) == -1) {
> -        reply.error = errno;
> -    } else {
> -        nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset);
> -    }
> -    nbd_coroutine_end(s, &request);
> -    return -reply.error;
> -
> -}
> +/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
> + * remain aligned to 4K. */
> +#define NBD_MAX_SECTORS 2040
>  
> -static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
> -                           int nb_sectors, QEMUIOVector *qiov,
> -                           int offset)
> +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num,
> +                      int nb_sectors, QEMUIOVector *qiov, int iswrite)

Call this nbd_co_rw, and please pass the whole request.type down.

>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
>      struct nbd_reply reply;
> +    int offset = 0;
>  
> -    request.type = NBD_CMD_WRITE;
> -    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
> +    request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ;
> +    if (iswrite && !bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
>          request.type |= NBD_CMD_FLAG_FUA;
>      }
> +    reply.error = 0;
> +
> +    /* we split the request into pieces of at most NBD_MAX_SECTORS size
> +     * and process them in a loop... */
> +    do {
> +        request.from = sector_num * 512;
> +        request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512;
> +
> +        nbd_coroutine_start(s, &request);
> +        if (nbd_co_send_request(s, &request, iswrite ? qiov->iov : NULL, 0) == -1) {

The last 0 needs to be offset.

> +            reply.error = errno;
> +        } else {
> +            nbd_co_receive_reply(s, &request, &reply, iswrite ? NULL : qiov->iov, offset);
> +        }
> +        nbd_coroutine_end(s, &request);
>  
> -    request.from = sector_num * 512;
> -    request.len = nb_sectors * 512;
> +        offset += NBD_MAX_SECTORS * 512;
> +        sector_num += NBD_MAX_SECTORS;
> +        nb_sectors -= NBD_MAX_SECTORS;
> +
> +    /* ..till we hit an error or there's nothing more to process */
> +    } while (reply.error == 0 && nb_sectors > 0);
>  
> -    nbd_coroutine_start(s, &request);
> -    if (nbd_co_send_request(s, &request, qiov->iov, offset) == -1) {
> -        reply.error = errno;
> -    } else {
> -        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
> -    }
> -    nbd_coroutine_end(s, &request);
>      return -reply.error;
>  }
>  
> -/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
> - * remain aligned to 4K. */
> -#define NBD_MAX_SECTORS 2040
> -
>  static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
>                          int nb_sectors, QEMUIOVector *qiov)
>  {
> -    int offset = 0;
> -    int ret;
> -    while (nb_sectors > NBD_MAX_SECTORS) {
> -        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
> -        if (ret < 0) {
> -            return ret;
> -        }
> -        offset += NBD_MAX_SECTORS * 512;
> -        sector_num += NBD_MAX_SECTORS;
> -        nb_sectors -= NBD_MAX_SECTORS;
> -    }
> -    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
> +    return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 0);
>  }
>  
>  static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
>                           int nb_sectors, QEMUIOVector *qiov)
>  {
> -    int offset = 0;
> -    int ret;
> -    while (nb_sectors > NBD_MAX_SECTORS) {
> -        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
> -        if (ret < 0) {
> -            return ret;
> -        }
> -        offset += NBD_MAX_SECTORS * 512;
> -        sector_num += NBD_MAX_SECTORS;
> -        nb_sectors -= NBD_MAX_SECTORS;
> -    }
> -    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
> +    return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 1);
>  }

... but thinking more about it, why don't you leave
nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function
that takes a function pointer?

Paolo
Michael Tokarev - Feb. 28, 2012, 12:35 p.m.
On 28.02.2012 15:35, Paolo Bonzini wrote:
> Il 28/02/2012 11:24, Michael Tokarev ha scritto:
>> This removes quite some duplicated code.
[]
>> +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num,
>> +                      int nb_sectors, QEMUIOVector *qiov, int iswrite)
> 
> Call this nbd_co_rw, and please pass the whole request.type down.

Originally it is readV and writeV, so why it should not be rwV ?

By passing whole request.type (NBD_CMD_WRITE or NBD_CMD_WRITE|NBD_CMD_FLAG_FUA
or NBD_CMD_READ) the condition (iswrite currently) will be larger (request.type
!= NBD_CMD_READ).  Also, if someday we'll have additional flag for READ as we
already do for write, whole thing will be even more difficult to read.

> 
>>  {
>>      BDRVNBDState *s = bs->opaque;
>>      struct nbd_request request;
>>      struct nbd_reply reply;
>> +    int offset = 0;
>>  
>> -    request.type = NBD_CMD_WRITE;
>> -    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
>> +    request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ;
>> +    if (iswrite && !bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
>>          request.type |= NBD_CMD_FLAG_FUA;
>>      }
>> +    reply.error = 0;
>> +
>> +    /* we split the request into pieces of at most NBD_MAX_SECTORS size
>> +     * and process them in a loop... */
>> +    do {
>> +        request.from = sector_num * 512;
>> +        request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512;
>> +
>> +        nbd_coroutine_start(s, &request);
>> +        if (nbd_co_send_request(s, &request, iswrite ? qiov->iov : NULL, 0) == -1) {
> 
> The last 0 needs to be offset.

Indeed, this is a bug.  I think I tested it with large requests
but it looks like only for reads.

[]
> ... but thinking more about it, why don't you leave
> nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function
> that takes a function pointer?

Because each of these nbd_co_*_1 does the same thing, the diff. is
only quiv->iov vs NULL.  While reading the original code it was the
first thing I did - consolidated nbd_co_*_1 into nbd_co_* ;)

Actually it might be a good idea to have single bdrv->bdrv_co_readwritev
method instead of two -- the path of each read and write jumps between
specific read-or-write routine and common readwrite routine several
times.

I see only one correction which needs (really!) to be done - that's
fixing the bug with offset.  Do you still not agree?

Thanks,

/mjt
Paolo Bonzini - Feb. 28, 2012, 1:03 p.m.
Il 28/02/2012 13:35, Michael Tokarev ha scritto:
> On 28.02.2012 15:35, Paolo Bonzini wrote:
>> Il 28/02/2012 11:24, Michael Tokarev ha scritto:
>>> This removes quite some duplicated code.
> []
>>> +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num,
>>> +                      int nb_sectors, QEMUIOVector *qiov, int iswrite)
>>
>> Call this nbd_co_rw, and please pass the whole request.type down.
> 
> Originally it is readV and writeV, so why it should not be rwV ?

It's more consistent with block.c.

> By passing whole request.type (NBD_CMD_WRITE or NBD_CMD_WRITE|NBD_CMD_FLAG_FUA
> or NBD_CMD_READ) the condition (iswrite currently) will be larger (request.type
> != NBD_CMD_READ).  Also, if someday we'll have additional flag for READ as we
> already do for write, whole thing will be even more difficult to read.

Sure, but why should a generic function deal with NBD_CMD_FLAG_FUA?

>>
>>>  {
>>>      BDRVNBDState *s = bs->opaque;
>>>      struct nbd_request request;
>>>      struct nbd_reply reply;
>>> +    int offset = 0;
>>>  
>>> -    request.type = NBD_CMD_WRITE;
>>> -    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
>>> +    request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ;
>>> +    if (iswrite && !bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
>>>          request.type |= NBD_CMD_FLAG_FUA;
>>>      }
>>> +    reply.error = 0;
>>> +
>>> +    /* we split the request into pieces of at most NBD_MAX_SECTORS size
>>> +     * and process them in a loop... */
>>> +    do {
>>> +        request.from = sector_num * 512;
>>> +        request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512;
>>> +
>>> +        nbd_coroutine_start(s, &request);
>>> +        if (nbd_co_send_request(s, &request, iswrite ? qiov->iov : NULL, 0) == -1) {
>>
>> The last 0 needs to be offset.
> 
> Indeed, this is a bug.  I think I tested it with large requests
> but it looks like only for reads.
> 
>> ... but thinking more about it, why don't you leave
>> nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function
>> that takes a function pointer?
> 
> Because each of these nbd_co_*_1 does the same thing, the diff. is
> only quiv->iov vs NULL.  While reading the original code it was the
> first thing I did - consolidated nbd_co_*_1 into nbd_co_* ;)

And offset.  I needed to check that non-0 offsets are fine when the iov
is NULL; it's not obvious.

> Actually it might be a good idea to have single bdrv->bdrv_co_readwritev
> method instead of two -- the path of each read and write jumps between
> specific read-or-write routine and common readwrite routine several
> times.

Small amounts of duplicate code can be better than functions with many
ifs or complicated conditions.

> I see only one correction which needs (really!) to be done - that's
> fixing the bug with offset.  Do you still not agree?

I still disagree. :)  I will accept the patch with the function pointer
though.

Paolo
Michael Tokarev - Feb. 28, 2012, 1:16 p.m.
On 28.02.2012 17:03, Paolo Bonzini wrote:
> Il 28/02/2012 13:35, Michael Tokarev ha scritto:
>> On 28.02.2012 15:35, Paolo Bonzini wrote:
>>> Il 28/02/2012 11:24, Michael Tokarev ha scritto:
>>>> This removes quite some duplicated code.
>> []
>>>> +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num,
>>>> +                      int nb_sectors, QEMUIOVector *qiov, int iswrite)
>>>
>>> Call this nbd_co_rw, and please pass the whole request.type down.
>>
>> Originally it is readV and writeV, so why it should not be rwV ?
> 
> It's more consistent with block.c.
> 
>> By passing whole request.type (NBD_CMD_WRITE or NBD_CMD_WRITE|NBD_CMD_FLAG_FUA
>> or NBD_CMD_READ) the condition (iswrite currently) will be larger (request.type
>> != NBD_CMD_READ).  Also, if someday we'll have additional flag for READ as we
>> already do for write, whole thing will be even more difficult to read.
> 
> Sure, but why should a generic function deal with NBD_CMD_FLAG_FUA?

I can pass both "iswrite" and request.type here - to avoid possible
complications in the area of adding more nbd-specific meanings/flags
to request.type.  But that becomes more ugly.

[]
>>> ... but thinking more about it, why don't you leave
>>> nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function
>>> that takes a function pointer?
>>
>> Because each of these nbd_co_*_1 does the same thing, the diff. is
>> only quiv->iov vs NULL.  While reading the original code it was the
>> first thing I did - consolidated nbd_co_*_1 into nbd_co_* ;)
> 
> And offset.  I needed to check that non-0 offsets are fine when the iov
> is NULL; it's not obvious.

It isn't indeed.  Both send_request and recv_reply only checks iov
and ignores offset if iov is NULL.

>> Actually it might be a good idea to have single bdrv->bdrv_co_readwritev
>> method instead of two -- the path of each read and write jumps between
>> specific read-or-write routine and common readwrite routine several
>> times.
> 
> Small amounts of duplicate code can be better than functions with many
> ifs or complicated conditions.

Sure thing.  But when the code path is so twisted - common->specific->
common-> specific - it makes very difficult to get the bigger picture.

>> I see only one correction which needs (really!) to be done - that's
>> fixing the bug with offset.  Do you still not agree?
> 
> I still disagree. :)  I will accept the patch with the function pointer
> though.

With separate nbd_co_*_1 it isn't worth the effort.  When it all is in a
_small_ single routine (the resulting function is actually very small),
whole logic is immediately visible.  In particular, the FUA bit which
is set for every _part_ of write request - it wasn't visible till I
integrated nbd_co_writev_1 into nbd_co_writev.

Thanks,

/mjt

Patch

diff --git a/block/nbd.c b/block/nbd.c
index 161b299..a78e212 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -320,91 +320,58 @@  static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     return result;
 }
 
-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-
-    request.type = NBD_CMD_READ;
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(s, &request);
-    if (nbd_co_send_request(s, &request, NULL, 0) == -1) {
-        reply.error = errno;
-    } else {
-        nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset);
-    }
-    nbd_coroutine_end(s, &request);
-    return -reply.error;
-
-}
+/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
+ * remain aligned to 4K. */
+#define NBD_MAX_SECTORS 2040
 
-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset)
+static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num,
+                      int nb_sectors, QEMUIOVector *qiov, int iswrite)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
     struct nbd_reply reply;
+    int offset = 0;
 
-    request.type = NBD_CMD_WRITE;
-    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
+    request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ;
+    if (iswrite && !bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
         request.type |= NBD_CMD_FLAG_FUA;
     }
+    reply.error = 0;
+
+    /* we split the request into pieces of at most NBD_MAX_SECTORS size
+     * and process them in a loop... */
+    do {
+        request.from = sector_num * 512;
+        request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512;
+
+        nbd_coroutine_start(s, &request);
+        if (nbd_co_send_request(s, &request, iswrite ? qiov->iov : NULL, 0) == -1) {
+            reply.error = errno;
+        } else {
+            nbd_co_receive_reply(s, &request, &reply, iswrite ? NULL : qiov->iov, offset);
+        }
+        nbd_coroutine_end(s, &request);
 
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+
+    /* ..till we hit an error or there's nothing more to process */
+    } while (reply.error == 0 && nb_sectors > 0);
 
-    nbd_coroutine_start(s, &request);
-    if (nbd_co_send_request(s, &request, qiov->iov, offset) == -1) {
-        reply.error = errno;
-    } else {
-        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(s, &request);
     return -reply.error;
 }
 
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
 static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
+    return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 0);
 }
 
 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors, QEMUIOVector *qiov)
 {
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+    return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 1);
 }
 
 static int nbd_co_flush(BlockDriverState *bs)