diff mbox

[v2] snapshot: use local variable to bdrv_pwrite_sync L1 table

Message ID 201410211934299401424@sangfor.com
State New
Headers show

Commit Message

Zhang Haoyu Oct. 21, 2014, 11:34 a.m. UTC
Use local variable to bdrv_pwrite_sync L1 table,
needless to make conversion of cached L1 table between
big-endian and host style.

Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
---
v1 -> v2:
 - remove the superflous assignment, l1_table = NULL;
 - replace 512 with BDRV_SECTOR_SIZE, and align_offset with ROUND_UP
 - remove needless check of if (l1_table) before g_free(l1_table)

 block/qcow2-refcount.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

Comments

Max Reitz Oct. 21, 2014, 12:01 p.m. UTC | #1
On 2014-10-21 at 13:34, Zhang Haoyu wrote:
> Use local variable to bdrv_pwrite_sync L1 table,
> needless to make conversion of cached L1 table between
> big-endian and host style.
>
> Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
> ---
> v1 -> v2:
>   - remove the superflous assignment, l1_table = NULL;
>   - replace 512 with BDRV_SECTOR_SIZE, and align_offset with ROUND_UP
>   - remove needless check of if (l1_table) before g_free(l1_table)
>
>   block/qcow2-refcount.c | 24 +++++++-----------------
>   1 file changed, 7 insertions(+), 17 deletions(-)
>
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 2bcaaf9..29a916a 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -881,14 +881,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>   {
>       BDRVQcowState *s = bs->opaque;
>       uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
> -    bool l1_allocated = false;
>       int64_t old_offset, old_l2_offset;
>       int i, j, l1_modified = 0, nb_csectors, refcount;
>       int ret;
>   
>       l2_table = NULL;
> -    l1_table = NULL;
>       l1_size2 = l1_size * sizeof(uint64_t);
> +    l1_table = g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE));

I'm sorry, but I just now realized that we don't even need the 0 variant 
here. We are never accessing any element beyond l1_table[l1_size - 1], 
and all elements from 0 until l1_size - 1 are overwritten by 
bdrv_pread() or memcpy(). Therefore we can simply use 
qemu_try_blockalign(bs->file, l1_size2) here.

(and we should be using qemu_{try_,}blockalign() instead of 
g_{try_,}malloc{0,}() whenever possible for performance reasons)

> +    if (l1_size2 && l1_table == NULL) {
> +        ret = -ENOMEM;
> +        goto fail;
> +    }
>   
>       s->cache_discards = true;
>   
> @@ -896,13 +899,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>        * l1_table_offset when it is the current s->l1_table_offset! Be careful
>        * when changing this! */
>       if (l1_table_offset != s->l1_table_offset) {
> -        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
> -        if (l1_size2 && l1_table == NULL) {
> -            ret = -ENOMEM;
> -            goto fail;
> -        }
> -        l1_allocated = true;
> -
>           ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
>           if (ret < 0) {
>               goto fail;
> @@ -912,8 +908,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>               be64_to_cpus(&l1_table[i]);
>       } else {
>           assert(l1_size == s->l1_size);
> -        l1_table = s->l1_table;
> -        l1_allocated = false;
> +        memcpy(l1_table, s->l1_table, l1_size2);
>       }
>   
>       for(i = 0; i < l1_size; i++) {
> @@ -1055,13 +1050,8 @@ fail:
>           }
>   
>           ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
> -
> -        for (i = 0; i < l1_size; i++) {
> -            be64_to_cpus(&l1_table[i]);
> -        }

But I realized something even more important as well: If you are 
modifying the current L1 table, you need to copy the result back to 
s->l1_table.

Sorry for having missed these in my previous review.

Max

>       }
> -    if (l1_allocated)
> -        g_free(l1_table);
> +    g_free(l1_table);
>       return ret;
>   }
>
Zhang Haoyu Oct. 21, 2014, 1:08 p.m. UTC | #2
>> Use local variable to bdrv_pwrite_sync L1 table,
>> needless to make conversion of cached L1 table between
>> big-endian and host style.
>>
>> Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
>> ---
>> v1 -> v2:
>>   - remove the superflous assignment, l1_table = NULL;
>>   - replace 512 with BDRV_SECTOR_SIZE, and align_offset with ROUND_UP
>>   - remove needless check of if (l1_table) before g_free(l1_table)
>>
>>   block/qcow2-refcount.c | 24 +++++++-----------------
>>   1 file changed, 7 insertions(+), 17 deletions(-)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 2bcaaf9..29a916a 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -881,14 +881,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>   {
>>       BDRVQcowState *s = bs->opaque;
>>       uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
>> -    bool l1_allocated = false;
>>       int64_t old_offset, old_l2_offset;
>>       int i, j, l1_modified = 0, nb_csectors, refcount;
>>       int ret;
>>   
>>       l2_table = NULL;
>> -    l1_table = NULL;
>>       l1_size2 = l1_size * sizeof(uint64_t);
>> +    l1_table = g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE));
>
>I'm sorry, but I just now realized that we don't even need the 0 variant 
>here. We are never accessing any element beyond l1_table[l1_size - 1], 
>and all elements from 0 until l1_size - 1 are overwritten by 
>bdrv_pread() or memcpy(). Therefore we can simply use 
>qemu_try_blockalign(bs->file, l1_size2) here.
>
OK, I will replace g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE)) with
qemu_try_blockalign(bs, ROUND_UP(l1_size2, BDRV_SECTOR_SIZE)).

>(and we should be using qemu_{try_,}blockalign() instead of 
>g_{try_,}malloc{0,}() whenever possible for performance reasons)
>
>> +    if (l1_size2 && l1_table == NULL) {
>> +        ret = -ENOMEM;
>> +        goto fail;
>> +    }
>>   
>>       s->cache_discards = true;
>>   
>> @@ -896,13 +899,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>        * l1_table_offset when it is the current s->l1_table_offset! Be careful
>>        * when changing this! */
>>       if (l1_table_offset != s->l1_table_offset) {
>> -        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
>> -        if (l1_size2 && l1_table == NULL) {
>> -            ret = -ENOMEM;
>> -            goto fail;
>> -        }
>> -        l1_allocated = true;
>> -
>>           ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
>>           if (ret < 0) {
>>               goto fail;
>> @@ -912,8 +908,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>               be64_to_cpus(&l1_table[i]);
>>       } else {
>>           assert(l1_size == s->l1_size);
>> -        l1_table = s->l1_table;
>> -        l1_allocated = false;
>> +        memcpy(l1_table, s->l1_table, l1_size2);
>>       }
>>   
>>       for(i = 0; i < l1_size; i++) {
>> @@ -1055,13 +1050,8 @@ fail:
>>           }
>>   
>>           ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
>> -
>> -        for (i = 0; i < l1_size; i++) {
>> -            be64_to_cpus(&l1_table[i]);
>> -        }
>
>But I realized something even more important as well: If you are 
>modifying the current L1 table, you need to copy the result back to 
>s->l1_table.
>
My fault.
>Sorry for having missed these in my previous review.
>
>Max
>
>>       }
>> -    if (l1_allocated)
>> -        g_free(l1_table);
>> +    g_free(l1_table);
>>       return ret;
>>   }
>>
Max Reitz Oct. 21, 2014, 1:10 p.m. UTC | #3
On 2014-10-21 at 15:08, Zhang Haoyu wrote:
>>> Use local variable to bdrv_pwrite_sync L1 table,
>>> needless to make conversion of cached L1 table between
>>> big-endian and host style.
>>>
>>> Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
>>> ---
>>> v1 -> v2:
>>>    - remove the superflous assignment, l1_table = NULL;
>>>    - replace 512 with BDRV_SECTOR_SIZE, and align_offset with ROUND_UP
>>>    - remove needless check of if (l1_table) before g_free(l1_table)
>>>
>>>    block/qcow2-refcount.c | 24 +++++++-----------------
>>>    1 file changed, 7 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>>> index 2bcaaf9..29a916a 100644
>>> --- a/block/qcow2-refcount.c
>>> +++ b/block/qcow2-refcount.c
>>> @@ -881,14 +881,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>>    {
>>>        BDRVQcowState *s = bs->opaque;
>>>        uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
>>> -    bool l1_allocated = false;
>>>        int64_t old_offset, old_l2_offset;
>>>        int i, j, l1_modified = 0, nb_csectors, refcount;
>>>        int ret;
>>>    
>>>        l2_table = NULL;
>>> -    l1_table = NULL;
>>>        l1_size2 = l1_size * sizeof(uint64_t);
>>> +    l1_table = g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE));
>> I'm sorry, but I just now realized that we don't even need the 0 variant
>> here. We are never accessing any element beyond l1_table[l1_size - 1],
>> and all elements from 0 until l1_size - 1 are overwritten by
>> bdrv_pread() or memcpy(). Therefore we can simply use
>> qemu_try_blockalign(bs->file, l1_size2) here.
>>
> OK, I will replace g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE)) with
> qemu_try_blockalign(bs, ROUND_UP(l1_size2, BDRV_SECTOR_SIZE)).

You can actually omit the ROUND_UP(). There are no accesses beyond 
l1_table[l1_size - 1], if I'm not mistaken.

Max

>> (and we should be using qemu_{try_,}blockalign() instead of
>> g_{try_,}malloc{0,}() whenever possible for performance reasons)
>>
>>> +    if (l1_size2 && l1_table == NULL) {
>>> +        ret = -ENOMEM;
>>> +        goto fail;
>>> +    }
>>>    
>>>        s->cache_discards = true;
>>>    
>>> @@ -896,13 +899,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>>         * l1_table_offset when it is the current s->l1_table_offset! Be careful
>>>         * when changing this! */
>>>        if (l1_table_offset != s->l1_table_offset) {
>>> -        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
>>> -        if (l1_size2 && l1_table == NULL) {
>>> -            ret = -ENOMEM;
>>> -            goto fail;
>>> -        }
>>> -        l1_allocated = true;
>>> -
>>>            ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
>>>            if (ret < 0) {
>>>                goto fail;
>>> @@ -912,8 +908,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>>                be64_to_cpus(&l1_table[i]);
>>>        } else {
>>>            assert(l1_size == s->l1_size);
>>> -        l1_table = s->l1_table;
>>> -        l1_allocated = false;
>>> +        memcpy(l1_table, s->l1_table, l1_size2);
>>>        }
>>>    
>>>        for(i = 0; i < l1_size; i++) {
>>> @@ -1055,13 +1050,8 @@ fail:
>>>            }
>>>    
>>>            ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
>>> -
>>> -        for (i = 0; i < l1_size; i++) {
>>> -            be64_to_cpus(&l1_table[i]);
>>> -        }
>> But I realized something even more important as well: If you are
>> modifying the current L1 table, you need to copy the result back to
>> s->l1_table.
>>
> My fault.
>> Sorry for having missed these in my previous review.
>>
>> Max
>>
>>>        }
>>> -    if (l1_allocated)
>>> -        g_free(l1_table);
>>> +    g_free(l1_table);
>>>        return ret;
>>>    }
>>>
Max Reitz Oct. 21, 2014, 1:10 p.m. UTC | #4
On 2014-10-21 at 15:10, Max Reitz wrote:
> On 2014-10-21 at 15:08, Zhang Haoyu wrote:
>>>> Use local variable to bdrv_pwrite_sync L1 table,
>>>> needless to make conversion of cached L1 table between
>>>> big-endian and host style.
>>>>
>>>> Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
>>>> ---
>>>> v1 -> v2:
>>>>    - remove the superflous assignment, l1_table = NULL;
>>>>    - replace 512 with BDRV_SECTOR_SIZE, and align_offset with ROUND_UP
>>>>    - remove needless check of if (l1_table) before g_free(l1_table)
>>>>
>>>>    block/qcow2-refcount.c | 24 +++++++-----------------
>>>>    1 file changed, 7 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>>>> index 2bcaaf9..29a916a 100644
>>>> --- a/block/qcow2-refcount.c
>>>> +++ b/block/qcow2-refcount.c
>>>> @@ -881,14 +881,17 @@ int 
>>>> qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>>>    {
>>>>        BDRVQcowState *s = bs->opaque;
>>>>        uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
>>>> -    bool l1_allocated = false;
>>>>        int64_t old_offset, old_l2_offset;
>>>>        int i, j, l1_modified = 0, nb_csectors, refcount;
>>>>        int ret;
>>>>           l2_table = NULL;
>>>> -    l1_table = NULL;
>>>>        l1_size2 = l1_size * sizeof(uint64_t);
>>>> +    l1_table = g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE));
>>> I'm sorry, but I just now realized that we don't even need the 0 
>>> variant
>>> here. We are never accessing any element beyond l1_table[l1_size - 1],
>>> and all elements from 0 until l1_size - 1 are overwritten by
>>> bdrv_pread() or memcpy(). Therefore we can simply use
>>> qemu_try_blockalign(bs->file, l1_size2) here.
>>>
>> OK, I will replace g_try_malloc0(ROUND_UP(l1_size2, 
>> BDRV_SECTOR_SIZE)) with
>> qemu_try_blockalign(bs, ROUND_UP(l1_size2, BDRV_SECTOR_SIZE)).
>
> You can actually omit the ROUND_UP(). There are no accesses beyond 
> l1_table[l1_size - 1], if I'm not mistaken.

Oh, and it should be qemu_try_blockalign(bs->file, l1_size2), not 
qemu_try_blockalign(bs, l1_size2).

> Max
>
>>> (and we should be using qemu_{try_,}blockalign() instead of
>>> g_{try_,}malloc{0,}() whenever possible for performance reasons)
>>>
>>>> +    if (l1_size2 && l1_table == NULL) {
>>>> +        ret = -ENOMEM;
>>>> +        goto fail;
>>>> +    }
>>>>           s->cache_discards = true;
>>>>    @@ -896,13 +899,6 @@ int 
>>>> qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>>>         * l1_table_offset when it is the current 
>>>> s->l1_table_offset! Be careful
>>>>         * when changing this! */
>>>>        if (l1_table_offset != s->l1_table_offset) {
>>>> -        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
>>>> -        if (l1_size2 && l1_table == NULL) {
>>>> -            ret = -ENOMEM;
>>>> -            goto fail;
>>>> -        }
>>>> -        l1_allocated = true;
>>>> -
>>>>            ret = bdrv_pread(bs->file, l1_table_offset, l1_table, 
>>>> l1_size2);
>>>>            if (ret < 0) {
>>>>                goto fail;
>>>> @@ -912,8 +908,7 @@ int 
>>>> qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>>>                be64_to_cpus(&l1_table[i]);
>>>>        } else {
>>>>            assert(l1_size == s->l1_size);
>>>> -        l1_table = s->l1_table;
>>>> -        l1_allocated = false;
>>>> +        memcpy(l1_table, s->l1_table, l1_size2);
>>>>        }
>>>>           for(i = 0; i < l1_size; i++) {
>>>> @@ -1055,13 +1050,8 @@ fail:
>>>>            }
>>>>               ret = bdrv_pwrite_sync(bs->file, l1_table_offset, 
>>>> l1_table, l1_size2);
>>>> -
>>>> -        for (i = 0; i < l1_size; i++) {
>>>> -            be64_to_cpus(&l1_table[i]);
>>>> -        }
>>> But I realized something even more important as well: If you are
>>> modifying the current L1 table, you need to copy the result back to
>>> s->l1_table.
>>>
>> My fault.
>>> Sorry for having missed these in my previous review.
>>>
>>> Max
>>>
>>>>        }
>>>> -    if (l1_allocated)
>>>> -        g_free(l1_table);
>>>> +    g_free(l1_table);
>>>>        return ret;
>>>>    }
>
diff mbox

Patch

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2bcaaf9..29a916a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -881,14 +881,17 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
-    bool l1_allocated = false;
     int64_t old_offset, old_l2_offset;
     int i, j, l1_modified = 0, nb_csectors, refcount;
     int ret;
 
     l2_table = NULL;
-    l1_table = NULL;
     l1_size2 = l1_size * sizeof(uint64_t);
+    l1_table = g_try_malloc0(ROUND_UP(l1_size2, BDRV_SECTOR_SIZE));
+    if (l1_size2 && l1_table == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     s->cache_discards = true;
 
@@ -896,13 +899,6 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
      * l1_table_offset when it is the current s->l1_table_offset! Be careful
      * when changing this! */
     if (l1_table_offset != s->l1_table_offset) {
-        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
-        if (l1_size2 && l1_table == NULL) {
-            ret = -ENOMEM;
-            goto fail;
-        }
-        l1_allocated = true;
-
         ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
         if (ret < 0) {
             goto fail;
@@ -912,8 +908,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             be64_to_cpus(&l1_table[i]);
     } else {
         assert(l1_size == s->l1_size);
-        l1_table = s->l1_table;
-        l1_allocated = false;
+        memcpy(l1_table, s->l1_table, l1_size2);
     }
 
     for(i = 0; i < l1_size; i++) {
@@ -1055,13 +1050,8 @@  fail:
         }
 
         ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
-
-        for (i = 0; i < l1_size; i++) {
-            be64_to_cpus(&l1_table[i]);
-        }
     }
-    if (l1_allocated)
-        g_free(l1_table);
+    g_free(l1_table);
     return ret;
 }