diff mbox

block: add a knob to disable multiwrite_merge

Message ID 1413785643-20336-1-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven Oct. 20, 2014, 6:14 a.m. UTC
the block layer silently merges write requests since
commit 40b4f539. This patch adds a knob to disable
this feature as there has been some discussion lately
if multiwrite is a good idea at all and as it falsifies
benchmarks.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 block.c                   |    4 ++++
 block/qapi.c              |    1 +
 blockdev.c                |    7 +++++++
 hmp.c                     |    4 ++++
 include/block/block_int.h |    1 +
 qapi/block-core.json      |   10 +++++++++-
 qemu-options.hx           |    1 +
 qmp-commands.hx           |    2 ++
 8 files changed, 29 insertions(+), 1 deletion(-)

Comments

Max Reitz Oct. 20, 2014, 8:59 a.m. UTC | #1
On 2014-10-20 at 08:14, Peter Lieven wrote:
> the block layer silently merges write requests since

s/^t/T/

> commit 40b4f539. This patch adds a knob to disable
> this feature as there has been some discussion lately
> if multiwrite is a good idea at all and as it falsifies
> benchmarks.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>   block.c                   |    4 ++++
>   block/qapi.c              |    1 +
>   blockdev.c                |    7 +++++++
>   hmp.c                     |    4 ++++
>   include/block/block_int.h |    1 +
>   qapi/block-core.json      |   10 +++++++++-
>   qemu-options.hx           |    1 +
>   qmp-commands.hx           |    2 ++
>   8 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/block.c b/block.c
> index 27533f3..1658a72 100644
> --- a/block.c
> +++ b/block.c
> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>   {
>       int i, outidx;
>   
> +    if (!bs->write_merging) {
> +        return num_reqs;
> +    }
> +
>       // Sort requests by start sector
>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>   
> diff --git a/block/qapi.c b/block/qapi.c
> index 9733ebd..02251dd 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>   
>       info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>       info->detect_zeroes = bs->detect_zeroes;
> +    info->write_merging = bs->write_merging;
>   
>       if (bs->io_limits_enabled) {
>           ThrottleConfig cfg;
> diff --git a/blockdev.c b/blockdev.c
> index e595910..13e47b8 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>       const char *id;
>       bool has_driver_specific_opts;
>       BlockdevDetectZeroesOptions detect_zeroes;
> +    bool write_merging;
>       BlockDriver *drv = NULL;
>   
>       /* Check common options by copying from bs_opts to opts, all other options
> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);

Using this option in blockdev_init() means that you can only enable or 
disable merging for the top layer (the root BDS). Furthermore, since you 
don't set bs->write_merging in bdrv_new() (or at least bdrv_open()), it 
actually defaults to false and only for the top layer it defaults to true.

Therefore, if after this patch a format block driver issues a multiwrite 
to its file, the write will not be merged and the user can do nothing 
about it. I don't suppose this is intentional...?

I propose evaluating the option in bdrv_open() and setting 
bs->write_merging there.

>       if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
>           if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
> @@ -530,6 +532,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>       bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>       bs->read_only = ro;
>       bs->detect_zeroes = detect_zeroes;
> +    bs->write_merging = write_merging;
>   
>       bdrv_set_on_error(bs, on_read_error, on_write_error);
>   
> @@ -2746,6 +2749,10 @@ QemuOptsList qemu_common_drive_opts = {
>               .name = "detect-zeroes",
>               .type = QEMU_OPT_STRING,
>               .help = "try to optimize zero writes (off, on, unmap)",
> +        },{
> +            .name = "write-merging",
> +            .type = QEMU_OPT_BOOL,
> +            .help = "enable write merging (default: true)",
>           },
>           { /* end of list */ }
>       },
> diff --git a/hmp.c b/hmp.c
> index 63d7686..8d6ad0b 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -348,6 +348,10 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
>                              BlockdevDetectZeroesOptions_lookup[info->value->inserted->detect_zeroes]);
>           }
>   
> +        if (!info->value->inserted->write_merging) {
> +            monitor_printf(mon, "    Write Merging:    off\n");
> +        }
> +
>           if (info->value->inserted->bps
>               || info->value->inserted->bps_rd
>               || info->value->inserted->bps_wr
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 8d86a6c..39bbde2 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -407,6 +407,7 @@ struct BlockDriverState {
>   
>       QDict *options;
>       BlockdevDetectZeroesOptions detect_zeroes;
> +    bool write_merging;
>   
>       /* The error object in use for blocking operations on backing_hd */
>       Error *backing_blocker;
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 8f7089e..4931bd9 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -214,6 +214,8 @@
>   #
>   # @detect_zeroes: detect and optimize zero writes (Since 2.1)
>   #
> +# @write_merging: true if write merging is enabled (Since 2.2)
> +#
>   # @bps: total throughput limit in bytes per second is specified
>   #
>   # @bps_rd: read throughput limit in bytes per second is specified
> @@ -250,6 +252,7 @@
>               '*backing_file': 'str', 'backing_file_depth': 'int',
>               'encrypted': 'bool', 'encryption_key_missing': 'bool',
>               'detect_zeroes': 'BlockdevDetectZeroesOptions',
> +            'write_merging': 'bool',
>               'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>               'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>               'image': 'ImageInfo',
> @@ -1180,6 +1183,10 @@
>   #                 (default: false)
>   # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
>   #                 (default: off)
> +# @write-merging: #optional enable the merging of write requests
> +#                 also known as multiwrite_merge (Since 2.2)
> +#                 (default: true, but this might change in the future
> +#                  depending on format/protocol/features used)
>   #
>   # Since: 1.7
>   ##
> @@ -1193,7 +1200,8 @@
>               '*rerror': 'BlockdevOnError',
>               '*werror': 'BlockdevOnError',
>               '*read-only': 'bool',
> -            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions',
> +            '*write-merging': 'bool' } }
>   
>   ##
>   # @BlockdevOptionsFile
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 22cf3b9..d2f756f 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -432,6 +432,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
>       "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
>       "       [,readonly=on|off][,copy-on-read=on|off]\n"
>       "       [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
> +    "       [,write-merging=on|off]\n"
>       "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
>       "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
>       "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 1abd619..529a563 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -2104,6 +2104,7 @@ Each json-object contain the following:
>            - "iops_size": I/O size when limiting by iops (json-int)
>            - "detect_zeroes": detect and optimize zero writing (json-string)
>                - Possible values: "off", "on", "unmap"
> +         - "write_merging": enable multiwrite_merge feature (json-bool)
>            - "image": the detail of the image, it is a json-object containing
>               the following:
>                - "filename": image file name (json-string)
> @@ -2181,6 +2182,7 @@ Example:
>                  "iops_wr_max": 0,
>                  "iops_size": 0,
>                  "detect_zeroes": "on",
> +               "write_merging": "true",
>                  "image":{
>                     "filename":"disks/test.qcow2",
>                     "format":"qcow2",

Hm, thanks for reminding me of that file. There are some things which I 
forgot to update (ImageInfoSpecific at least)...

Max
Peter Lieven Oct. 20, 2014, 9:14 a.m. UTC | #2
On 20.10.2014 10:59, Max Reitz wrote:
> On 2014-10-20 at 08:14, Peter Lieven wrote:
>> the block layer silently merges write requests since
>
> s/^t/T/
>
>> commit 40b4f539. This patch adds a knob to disable
>> this feature as there has been some discussion lately
>> if multiwrite is a good idea at all and as it falsifies
>> benchmarks.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   block.c                   |    4 ++++
>>   block/qapi.c              |    1 +
>>   blockdev.c                |    7 +++++++
>>   hmp.c                     |    4 ++++
>>   include/block/block_int.h |    1 +
>>   qapi/block-core.json      |   10 +++++++++-
>>   qemu-options.hx           |    1 +
>>   qmp-commands.hx           |    2 ++
>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>
>> diff --git a/block.c b/block.c
>> index 27533f3..1658a72 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>   {
>>       int i, outidx;
>>   +    if (!bs->write_merging) {
>> +        return num_reqs;
>> +    }
>> +
>>       // Sort requests by start sector
>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>   diff --git a/block/qapi.c b/block/qapi.c
>> index 9733ebd..02251dd 100644
>> --- a/block/qapi.c
>> +++ b/block/qapi.c
>> @@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>       info->detect_zeroes = bs->detect_zeroes;
>> +    info->write_merging = bs->write_merging;
>>         if (bs->io_limits_enabled) {
>>           ThrottleConfig cfg;
>> diff --git a/blockdev.c b/blockdev.c
>> index e595910..13e47b8 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>       const char *id;
>>       bool has_driver_specific_opts;
>>       BlockdevDetectZeroesOptions detect_zeroes;
>> +    bool write_merging;
>>       BlockDriver *drv = NULL;
>>         /* Check common options by copying from bs_opts to opts, all other options
>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>
> Using this option in blockdev_init() means that you can only enable or disable merging for the top layer (the root BDS). Furthermore, since you don't set bs->write_merging in bdrv_new() (or at least bdrv_open()), it actually defaults to false and only 
> for the top layer it defaults to true.
>
> Therefore, if after this patch a format block driver issues a multiwrite to its file, the write will not be merged and the user can do nothing about it. I don't suppose this is intentional...?

I am not sure if a block driver actually can do this at all? The only way to enter multiwrite is from virtio_blk_handle_request in virtio-blk.c.

>
> I propose evaluating the option in bdrv_open() and setting bs->write_merging there.

I wasn't aware actually. I remember that someone asked me to implement discard_zeroes in blockdev_init. I think it was something related to QMP. So we still might
need to check parameters at 2 positions? It is quite confusing which paramter has to be parsed where.

Peter

>
>>       if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
>>           if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
>> @@ -530,6 +532,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>       bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>>       bs->read_only = ro;
>>       bs->detect_zeroes = detect_zeroes;
>> +    bs->write_merging = write_merging;
>>         bdrv_set_on_error(bs, on_read_error, on_write_error);
>>   @@ -2746,6 +2749,10 @@ QemuOptsList qemu_common_drive_opts = {
>>               .name = "detect-zeroes",
>>               .type = QEMU_OPT_STRING,
>>               .help = "try to optimize zero writes (off, on, unmap)",
>> +        },{
>> +            .name = "write-merging",
>> +            .type = QEMU_OPT_BOOL,
>> +            .help = "enable write merging (default: true)",
>>           },
>>           { /* end of list */ }
>>       },
>> diff --git a/hmp.c b/hmp.c
>> index 63d7686..8d6ad0b 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -348,6 +348,10 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
>> BlockdevDetectZeroesOptions_lookup[info->value->inserted->detect_zeroes]);
>>           }
>>   +        if (!info->value->inserted->write_merging) {
>> +            monitor_printf(mon, "    Write Merging:    off\n");
>> +        }
>> +
>>           if (info->value->inserted->bps
>>               || info->value->inserted->bps_rd
>>               || info->value->inserted->bps_wr
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 8d86a6c..39bbde2 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -407,6 +407,7 @@ struct BlockDriverState {
>>         QDict *options;
>>       BlockdevDetectZeroesOptions detect_zeroes;
>> +    bool write_merging;
>>         /* The error object in use for blocking operations on backing_hd */
>>       Error *backing_blocker;
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 8f7089e..4931bd9 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -214,6 +214,8 @@
>>   #
>>   # @detect_zeroes: detect and optimize zero writes (Since 2.1)
>>   #
>> +# @write_merging: true if write merging is enabled (Since 2.2)
>> +#
>>   # @bps: total throughput limit in bytes per second is specified
>>   #
>>   # @bps_rd: read throughput limit in bytes per second is specified
>> @@ -250,6 +252,7 @@
>>               '*backing_file': 'str', 'backing_file_depth': 'int',
>>               'encrypted': 'bool', 'encryption_key_missing': 'bool',
>>               'detect_zeroes': 'BlockdevDetectZeroesOptions',
>> +            'write_merging': 'bool',
>>               'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>>               'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>>               'image': 'ImageInfo',
>> @@ -1180,6 +1183,10 @@
>>   #                 (default: false)
>>   # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
>>   #                 (default: off)
>> +# @write-merging: #optional enable the merging of write requests
>> +#                 also known as multiwrite_merge (Since 2.2)
>> +#                 (default: true, but this might change in the future
>> +#                  depending on format/protocol/features used)
>>   #
>>   # Since: 1.7
>>   ##
>> @@ -1193,7 +1200,8 @@
>>               '*rerror': 'BlockdevOnError',
>>               '*werror': 'BlockdevOnError',
>>               '*read-only': 'bool',
>> -            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
>> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions',
>> +            '*write-merging': 'bool' } }
>>     ##
>>   # @BlockdevOptionsFile
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 22cf3b9..d2f756f 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -432,6 +432,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
>>       " [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
>>       "       [,readonly=on|off][,copy-on-read=on|off]\n"
>>       " [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
>> +    "       [,write-merging=on|off]\n"
>>       "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
>>       "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
>>       " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index 1abd619..529a563 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -2104,6 +2104,7 @@ Each json-object contain the following:
>>            - "iops_size": I/O size when limiting by iops (json-int)
>>            - "detect_zeroes": detect and optimize zero writing (json-string)
>>                - Possible values: "off", "on", "unmap"
>> +         - "write_merging": enable multiwrite_merge feature (json-bool)
>>            - "image": the detail of the image, it is a json-object containing
>>               the following:
>>                - "filename": image file name (json-string)
>> @@ -2181,6 +2182,7 @@ Example:
>>                  "iops_wr_max": 0,
>>                  "iops_size": 0,
>>                  "detect_zeroes": "on",
>> +               "write_merging": "true",
>>                  "image":{
>>                     "filename":"disks/test.qcow2",
>>                     "format":"qcow2",
>
> Hm, thanks for reminding me of that file. There are some things which I forgot to update (ImageInfoSpecific at least)...
>
> Max
Max Reitz Oct. 20, 2014, 9:27 a.m. UTC | #3
On 2014-10-20 at 11:14, Peter Lieven wrote:
> On 20.10.2014 10:59, Max Reitz wrote:
>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>> the block layer silently merges write requests since
>>
>> s/^t/T/
>>
>>> commit 40b4f539. This patch adds a knob to disable
>>> this feature as there has been some discussion lately
>>> if multiwrite is a good idea at all and as it falsifies
>>> benchmarks.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>   block.c                   |    4 ++++
>>>   block/qapi.c              |    1 +
>>>   blockdev.c                |    7 +++++++
>>>   hmp.c                     |    4 ++++
>>>   include/block/block_int.h |    1 +
>>>   qapi/block-core.json      |   10 +++++++++-
>>>   qemu-options.hx           |    1 +
>>>   qmp-commands.hx           |    2 ++
>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/block.c b/block.c
>>> index 27533f3..1658a72 100644
>>> --- a/block.c
>>> +++ b/block.c
>>> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState 
>>> *bs, BlockRequest *reqs,
>>>   {
>>>       int i, outidx;
>>>   +    if (!bs->write_merging) {
>>> +        return num_reqs;
>>> +    }
>>> +
>>>       // Sort requests by start sector
>>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>>   diff --git a/block/qapi.c b/block/qapi.c
>>> index 9733ebd..02251dd 100644
>>> --- a/block/qapi.c
>>> +++ b/block/qapi.c
>>> @@ -58,6 +58,7 @@ BlockDeviceInfo 
>>> *bdrv_block_device_info(BlockDriverState *bs)
>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>       info->detect_zeroes = bs->detect_zeroes;
>>> +    info->write_merging = bs->write_merging;
>>>         if (bs->io_limits_enabled) {
>>>           ThrottleConfig cfg;
>>> diff --git a/blockdev.c b/blockdev.c
>>> index e595910..13e47b8 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char 
>>> *file, QDict *bs_opts,
>>>       const char *id;
>>>       bool has_driver_specific_opts;
>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>> +    bool write_merging;
>>>       BlockDriver *drv = NULL;
>>>         /* Check common options by copying from bs_opts to opts, all 
>>> other options
>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char 
>>> *file, QDict *bs_opts,
>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>>
>> Using this option in blockdev_init() means that you can only enable 
>> or disable merging for the top layer (the root BDS). Furthermore, 
>> since you don't set bs->write_merging in bdrv_new() (or at least 
>> bdrv_open()), it actually defaults to false and only for the top 
>> layer it defaults to true.
>>
>> Therefore, if after this patch a format block driver issues a 
>> multiwrite to its file, the write will not be merged and the user can 
>> do nothing about it. I don't suppose this is intentional...?
>
> I am not sure if a block driver actually can do this at all? The only 
> way to enter multiwrite is from virtio_blk_handle_request in 
> virtio-blk.c.

Well, there's also qemu-io -c multiwrite (which only accesses the root 
BDS as well). But other than that, yes, you're right. So, in practice it 
shouldn't matter.

>
>>
>> I propose evaluating the option in bdrv_open() and setting 
>> bs->write_merging there.
>
> I wasn't aware actually. I remember that someone asked me to implement 
> discard_zeroes in blockdev_init. I think it was something related to 
> QMP. So we still might
> need to check parameters at 2 positions? It is quite confusing which 
> paramter has to be parsed where.

As for me, I don't know why some options are parsed in blockdev_init() 
at all. I guess all the options currently parsed in blockdev_init() 
should later be moved to the BlockBackend, at least that would be the 
idea. In practice, we cannot do that: Things like caching will stay in 
the BlockDriverState.

I think it's just broken. IMHO, everything related to the BB should be 
in blockdev_init() and everything related to the BDS should be in 
bdrv_open(). So the question is now whether you want write_merging to be 
in the BDS or in the BB. Considering BB is in Kevin's block branch as of 
last Friday, you might actually want to work on that branch and move the 
field into the BB if you decide that that's the place it should be in.

Max

> Peter
>
>>
>>>       if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
>>>           if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
>>> @@ -530,6 +532,7 @@ static DriveInfo *blockdev_init(const char 
>>> *file, QDict *bs_opts,
>>>       bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>>>       bs->read_only = ro;
>>>       bs->detect_zeroes = detect_zeroes;
>>> +    bs->write_merging = write_merging;
>>>         bdrv_set_on_error(bs, on_read_error, on_write_error);
>>>   @@ -2746,6 +2749,10 @@ QemuOptsList qemu_common_drive_opts = {
>>>               .name = "detect-zeroes",
>>>               .type = QEMU_OPT_STRING,
>>>               .help = "try to optimize zero writes (off, on, unmap)",
>>> +        },{
>>> +            .name = "write-merging",
>>> +            .type = QEMU_OPT_BOOL,
>>> +            .help = "enable write merging (default: true)",
>>>           },
>>>           { /* end of list */ }
>>>       },
>>> diff --git a/hmp.c b/hmp.c
>>> index 63d7686..8d6ad0b 100644
>>> --- a/hmp.c
>>> +++ b/hmp.c
>>> @@ -348,6 +348,10 @@ void hmp_info_block(Monitor *mon, const QDict 
>>> *qdict)
>>> BlockdevDetectZeroesOptions_lookup[info->value->inserted->detect_zeroes]); 
>>>
>>>           }
>>>   +        if (!info->value->inserted->write_merging) {
>>> +            monitor_printf(mon, "    Write Merging: off\n");
>>> +        }
>>> +
>>>           if (info->value->inserted->bps
>>>               || info->value->inserted->bps_rd
>>>               || info->value->inserted->bps_wr
>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>> index 8d86a6c..39bbde2 100644
>>> --- a/include/block/block_int.h
>>> +++ b/include/block/block_int.h
>>> @@ -407,6 +407,7 @@ struct BlockDriverState {
>>>         QDict *options;
>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>> +    bool write_merging;
>>>         /* The error object in use for blocking operations on 
>>> backing_hd */
>>>       Error *backing_blocker;
>>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>>> index 8f7089e..4931bd9 100644
>>> --- a/qapi/block-core.json
>>> +++ b/qapi/block-core.json
>>> @@ -214,6 +214,8 @@
>>>   #
>>>   # @detect_zeroes: detect and optimize zero writes (Since 2.1)
>>>   #
>>> +# @write_merging: true if write merging is enabled (Since 2.2)
>>> +#
>>>   # @bps: total throughput limit in bytes per second is specified
>>>   #
>>>   # @bps_rd: read throughput limit in bytes per second is specified
>>> @@ -250,6 +252,7 @@
>>>               '*backing_file': 'str', 'backing_file_depth': 'int',
>>>               'encrypted': 'bool', 'encryption_key_missing': 'bool',
>>>               'detect_zeroes': 'BlockdevDetectZeroesOptions',
>>> +            'write_merging': 'bool',
>>>               'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>>>               'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>>>               'image': 'ImageInfo',
>>> @@ -1180,6 +1183,10 @@
>>>   #                 (default: false)
>>>   # @detect-zeroes: #optional detect and optimize zero writes (Since 
>>> 2.1)
>>>   #                 (default: off)
>>> +# @write-merging: #optional enable the merging of write requests
>>> +#                 also known as multiwrite_merge (Since 2.2)
>>> +#                 (default: true, but this might change in the future
>>> +#                  depending on format/protocol/features used)
>>>   #
>>>   # Since: 1.7
>>>   ##
>>> @@ -1193,7 +1200,8 @@
>>>               '*rerror': 'BlockdevOnError',
>>>               '*werror': 'BlockdevOnError',
>>>               '*read-only': 'bool',
>>> -            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
>>> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions',
>>> +            '*write-merging': 'bool' } }
>>>     ##
>>>   # @BlockdevOptionsFile
>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>> index 22cf3b9..d2f756f 100644
>>> --- a/qemu-options.hx
>>> +++ b/qemu-options.hx
>>> @@ -432,6 +432,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
>>>       " 
>>> [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
>>>       "       [,readonly=on|off][,copy-on-read=on|off]\n"
>>>       " [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
>>> +    "       [,write-merging=on|off]\n"
>>>       "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
>>>       "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
>>>       " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
>>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>>> index 1abd619..529a563 100644
>>> --- a/qmp-commands.hx
>>> +++ b/qmp-commands.hx
>>> @@ -2104,6 +2104,7 @@ Each json-object contain the following:
>>>            - "iops_size": I/O size when limiting by iops (json-int)
>>>            - "detect_zeroes": detect and optimize zero writing 
>>> (json-string)
>>>                - Possible values: "off", "on", "unmap"
>>> +         - "write_merging": enable multiwrite_merge feature 
>>> (json-bool)
>>>            - "image": the detail of the image, it is a json-object 
>>> containing
>>>               the following:
>>>                - "filename": image file name (json-string)
>>> @@ -2181,6 +2182,7 @@ Example:
>>>                  "iops_wr_max": 0,
>>>                  "iops_size": 0,
>>>                  "detect_zeroes": "on",
>>> +               "write_merging": "true",
>>>                  "image":{
>>>                     "filename":"disks/test.qcow2",
>>>                     "format":"qcow2",
>>
>> Hm, thanks for reminding me of that file. There are some things which 
>> I forgot to update (ImageInfoSpecific at least)...
>>
>> Max
>
>
Peter Lieven Oct. 20, 2014, 10:03 a.m. UTC | #4
On 20.10.2014 11:27, Max Reitz wrote:
> On 2014-10-20 at 11:14, Peter Lieven wrote:
>> On 20.10.2014 10:59, Max Reitz wrote:
>>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>>> the block layer silently merges write requests since
>>>
>>> s/^t/T/
>>>
>>>> commit 40b4f539. This patch adds a knob to disable
>>>> this feature as there has been some discussion lately
>>>> if multiwrite is a good idea at all and as it falsifies
>>>> benchmarks.
>>>>
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>>   block.c                   |    4 ++++
>>>>   block/qapi.c              |    1 +
>>>>   blockdev.c                |    7 +++++++
>>>>   hmp.c                     |    4 ++++
>>>>   include/block/block_int.h |    1 +
>>>>   qapi/block-core.json      |   10 +++++++++-
>>>>   qemu-options.hx           |    1 +
>>>>   qmp-commands.hx           |    2 ++
>>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/block.c b/block.c
>>>> index 27533f3..1658a72 100644
>>>> --- a/block.c
>>>> +++ b/block.c
>>>> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>>>   {
>>>>       int i, outidx;
>>>>   +    if (!bs->write_merging) {
>>>> +        return num_reqs;
>>>> +    }
>>>> +
>>>>       // Sort requests by start sector
>>>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>>>   diff --git a/block/qapi.c b/block/qapi.c
>>>> index 9733ebd..02251dd 100644
>>>> --- a/block/qapi.c
>>>> +++ b/block/qapi.c
>>>> @@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>>       info->detect_zeroes = bs->detect_zeroes;
>>>> +    info->write_merging = bs->write_merging;
>>>>         if (bs->io_limits_enabled) {
>>>>           ThrottleConfig cfg;
>>>> diff --git a/blockdev.c b/blockdev.c
>>>> index e595910..13e47b8 100644
>>>> --- a/blockdev.c
>>>> +++ b/blockdev.c
>>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>       const char *id;
>>>>       bool has_driver_specific_opts;
>>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>>> +    bool write_merging;
>>>>       BlockDriver *drv = NULL;
>>>>         /* Check common options by copying from bs_opts to opts, all other options
>>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>>>
>>> Using this option in blockdev_init() means that you can only enable or disable merging for the top layer (the root BDS). Furthermore, since you don't set bs->write_merging in bdrv_new() (or at least bdrv_open()), it actually defaults to false and only 
>>> for the top layer it defaults to true.
>>>
>>> Therefore, if after this patch a format block driver issues a multiwrite to its file, the write will not be merged and the user can do nothing about it. I don't suppose this is intentional...?
>>
>> I am not sure if a block driver actually can do this at all? The only way to enter multiwrite is from virtio_blk_handle_request in virtio-blk.c.
>
> Well, there's also qemu-io -c multiwrite (which only accesses the root BDS as well). But other than that, yes, you're right. So, in practice it shouldn't matter.
>
>>
>>>
>>> I propose evaluating the option in bdrv_open() and setting bs->write_merging there.
>>
>> I wasn't aware actually. I remember that someone asked me to implement discard_zeroes in blockdev_init. I think it was something related to QMP. So we still might
>> need to check parameters at 2 positions? It is quite confusing which paramter has to be parsed where.
>
> As for me, I don't know why some options are parsed in blockdev_init() at all. I guess all the options currently parsed in blockdev_init() should later be moved to the BlockBackend, at least that would be the idea. In practice, we cannot do that: Things 
> like caching will stay in the BlockDriverState.
>
> I think it's just broken. IMHO, everything related to the BB should be in blockdev_init() and everything related to the BDS should be in bdrv_open(). So the question is now whether you want write_merging to be in the BDS or in the BB. Considering BB is 
> in Kevin's block branch as of last Friday, you might actually want to work on that branch and move the field into the BB if you decide that that's the place it should be in.

Actually I there a pros and cons for both BDS and BB. As of now my intention was to be able to turn it off. As there are People who would like to see it completely disappear I would not spent too much effort in that switch today.
Looking at BB it is a BDS thing and thus belongs to bdrv_open. But this is true for discard_zeroes (and others) as well. Kevin, Stefan, ultimatively where should it be parsed?

I have on my todo list the following to give you a figure what might happen. All this is for 2.3+ except for the accounting maybe:

  - add accounting for merged requests (to have a metric for modifications)
  - evaluate if sorting the requests really helps
  - simplify the merge conditions and do not keep a fixed list of requests just merge if the Nth and N-1 requests are mergable (without sorting).
  - think about handling the complete merging in virtio-blk with the above simplifications. currently it is a virtio-blk only feature anyway.
  - add read merging

Peter
Max Reitz Oct. 20, 2014, 11:51 a.m. UTC | #5
On 2014-10-20 at 12:03, Peter Lieven wrote:
> On 20.10.2014 11:27, Max Reitz wrote:
>> On 2014-10-20 at 11:14, Peter Lieven wrote:
>>> On 20.10.2014 10:59, Max Reitz wrote:
>>>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>>>> the block layer silently merges write requests since
>>>>
>>>> s/^t/T/
>>>>
>>>>> commit 40b4f539. This patch adds a knob to disable
>>>>> this feature as there has been some discussion lately
>>>>> if multiwrite is a good idea at all and as it falsifies
>>>>> benchmarks.
>>>>>
>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>> ---
>>>>>   block.c                   |    4 ++++
>>>>>   block/qapi.c              |    1 +
>>>>>   blockdev.c                |    7 +++++++
>>>>>   hmp.c                     |    4 ++++
>>>>>   include/block/block_int.h |    1 +
>>>>>   qapi/block-core.json      |   10 +++++++++-
>>>>>   qemu-options.hx           |    1 +
>>>>>   qmp-commands.hx           |    2 ++
>>>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/block.c b/block.c
>>>>> index 27533f3..1658a72 100644
>>>>> --- a/block.c
>>>>> +++ b/block.c
>>>>> @@ -4531,6 +4531,10 @@ static int 
>>>>> multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>>>>   {
>>>>>       int i, outidx;
>>>>>   +    if (!bs->write_merging) {
>>>>> +        return num_reqs;
>>>>> +    }
>>>>> +
>>>>>       // Sort requests by start sector
>>>>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>>>>   diff --git a/block/qapi.c b/block/qapi.c
>>>>> index 9733ebd..02251dd 100644
>>>>> --- a/block/qapi.c
>>>>> +++ b/block/qapi.c
>>>>> @@ -58,6 +58,7 @@ BlockDeviceInfo 
>>>>> *bdrv_block_device_info(BlockDriverState *bs)
>>>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>>>       info->detect_zeroes = bs->detect_zeroes;
>>>>> +    info->write_merging = bs->write_merging;
>>>>>         if (bs->io_limits_enabled) {
>>>>>           ThrottleConfig cfg;
>>>>> diff --git a/blockdev.c b/blockdev.c
>>>>> index e595910..13e47b8 100644
>>>>> --- a/blockdev.c
>>>>> +++ b/blockdev.c
>>>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char 
>>>>> *file, QDict *bs_opts,
>>>>>       const char *id;
>>>>>       bool has_driver_specific_opts;
>>>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>>>> +    bool write_merging;
>>>>>       BlockDriver *drv = NULL;
>>>>>         /* Check common options by copying from bs_opts to opts, 
>>>>> all other options
>>>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char 
>>>>> *file, QDict *bs_opts,
>>>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>>>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>>>>
>>>> Using this option in blockdev_init() means that you can only enable 
>>>> or disable merging for the top layer (the root BDS). Furthermore, 
>>>> since you don't set bs->write_merging in bdrv_new() (or at least 
>>>> bdrv_open()), it actually defaults to false and only for the top 
>>>> layer it defaults to true.
>>>>
>>>> Therefore, if after this patch a format block driver issues a 
>>>> multiwrite to its file, the write will not be merged and the user 
>>>> can do nothing about it. I don't suppose this is intentional...?
>>>
>>> I am not sure if a block driver actually can do this at all? The 
>>> only way to enter multiwrite is from virtio_blk_handle_request in 
>>> virtio-blk.c.
>>
>> Well, there's also qemu-io -c multiwrite (which only accesses the 
>> root BDS as well). But other than that, yes, you're right. So, in 
>> practice it shouldn't matter.
>>
>>>
>>>>
>>>> I propose evaluating the option in bdrv_open() and setting 
>>>> bs->write_merging there.
>>>
>>> I wasn't aware actually. I remember that someone asked me to 
>>> implement discard_zeroes in blockdev_init. I think it was something 
>>> related to QMP. So we still might
>>> need to check parameters at 2 positions? It is quite confusing which 
>>> paramter has to be parsed where.
>>
>> As for me, I don't know why some options are parsed in 
>> blockdev_init() at all. I guess all the options currently parsed in 
>> blockdev_init() should later be moved to the BlockBackend, at least 
>> that would be the idea. In practice, we cannot do that: Things like 
>> caching will stay in the BlockDriverState.
>>
>> I think it's just broken. IMHO, everything related to the BB should 
>> be in blockdev_init() and everything related to the BDS should be in 
>> bdrv_open(). So the question is now whether you want write_merging to 
>> be in the BDS or in the BB. Considering BB is in Kevin's block branch 
>> as of last Friday, you might actually want to work on that branch and 
>> move the field into the BB if you decide that that's the place it 
>> should be in.
>
> Actually I there a pros and cons for both BDS and BB. As of now my 
> intention was to be able to turn it off. As there are People who would 
> like to see it completely disappear I would not spent too much effort 
> in that switch today.
> Looking at BB it is a BDS thing and thus belongs to bdrv_open. But 
> this is true for discard_zeroes (and others) as well. Kevin, Stefan, 
> ultimatively where should it be parsed?

Yes, and for cache, too. That's what I meant with "it's just broken".

Max

> I have on my todo list the following to give you a figure what might 
> happen. All this is for 2.3+ except for the accounting maybe:
>
>  - add accounting for merged requests (to have a metric for 
> modifications)
>  - evaluate if sorting the requests really helps
>  - simplify the merge conditions and do not keep a fixed list of 
> requests just merge if the Nth and N-1 requests are mergable (without 
> sorting).
>  - think about handling the complete merging in virtio-blk with the 
> above simplifications. currently it is a virtio-blk only feature anyway.
>  - add read merging
>
> Peter
Peter Lieven Oct. 20, 2014, 11:53 a.m. UTC | #6
On 20.10.2014 13:51, Max Reitz wrote:
> On 2014-10-20 at 12:03, Peter Lieven wrote:
>> On 20.10.2014 11:27, Max Reitz wrote:
>>> On 2014-10-20 at 11:14, Peter Lieven wrote:
>>>> On 20.10.2014 10:59, Max Reitz wrote:
>>>>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>>>>> the block layer silently merges write requests since
>>>>>
>>>>> s/^t/T/
>>>>>
>>>>>> commit 40b4f539. This patch adds a knob to disable
>>>>>> this feature as there has been some discussion lately
>>>>>> if multiwrite is a good idea at all and as it falsifies
>>>>>> benchmarks.
>>>>>>
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>>   block.c                   |    4 ++++
>>>>>>   block/qapi.c              |    1 +
>>>>>>   blockdev.c                |    7 +++++++
>>>>>>   hmp.c                     |    4 ++++
>>>>>>   include/block/block_int.h |    1 +
>>>>>>   qapi/block-core.json      |   10 +++++++++-
>>>>>>   qemu-options.hx           |    1 +
>>>>>>   qmp-commands.hx           |    2 ++
>>>>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/block.c b/block.c
>>>>>> index 27533f3..1658a72 100644
>>>>>> --- a/block.c
>>>>>> +++ b/block.c
>>>>>> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>>>>>   {
>>>>>>       int i, outidx;
>>>>>>   +    if (!bs->write_merging) {
>>>>>> +        return num_reqs;
>>>>>> +    }
>>>>>> +
>>>>>>       // Sort requests by start sector
>>>>>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>>>>>   diff --git a/block/qapi.c b/block/qapi.c
>>>>>> index 9733ebd..02251dd 100644
>>>>>> --- a/block/qapi.c
>>>>>> +++ b/block/qapi.c
>>>>>> @@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>>>>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>>>>       info->detect_zeroes = bs->detect_zeroes;
>>>>>> +    info->write_merging = bs->write_merging;
>>>>>>         if (bs->io_limits_enabled) {
>>>>>>           ThrottleConfig cfg;
>>>>>> diff --git a/blockdev.c b/blockdev.c
>>>>>> index e595910..13e47b8 100644
>>>>>> --- a/blockdev.c
>>>>>> +++ b/blockdev.c
>>>>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>>>       const char *id;
>>>>>>       bool has_driver_specific_opts;
>>>>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>>>>> +    bool write_merging;
>>>>>>       BlockDriver *drv = NULL;
>>>>>>         /* Check common options by copying from bs_opts to opts, all other options
>>>>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>>>>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>>>>>
>>>>> Using this option in blockdev_init() means that you can only enable or disable merging for the top layer (the root BDS). Furthermore, since you don't set bs->write_merging in bdrv_new() (or at least bdrv_open()), it actually defaults to false and 
>>>>> only for the top layer it defaults to true.
>>>>>
>>>>> Therefore, if after this patch a format block driver issues a multiwrite to its file, the write will not be merged and the user can do nothing about it. I don't suppose this is intentional...?
>>>>
>>>> I am not sure if a block driver actually can do this at all? The only way to enter multiwrite is from virtio_blk_handle_request in virtio-blk.c.
>>>
>>> Well, there's also qemu-io -c multiwrite (which only accesses the root BDS as well). But other than that, yes, you're right. So, in practice it shouldn't matter.
>>>
>>>>
>>>>>
>>>>> I propose evaluating the option in bdrv_open() and setting bs->write_merging there.
>>>>
>>>> I wasn't aware actually. I remember that someone asked me to implement discard_zeroes in blockdev_init. I think it was something related to QMP. So we still might
>>>> need to check parameters at 2 positions? It is quite confusing which paramter has to be parsed where.
>>>
>>> As for me, I don't know why some options are parsed in blockdev_init() at all. I guess all the options currently parsed in blockdev_init() should later be moved to the BlockBackend, at least that would be the idea. In practice, we cannot do that: 
>>> Things like caching will stay in the BlockDriverState.
>>>
>>> I think it's just broken. IMHO, everything related to the BB should be in blockdev_init() and everything related to the BDS should be in bdrv_open(). So the question is now whether you want write_merging to be in the BDS or in the BB. Considering BB 
>>> is in Kevin's block branch as of last Friday, you might actually want to work on that branch and move the field into the BB if you decide that that's the place it should be in.
>>
>> Actually I there a pros and cons for both BDS and BB. As of now my intention was to be able to turn it off. As there are People who would like to see it completely disappear I would not spent too much effort in that switch today.
>> Looking at BB it is a BDS thing and thus belongs to bdrv_open. But this is true for discard_zeroes (and others) as well. Kevin, Stefan, ultimatively where should it be parsed?
>
> Yes, and for cache, too. That's what I meant with "it's just broken".

Looking at the old discussion about discard zeroes it was recommended to put it into bdrv_open_common. If thats still the recommendation I will put it in bdrv_open_common and send
a fix for discard_zeroes. I can't remember why I finally put it in blockdev_init...

Peter
Peter Lieven Oct. 20, 2014, 12:16 p.m. UTC | #7
On 20.10.2014 13:51, Max Reitz wrote:
> On 2014-10-20 at 12:03, Peter Lieven wrote:
>> On 20.10.2014 11:27, Max Reitz wrote:
>>> On 2014-10-20 at 11:14, Peter Lieven wrote:
>>>> On 20.10.2014 10:59, Max Reitz wrote:
>>>>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>>>>> the block layer silently merges write requests since
>>>>>
>>>>> s/^t/T/
>>>>>
>>>>>> commit 40b4f539. This patch adds a knob to disable
>>>>>> this feature as there has been some discussion lately
>>>>>> if multiwrite is a good idea at all and as it falsifies
>>>>>> benchmarks.
>>>>>>
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>>   block.c                   |    4 ++++
>>>>>>   block/qapi.c              |    1 +
>>>>>>   blockdev.c                |    7 +++++++
>>>>>>   hmp.c                     |    4 ++++
>>>>>>   include/block/block_int.h |    1 +
>>>>>>   qapi/block-core.json      |   10 +++++++++-
>>>>>>   qemu-options.hx           |    1 +
>>>>>>   qmp-commands.hx           |    2 ++
>>>>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/block.c b/block.c
>>>>>> index 27533f3..1658a72 100644
>>>>>> --- a/block.c
>>>>>> +++ b/block.c
>>>>>> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>>>>>   {
>>>>>>       int i, outidx;
>>>>>>   +    if (!bs->write_merging) {
>>>>>> +        return num_reqs;
>>>>>> +    }
>>>>>> +
>>>>>>       // Sort requests by start sector
>>>>>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>>>>>   diff --git a/block/qapi.c b/block/qapi.c
>>>>>> index 9733ebd..02251dd 100644
>>>>>> --- a/block/qapi.c
>>>>>> +++ b/block/qapi.c
>>>>>> @@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>>>>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>>>>       info->detect_zeroes = bs->detect_zeroes;
>>>>>> +    info->write_merging = bs->write_merging;
>>>>>>         if (bs->io_limits_enabled) {
>>>>>>           ThrottleConfig cfg;
>>>>>> diff --git a/blockdev.c b/blockdev.c
>>>>>> index e595910..13e47b8 100644
>>>>>> --- a/blockdev.c
>>>>>> +++ b/blockdev.c
>>>>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>>>       const char *id;
>>>>>>       bool has_driver_specific_opts;
>>>>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>>>>> +    bool write_merging;
>>>>>>       BlockDriver *drv = NULL;
>>>>>>         /* Check common options by copying from bs_opts to opts, all other options
>>>>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>>>>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>>>>>
>>>>> Using this option in blockdev_init() means that you can only enable or disable merging for the top layer (the root BDS). Furthermore, since you don't set bs->write_merging in bdrv_new() (or at least bdrv_open()), it actually defaults to false and 
>>>>> only for the top layer it defaults to true.
>>>>>
>>>>> Therefore, if after this patch a format block driver issues a multiwrite to its file, the write will not be merged and the user can do nothing about it. I don't suppose this is intentional...?
>>>>
>>>> I am not sure if a block driver actually can do this at all? The only way to enter multiwrite is from virtio_blk_handle_request in virtio-blk.c.
>>>
>>> Well, there's also qemu-io -c multiwrite (which only accesses the root BDS as well). But other than that, yes, you're right. So, in practice it shouldn't matter.
>>>
>>>>
>>>>>
>>>>> I propose evaluating the option in bdrv_open() and setting bs->write_merging there.
>>>>
>>>> I wasn't aware actually. I remember that someone asked me to implement discard_zeroes in blockdev_init. I think it was something related to QMP. So we still might
>>>> need to check parameters at 2 positions? It is quite confusing which paramter has to be parsed where.
>>>
>>> As for me, I don't know why some options are parsed in blockdev_init() at all. I guess all the options currently parsed in blockdev_init() should later be moved to the BlockBackend, at least that would be the idea. In practice, we cannot do that: 
>>> Things like caching will stay in the BlockDriverState.
>>>
>>> I think it's just broken. IMHO, everything related to the BB should be in blockdev_init() and everything related to the BDS should be in bdrv_open(). So the question is now whether you want write_merging to be in the BDS or in the BB. Considering BB 
>>> is in Kevin's block branch as of last Friday, you might actually want to work on that branch and move the field into the BB if you decide that that's the place it should be in.
>>
>> Actually I there a pros and cons for both BDS and BB. As of now my intention was to be able to turn it off. As there are People who would like to see it completely disappear I would not spent too much effort in that switch today.
>> Looking at BB it is a BDS thing and thus belongs to bdrv_open. But this is true for discard_zeroes (and others) as well. Kevin, Stefan, ultimatively where should it be parsed?
>
> Yes, and for cache, too. That's what I meant with "it's just broken".

Can you further help here. I think my problem was that I don't have access to the commandline options in bdrv_open?!

Thank you,
Peter
Max Reitz Oct. 20, 2014, 12:19 p.m. UTC | #8
On 2014-10-20 at 14:16, Peter Lieven wrote:
> On 20.10.2014 13:51, Max Reitz wrote:
>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>> On 20.10.2014 11:27, Max Reitz wrote:
>>>> On 2014-10-20 at 11:14, Peter Lieven wrote:
>>>>> On 20.10.2014 10:59, Max Reitz wrote:
>>>>>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>>>>>> the block layer silently merges write requests since
>>>>>>
>>>>>> s/^t/T/
>>>>>>
>>>>>>> commit 40b4f539. This patch adds a knob to disable
>>>>>>> this feature as there has been some discussion lately
>>>>>>> if multiwrite is a good idea at all and as it falsifies
>>>>>>> benchmarks.
>>>>>>>
>>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>>> ---
>>>>>>>   block.c                   |    4 ++++
>>>>>>>   block/qapi.c              |    1 +
>>>>>>>   blockdev.c                |    7 +++++++
>>>>>>>   hmp.c                     |    4 ++++
>>>>>>>   include/block/block_int.h |    1 +
>>>>>>>   qapi/block-core.json      |   10 +++++++++-
>>>>>>>   qemu-options.hx           |    1 +
>>>>>>>   qmp-commands.hx           |    2 ++
>>>>>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/block.c b/block.c
>>>>>>> index 27533f3..1658a72 100644
>>>>>>> --- a/block.c
>>>>>>> +++ b/block.c
>>>>>>> @@ -4531,6 +4531,10 @@ static int 
>>>>>>> multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>>>>>>   {
>>>>>>>       int i, outidx;
>>>>>>>   +    if (!bs->write_merging) {
>>>>>>> +        return num_reqs;
>>>>>>> +    }
>>>>>>> +
>>>>>>>       // Sort requests by start sector
>>>>>>>       qsort(reqs, num_reqs, sizeof(*reqs), 
>>>>>>> &multiwrite_req_compare);
>>>>>>>   diff --git a/block/qapi.c b/block/qapi.c
>>>>>>> index 9733ebd..02251dd 100644
>>>>>>> --- a/block/qapi.c
>>>>>>> +++ b/block/qapi.c
>>>>>>> @@ -58,6 +58,7 @@ BlockDeviceInfo 
>>>>>>> *bdrv_block_device_info(BlockDriverState *bs)
>>>>>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>>>>>       info->detect_zeroes = bs->detect_zeroes;
>>>>>>> +    info->write_merging = bs->write_merging;
>>>>>>>         if (bs->io_limits_enabled) {
>>>>>>>           ThrottleConfig cfg;
>>>>>>> diff --git a/blockdev.c b/blockdev.c
>>>>>>> index e595910..13e47b8 100644
>>>>>>> --- a/blockdev.c
>>>>>>> +++ b/blockdev.c
>>>>>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char 
>>>>>>> *file, QDict *bs_opts,
>>>>>>>       const char *id;
>>>>>>>       bool has_driver_specific_opts;
>>>>>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>>>>>> +    bool write_merging;
>>>>>>>       BlockDriver *drv = NULL;
>>>>>>>         /* Check common options by copying from bs_opts to opts, 
>>>>>>> all other options
>>>>>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char 
>>>>>>> *file, QDict *bs_opts,
>>>>>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>>>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>>>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", 
>>>>>>> false);
>>>>>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", 
>>>>>>> true);
>>>>>>
>>>>>> Using this option in blockdev_init() means that you can only 
>>>>>> enable or disable merging for the top layer (the root BDS). 
>>>>>> Furthermore, since you don't set bs->write_merging in bdrv_new() 
>>>>>> (or at least bdrv_open()), it actually defaults to false and only 
>>>>>> for the top layer it defaults to true.
>>>>>>
>>>>>> Therefore, if after this patch a format block driver issues a 
>>>>>> multiwrite to its file, the write will not be merged and the user 
>>>>>> can do nothing about it. I don't suppose this is intentional...?
>>>>>
>>>>> I am not sure if a block driver actually can do this at all? The 
>>>>> only way to enter multiwrite is from virtio_blk_handle_request in 
>>>>> virtio-blk.c.
>>>>
>>>> Well, there's also qemu-io -c multiwrite (which only accesses the 
>>>> root BDS as well). But other than that, yes, you're right. So, in 
>>>> practice it shouldn't matter.
>>>>
>>>>>
>>>>>>
>>>>>> I propose evaluating the option in bdrv_open() and setting 
>>>>>> bs->write_merging there.
>>>>>
>>>>> I wasn't aware actually. I remember that someone asked me to 
>>>>> implement discard_zeroes in blockdev_init. I think it was 
>>>>> something related to QMP. So we still might
>>>>> need to check parameters at 2 positions? It is quite confusing 
>>>>> which paramter has to be parsed where.
>>>>
>>>> As for me, I don't know why some options are parsed in 
>>>> blockdev_init() at all. I guess all the options currently parsed in 
>>>> blockdev_init() should later be moved to the BlockBackend, at least 
>>>> that would be the idea. In practice, we cannot do that: Things like 
>>>> caching will stay in the BlockDriverState.
>>>>
>>>> I think it's just broken. IMHO, everything related to the BB should 
>>>> be in blockdev_init() and everything related to the BDS should be 
>>>> in bdrv_open(). So the question is now whether you want 
>>>> write_merging to be in the BDS or in the BB. Considering BB is in 
>>>> Kevin's block branch as of last Friday, you might actually want to 
>>>> work on that branch and move the field into the BB if you decide 
>>>> that that's the place it should be in.
>>>
>>> Actually I there a pros and cons for both BDS and BB. As of now my 
>>> intention was to be able to turn it off. As there are People who 
>>> would like to see it completely disappear I would not spent too much 
>>> effort in that switch today.
>>> Looking at BB it is a BDS thing and thus belongs to bdrv_open. But 
>>> this is true for discard_zeroes (and others) as well. Kevin, Stefan, 
>>> ultimatively where should it be parsed?
>>
>> Yes, and for cache, too. That's what I meant with "it's just broken".
>
> Can you further help here. I think my problem was that I don't have 
> access to the commandline options in bdrv_open?!

You do. It's the "options" QDict. :-)

Max
Peter Lieven Oct. 20, 2014, 12:48 p.m. UTC | #9
On 20.10.2014 14:19, Max Reitz wrote:
> On 2014-10-20 at 14:16, Peter Lieven wrote:
>> On 20.10.2014 13:51, Max Reitz wrote:
>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>> On 20.10.2014 11:27, Max Reitz wrote:
>>>>> On 2014-10-20 at 11:14, Peter Lieven wrote:
>>>>>> On 20.10.2014 10:59, Max Reitz wrote:
>>>>>>> On 2014-10-20 at 08:14, Peter Lieven wrote:
>>>>>>>> the block layer silently merges write requests since
>>>>>>>
>>>>>>> s/^t/T/
>>>>>>>
>>>>>>>> commit 40b4f539. This patch adds a knob to disable
>>>>>>>> this feature as there has been some discussion lately
>>>>>>>> if multiwrite is a good idea at all and as it falsifies
>>>>>>>> benchmarks.
>>>>>>>>
>>>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>>>> ---
>>>>>>>>   block.c                   |    4 ++++
>>>>>>>>   block/qapi.c              |    1 +
>>>>>>>>   blockdev.c                |    7 +++++++
>>>>>>>>   hmp.c                     |    4 ++++
>>>>>>>>   include/block/block_int.h |    1 +
>>>>>>>>   qapi/block-core.json      |   10 +++++++++-
>>>>>>>>   qemu-options.hx           |    1 +
>>>>>>>>   qmp-commands.hx           |    2 ++
>>>>>>>>   8 files changed, 29 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/block.c b/block.c
>>>>>>>> index 27533f3..1658a72 100644
>>>>>>>> --- a/block.c
>>>>>>>> +++ b/block.c
>>>>>>>> @@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
>>>>>>>>   {
>>>>>>>>       int i, outidx;
>>>>>>>>   +    if (!bs->write_merging) {
>>>>>>>> +        return num_reqs;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>       // Sort requests by start sector
>>>>>>>>       qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
>>>>>>>>   diff --git a/block/qapi.c b/block/qapi.c
>>>>>>>> index 9733ebd..02251dd 100644
>>>>>>>> --- a/block/qapi.c
>>>>>>>> +++ b/block/qapi.c
>>>>>>>> @@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
>>>>>>>>         info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>>>>>>>>       info->detect_zeroes = bs->detect_zeroes;
>>>>>>>> +    info->write_merging = bs->write_merging;
>>>>>>>>         if (bs->io_limits_enabled) {
>>>>>>>>           ThrottleConfig cfg;
>>>>>>>> diff --git a/blockdev.c b/blockdev.c
>>>>>>>> index e595910..13e47b8 100644
>>>>>>>> --- a/blockdev.c
>>>>>>>> +++ b/blockdev.c
>>>>>>>> @@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>>>>>       const char *id;
>>>>>>>>       bool has_driver_specific_opts;
>>>>>>>>       BlockdevDetectZeroesOptions detect_zeroes;
>>>>>>>> +    bool write_merging;
>>>>>>>>       BlockDriver *drv = NULL;
>>>>>>>>         /* Check common options by copying from bs_opts to opts, all other options
>>>>>>>> @@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>>>>>>>       snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
>>>>>>>>       ro = qemu_opt_get_bool(opts, "read-only", 0);
>>>>>>>>       copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
>>>>>>>> +    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
>>>>>>>
>>>>>>> Using this option in blockdev_init() means that you can only enable or disable merging for the top layer (the root BDS). Furthermore, since you don't set bs->write_merging in bdrv_new() (or at least bdrv_open()), it actually defaults to false and 
>>>>>>> only for the top layer it defaults to true.
>>>>>>>
>>>>>>> Therefore, if after this patch a format block driver issues a multiwrite to its file, the write will not be merged and the user can do nothing about it. I don't suppose this is intentional...?
>>>>>>
>>>>>> I am not sure if a block driver actually can do this at all? The only way to enter multiwrite is from virtio_blk_handle_request in virtio-blk.c.
>>>>>
>>>>> Well, there's also qemu-io -c multiwrite (which only accesses the root BDS as well). But other than that, yes, you're right. So, in practice it shouldn't matter.
>>>>>
>>>>>>
>>>>>>>
>>>>>>> I propose evaluating the option in bdrv_open() and setting bs->write_merging there.
>>>>>>
>>>>>> I wasn't aware actually. I remember that someone asked me to implement discard_zeroes in blockdev_init. I think it was something related to QMP. So we still might
>>>>>> need to check parameters at 2 positions? It is quite confusing which paramter has to be parsed where.
>>>>>
>>>>> As for me, I don't know why some options are parsed in blockdev_init() at all. I guess all the options currently parsed in blockdev_init() should later be moved to the BlockBackend, at least that would be the idea. In practice, we cannot do that: 
>>>>> Things like caching will stay in the BlockDriverState.
>>>>>
>>>>> I think it's just broken. IMHO, everything related to the BB should be in blockdev_init() and everything related to the BDS should be in bdrv_open(). So the question is now whether you want write_merging to be in the BDS or in the BB. Considering BB 
>>>>> is in Kevin's block branch as of last Friday, you might actually want to work on that branch and move the field into the BB if you decide that that's the place it should be in.
>>>>
>>>> Actually I there a pros and cons for both BDS and BB. As of now my intention was to be able to turn it off. As there are People who would like to see it completely disappear I would not spent too much effort in that switch today.
>>>> Looking at BB it is a BDS thing and thus belongs to bdrv_open. But this is true for discard_zeroes (and others) as well. Kevin, Stefan, ultimatively where should it be parsed?
>>>
>>> Yes, and for cache, too. That's what I meant with "it's just broken".
>>
>> Can you further help here. I think my problem was that I don't have access to the commandline options in bdrv_open?!
>
> You do. It's the "options" QDict. :-)

Maybe I just don't get it.

If I specify

qemu -drive if=virtio,file=image.qcow2,write-merging=off

and check with

qdict_get_try_bool(options, "write-merging", true);

in bdrv_open() directly before bdrv_swap I always get true.

Peter
Kevin Wolf Oct. 20, 2014, 12:56 p.m. UTC | #10
Am 20.10.2014 um 13:53 hat Peter Lieven geschrieben:
> On 20.10.2014 13:51, Max Reitz wrote:
> >On 2014-10-20 at 12:03, Peter Lieven wrote:
> >>On 20.10.2014 11:27, Max Reitz wrote:
> >>>On 2014-10-20 at 11:14, Peter Lieven wrote:
> >>>>On 20.10.2014 10:59, Max Reitz wrote:
> >>>>>On 2014-10-20 at 08:14, Peter Lieven wrote:
> >>>>>>the block layer silently merges write requests since
> >>>>>
> >>>>>s/^t/T/
> >>>>>
> >>>>>>commit 40b4f539. This patch adds a knob to disable
> >>>>>>this feature as there has been some discussion lately
> >>>>>>if multiwrite is a good idea at all and as it falsifies
> >>>>>>benchmarks.
> >>>>>>
> >>>>>>Signed-off-by: Peter Lieven <pl@kamp.de>
> >>>>>>---
> >>>>>>  block.c                   |    4 ++++
> >>>>>>  block/qapi.c              |    1 +
> >>>>>>  blockdev.c                |    7 +++++++
> >>>>>>  hmp.c                     |    4 ++++
> >>>>>>  include/block/block_int.h |    1 +
> >>>>>>  qapi/block-core.json      |   10 +++++++++-
> >>>>>>  qemu-options.hx           |    1 +
> >>>>>>  qmp-commands.hx           |    2 ++
> >>>>>>  8 files changed, 29 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>>diff --git a/block.c b/block.c
> >>>>>>index 27533f3..1658a72 100644
> >>>>>>--- a/block.c
> >>>>>>+++ b/block.c
> >>>>>>@@ -4531,6 +4531,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
> >>>>>>  {
> >>>>>>      int i, outidx;
> >>>>>>  +    if (!bs->write_merging) {
> >>>>>>+        return num_reqs;
> >>>>>>+    }
> >>>>>>+
> >>>>>>      // Sort requests by start sector
> >>>>>>      qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
> >>>>>>  diff --git a/block/qapi.c b/block/qapi.c
> >>>>>>index 9733ebd..02251dd 100644
> >>>>>>--- a/block/qapi.c
> >>>>>>+++ b/block/qapi.c
> >>>>>>@@ -58,6 +58,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
> >>>>>>        info->backing_file_depth = bdrv_get_backing_file_depth(bs);
> >>>>>>      info->detect_zeroes = bs->detect_zeroes;
> >>>>>>+    info->write_merging = bs->write_merging;
> >>>>>>        if (bs->io_limits_enabled) {
> >>>>>>          ThrottleConfig cfg;
> >>>>>>diff --git a/blockdev.c b/blockdev.c
> >>>>>>index e595910..13e47b8 100644
> >>>>>>--- a/blockdev.c
> >>>>>>+++ b/blockdev.c
> >>>>>>@@ -378,6 +378,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
> >>>>>>      const char *id;
> >>>>>>      bool has_driver_specific_opts;
> >>>>>>      BlockdevDetectZeroesOptions detect_zeroes;
> >>>>>>+    bool write_merging;
> >>>>>>      BlockDriver *drv = NULL;
> >>>>>>        /* Check common options by copying from bs_opts to opts, all other options
> >>>>>>@@ -405,6 +406,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
> >>>>>>      snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
> >>>>>>      ro = qemu_opt_get_bool(opts, "read-only", 0);
> >>>>>>      copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
> >>>>>>+    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
> >>>>>
> >>>>>Using this option in blockdev_init() means that you can
> >>>>>only enable or disable merging for the top layer (the root
> >>>>>BDS). Furthermore, since you don't set bs->write_merging
> >>>>>in bdrv_new() (or at least bdrv_open()), it actually
> >>>>>defaults to false and only for the top layer it defaults
> >>>>>to true.
> >>>>>
> >>>>>Therefore, if after this patch a format block driver issues a multiwrite to its file, the write will not be merged and the user can do nothing about it. I don't suppose this is intentional...?
> >>>>
> >>>>I am not sure if a block driver actually can do this at all? The only way to enter multiwrite is from virtio_blk_handle_request in virtio-blk.c.
> >>>
> >>>Well, there's also qemu-io -c multiwrite (which only accesses the root BDS as well). But other than that, yes, you're right. So, in practice it shouldn't matter.
> >>>
> >>>>
> >>>>>
> >>>>>I propose evaluating the option in bdrv_open() and setting bs->write_merging there.
> >>>>
> >>>>I wasn't aware actually. I remember that someone asked me to implement discard_zeroes in blockdev_init. I think it was something related to QMP. So we still might
> >>>>need to check parameters at 2 positions? It is quite confusing which paramter has to be parsed where.
> >>>
> >>>As for me, I don't know why some options are parsed in
> >>>blockdev_init() at all. I guess all the options currently
> >>>parsed in blockdev_init() should later be moved to the
> >>>BlockBackend, at least that would be the idea. In practice, we
> >>>cannot do that: Things like caching will stay in the
> >>>BlockDriverState.
> >>>
> >>>I think it's just broken. IMHO, everything related to the BB
> >>>should be in blockdev_init() and everything related to the BDS
> >>>should be in bdrv_open(). So the question is now whether you
> >>>want write_merging to be in the BDS or in the BB. Considering
> >>>BB is in Kevin's block branch as of last Friday, you might
> >>>actually want to work on that branch and move the field into
> >>>the BB if you decide that that's the place it should be in.
> >>
> >>Actually I there a pros and cons for both BDS and BB. As of now my intention was to be able to turn it off. As there are People who would like to see it completely disappear I would not spent too much effort in that switch today.
> >>Looking at BB it is a BDS thing and thus belongs to bdrv_open. But this is true for discard_zeroes (and others) as well. Kevin, Stefan, ultimatively where should it be parsed?
> >
> >Yes, and for cache, too. That's what I meant with "it's just broken".
> 
> Looking at the old discussion about discard zeroes it was recommended to put it into bdrv_open_common. If thats still the recommendation I will put it in bdrv_open_common and send
> a fix for discard_zeroes. I can't remember why I finally put it in blockdev_init...

Yes, BDS options (and this is one) should be parsed in bdrv_open() or a
function called by it. bdrv_open_common() sounds about right here.

Kevin
Max Reitz Oct. 20, 2014, 1:15 p.m. UTC | #11
On 20.10.2014 at 14:48, Peter Lieven wrote:
> On 20.10.2014 14:19, Max Reitz wrote:
>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>> [...]
>>>
>>> Can you further help here. I think my problem was that I don't have 
>>> access to the commandline options in bdrv_open?!
>>
>> You do. It's the "options" QDict. :-)
>
> Maybe I just don't get it.
>
> If I specify
>
> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>
> and check with
>
> qdict_get_try_bool(options, "write-merging", true);
>
> in bdrv_open() directly before bdrv_swap I always get true.

Hm, judging from fprintf(stderr, "%s\n", 
qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));, it's there 
for me (directly after qdict_del(options, "node-name"). The output is:

Qemu wrote:
> {
>     "filename": "image.qcow2"
> }
> {
>     "write-merging": "off"
> }
> qemu-system-x86_64: -drive 
> if=virtio,file=image.qcow2,write-merging=off: could not open disk 
> image image.qcow2: Block format 'qcow2' used by device 'virtio0' 
> doesn't support the option 'write-merging'

But as you can see, it's a string and not a bool. So the problem is that 
there are (at least) two parameter "types" in qemu: One is just giving a 
QDict, and the other are QemuOpts. QDicts are just the raw user input 
and the user can only input strings, so everything is just a string. As 
far as I know, typing everything correctly is done by converting the 
QDict to a QemuOpts object (as you can see in generally every block 
driver which supports some options (e.g. qcow2) and also in 
blockdev_init(), it's qemu_opts_absorb_qdict()).

Sooo, right, I forgot that. Currently, there are no non-string 
non-block-driver-specific options for mid-tree BDS (in contrast to the 
root BDS, which are parsed in blockdev_init()), so you now have the 
honorable task of introducing such a QemuOptsList along with 
qemu_opts_absorb_qdict() and everything to bdrv_open_common(). *cough*

Max
Peter Lieven Oct. 20, 2014, 1:19 p.m. UTC | #12
On 20.10.2014 15:15, Max Reitz wrote:
> On 20.10.2014 at 14:48, Peter Lieven wrote:
>> On 20.10.2014 14:19, Max Reitz wrote:
>>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>>> [...]
>>>>
>>>> Can you further help here. I think my problem was that I don't have access to the commandline options in bdrv_open?!
>>>
>>> You do. It's the "options" QDict. :-)
>>
>> Maybe I just don't get it.
>>
>> If I specify
>>
>> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>>
>> and check with
>>
>> qdict_get_try_bool(options, "write-merging", true);
>>
>> in bdrv_open() directly before bdrv_swap I always get true.
>
> Hm, judging from fprintf(stderr, "%s\n", qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));, it's there for me (directly after qdict_del(options, "node-name"). The output is:
>
> Qemu wrote:
>> {
>>     "filename": "image.qcow2"
>> }
>> {
>>     "write-merging": "off"
>> }
>> qemu-system-x86_64: -drive if=virtio,file=image.qcow2,write-merging=off: could not open disk image image.qcow2: Block format 'qcow2' used by device 'virtio0' doesn't support the option 'write-merging'
>
> But as you can see, it's a string and not a bool. So the problem is that there are (at least) two parameter "types" in qemu: One is just giving a QDict, and the other are QemuOpts. QDicts are just the raw user input and the user can only input strings, 
> so everything is just a string. As far as I know, typing everything correctly is done by converting the QDict to a QemuOpts object (as you can see in generally every block driver which supports some options (e.g. qcow2) and also in blockdev_init(), it's 
> qemu_opts_absorb_qdict()).
>
> Sooo, right, I forgot that. Currently, there are no non-string non-block-driver-specific options for mid-tree BDS (in contrast to the root BDS, which are parsed in blockdev_init()), so you now have the honorable task of introducing such a QemuOptsList 
> along with qemu_opts_absorb_qdict() and everything to bdrv_open_common(). *cough*

I would appreciate if someone with better knowledge of this whole stuff would start this. Or we postpone this know until all the ongoing conversions are done.

Peter
Max Reitz Oct. 20, 2014, 1:22 p.m. UTC | #13
On 20.10.2014 at 15:19, Peter Lieven wrote:
> On 20.10.2014 15:15, Max Reitz wrote:
>> On 20.10.2014 at 14:48, Peter Lieven wrote:
>>> On 20.10.2014 14:19, Max Reitz wrote:
>>>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>>>> [...]
>>>>>
>>>>> Can you further help here. I think my problem was that I don't 
>>>>> have access to the commandline options in bdrv_open?!
>>>>
>>>> You do. It's the "options" QDict. :-)
>>>
>>> Maybe I just don't get it.
>>>
>>> If I specify
>>>
>>> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>>>
>>> and check with
>>>
>>> qdict_get_try_bool(options, "write-merging", true);
>>>
>>> in bdrv_open() directly before bdrv_swap I always get true.
>>
>> Hm, judging from fprintf(stderr, "%s\n", 
>> qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));, it's 
>> there for me (directly after qdict_del(options, "node-name"). The 
>> output is:
>>
>> Qemu wrote:
>>> {
>>>     "filename": "image.qcow2"
>>> }
>>> {
>>>     "write-merging": "off"
>>> }
>>> qemu-system-x86_64: -drive 
>>> if=virtio,file=image.qcow2,write-merging=off: could not open disk 
>>> image image.qcow2: Block format 'qcow2' used by device 'virtio0' 
>>> doesn't support the option 'write-merging'
>>
>> But as you can see, it's a string and not a bool. So the problem is 
>> that there are (at least) two parameter "types" in qemu: One is just 
>> giving a QDict, and the other are QemuOpts. QDicts are just the raw 
>> user input and the user can only input strings, so everything is just 
>> a string. As far as I know, typing everything correctly is done by 
>> converting the QDict to a QemuOpts object (as you can see in 
>> generally every block driver which supports some options (e.g. qcow2) 
>> and also in blockdev_init(), it's qemu_opts_absorb_qdict()).
>>
>> Sooo, right, I forgot that. Currently, there are no non-string 
>> non-block-driver-specific options for mid-tree BDS (in contrast to 
>> the root BDS, which are parsed in blockdev_init()), so you now have 
>> the honorable task of introducing such a QemuOptsList along with 
>> qemu_opts_absorb_qdict() and everything to bdrv_open_common(). *cough*
>
> I would appreciate if someone with better knowledge of this whole 
> stuff would start this. Or we postpone this know until all the ongoing 
> conversions are done.

I can try and create some barebone which your patches can then be based 
on. I probably don't have the knowledge either, but I'm daring enough to 
do it anyway. ;-)

Max
Peter Lieven Oct. 20, 2014, 1:29 p.m. UTC | #14
On 20.10.2014 15:22, Max Reitz wrote:
> On 20.10.2014 at 15:19, Peter Lieven wrote:
>> On 20.10.2014 15:15, Max Reitz wrote:
>>> On 20.10.2014 at 14:48, Peter Lieven wrote:
>>>> On 20.10.2014 14:19, Max Reitz wrote:
>>>>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>>>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>>>>> [...]
>>>>>>
>>>>>> Can you further help here. I think my problem was that I don't have access to the commandline options in bdrv_open?!
>>>>>
>>>>> You do. It's the "options" QDict. :-)
>>>>
>>>> Maybe I just don't get it.
>>>>
>>>> If I specify
>>>>
>>>> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>>>>
>>>> and check with
>>>>
>>>> qdict_get_try_bool(options, "write-merging", true);
>>>>
>>>> in bdrv_open() directly before bdrv_swap I always get true.
>>>
>>> Hm, judging from fprintf(stderr, "%s\n", qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));, it's there for me (directly after qdict_del(options, "node-name"). The output is:
>>>
>>> Qemu wrote:
>>>> {
>>>>     "filename": "image.qcow2"
>>>> }
>>>> {
>>>>     "write-merging": "off"
>>>> }
>>>> qemu-system-x86_64: -drive if=virtio,file=image.qcow2,write-merging=off: could not open disk image image.qcow2: Block format 'qcow2' used by device 'virtio0' doesn't support the option 'write-merging'
>>>
>>> But as you can see, it's a string and not a bool. So the problem is that there are (at least) two parameter "types" in qemu: One is just giving a QDict, and the other are QemuOpts. QDicts are just the raw user input and the user can only input 
>>> strings, so everything is just a string. As far as I know, typing everything correctly is done by converting the QDict to a QemuOpts object (as you can see in generally every block driver which supports some options (e.g. qcow2) and also in 
>>> blockdev_init(), it's qemu_opts_absorb_qdict()).
>>>
>>> Sooo, right, I forgot that. Currently, there are no non-string non-block-driver-specific options for mid-tree BDS (in contrast to the root BDS, which are parsed in blockdev_init()), so you now have the honorable task of introducing such a QemuOptsList 
>>> along with qemu_opts_absorb_qdict() and everything to bdrv_open_common(). *cough*
>>
>> I would appreciate if someone with better knowledge of this whole stuff would start this. Or we postpone this know until all the ongoing conversions are done.
>
> I can try and create some barebone which your patches can then be based on. I probably don't have the knowledge either, but I'm daring enough to do it anyway. ;-)

Thank you.

Peter
Kevin Wolf Oct. 20, 2014, 1:31 p.m. UTC | #15
Am 20.10.2014 um 15:22 hat Max Reitz geschrieben:
> On 20.10.2014 at 15:19, Peter Lieven wrote:
> >On 20.10.2014 15:15, Max Reitz wrote:
> >>On 20.10.2014 at 14:48, Peter Lieven wrote:
> >>>On 20.10.2014 14:19, Max Reitz wrote:
> >>>>On 2014-10-20 at 14:16, Peter Lieven wrote:
> >>>>>On 20.10.2014 13:51, Max Reitz wrote:
> >>>>>>On 2014-10-20 at 12:03, Peter Lieven wrote:
> >>>>>>[...]
> >>>>>
> >>>>>Can you further help here. I think my problem was that I
> >>>>>don't have access to the commandline options in
> >>>>>bdrv_open?!
> >>>>
> >>>>You do. It's the "options" QDict. :-)
> >>>
> >>>Maybe I just don't get it.
> >>>
> >>>If I specify
> >>>
> >>>qemu -drive if=virtio,file=image.qcow2,write-merging=off
> >>>
> >>>and check with
> >>>
> >>>qdict_get_try_bool(options, "write-merging", true);
> >>>
> >>>in bdrv_open() directly before bdrv_swap I always get true.
> >>
> >>Hm, judging from fprintf(stderr, "%s\n",
> >>qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));,
> >>it's there for me (directly after qdict_del(options,
> >>"node-name"). The output is:
> >>
> >>Qemu wrote:
> >>>{
> >>>    "filename": "image.qcow2"
> >>>}
> >>>{
> >>>    "write-merging": "off"
> >>>}
> >>>qemu-system-x86_64: -drive
> >>>if=virtio,file=image.qcow2,write-merging=off: could not open
> >>>disk image image.qcow2: Block format 'qcow2' used by device
> >>>'virtio0' doesn't support the option 'write-merging'
> >>
> >>But as you can see, it's a string and not a bool. So the problem
> >>is that there are (at least) two parameter "types" in qemu: One
> >>is just giving a QDict, and the other are QemuOpts. QDicts are
> >>just the raw user input and the user can only input strings, so
> >>everything is just a string. As far as I know, typing everything
> >>correctly is done by converting the QDict to a QemuOpts object
> >>(as you can see in generally every block driver which supports
> >>some options (e.g. qcow2) and also in blockdev_init(), it's
> >>qemu_opts_absorb_qdict()).
> >>
> >>Sooo, right, I forgot that. Currently, there are no non-string
> >>non-block-driver-specific options for mid-tree BDS (in contrast
> >>to the root BDS, which are parsed in blockdev_init()), so you
> >>now have the honorable task of introducing such a QemuOptsList
> >>along with qemu_opts_absorb_qdict() and everything to
> >>bdrv_open_common(). *cough*
> >
> >I would appreciate if someone with better knowledge of this whole
> >stuff would start this. Or we postpone this know until all the
> >ongoing conversions are done.
> 
> I can try and create some barebone which your patches can then be
> based on. I probably don't have the knowledge either, but I'm daring
> enough to do it anyway. ;-)

Actually I have some patches somewhere [1] that introduce a QemuOpts for
bdrv_open_common(). I intended to use that for cache modes, but as I
explained in our KVM Forum presentation, it's not quite as easy as I
thought it would be and so the patch series isn't ready yet.

Anyway, having the QemuOpts there for driver-independent options is
probably the way to go. Feel free to remove the caching from my
patch and keep only the node-name part. Then it can be a preparatory
patch for your series where you simply add a new option to the list.

Kevin

[1] http://repo.or.cz/w/qemu/kevin.git/commitdiff/9c22aee04cf0bdf6a3858340bc6ff27d6805254f
Peter Lieven Oct. 20, 2014, 1:47 p.m. UTC | #16
On 20.10.2014 15:31, Kevin Wolf wrote:
> Am 20.10.2014 um 15:22 hat Max Reitz geschrieben:
>> On 20.10.2014 at 15:19, Peter Lieven wrote:
>>> On 20.10.2014 15:15, Max Reitz wrote:
>>>> On 20.10.2014 at 14:48, Peter Lieven wrote:
>>>>> On 20.10.2014 14:19, Max Reitz wrote:
>>>>>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>>>>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>>>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>>>>>> [...]
>>>>>>> Can you further help here. I think my problem was that I
>>>>>>> don't have access to the commandline options in
>>>>>>> bdrv_open?!
>>>>>> You do. It's the "options" QDict. :-)
>>>>> Maybe I just don't get it.
>>>>>
>>>>> If I specify
>>>>>
>>>>> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>>>>>
>>>>> and check with
>>>>>
>>>>> qdict_get_try_bool(options, "write-merging", true);
>>>>>
>>>>> in bdrv_open() directly before bdrv_swap I always get true.
>>>> Hm, judging from fprintf(stderr, "%s\n",
>>>> qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));,
>>>> it's there for me (directly after qdict_del(options,
>>>> "node-name"). The output is:
>>>>
>>>> Qemu wrote:
>>>>> {
>>>>>     "filename": "image.qcow2"
>>>>> }
>>>>> {
>>>>>     "write-merging": "off"
>>>>> }
>>>>> qemu-system-x86_64: -drive
>>>>> if=virtio,file=image.qcow2,write-merging=off: could not open
>>>>> disk image image.qcow2: Block format 'qcow2' used by device
>>>>> 'virtio0' doesn't support the option 'write-merging'
>>>> But as you can see, it's a string and not a bool. So the problem
>>>> is that there are (at least) two parameter "types" in qemu: One
>>>> is just giving a QDict, and the other are QemuOpts. QDicts are
>>>> just the raw user input and the user can only input strings, so
>>>> everything is just a string. As far as I know, typing everything
>>>> correctly is done by converting the QDict to a QemuOpts object
>>>> (as you can see in generally every block driver which supports
>>>> some options (e.g. qcow2) and also in blockdev_init(), it's
>>>> qemu_opts_absorb_qdict()).
>>>>
>>>> Sooo, right, I forgot that. Currently, there are no non-string
>>>> non-block-driver-specific options for mid-tree BDS (in contrast
>>>> to the root BDS, which are parsed in blockdev_init()), so you
>>>> now have the honorable task of introducing such a QemuOptsList
>>>> along with qemu_opts_absorb_qdict() and everything to
>>>> bdrv_open_common(). *cough*
>>> I would appreciate if someone with better knowledge of this whole
>>> stuff would start this. Or we postpone this know until all the
>>> ongoing conversions are done.
>> I can try and create some barebone which your patches can then be
>> based on. I probably don't have the knowledge either, but I'm daring
>> enough to do it anyway. ;-)
> Actually I have some patches somewhere [1] that introduce a QemuOpts for
> bdrv_open_common(). I intended to use that for cache modes, but as I
> explained in our KVM Forum presentation, it's not quite as easy as I
> thought it would be and so the patch series isn't ready yet.
>
> Anyway, having the QemuOpts there for driver-independent options is
> probably the way to go. Feel free to remove the caching from my
> patch and keep only the node-name part. Then it can be a preparatory
> patch for your series where you simply add a new option to the list.
>
> Kevin
>
> [1] http://repo.or.cz/w/qemu/kevin.git/commitdiff/9c22aee04cf0bdf6a3858340bc6ff27d6805254f

Thank you.

Would it be legit to recycle qemu_common_drive_opts from blockdev.c for this?

Peter
Kevin Wolf Oct. 20, 2014, 1:55 p.m. UTC | #17
Am 20.10.2014 um 15:47 hat Peter Lieven geschrieben:
> On 20.10.2014 15:31, Kevin Wolf wrote:
> >Am 20.10.2014 um 15:22 hat Max Reitz geschrieben:
> >>On 20.10.2014 at 15:19, Peter Lieven wrote:
> >>>On 20.10.2014 15:15, Max Reitz wrote:
> >>>>On 20.10.2014 at 14:48, Peter Lieven wrote:
> >>>>>On 20.10.2014 14:19, Max Reitz wrote:
> >>>>>>On 2014-10-20 at 14:16, Peter Lieven wrote:
> >>>>>>>On 20.10.2014 13:51, Max Reitz wrote:
> >>>>>>>>On 2014-10-20 at 12:03, Peter Lieven wrote:
> >>>>>>>>[...]
> >>>>>>>Can you further help here. I think my problem was that I
> >>>>>>>don't have access to the commandline options in
> >>>>>>>bdrv_open?!
> >>>>>>You do. It's the "options" QDict. :-)
> >>>>>Maybe I just don't get it.
> >>>>>
> >>>>>If I specify
> >>>>>
> >>>>>qemu -drive if=virtio,file=image.qcow2,write-merging=off
> >>>>>
> >>>>>and check with
> >>>>>
> >>>>>qdict_get_try_bool(options, "write-merging", true);
> >>>>>
> >>>>>in bdrv_open() directly before bdrv_swap I always get true.
> >>>>Hm, judging from fprintf(stderr, "%s\n",
> >>>>qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));,
> >>>>it's there for me (directly after qdict_del(options,
> >>>>"node-name"). The output is:
> >>>>
> >>>>Qemu wrote:
> >>>>>{
> >>>>>    "filename": "image.qcow2"
> >>>>>}
> >>>>>{
> >>>>>    "write-merging": "off"
> >>>>>}
> >>>>>qemu-system-x86_64: -drive
> >>>>>if=virtio,file=image.qcow2,write-merging=off: could not open
> >>>>>disk image image.qcow2: Block format 'qcow2' used by device
> >>>>>'virtio0' doesn't support the option 'write-merging'
> >>>>But as you can see, it's a string and not a bool. So the problem
> >>>>is that there are (at least) two parameter "types" in qemu: One
> >>>>is just giving a QDict, and the other are QemuOpts. QDicts are
> >>>>just the raw user input and the user can only input strings, so
> >>>>everything is just a string. As far as I know, typing everything
> >>>>correctly is done by converting the QDict to a QemuOpts object
> >>>>(as you can see in generally every block driver which supports
> >>>>some options (e.g. qcow2) and also in blockdev_init(), it's
> >>>>qemu_opts_absorb_qdict()).
> >>>>
> >>>>Sooo, right, I forgot that. Currently, there are no non-string
> >>>>non-block-driver-specific options for mid-tree BDS (in contrast
> >>>>to the root BDS, which are parsed in blockdev_init()), so you
> >>>>now have the honorable task of introducing such a QemuOptsList
> >>>>along with qemu_opts_absorb_qdict() and everything to
> >>>>bdrv_open_common(). *cough*
> >>>I would appreciate if someone with better knowledge of this whole
> >>>stuff would start this. Or we postpone this know until all the
> >>>ongoing conversions are done.
> >>I can try and create some barebone which your patches can then be
> >>based on. I probably don't have the knowledge either, but I'm daring
> >>enough to do it anyway. ;-)
> >Actually I have some patches somewhere [1] that introduce a QemuOpts for
> >bdrv_open_common(). I intended to use that for cache modes, but as I
> >explained in our KVM Forum presentation, it's not quite as easy as I
> >thought it would be and so the patch series isn't ready yet.
> >
> >Anyway, having the QemuOpts there for driver-independent options is
> >probably the way to go. Feel free to remove the caching from my
> >patch and keep only the node-name part. Then it can be a preparatory
> >patch for your series where you simply add a new option to the list.
> >
> >Kevin
> >
> >[1] http://repo.or.cz/w/qemu/kevin.git/commitdiff/9c22aee04cf0bdf6a3858340bc6ff27d6805254f
> 
> Thank you.
> 
> Would it be legit to recycle qemu_common_drive_opts from blockdev.c for this?

No, I don't think so. That one should in theory be only for BlockBackend
options. For the short term, it still mixes BB and BDS options, but BDS
options should be moved out step by step. In any case, it is only used
for the top level.

Any option that is parsed with qemu_opts_absorb_qdict() in
bdrv_open_common() must also be handled there. If you don't ensure that
and extract all the options that blockdev_init() knows without actually
handling them, it can happen that invalid options are silently ignored
(e.g. backing.werror should error out, but would be accepted).

And please coordinate with Max, if both of you write a patch, that's
wasted time.

Kevin
Peter Lieven Oct. 20, 2014, 1:59 p.m. UTC | #18
On 20.10.2014 15:55, Kevin Wolf wrote:
> Am 20.10.2014 um 15:47 hat Peter Lieven geschrieben:
>> On 20.10.2014 15:31, Kevin Wolf wrote:
>>> Am 20.10.2014 um 15:22 hat Max Reitz geschrieben:
>>>> On 20.10.2014 at 15:19, Peter Lieven wrote:
>>>>> On 20.10.2014 15:15, Max Reitz wrote:
>>>>>> On 20.10.2014 at 14:48, Peter Lieven wrote:
>>>>>>> On 20.10.2014 14:19, Max Reitz wrote:
>>>>>>>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>>>>>>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>>>>>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>>>>>>>> [...]
>>>>>>>>> Can you further help here. I think my problem was that I
>>>>>>>>> don't have access to the commandline options in
>>>>>>>>> bdrv_open?!
>>>>>>>> You do. It's the "options" QDict. :-)
>>>>>>> Maybe I just don't get it.
>>>>>>>
>>>>>>> If I specify
>>>>>>>
>>>>>>> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>>>>>>>
>>>>>>> and check with
>>>>>>>
>>>>>>> qdict_get_try_bool(options, "write-merging", true);
>>>>>>>
>>>>>>> in bdrv_open() directly before bdrv_swap I always get true.
>>>>>> Hm, judging from fprintf(stderr, "%s\n",
>>>>>> qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));,
>>>>>> it's there for me (directly after qdict_del(options,
>>>>>> "node-name"). The output is:
>>>>>>
>>>>>> Qemu wrote:
>>>>>>> {
>>>>>>>     "filename": "image.qcow2"
>>>>>>> }
>>>>>>> {
>>>>>>>     "write-merging": "off"
>>>>>>> }
>>>>>>> qemu-system-x86_64: -drive
>>>>>>> if=virtio,file=image.qcow2,write-merging=off: could not open
>>>>>>> disk image image.qcow2: Block format 'qcow2' used by device
>>>>>>> 'virtio0' doesn't support the option 'write-merging'
>>>>>> But as you can see, it's a string and not a bool. So the problem
>>>>>> is that there are (at least) two parameter "types" in qemu: One
>>>>>> is just giving a QDict, and the other are QemuOpts. QDicts are
>>>>>> just the raw user input and the user can only input strings, so
>>>>>> everything is just a string. As far as I know, typing everything
>>>>>> correctly is done by converting the QDict to a QemuOpts object
>>>>>> (as you can see in generally every block driver which supports
>>>>>> some options (e.g. qcow2) and also in blockdev_init(), it's
>>>>>> qemu_opts_absorb_qdict()).
>>>>>>
>>>>>> Sooo, right, I forgot that. Currently, there are no non-string
>>>>>> non-block-driver-specific options for mid-tree BDS (in contrast
>>>>>> to the root BDS, which are parsed in blockdev_init()), so you
>>>>>> now have the honorable task of introducing such a QemuOptsList
>>>>>> along with qemu_opts_absorb_qdict() and everything to
>>>>>> bdrv_open_common(). *cough*
>>>>> I would appreciate if someone with better knowledge of this whole
>>>>> stuff would start this. Or we postpone this know until all the
>>>>> ongoing conversions are done.
>>>> I can try and create some barebone which your patches can then be
>>>> based on. I probably don't have the knowledge either, but I'm daring
>>>> enough to do it anyway. ;-)
>>> Actually I have some patches somewhere [1] that introduce a QemuOpts for
>>> bdrv_open_common(). I intended to use that for cache modes, but as I
>>> explained in our KVM Forum presentation, it's not quite as easy as I
>>> thought it would be and so the patch series isn't ready yet.
>>>
>>> Anyway, having the QemuOpts there for driver-independent options is
>>> probably the way to go. Feel free to remove the caching from my
>>> patch and keep only the node-name part. Then it can be a preparatory
>>> patch for your series where you simply add a new option to the list.
>>>
>>> Kevin
>>>
>>> [1] http://repo.or.cz/w/qemu/kevin.git/commitdiff/9c22aee04cf0bdf6a3858340bc6ff27d6805254f
>> Thank you.
>>
>> Would it be legit to recycle qemu_common_drive_opts from blockdev.c for this?
> No, I don't think so. That one should in theory be only for BlockBackend
> options. For the short term, it still mixes BB and BDS options, but BDS
> options should be moved out step by step. In any case, it is only used
> for the top level.
>
> Any option that is parsed with qemu_opts_absorb_qdict() in
> bdrv_open_common() must also be handled there. If you don't ensure that
> and extract all the options that blockdev_init() knows without actually
> handling them, it can happen that invalid options are silently ignored
> (e.g. backing.werror should error out, but would be accepted).
>
> And please coordinate with Max, if both of you write a patch, that's
> wasted time.

Max, if you don't have started I would use Kevins patch as basis?

Peter
Max Reitz Oct. 20, 2014, 1:59 p.m. UTC | #19
On 20.10.2014 at 15:59, Peter Lieven wrote:
> On 20.10.2014 15:55, Kevin Wolf wrote:
>> Am 20.10.2014 um 15:47 hat Peter Lieven geschrieben:
>>> On 20.10.2014 15:31, Kevin Wolf wrote:
>>>> Am 20.10.2014 um 15:22 hat Max Reitz geschrieben:
>>>>> On 20.10.2014 at 15:19, Peter Lieven wrote:
>>>>>> On 20.10.2014 15:15, Max Reitz wrote:
>>>>>>> On 20.10.2014 at 14:48, Peter Lieven wrote:
>>>>>>>> On 20.10.2014 14:19, Max Reitz wrote:
>>>>>>>>> On 2014-10-20 at 14:16, Peter Lieven wrote:
>>>>>>>>>> On 20.10.2014 13:51, Max Reitz wrote:
>>>>>>>>>>> On 2014-10-20 at 12:03, Peter Lieven wrote:
>>>>>>>>>>> [...]
>>>>>>>>>> Can you further help here. I think my problem was that I
>>>>>>>>>> don't have access to the commandline options in
>>>>>>>>>> bdrv_open?!
>>>>>>>>> You do. It's the "options" QDict. :-)
>>>>>>>> Maybe I just don't get it.
>>>>>>>>
>>>>>>>> If I specify
>>>>>>>>
>>>>>>>> qemu -drive if=virtio,file=image.qcow2,write-merging=off
>>>>>>>>
>>>>>>>> and check with
>>>>>>>>
>>>>>>>> qdict_get_try_bool(options, "write-merging", true);
>>>>>>>>
>>>>>>>> in bdrv_open() directly before bdrv_swap I always get true.
>>>>>>> Hm, judging from fprintf(stderr, "%s\n",
>>>>>>> qstring_get_str(qobject_to_json_pretty(QOBJECT(options))));,
>>>>>>> it's there for me (directly after qdict_del(options,
>>>>>>> "node-name"). The output is:
>>>>>>>
>>>>>>> Qemu wrote:
>>>>>>>> {
>>>>>>>>     "filename": "image.qcow2"
>>>>>>>> }
>>>>>>>> {
>>>>>>>>     "write-merging": "off"
>>>>>>>> }
>>>>>>>> qemu-system-x86_64: -drive
>>>>>>>> if=virtio,file=image.qcow2,write-merging=off: could not open
>>>>>>>> disk image image.qcow2: Block format 'qcow2' used by device
>>>>>>>> 'virtio0' doesn't support the option 'write-merging'
>>>>>>> But as you can see, it's a string and not a bool. So the problem
>>>>>>> is that there are (at least) two parameter "types" in qemu: One
>>>>>>> is just giving a QDict, and the other are QemuOpts. QDicts are
>>>>>>> just the raw user input and the user can only input strings, so
>>>>>>> everything is just a string. As far as I know, typing everything
>>>>>>> correctly is done by converting the QDict to a QemuOpts object
>>>>>>> (as you can see in generally every block driver which supports
>>>>>>> some options (e.g. qcow2) and also in blockdev_init(), it's
>>>>>>> qemu_opts_absorb_qdict()).
>>>>>>>
>>>>>>> Sooo, right, I forgot that. Currently, there are no non-string
>>>>>>> non-block-driver-specific options for mid-tree BDS (in contrast
>>>>>>> to the root BDS, which are parsed in blockdev_init()), so you
>>>>>>> now have the honorable task of introducing such a QemuOptsList
>>>>>>> along with qemu_opts_absorb_qdict() and everything to
>>>>>>> bdrv_open_common(). *cough*
>>>>>> I would appreciate if someone with better knowledge of this whole
>>>>>> stuff would start this. Or we postpone this know until all the
>>>>>> ongoing conversions are done.
>>>>> I can try and create some barebone which your patches can then be
>>>>> based on. I probably don't have the knowledge either, but I'm daring
>>>>> enough to do it anyway. ;-)
>>>> Actually I have some patches somewhere [1] that introduce a 
>>>> QemuOpts for
>>>> bdrv_open_common(). I intended to use that for cache modes, but as I
>>>> explained in our KVM Forum presentation, it's not quite as easy as I
>>>> thought it would be and so the patch series isn't ready yet.
>>>>
>>>> Anyway, having the QemuOpts there for driver-independent options is
>>>> probably the way to go. Feel free to remove the caching from my
>>>> patch and keep only the node-name part. Then it can be a preparatory
>>>> patch for your series where you simply add a new option to the list.
>>>>
>>>> Kevin
>>>>
>>>> [1] 
>>>> http://repo.or.cz/w/qemu/kevin.git/commitdiff/9c22aee04cf0bdf6a3858340bc6ff27d6805254f
>>> Thank you.
>>>
>>> Would it be legit to recycle qemu_common_drive_opts from blockdev.c 
>>> for this?
>> No, I don't think so. That one should in theory be only for BlockBackend
>> options. For the short term, it still mixes BB and BDS options, but BDS
>> options should be moved out step by step. In any case, it is only used
>> for the top level.
>>
>> Any option that is parsed with qemu_opts_absorb_qdict() in
>> bdrv_open_common() must also be handled there. If you don't ensure that
>> and extract all the options that blockdev_init() knows without actually
>> handling them, it can happen that invalid options are silently ignored
>> (e.g. backing.werror should error out, but would be accepted).
>>
>> And please coordinate with Max, if both of you write a patch, that's
>> wasted time.
>
> Max, if you don't have started I would use Kevins patch as basis?

No, I haven't. Feel free to.

Max
diff mbox

Patch

diff --git a/block.c b/block.c
index 27533f3..1658a72 100644
--- a/block.c
+++ b/block.c
@@ -4531,6 +4531,10 @@  static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
 {
     int i, outidx;
 
+    if (!bs->write_merging) {
+        return num_reqs;
+    }
+
     // Sort requests by start sector
     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
 
diff --git a/block/qapi.c b/block/qapi.c
index 9733ebd..02251dd 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -58,6 +58,7 @@  BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
     info->detect_zeroes = bs->detect_zeroes;
+    info->write_merging = bs->write_merging;
 
     if (bs->io_limits_enabled) {
         ThrottleConfig cfg;
diff --git a/blockdev.c b/blockdev.c
index e595910..13e47b8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -378,6 +378,7 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     const char *id;
     bool has_driver_specific_opts;
     BlockdevDetectZeroesOptions detect_zeroes;
+    bool write_merging;
     BlockDriver *drv = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
@@ -405,6 +406,7 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
     ro = qemu_opt_get_bool(opts, "read-only", 0);
     copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
+    write_merging = qemu_opt_get_bool(opts, "write-merging", true);
 
     if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
         if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
@@ -530,6 +532,7 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
     bs->read_only = ro;
     bs->detect_zeroes = detect_zeroes;
+    bs->write_merging = write_merging;
 
     bdrv_set_on_error(bs, on_read_error, on_write_error);
 
@@ -2746,6 +2749,10 @@  QemuOptsList qemu_common_drive_opts = {
             .name = "detect-zeroes",
             .type = QEMU_OPT_STRING,
             .help = "try to optimize zero writes (off, on, unmap)",
+        },{
+            .name = "write-merging",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable write merging (default: true)",
         },
         { /* end of list */ }
     },
diff --git a/hmp.c b/hmp.c
index 63d7686..8d6ad0b 100644
--- a/hmp.c
+++ b/hmp.c
@@ -348,6 +348,10 @@  void hmp_info_block(Monitor *mon, const QDict *qdict)
                            BlockdevDetectZeroesOptions_lookup[info->value->inserted->detect_zeroes]);
         }
 
+        if (!info->value->inserted->write_merging) {
+            monitor_printf(mon, "    Write Merging:    off\n");
+        }
+
         if (info->value->inserted->bps
             || info->value->inserted->bps_rd
             || info->value->inserted->bps_wr
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8d86a6c..39bbde2 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -407,6 +407,7 @@  struct BlockDriverState {
 
     QDict *options;
     BlockdevDetectZeroesOptions detect_zeroes;
+    bool write_merging;
 
     /* The error object in use for blocking operations on backing_hd */
     Error *backing_blocker;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8f7089e..4931bd9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -214,6 +214,8 @@ 
 #
 # @detect_zeroes: detect and optimize zero writes (Since 2.1)
 #
+# @write_merging: true if write merging is enabled (Since 2.2)
+#
 # @bps: total throughput limit in bytes per second is specified
 #
 # @bps_rd: read throughput limit in bytes per second is specified
@@ -250,6 +252,7 @@ 
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
             'detect_zeroes': 'BlockdevDetectZeroesOptions',
+            'write_merging': 'bool',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             'image': 'ImageInfo',
@@ -1180,6 +1183,10 @@ 
 #                 (default: false)
 # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
 #                 (default: off)
+# @write-merging: #optional enable the merging of write requests
+#                 also known as multiwrite_merge (Since 2.2)
+#                 (default: true, but this might change in the future
+#                  depending on format/protocol/features used)
 #
 # Since: 1.7
 ##
@@ -1193,7 +1200,8 @@ 
             '*rerror': 'BlockdevOnError',
             '*werror': 'BlockdevOnError',
             '*read-only': 'bool',
-            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions',
+            '*write-merging': 'bool' } }
 
 ##
 # @BlockdevOptionsFile
diff --git a/qemu-options.hx b/qemu-options.hx
index 22cf3b9..d2f756f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -432,6 +432,7 @@  DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
     "       [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
+    "       [,write-merging=on|off]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 1abd619..529a563 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2104,6 +2104,7 @@  Each json-object contain the following:
          - "iops_size": I/O size when limiting by iops (json-int)
          - "detect_zeroes": detect and optimize zero writing (json-string)
              - Possible values: "off", "on", "unmap"
+         - "write_merging": enable multiwrite_merge feature (json-bool)
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -2181,6 +2182,7 @@  Example:
                "iops_wr_max": 0,
                "iops_size": 0,
                "detect_zeroes": "on",
+               "write_merging": "true",
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",