@@ -3248,6 +3248,15 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
+ if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
+ !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
+ qemu_iovec_is_zero(qiov)) {
+ flags |= BDRV_REQ_ZERO_WRITE;
+ if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
+ flags |= BDRV_REQ_MAY_UNMAP;
+ }
+ }
+
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
@@ -50,6 +50,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
}
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
+ info->detect_zeroes = bs->detect_zeroes;
if (bs->io_limits_enabled) {
ThrottleConfig cfg;
@@ -341,6 +341,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
QemuOpts *opts;
const char *id;
bool has_driver_specific_opts;
+ BlockdevDetectZeroesOptions detect_zeroes;
BlockDriver *drv = NULL;
/* Check common options by copying from bs_opts to opts, all other options
@@ -469,6 +470,24 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
}
}
+ detect_zeroes =
+ parse_enum_option(BlockdevDetectZeroesOptions_lookup,
+ qemu_opt_get(opts, "detect-zeroes"),
+ BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+ BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+ &error);
+ if (error) {
+ error_propagate(errp, error);
+ goto early_err;
+ }
+
+ if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+ !(bdrv_flags & BDRV_O_UNMAP)) {
+ error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+ "without setting discard operation to unmap");
+ goto early_err;
+ }
+
/* init */
dinfo = g_malloc0(sizeof(*dinfo));
dinfo->id = g_strdup(qemu_opts_id(opts));
@@ -479,6 +498,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
}
dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
dinfo->bdrv->read_only = ro;
+ dinfo->bdrv->detect_zeroes = detect_zeroes;
dinfo->refcount = 1;
if (serial != NULL) {
dinfo->serial = g_strdup(serial);
@@ -2472,6 +2492,10 @@ QemuOptsList qemu_common_drive_opts = {
.name = "copy-on-read",
.type = QEMU_OPT_BOOL,
.help = "copy read data from backing file into image file",
+ },{
+ .name = "detect-zeroes",
+ .type = QEMU_OPT_STRING,
+ .help = "try to optimize zero writes (off, on, unmap)",
},
{ /* end of list */ }
},
@@ -341,6 +341,11 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
info->value->inserted->backing_file_depth);
}
+ if (info->value->inserted->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF) {
+ monitor_printf(mon, " Detect zeroes: %s\n",
+ BlockdevDetectZeroesOptions_lookup[info->value->inserted->detect_zeroes]);
+ }
+
if (info->value->inserted->bps
|| info->value->inserted->bps_rd
|| info->value->inserted->bps_wr
@@ -364,6 +364,7 @@ struct BlockDriverState {
BlockJob *job;
QDict *options;
+ BlockdevDetectZeroesOptions detect_zeroes;
};
int get_tmp_filename(char *filename, int size);
@@ -942,6 +942,8 @@
# @encryption_key_missing: true if the backing device is encrypted but an
# valid encryption key is missing
#
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
# @bps: total throughput limit in bytes per second is specified
#
# @bps_rd: read throughput limit in bytes per second is specified
@@ -977,6 +979,7 @@
'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
'*backing_file': 'str', 'backing_file_depth': 'int',
'encrypted': 'bool', 'encryption_key_missing': 'bool',
+ 'detect_zeroes': 'BlockdevDetectZeroesOptions',
'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
'image': 'ImageInfo',
@@ -4255,6 +4258,22 @@
'data': [ 'ignore', 'unmap' ] }
##
+# @BlockdevDetectZeroesOptions
+#
+# Describes the operation mode for the automatic conversion of plain
+# zero writes by the OS to driver specific optimized zero write commands.
+#
+# @off: Disabled (default)
+# @on: Enabled
+# @unmap: Enabled and even try to unmap blocks if possible. This requires
+# also that @BlockdevDiscardOptions is set to unmap for this device.
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+ 'data': [ 'off', 'on', 'unmap' ] }
+
+##
# @BlockdevAioOptions
#
# Selects the AIO backend to handle I/O requests
@@ -4306,20 +4325,22 @@
# Options that are available for all block devices, independent of the block
# driver.
#
-# @driver: block driver name
-# @id: #optional id by which the new block device can be referred to.
-# This is a required option on the top level of blockdev-add, and
-# currently not allowed on any other level.
-# @node-name: #optional the name of a block driver state node (Since 2.0)
-# @discard: #optional discard-related options (default: ignore)
-# @cache: #optional cache-related options
-# @aio: #optional AIO backend (default: threads)
-# @rerror: #optional how to handle read errors on the device
-# (default: report)
-# @werror: #optional how to handle write errors on the device
-# (default: enospc)
-# @read-only: #optional whether the block device should be read-only
-# (default: false)
+# @driver: block driver name
+# @id: #optional id by which the new block device can be referred to.
+# This is a required option on the top level of blockdev-add, and
+# currently not allowed on any other level.
+# @node-name: #optional the name of a block driver state node (Since 2.0)
+# @discard: #optional discard-related options (default: ignore)
+# @cache: #optional cache-related options
+# @aio: #optional AIO backend (default: threads)
+# @rerror: #optional how to handle read errors on the device
+# (default: report)
+# @werror: #optional how to handle write errors on the device
+# (default: enospc)
+# @read-only: #optional whether the block device should be read-only
+# (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+# (default: off)
#
# Since: 1.7
##
@@ -4332,7 +4353,8 @@
'*aio': 'BlockdevAioOptions',
'*rerror': 'BlockdevOnError',
'*werror': 'BlockdevOnError',
- '*read-only': 'bool' } }
+ '*read-only': 'bool',
+ '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
##
# @BlockdevOptionsFile
@@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
" [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
" [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
" [,readonly=on|off][,copy-on-read=on|off]\n"
+ " [,detect-zeroes=on|off|unmap]\n"
" [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
" [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
" [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write attempts will fail.
@item copy-on-read=@var{copy-on-read}
@var{copy-on-read} is "on" or "off" and enables whether to copy read backing
file sectors into the image file.
+@item detect-zeroes=@var{detect-zeroes}
+@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. You may even choose "unmap" if @var{discard} is set
+to "unmap" to allow a zero write to be converted to an UNMAP operation.
@end table
By default, the @option{cache=writeback} mode is used. It will report data
@@ -2032,6 +2032,8 @@ Each json-object contain the following:
- "iops_rd_max": read I/O operations max (json-int)
- "iops_wr_max": write I/O operations max (json-int)
- "iops_size": I/O size when limiting by iops (json-int)
+ - "detect_zeroes": detect and optimize zero writing (json-string)
+ - Possible values: "off", "on", "unmap"
- "image": the detail of the image, it is a json-object containing
the following:
- "filename": image file name (json-string)
@@ -2108,6 +2110,7 @@ Example:
"iops_rd_max": 0,
"iops_wr_max": 0,
"iops_size": 0,
+ "detect_zeroes": "on",
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
this patch tries to optimize zero write requests by automatically using bdrv_write_zeroes if it is supported by the format. This significantly speeds up file system initialization and should speed zero write test used to test backend storage performance. I ran the following 2 tests on my internal SSD with a 50G QCOW2 container and on an attached iSCSI storage. a) mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX QCOW2 [off] [on] [unmap] ----- runtime: 14secs 1.1secs 1.1secs filesize: 937M 18M 18M iSCSI [off] [on] [unmap] ---- runtime: 9.3s 0.9s 0.9s b) dd if=/dev/zero of=/dev/vdX bs=1M oflag=direct QCOW2 [off] [on] [unmap] ----- runtime: 246secs 18secs 18secs filesize: 51G 192K 192K throughput: 203M/s 2.3G/s 2.3G/s iSCSI* [off] [on] [unmap] ---- runtime: 8mins 45secs 33secs throughput: 106M/s 1.2G/s 1.6G/s allocated: 100% 100% 0% * The storage was connected via an 1Gbit interface. It seems to internally handle writing zeroes via WRITESAME16 very fast. Signed-off-by: Peter Lieven <pl@kamp.de> --- block.c | 9 ++++++++ block/qapi.c | 1 + blockdev.c | 24 +++++++++++++++++++++ hmp.c | 5 +++++ include/block/block_int.h | 1 + qapi-schema.json | 52 ++++++++++++++++++++++++++++++++------------- qemu-options.hx | 6 ++++++ qmp-commands.hx | 3 +++ 8 files changed, 86 insertions(+), 15 deletions(-)