@@ -1204,6 +1204,7 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
};
assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
+ s->info.modified = true;
if (flags & BDRV_REQ_FUA) {
assert(s->info.flags & NBD_FLAG_SEND_FUA);
request.flags |= NBD_CMD_FLAG_FUA;
@@ -1276,6 +1277,7 @@ static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
};
assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
+ s->info.modified = true;
if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
return 0;
}
@@ -1909,6 +1911,16 @@ static int nbd_co_flush(BlockDriverState *bs)
return nbd_client_co_flush(bs);
}
+static int nbd_known_zeroes(BlockDriverState *bs)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ if (!s->info.modified && s->info.init_state & NBD_INIT_ZERO) {
+ return BDRV_ZERO_OPEN;
+ }
+ return 0;
+}
+
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
@@ -2027,6 +2039,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
+ .bdrv_known_zeroes = nbd_known_zeroes,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
@@ -2052,6 +2065,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
+ .bdrv_known_zeroes = nbd_known_zeroes,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
@@ -2077,6 +2091,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
+ .bdrv_known_zeroes = nbd_known_zeroes,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
@@ -307,6 +307,7 @@ struct NBDExportInfo {
uint32_t min_block;
uint32_t opt_block;
uint32_t max_block;
+ uint16_t init_state;
uint32_t context_id;
@@ -314,6 +315,9 @@ struct NBDExportInfo {
char *description;
int n_contexts;
char **contexts;
+
+ /* Set during runtime to track if init_state is still trustworthy. */
+ bool modified;
};
typedef struct NBDExportInfo NBDExportInfo;
@@ -350,16 +350,17 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
- buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
+ buf = g_malloc(4 + len + 2 + 2 * (info->request_sizes + 1) + 1);
stl_be_p(buf, len);
memcpy(buf + 4, info->name, len);
- /* At most one request, everything else up to server */
- stw_be_p(buf + 4 + len, info->request_sizes);
+ /* One or two requests, everything else up to server */
+ stw_be_p(buf + 4 + len, info->request_sizes + 1);
if (info->request_sizes) {
stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
}
+ stw_be_p(buf + 4 + len + 2 + 2 * info->request_sizes, NBD_INFO_INIT_STATE);
error = nbd_send_option_request(ioc, opt,
- 4 + len + 2 + 2 * info->request_sizes,
+ 4 + len + 2 + 2 * (info->request_sizes + 1),
buf, errp);
g_free(buf);
if (error < 0) {
@@ -484,6 +485,21 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
info->max_block);
break;
+ case NBD_INFO_INIT_STATE:
+ if (len != sizeof(info->init_state)) {
+ error_setg(errp, "remaining export info len %" PRIu32
+ " is unexpected size", len);
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ if (nbd_read16(ioc, &info->init_state, "info init state",
+ errp) < 0) {
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ trace_nbd_opt_info_init_state(info->init_state);
+ break;
+
default:
/*
* Not worth the bother to check if NBD_INFO_NAME or
@@ -10,6 +10,7 @@ nbd_opt_info_go_start(const char *opt, const char *name) "Attempting %s for expo
nbd_opt_info_go_success(const char *opt) "Export is ready after %s request"
nbd_opt_info_unknown(int info, const char *name) "Ignoring unknown info %d (%s)"
nbd_opt_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
+nbd_opt_info_init_state(unsigned int flags) "Initial state flags 0x%x"
nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'"
nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'"
nbd_receive_starttls_new_client(void) "Setting up TLS"
@@ -220,6 +220,19 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
printf(" opt block: %u\n", list[i].opt_block);
printf(" max block: %u\n", list[i].max_block);
}
+ {
+ static const char *const init_names[] = {
+ [NBD_INIT_SPARSE_BIT] = "sparse",
+ [NBD_INIT_ZERO_BIT] = "zero",
+ };
+ printf(" init state: 0x%x (", list[i].init_state);
+ for (size_t bit = 0; bit < ARRAY_SIZE(init_names); bit++) {
+ if (init_names[bit] && (list[i].init_state & (1 << bit))) {
+ printf(" %s", init_names[bit]);
+ }
+ }
+ printf(" )\n");
+ }
if (list[i].n_contexts) {
printf(" available meta contexts: %d\n", list[i].n_contexts);
for (j = 0; j < list[i].n_contexts; j++) {
@@ -59,6 +59,7 @@ exports available: 2
min block: 1
opt block: 4096
max block: 33554432
+ init state: 0x0 ( )
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b
@@ -69,6 +70,7 @@ exports available: 2
min block: 1
opt block: 4096
max block: 33554432
+ init state: 0x0 ( )
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b2
@@ -140,6 +142,7 @@ exports available: 2
min block: 1
opt block: 4096
max block: 33554432
+ init state: 0x0 ( )
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b
@@ -150,6 +153,7 @@ exports available: 2
min block: 1
opt block: 4096
max block: 33554432
+ init state: 0x0 ( )
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b2
@@ -43,6 +43,7 @@ exports available: 1
min block: 1
opt block: 4096
max block: 33554432
+ init state: 0x0 ( )
available meta contexts: 1
base:allocation
Using the new NBD extension of NBD_INFO_INIT_STATE, we can pass on the information when a server reports that an image initially reads as all zeroes. The server information is treated as stale the moment we request a write operation, even across reconnections to the server, which is fine since our intended usage of BDRV_ZERO_OPEN is to optimize qemu-img at startup, and not something relied on during later image use. Update iotests to reflect improved output of 'qemu-nbd --list'. As NBD still cannot create or resize images, we don't need to worry about BDRV_ZERO_CREATE or BDRV_ZERO_TRUNCATE. Signed-off-by: Eric Blake <eblake@redhat.com> --- block/nbd.c | 15 +++++++++++++++ include/block/nbd.h | 4 ++++ nbd/client.c | 24 ++++++++++++++++++++---- nbd/trace-events | 1 + qemu-nbd.c | 13 +++++++++++++ tests/qemu-iotests/223.out | 4 ++++ tests/qemu-iotests/233.out | 1 + 7 files changed, 58 insertions(+), 4 deletions(-)