Message ID | 1384271389-20716-9-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <pbonzini@redhat.com>: > Writing zeroes to a file can be done by punching a hole if MAY_UNMAP > is set. > > Note that in this case handle_aiocb_discard's ENOTSUP return code > is not ignored, but makes the block layer fall back to the generic > implementation. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > block/raw-posix.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- > trace-events | 1 + > 2 files changed, 64 insertions(+), 1 deletion(-) > > diff --git a/block/raw-posix.c b/block/raw-posix.c > index 27fe47d..830e109 100644 > --- a/block/raw-posix.c > +++ b/block/raw-posix.c > @@ -142,6 +142,7 @@ typedef struct BDRVRawState { > bool is_xfs : 1; > #endif > bool has_discard : 1; > + bool discard_zeroes : 1; > } BDRVRawState; > > typedef struct BDRVRawReopenState { > @@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > Error *local_err = NULL; > const char *filename; > int fd, ret; > + struct stat st; > > opts = qemu_opts_create_nofail(&raw_runtime_opts); > qemu_opts_absorb_qdict(opts, options, &local_err); > @@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > #endif > > s->has_discard = true; > + > + if (fstat(s->fd, &st) < 0) { > + error_setg_errno(errp, errno, "Could not stat file"); > + goto fail; > + } > + if (S_ISREG(st.st_mode)) { > + s->discard_zeroes = true; > + } > + > #ifdef CONFIG_XFS > if (platform_test_xfs_fd(s->fd)) { > s->is_xfs = true; > @@ -788,6 +799,29 @@ static int aio_worker(void *arg) > return ret; > } > > +static int paio_submit_co(BlockDriverState *bs, int fd, > + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > + int type) > +{ > + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); > + ThreadPool *pool; > + > + acb->bs = bs; > + acb->aio_type = type; > + acb->aio_fildes = fd; > + > + if (qiov) { > + acb->aio_iov = qiov->iov; > + acb->aio_niov = qiov->niov; > + } > + acb->aio_nbytes = nb_sectors * 512; > + acb->aio_offset = sector_num * 512; > + > + trace_paio_submit_co(sector_num, nb_sectors, type); > + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); > + return thread_pool_submit_co(pool, aio_worker, acb); > +} > + > static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, > int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > BlockDriverCompletionFunc *cb, void *opaque, int type) > @@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, > cb, opaque, QEMU_AIO_DISCARD); > } > > +static int coroutine_fn raw_co_write_zeroes( > + BlockDriverState *bs, int64_t sector_num, > + int nb_sectors, BdrvRequestFlags flags) > +{ > + BDRVRawState *s = bs->opaque; > + > + if (!(flags & BDRV_REQ_MAY_UNMAP)) { > + return -ENOTSUP; > + } > + if (!s->discard_zeroes) { > + return -ENOTSUP; > + } > + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors, > + QEMU_AIO_DISCARD); > +} > + > +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) > +{ > + BDRVRawState *s = bs->opaque; > + > + bdi->unallocated_blocks_are_zero = s->discard_zeroes; > + bdi->can_write_zeroes_with_unmap = s->discard_zeroes; does BLKDISCARDZEROES ioctl guarantee that a device is zero initialized or does it just guarantee that a discard may not fail and that it reads as zeroes afterwards? Peter > + return 0; > +} > + > static QEMUOptionParameter raw_create_options[] = { > { > .name = BLOCK_OPT_SIZE, > @@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = { > .bdrv_create = raw_create, > .bdrv_has_zero_init = bdrv_has_zero_init_1, > .bdrv_co_get_block_status = raw_co_get_block_status, > + .bdrv_co_write_zeroes = raw_co_write_zeroes, > > .bdrv_aio_readv = raw_aio_readv, > .bdrv_aio_writev = raw_aio_writev, > @@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = { > > .bdrv_truncate = raw_truncate, > .bdrv_getlength = raw_getlength, > + .bdrv_get_info = raw_get_info, > .bdrv_get_allocated_file_size > = raw_get_allocated_file_size, > > @@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = { > > .bdrv_truncate = raw_truncate, > .bdrv_getlength = raw_getlength, > + .bdrv_get_info = raw_get_info, > .bdrv_get_allocated_file_size > = raw_get_allocated_file_size, > > @@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = { > .bdrv_aio_flush = raw_aio_flush, > > .bdrv_truncate = raw_truncate, > - .bdrv_getlength = raw_getlength, > + .bdrv_getlength = raw_getlength, > .has_variable_length = true, > .bdrv_get_allocated_file_size > = raw_get_allocated_file_size, > diff --git a/trace-events b/trace-events > index 96b3974..995c84a 100644 > --- a/trace-events > +++ b/trace-events > @@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" > > # block/raw-win32.c > # block/raw-posix.c > +paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d" > paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" > > # ioport.c > -- > 1.8.4.2 > >
Am 13.11.2013 um 07:27 schrieb Peter Lieven <pl@kamp.de>: > > Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <pbonzini@redhat.com>: > >> Writing zeroes to a file can be done by punching a hole if MAY_UNMAP >> is set. >> >> Note that in this case handle_aiocb_discard's ENOTSUP return code >> is not ignored, but makes the block layer fall back to the generic >> implementation. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> block/raw-posix.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- >> trace-events | 1 + >> 2 files changed, 64 insertions(+), 1 deletion(-) >> >> diff --git a/block/raw-posix.c b/block/raw-posix.c >> index 27fe47d..830e109 100644 >> --- a/block/raw-posix.c >> +++ b/block/raw-posix.c >> @@ -142,6 +142,7 @@ typedef struct BDRVRawState { >> bool is_xfs : 1; >> #endif >> bool has_discard : 1; >> + bool discard_zeroes : 1; >> } BDRVRawState; >> >> typedef struct BDRVRawReopenState { >> @@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, >> Error *local_err = NULL; >> const char *filename; >> int fd, ret; >> + struct stat st; >> >> opts = qemu_opts_create_nofail(&raw_runtime_opts); >> qemu_opts_absorb_qdict(opts, options, &local_err); >> @@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, >> #endif >> >> s->has_discard = true; >> + >> + if (fstat(s->fd, &st) < 0) { >> + error_setg_errno(errp, errno, "Could not stat file"); >> + goto fail; >> + } >> + if (S_ISREG(st.st_mode)) { >> + s->discard_zeroes = true; >> + } >> + >> #ifdef CONFIG_XFS >> if (platform_test_xfs_fd(s->fd)) { >> s->is_xfs = true; >> @@ -788,6 +799,29 @@ static int aio_worker(void *arg) >> return ret; >> } >> >> +static int paio_submit_co(BlockDriverState *bs, int fd, >> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, >> + int type) >> +{ >> + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); >> + ThreadPool *pool; >> + >> + acb->bs = bs; >> + acb->aio_type = type; >> + acb->aio_fildes = fd; >> + >> + if (qiov) { >> + acb->aio_iov = qiov->iov; >> + acb->aio_niov = qiov->niov; >> + } >> + acb->aio_nbytes = nb_sectors * 512; >> + acb->aio_offset = sector_num * 512; >> + >> + trace_paio_submit_co(sector_num, nb_sectors, type); >> + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); >> + return thread_pool_submit_co(pool, aio_worker, acb); >> +} >> + >> static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, >> int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, >> BlockDriverCompletionFunc *cb, void *opaque, int type) >> @@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, >> cb, opaque, QEMU_AIO_DISCARD); >> } >> >> +static int coroutine_fn raw_co_write_zeroes( >> + BlockDriverState *bs, int64_t sector_num, >> + int nb_sectors, BdrvRequestFlags flags) >> +{ >> + BDRVRawState *s = bs->opaque; >> + >> + if (!(flags & BDRV_REQ_MAY_UNMAP)) { >> + return -ENOTSUP; >> + } >> + if (!s->discard_zeroes) { >> + return -ENOTSUP; >> + } >> + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors, >> + QEMU_AIO_DISCARD); >> +} >> + >> +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) >> +{ >> + BDRVRawState *s = bs->opaque; >> + >> + bdi->unallocated_blocks_are_zero = s->discard_zeroes; >> + bdi->can_write_zeroes_with_unmap = s->discard_zeroes; > > does BLKDISCARDZEROES ioctl guarantee that a device is > zero initialized or does it just guarantee that a discard may not > fail and that it reads as zeroes afterwards? Please ignore this. We are talking about a file here. Peter > > Peter > >> + return 0; >> +} >> + >> static QEMUOptionParameter raw_create_options[] = { >> { >> .name = BLOCK_OPT_SIZE, >> @@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = { >> .bdrv_create = raw_create, >> .bdrv_has_zero_init = bdrv_has_zero_init_1, >> .bdrv_co_get_block_status = raw_co_get_block_status, >> + .bdrv_co_write_zeroes = raw_co_write_zeroes, >> >> .bdrv_aio_readv = raw_aio_readv, >> .bdrv_aio_writev = raw_aio_writev, >> @@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = { >> >> .bdrv_truncate = raw_truncate, >> .bdrv_getlength = raw_getlength, >> + .bdrv_get_info = raw_get_info, >> .bdrv_get_allocated_file_size >> = raw_get_allocated_file_size, >> >> @@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = { >> >> .bdrv_truncate = raw_truncate, >> .bdrv_getlength = raw_getlength, >> + .bdrv_get_info = raw_get_info, >> .bdrv_get_allocated_file_size >> = raw_get_allocated_file_size, >> >> @@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = { >> .bdrv_aio_flush = raw_aio_flush, >> >> .bdrv_truncate = raw_truncate, >> - .bdrv_getlength = raw_getlength, >> + .bdrv_getlength = raw_getlength, >> .has_variable_length = true, >> .bdrv_get_allocated_file_size >> = raw_get_allocated_file_size, >> diff --git a/trace-events b/trace-events >> index 96b3974..995c84a 100644 >> --- a/trace-events >> +++ b/trace-events >> @@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" >> >> # block/raw-win32.c >> # block/raw-posix.c >> +paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d" >> paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" >> >> # ioport.c >> -- >> 1.8.4.2 >> >> >
diff --git a/block/raw-posix.c b/block/raw-posix.c index 27fe47d..830e109 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -142,6 +142,7 @@ typedef struct BDRVRawState { bool is_xfs : 1; #endif bool has_discard : 1; + bool discard_zeroes : 1; } BDRVRawState; typedef struct BDRVRawReopenState { @@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, Error *local_err = NULL; const char *filename; int fd, ret; + struct stat st; opts = qemu_opts_create_nofail(&raw_runtime_opts); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, #endif s->has_discard = true; + + if (fstat(s->fd, &st) < 0) { + error_setg_errno(errp, errno, "Could not stat file"); + goto fail; + } + if (S_ISREG(st.st_mode)) { + s->discard_zeroes = true; + } + #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { s->is_xfs = true; @@ -788,6 +799,29 @@ static int aio_worker(void *arg) return ret; } +static int paio_submit_co(BlockDriverState *bs, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + int type) +{ + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); + ThreadPool *pool; + + acb->bs = bs; + acb->aio_type = type; + acb->aio_fildes = fd; + + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + } + acb->aio_nbytes = nb_sectors * 512; + acb->aio_offset = sector_num * 512; + + trace_paio_submit_co(sector_num, nb_sectors, type); + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + return thread_pool_submit_co(pool, aio_worker, acb); +} + static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type) @@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, cb, opaque, QEMU_AIO_DISCARD); } +static int coroutine_fn raw_co_write_zeroes( + BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) +{ + BDRVRawState *s = bs->opaque; + + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + return -ENOTSUP; + } + if (!s->discard_zeroes) { + return -ENOTSUP; + } + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors, + QEMU_AIO_DISCARD); +} + +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVRawState *s = bs->opaque; + + bdi->unallocated_blocks_are_zero = s->discard_zeroes; + bdi->can_write_zeroes_with_unmap = s->discard_zeroes; + return 0; +} + static QEMUOptionParameter raw_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = { .bdrv_create = raw_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = raw_co_get_block_status, + .bdrv_co_write_zeroes = raw_co_write_zeroes, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = { .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, @@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = { .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, @@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, diff --git a/trace-events b/trace-events index 96b3974..995c84a 100644 --- a/trace-events +++ b/trace-events @@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" # block/raw-win32.c # block/raw-posix.c +paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d" paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" # ioport.c
Writing zeroes to a file can be done by punching a hole if MAY_UNMAP is set. Note that in this case handle_aiocb_discard's ENOTSUP return code is not ignored, but makes the block layer fall back to the generic implementation. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- block/raw-posix.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- trace-events | 1 + 2 files changed, 64 insertions(+), 1 deletion(-)