diff mbox

[08/11] raw-posix: implement write_zeroes with MAY_UNMAP for files

Message ID 1384271389-20716-9-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Nov. 12, 2013, 3:49 p.m. UTC
Writing zeroes to a file can be done by punching a hole if MAY_UNMAP
is set.

Note that in this case handle_aiocb_discard's ENOTSUP return code
is not ignored, but makes the block layer fall back to the generic
implementation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/raw-posix.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 trace-events      |  1 +
 2 files changed, 64 insertions(+), 1 deletion(-)

Comments

Peter Lieven Nov. 13, 2013, 6:27 a.m. UTC | #1
Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <pbonzini@redhat.com>:

> Writing zeroes to a file can be done by punching a hole if MAY_UNMAP
> is set.
> 
> Note that in this case handle_aiocb_discard's ENOTSUP return code
> is not ignored, but makes the block layer fall back to the generic
> implementation.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> block/raw-posix.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> trace-events      |  1 +
> 2 files changed, 64 insertions(+), 1 deletion(-)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 27fe47d..830e109 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -142,6 +142,7 @@ typedef struct BDRVRawState {
>     bool is_xfs : 1;
> #endif
>     bool has_discard : 1;
> +    bool discard_zeroes : 1;
> } BDRVRawState;
> 
> typedef struct BDRVRawReopenState {
> @@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>     Error *local_err = NULL;
>     const char *filename;
>     int fd, ret;
> +    struct stat st;
> 
>     opts = qemu_opts_create_nofail(&raw_runtime_opts);
>     qemu_opts_absorb_qdict(opts, options, &local_err);
> @@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
> #endif
> 
>     s->has_discard = true;
> +
> +    if (fstat(s->fd, &st) < 0) {
> +        error_setg_errno(errp, errno, "Could not stat file");
> +        goto fail;
> +    }
> +    if (S_ISREG(st.st_mode)) {
> +        s->discard_zeroes = true;
> +    }
> +
> #ifdef CONFIG_XFS
>     if (platform_test_xfs_fd(s->fd)) {
>         s->is_xfs = true;
> @@ -788,6 +799,29 @@ static int aio_worker(void *arg)
>     return ret;
> }
> 
> +static int paio_submit_co(BlockDriverState *bs, int fd,
> +        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> +        int type)
> +{
> +    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
> +    ThreadPool *pool;
> +
> +    acb->bs = bs;
> +    acb->aio_type = type;
> +    acb->aio_fildes = fd;
> +
> +    if (qiov) {
> +        acb->aio_iov = qiov->iov;
> +        acb->aio_niov = qiov->niov;
> +    }
> +    acb->aio_nbytes = nb_sectors * 512;
> +    acb->aio_offset = sector_num * 512;
> +
> +    trace_paio_submit_co(sector_num, nb_sectors, type);
> +    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
> +    return thread_pool_submit_co(pool, aio_worker, acb);
> +}
> +
> static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
>         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>         BlockDriverCompletionFunc *cb, void *opaque, int type)
> @@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
>                        cb, opaque, QEMU_AIO_DISCARD);
> }
> 
> +static int coroutine_fn raw_co_write_zeroes(
> +    BlockDriverState *bs, int64_t sector_num,
> +    int nb_sectors, BdrvRequestFlags flags)
> +{
> +    BDRVRawState *s = bs->opaque;
> +
> +    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
> +        return -ENOTSUP;
> +    }
> +    if (!s->discard_zeroes) {
> +        return -ENOTSUP;
> +    }
> +    return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
> +                          QEMU_AIO_DISCARD);
> +}
> +
> +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
> +{
> +    BDRVRawState *s = bs->opaque;
> +
> +    bdi->unallocated_blocks_are_zero = s->discard_zeroes;
> +    bdi->can_write_zeroes_with_unmap = s->discard_zeroes;

does BLKDISCARDZEROES ioctl guarantee that a device is
zero initialized or does it just guarantee that a discard may not
fail and that it reads as zeroes afterwards?

Peter

> +    return 0;
> +}
> +
> static QEMUOptionParameter raw_create_options[] = {
>     {
>         .name = BLOCK_OPT_SIZE,
> @@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = {
>     .bdrv_create = raw_create,
>     .bdrv_has_zero_init = bdrv_has_zero_init_1,
>     .bdrv_co_get_block_status = raw_co_get_block_status,
> +    .bdrv_co_write_zeroes = raw_co_write_zeroes,
> 
>     .bdrv_aio_readv = raw_aio_readv,
>     .bdrv_aio_writev = raw_aio_writev,
> @@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = {
> 
>     .bdrv_truncate = raw_truncate,
>     .bdrv_getlength = raw_getlength,
> +    .bdrv_get_info = raw_get_info,
>     .bdrv_get_allocated_file_size
>                         = raw_get_allocated_file_size,
> 
> @@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = {
> 
>     .bdrv_truncate      = raw_truncate,
>     .bdrv_getlength	= raw_getlength,
> +    .bdrv_get_info = raw_get_info,
>     .bdrv_get_allocated_file_size
>                         = raw_get_allocated_file_size,
> 
> @@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = {
>     .bdrv_aio_flush	= raw_aio_flush,
> 
>     .bdrv_truncate      = raw_truncate,
> -    .bdrv_getlength      = raw_getlength,
> +    .bdrv_getlength     = raw_getlength,
>     .has_variable_length = true,
>     .bdrv_get_allocated_file_size
>                         = raw_get_allocated_file_size,
> diff --git a/trace-events b/trace-events
> index 96b3974..995c84a 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
> 
> # block/raw-win32.c
> # block/raw-posix.c
> +paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d"
> paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
> 
> # ioport.c
> -- 
> 1.8.4.2
> 
>
Peter Lieven Nov. 13, 2013, 6:30 a.m. UTC | #2
Am 13.11.2013 um 07:27 schrieb Peter Lieven <pl@kamp.de>:

> 
> Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <pbonzini@redhat.com>:
> 
>> Writing zeroes to a file can be done by punching a hole if MAY_UNMAP
>> is set.
>> 
>> Note that in this case handle_aiocb_discard's ENOTSUP return code
>> is not ignored, but makes the block layer fall back to the generic
>> implementation.
>> 
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>> block/raw-posix.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> trace-events      |  1 +
>> 2 files changed, 64 insertions(+), 1 deletion(-)
>> 
>> diff --git a/block/raw-posix.c b/block/raw-posix.c
>> index 27fe47d..830e109 100644
>> --- a/block/raw-posix.c
>> +++ b/block/raw-posix.c
>> @@ -142,6 +142,7 @@ typedef struct BDRVRawState {
>>     bool is_xfs : 1;
>> #endif
>>     bool has_discard : 1;
>> +    bool discard_zeroes : 1;
>> } BDRVRawState;
>> 
>> typedef struct BDRVRawReopenState {
>> @@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>>     Error *local_err = NULL;
>>     const char *filename;
>>     int fd, ret;
>> +    struct stat st;
>> 
>>     opts = qemu_opts_create_nofail(&raw_runtime_opts);
>>     qemu_opts_absorb_qdict(opts, options, &local_err);
>> @@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>> #endif
>> 
>>     s->has_discard = true;
>> +
>> +    if (fstat(s->fd, &st) < 0) {
>> +        error_setg_errno(errp, errno, "Could not stat file");
>> +        goto fail;
>> +    }
>> +    if (S_ISREG(st.st_mode)) {
>> +        s->discard_zeroes = true;
>> +    }
>> +
>> #ifdef CONFIG_XFS
>>     if (platform_test_xfs_fd(s->fd)) {
>>         s->is_xfs = true;
>> @@ -788,6 +799,29 @@ static int aio_worker(void *arg)
>>     return ret;
>> }
>> 
>> +static int paio_submit_co(BlockDriverState *bs, int fd,
>> +        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>> +        int type)
>> +{
>> +    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
>> +    ThreadPool *pool;
>> +
>> +    acb->bs = bs;
>> +    acb->aio_type = type;
>> +    acb->aio_fildes = fd;
>> +
>> +    if (qiov) {
>> +        acb->aio_iov = qiov->iov;
>> +        acb->aio_niov = qiov->niov;
>> +    }
>> +    acb->aio_nbytes = nb_sectors * 512;
>> +    acb->aio_offset = sector_num * 512;
>> +
>> +    trace_paio_submit_co(sector_num, nb_sectors, type);
>> +    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
>> +    return thread_pool_submit_co(pool, aio_worker, acb);
>> +}
>> +
>> static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
>>         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>>         BlockDriverCompletionFunc *cb, void *opaque, int type)
>> @@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
>>                        cb, opaque, QEMU_AIO_DISCARD);
>> }
>> 
>> +static int coroutine_fn raw_co_write_zeroes(
>> +    BlockDriverState *bs, int64_t sector_num,
>> +    int nb_sectors, BdrvRequestFlags flags)
>> +{
>> +    BDRVRawState *s = bs->opaque;
>> +
>> +    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
>> +        return -ENOTSUP;
>> +    }
>> +    if (!s->discard_zeroes) {
>> +        return -ENOTSUP;
>> +    }
>> +    return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
>> +                          QEMU_AIO_DISCARD);
>> +}
>> +
>> +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
>> +{
>> +    BDRVRawState *s = bs->opaque;
>> +
>> +    bdi->unallocated_blocks_are_zero = s->discard_zeroes;
>> +    bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
> 
> does BLKDISCARDZEROES ioctl guarantee that a device is
> zero initialized or does it just guarantee that a discard may not
> fail and that it reads as zeroes afterwards?

Please ignore this. We are talking about a file here.

Peter

> 
> Peter
> 
>> +    return 0;
>> +}
>> +
>> static QEMUOptionParameter raw_create_options[] = {
>>     {
>>         .name = BLOCK_OPT_SIZE,
>> @@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = {
>>     .bdrv_create = raw_create,
>>     .bdrv_has_zero_init = bdrv_has_zero_init_1,
>>     .bdrv_co_get_block_status = raw_co_get_block_status,
>> +    .bdrv_co_write_zeroes = raw_co_write_zeroes,
>> 
>>     .bdrv_aio_readv = raw_aio_readv,
>>     .bdrv_aio_writev = raw_aio_writev,
>> @@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = {
>> 
>>     .bdrv_truncate = raw_truncate,
>>     .bdrv_getlength = raw_getlength,
>> +    .bdrv_get_info = raw_get_info,
>>     .bdrv_get_allocated_file_size
>>                         = raw_get_allocated_file_size,
>> 
>> @@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = {
>> 
>>     .bdrv_truncate      = raw_truncate,
>>     .bdrv_getlength	= raw_getlength,
>> +    .bdrv_get_info = raw_get_info,
>>     .bdrv_get_allocated_file_size
>>                         = raw_get_allocated_file_size,
>> 
>> @@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = {
>>     .bdrv_aio_flush	= raw_aio_flush,
>> 
>>     .bdrv_truncate      = raw_truncate,
>> -    .bdrv_getlength      = raw_getlength,
>> +    .bdrv_getlength     = raw_getlength,
>>     .has_variable_length = true,
>>     .bdrv_get_allocated_file_size
>>                         = raw_get_allocated_file_size,
>> diff --git a/trace-events b/trace-events
>> index 96b3974..995c84a 100644
>> --- a/trace-events
>> +++ b/trace-events
>> @@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
>> 
>> # block/raw-win32.c
>> # block/raw-posix.c
>> +paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d"
>> paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
>> 
>> # ioport.c
>> -- 
>> 1.8.4.2
>> 
>> 
>
diff mbox

Patch

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 27fe47d..830e109 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -142,6 +142,7 @@  typedef struct BDRVRawState {
     bool is_xfs : 1;
 #endif
     bool has_discard : 1;
+    bool discard_zeroes : 1;
 } BDRVRawState;
 
 typedef struct BDRVRawReopenState {
@@ -283,6 +284,7 @@  static int raw_open_common(BlockDriverState *bs, QDict *options,
     Error *local_err = NULL;
     const char *filename;
     int fd, ret;
+    struct stat st;
 
     opts = qemu_opts_create_nofail(&raw_runtime_opts);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -325,6 +327,15 @@  static int raw_open_common(BlockDriverState *bs, QDict *options,
 #endif
 
     s->has_discard = true;
+
+    if (fstat(s->fd, &st) < 0) {
+        error_setg_errno(errp, errno, "Could not stat file");
+        goto fail;
+    }
+    if (S_ISREG(st.st_mode)) {
+        s->discard_zeroes = true;
+    }
+
 #ifdef CONFIG_XFS
     if (platform_test_xfs_fd(s->fd)) {
         s->is_xfs = true;
@@ -788,6 +799,29 @@  static int aio_worker(void *arg)
     return ret;
 }
 
+static int paio_submit_co(BlockDriverState *bs, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        int type)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+    ThreadPool *pool;
+
+    acb->bs = bs;
+    acb->aio_type = type;
+    acb->aio_fildes = fd;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit_co(sector_num, nb_sectors, type);
+    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+    return thread_pool_submit_co(pool, aio_worker, acb);
+}
+
 static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -1200,6 +1234,31 @@  static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
                        cb, opaque, QEMU_AIO_DISCARD);
 }
 
+static int coroutine_fn raw_co_write_zeroes(
+    BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, BdrvRequestFlags flags)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        return -ENOTSUP;
+    }
+    if (!s->discard_zeroes) {
+        return -ENOTSUP;
+    }
+    return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+                          QEMU_AIO_DISCARD);
+}
+
+static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+    BDRVRawState *s = bs->opaque;
+
+    bdi->unallocated_blocks_are_zero = s->discard_zeroes;
+    bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
+    return 0;
+}
+
 static QEMUOptionParameter raw_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -1223,6 +1282,7 @@  static BlockDriver bdrv_file = {
     .bdrv_create = raw_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = raw_co_get_block_status,
+    .bdrv_co_write_zeroes = raw_co_write_zeroes,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
@@ -1231,6 +1291,7 @@  static BlockDriver bdrv_file = {
 
     .bdrv_truncate = raw_truncate,
     .bdrv_getlength = raw_getlength,
+    .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
@@ -1586,6 +1647,7 @@  static BlockDriver bdrv_host_device = {
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength	= raw_getlength,
+    .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
@@ -1715,7 +1777,7 @@  static BlockDriver bdrv_host_floppy = {
     .bdrv_aio_flush	= raw_aio_flush,
 
     .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
+    .bdrv_getlength     = raw_getlength,
     .has_variable_length = true,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
diff --git a/trace-events b/trace-events
index 96b3974..995c84a 100644
--- a/trace-events
+++ b/trace-events
@@ -128,6 +128,7 @@  thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
 
 # block/raw-win32.c
 # block/raw-posix.c
+paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d"
 paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
 
 # ioport.c