Message ID | 1384271389-20716-10-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
Wouldn't it be good to add bdi->can_write_zeroes_with_unmap here as well? This would automatically avoid full allocation when converting something to a host device supporting BLKDISCARDZEROES. Peter Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <pbonzini@redhat.com>: > See the next commit for the description of the Linux kernel problem > that is worked around in raw_open_common. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > block/raw-posix.c | 38 ++++++++++++++++++++++++++++++++++++++ > 1 file changed, 38 insertions(+) > > diff --git a/block/raw-posix.c b/block/raw-posix.c > index 830e109..5cb46f1 100644 > --- a/block/raw-posix.c > +++ b/block/raw-posix.c > @@ -335,6 +335,23 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > if (S_ISREG(st.st_mode)) { > s->discard_zeroes = true; > } > +#ifdef BLKDISCARDZEROES > + if (S_ISBLK(st.st_mode)) { > + unsigned int arg; > + if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { > + s->discard_zeroes = true; > + } > + } > +#endif > +#ifdef CONFIG_LINUX > + /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do > + * not rely on the contents of discarded blocks unless using O_DIRECT. > + */ > + if (!(bs->open_flags & BDRV_O_NOCACHE)) { > + s->discard_zeroes = false; > + } > +#endif > + } > > #ifdef CONFIG_XFS > if (platform_test_xfs_fd(s->fd)) { > @@ -1587,6 +1604,26 @@ static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs, > cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); > } > > +static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs, > + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) > +{ > + BDRVRawState *s = bs->opaque; > + int rc; > + > + rc = fd_open(bs); > + if (rc < 0) { > + return rc; > + } > + if (!(flags & BDRV_REQ_MAY_UNMAP)) { > + return -ENOTSUP; > + } > + if (!s->discard_zeroes) { > + return -ENOTSUP; > + } > + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors, > + QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); > +} > + > static int hdev_create(const char *filename, QEMUOptionParameter *options, > Error **errp) > { > @@ -1639,6 +1676,7 @@ static BlockDriver bdrv_host_device = { > .bdrv_reopen_abort = raw_reopen_abort, > .bdrv_create = hdev_create, > .create_options = raw_create_options, > + .bdrv_co_write_zeroes = hdev_co_write_zeroes, > > .bdrv_aio_readv = raw_aio_readv, > .bdrv_aio_writev = raw_aio_writev, > -- > 1.8.4.2 > >
Il 13/11/2013 07:29, Peter Lieven ha scritto: > Wouldn't it be good to add bdi->can_write_zeroes_with_unmap here as well? We do: > + bdi->unallocated_blocks_are_zero = s->discard_zeroes; > + bdi->can_write_zeroes_with_unmap = s->discard_zeroes; > This would automatically avoid full allocation when converting something to a host device > supporting BLKDISCARDZEROES. Yes, that's (part of) the point of this patch. Regarding the question you posed in the previous patch: > does BLKDISCARDZEROES ioctl guarantee that a device is > zero initialized or does it just guarantee that a discard may not > fail and that it reads as zeroes afterwards? Only the latter. ".bdrv_has_zero_init" is only present in the bdrv_file BlockDriver. Paolo > Peter > > Am 12.11.2013 um 16:49 schrieb Paolo Bonzini <pbonzini@redhat.com>: > >> See the next commit for the description of the Linux kernel problem >> that is worked around in raw_open_common. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> block/raw-posix.c | 38 ++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 38 insertions(+) >> >> diff --git a/block/raw-posix.c b/block/raw-posix.c >> index 830e109..5cb46f1 100644 >> --- a/block/raw-posix.c >> +++ b/block/raw-posix.c >> @@ -335,6 +335,23 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, >> if (S_ISREG(st.st_mode)) { >> s->discard_zeroes = true; >> } >> +#ifdef BLKDISCARDZEROES >> + if (S_ISBLK(st.st_mode)) { >> + unsigned int arg; >> + if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { >> + s->discard_zeroes = true; >> + } >> + } >> +#endif >> +#ifdef CONFIG_LINUX >> + /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do >> + * not rely on the contents of discarded blocks unless using O_DIRECT. >> + */ >> + if (!(bs->open_flags & BDRV_O_NOCACHE)) { >> + s->discard_zeroes = false; >> + } >> +#endif >> + } >> >> #ifdef CONFIG_XFS >> if (platform_test_xfs_fd(s->fd)) { >> @@ -1587,6 +1604,26 @@ static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs, >> cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); >> } >> >> +static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs, >> + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) >> +{ >> + BDRVRawState *s = bs->opaque; >> + int rc; >> + >> + rc = fd_open(bs); >> + if (rc < 0) { >> + return rc; >> + } >> + if (!(flags & BDRV_REQ_MAY_UNMAP)) { >> + return -ENOTSUP; >> + } >> + if (!s->discard_zeroes) { >> + return -ENOTSUP; >> + } >> + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors, >> + QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); >> +} >> + >> static int hdev_create(const char *filename, QEMUOptionParameter *options, >> Error **errp) >> { >> @@ -1639,6 +1676,7 @@ static BlockDriver bdrv_host_device = { >> .bdrv_reopen_abort = raw_reopen_abort, >> .bdrv_create = hdev_create, >> .create_options = raw_create_options, >> + .bdrv_co_write_zeroes = hdev_co_write_zeroes, >> >> .bdrv_aio_readv = raw_aio_readv, >> .bdrv_aio_writev = raw_aio_writev, >> -- >> 1.8.4.2 >> >> > > >
Am 13.11.2013 um 10:44 schrieb Paolo Bonzini <pbonzini@redhat.com>: > Il 13/11/2013 07:29, Peter Lieven ha scritto: >> Wouldn't it be good to add bdi->can_write_zeroes_with_unmap here as well? > > We do: > >> + bdi->unallocated_blocks_are_zero = s->discard_zeroes; >> + bdi->can_write_zeroes_with_unmap = s->discard_zeroes; Sorry, I missed this. > >> This would automatically avoid full allocation when converting something to a host device >> supporting BLKDISCARDZEROES. > > Yes, that's (part of) the point of this patch. > > Regarding the question you posed in the previous patch: > >> does BLKDISCARDZEROES ioctl guarantee that a device is >> zero initialized or does it just guarantee that a discard may not >> fail and that it reads as zeroes afterwards? > > Only the latter. ".bdrv_has_zero_init" is only present in the bdrv_file > BlockDriver. Then bdi->unallocated_blocks_are_zero must stay 0. .bdrv_has_zero_init's semantic is to reflect the zero status of all blocks of the device right after bdrv_create independently of their allocation status. bdi->unallocated_blocks_are_zero reflects the zero status of every unallocated block regardless if it was unallocated right from the beginning or became unallocated through a discard. Peter
Il 13/11/2013 15:14, Peter Lieven ha scritto: >>> >> does BLKDISCARDZEROES ioctl guarantee that a device is >>> >> zero initialized or does it just guarantee that a discard may not >>> >> fail and that it reads as zeroes afterwards? >> > >> > Only the latter. ".bdrv_has_zero_init" is only present in the bdrv_file >> > BlockDriver. > Then bdi->unallocated_blocks_are_zero must stay 0. .bdrv_has_zero_init's > semantic is to reflect the zero status of all blocks of the device right after bdrv_create > independently of their allocation status. bdi->unallocated_blocks_are_zero > reflects the zero status of every unallocated block regardless if it was > unallocated right from the beginning or became unallocated through a discard. What we have is: * bdi->unallocated_blocks_are_zero returns true * bdrv_create doesn't ensure that every block starts unallocated * hence bdrv_has_zero_init returns false Blocks that (for any reason) are unallocated after bdrv_create *will* be zero if BLKDISCARDZEROES returns true. Paolo
> Am 13.11.2013 um 15:39 schrieb Paolo Bonzini <pbonzini@redhat.com>: > > Il 13/11/2013 15:14, Peter Lieven ha scritto: >>>>>> does BLKDISCARDZEROES ioctl guarantee that a device is >>>>>> zero initialized or does it just guarantee that a discard may not >>>>>> fail and that it reads as zeroes afterwards? >>>> >>>> Only the latter. ".bdrv_has_zero_init" is only present in the bdrv_file >>>> BlockDriver. >> Then bdi->unallocated_blocks_are_zero must stay 0. .bdrv_has_zero_init's >> semantic is to reflect the zero status of all blocks of the device right after bdrv_create >> independently of their allocation status. bdi->unallocated_blocks_are_zero >> reflects the zero status of every unallocated block regardless if it was >> unallocated right from the beginning or became unallocated through a discard. > > What we have is: > > * bdi->unallocated_blocks_are_zero returns true > > * bdrv_create doesn't ensure that every block starts unallocated > > * hence bdrv_has_zero_init returns false > > Blocks that (for any reason) are unallocated after bdrv_create *will* be > zero if BLKDISCARDZEROES returns true. Ok, then i misunderstood your comment. If BLKDISCARDZEROES means lbprz == 1 SCSI speaking then you are right. Peter > > Paolo
diff --git a/block/raw-posix.c b/block/raw-posix.c index 830e109..5cb46f1 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -335,6 +335,23 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, if (S_ISREG(st.st_mode)) { s->discard_zeroes = true; } +#ifdef BLKDISCARDZEROES + if (S_ISBLK(st.st_mode)) { + unsigned int arg; + if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { + s->discard_zeroes = true; + } + } +#endif +#ifdef CONFIG_LINUX + /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do + * not rely on the contents of discarded blocks unless using O_DIRECT. + */ + if (!(bs->open_flags & BDRV_O_NOCACHE)) { + s->discard_zeroes = false; + } +#endif + } #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -1587,6 +1604,26 @@ static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs, cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); } +static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) +{ + BDRVRawState *s = bs->opaque; + int rc; + + rc = fd_open(bs); + if (rc < 0) { + return rc; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + return -ENOTSUP; + } + if (!s->discard_zeroes) { + return -ENOTSUP; + } + return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors, + QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); +} + static int hdev_create(const char *filename, QEMUOptionParameter *options, Error **errp) { @@ -1639,6 +1676,7 @@ static BlockDriver bdrv_host_device = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, + .bdrv_co_write_zeroes = hdev_co_write_zeroes, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev,
See the next commit for the description of the Linux kernel problem that is worked around in raw_open_common. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- block/raw-posix.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+)