Message ID | 20190114233708.GA20465@magnolia |
---|---|
State | Accepted, archived |
Headers | show |
Series | libext2fs: revert "try to always use PUNCH_HOLE for unix_discard" | expand |
On Mon, Jan 14, 2019 at 03:37:08PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because > fallocate(PUNCH_HOLE) on block devices was changed to use zeroout > instead of discard shortly after block device fallocate was merged. > zeroout isn't necessarily a "drop storage" operation like discard is, > so we prefer to use that on block devices. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Thanks, applied. - Ted
On Thu, Feb 14, 2019 at 04:04:48PM -0500, Theodore Y. Ts'o wrote: > On Mon, Jan 14, 2019 at 03:37:08PM -0800, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because > > fallocate(PUNCH_HOLE) on block devices was changed to use zeroout > > instead of discard shortly after block device fallocate was merged. > > zeroout isn't necessarily a "drop storage" operation like discard is, > > so we prefer to use that on block devices. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > Thanks, applied. > > - Ted I just noticed this patch, sorry. I think we can still use fallocate, but we need to set the right flags to make sure it uses discard instead of zeroout. See fs/block_dev.c switch (mode) { case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: error = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); break; default: return -EOPNOTSUPP; } So if we want a discard (meaning we want to unallocate the blocks without necessarily making sure we can't read stale data from it) we have to use FALLOC_FL_NO_HIDE_STALE. So the flags would be FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE Ted, Darrick what do you think ? Can we keep the bcca9876a3428c10417c660b78933e6e70e8a5f5 commit and just change the flags ? -Lukas
On Fri, Feb 15, 2019 at 10:50:07AM +0100, Lukas Czerner wrote: > On Thu, Feb 14, 2019 at 04:04:48PM -0500, Theodore Y. Ts'o wrote: > > On Mon, Jan 14, 2019 at 03:37:08PM -0800, Darrick J. Wong wrote: > > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > > > Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because > > > fallocate(PUNCH_HOLE) on block devices was changed to use zeroout > > > instead of discard shortly after block device fallocate was merged. > > > zeroout isn't necessarily a "drop storage" operation like discard is, > > > so we prefer to use that on block devices. > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > > > Thanks, applied. > > > > - Ted > > I just noticed this patch, sorry. I think we can still use fallocate, > but we need to set the right flags to make sure it uses discard instead > of zeroout. See fs/block_dev.c > > switch (mode) { > case FALLOC_FL_ZERO_RANGE: > case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: > error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, > GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); > break; > case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: > error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, > GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); > break; > case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: > error = blkdev_issue_discard(bdev, start >> 9, len >> 9, > GFP_KERNEL, 0); > break; > default: > return -EOPNOTSUPP; > } > > So if we want a discard (meaning we want to unallocate the blocks > without necessarily making sure we can't read stale data from it) we > have to use FALLOC_FL_NO_HIDE_STALE. > > So the flags would be FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE Userspace isn't allowed to pass in _NO_HIDE_STALE; see FALLOC_FL_SUPPORTED_MASK in include/linux/falloc.h. The behavior of "no hide stale" isn't defined in the manpages; it's merely a reserved code point. --D > Ted, Darrick what do you think ? Can we keep the > bcca9876a3428c10417c660b78933e6e70e8a5f5 commit and just change the > flags ? > > -Lukas
diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c index 16e2052cd..74fc8a75d 100644 --- a/lib/ext2fs/unix_io.c +++ b/lib/ext2fs/unix_io.c @@ -1081,38 +1081,6 @@ static errcode_t unix_set_option(io_channel channel, const char *option, #define BLKDISCARD _IO(0x12,119) #endif -/* - * Try a PUNCH_HOLE to unmap blocks, then BLKDISCARD if that doesn't work. - * We prefer PUNCH_HOLE because it invalidates the page cache, even on block - * devices. - */ -static int __unix_discard(int fd, int is_bdev, off_t offset, off_t len) -{ -#ifdef BLKDISCARD - __u64 range[2]; -#endif - int ret = -1; - -#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) - ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - offset, len); - if (ret == 0) - return 0; -#endif -#ifdef BLKDISCARD - if (is_bdev) { - range[0] = (__u64)offset; - range[1] = (__u64)len; - - ret = ioctl(fd, BLKDISCARD, &range); - if (ret == 0) - return 0; - } -#endif - errno = EOPNOTSUPP; - return ret; -} - static errcode_t unix_discard(io_channel channel, unsigned long long block, unsigned long long count) { @@ -1123,10 +1091,31 @@ static errcode_t unix_discard(io_channel channel, unsigned long long block, data = (struct unix_private_data *) channel->private_data; EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); - ret = __unix_discard(data->dev, - (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE), - (off_t)(block) * channel->block_size + data->offset, - (off_t)(count) * channel->block_size); + if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { +#ifdef BLKDISCARD + __u64 range[2]; + + range[0] = (__u64)(block) * channel->block_size + data->offset; + range[1] = (__u64)(count) * channel->block_size; + + ret = ioctl(data->dev, BLKDISCARD, &range); +#else + goto unimplemented; +#endif + } else { +#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) + /* + * If we are not on block device, try to use punch hole + * to reclaim free space. + */ + ret = fallocate(data->dev, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + (off_t)(block) * channel->block_size + data->offset, + (off_t)(count) * channel->block_size); +#else + goto unimplemented; +#endif + } if (ret < 0) { if (errno == EOPNOTSUPP) goto unimplemented;