libext2fs: revert "try to always use PUNCH_HOLE for unix_discard"

Message ID 20190114233708.GA20465@magnolia
State Accepted
Headers show
Series
  • libext2fs: revert "try to always use PUNCH_HOLE for unix_discard"
Related show

Commit Message

Darrick J. Wong Jan. 14, 2019, 11:37 p.m.
From: Darrick J. Wong <darrick.wong@oracle.com>

Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because
fallocate(PUNCH_HOLE) on block devices was changed to use zeroout
instead of discard shortly after block device fallocate was merged.
zeroout isn't necessarily a "drop storage" operation like discard is,
so we prefer to use that on block devices.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 lib/ext2fs/unix_io.c |   61 ++++++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 36 deletions(-)

Comments

Theodore Ts'o Feb. 14, 2019, 9:04 p.m. | #1
On Mon, Jan 14, 2019 at 03:37:08PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because
> fallocate(PUNCH_HOLE) on block devices was changed to use zeroout
> instead of discard shortly after block device fallocate was merged.
> zeroout isn't necessarily a "drop storage" operation like discard is,
> so we prefer to use that on block devices.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Thanks, applied.

					- Ted
Lukas Czerner Feb. 15, 2019, 9:50 a.m. | #2
On Thu, Feb 14, 2019 at 04:04:48PM -0500, Theodore Y. Ts'o wrote:
> On Mon, Jan 14, 2019 at 03:37:08PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because
> > fallocate(PUNCH_HOLE) on block devices was changed to use zeroout
> > instead of discard shortly after block device fallocate was merged.
> > zeroout isn't necessarily a "drop storage" operation like discard is,
> > so we prefer to use that on block devices.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Thanks, applied.
> 
> 					- Ted

I just noticed this patch, sorry. I think we can still use fallocate,
but we need to set the right flags to make sure it uses discard instead
of zeroout. See fs/block_dev.c

	switch (mode) {
	case FALLOC_FL_ZERO_RANGE:
	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
					    GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
		break;
	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
					     GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
		break;
	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
		error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
					     GFP_KERNEL, 0);
		break;
	default:
		return -EOPNOTSUPP;
	}

So if we want a discard (meaning we want to unallocate the blocks
without necessarily making sure we can't read stale data from it) we
have to use FALLOC_FL_NO_HIDE_STALE.

So the flags would be FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE

Ted, Darrick what do you think ? Can we keep the
bcca9876a3428c10417c660b78933e6e70e8a5f5 commit and just change the
flags ?

-Lukas
Darrick J. Wong Feb. 15, 2019, 4:25 p.m. | #3
On Fri, Feb 15, 2019 at 10:50:07AM +0100, Lukas Czerner wrote:
> On Thu, Feb 14, 2019 at 04:04:48PM -0500, Theodore Y. Ts'o wrote:
> > On Mon, Jan 14, 2019 at 03:37:08PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Revert bcca9876a3428c10417c660b78933e6e70e8a5f5, because
> > > fallocate(PUNCH_HOLE) on block devices was changed to use zeroout
> > > instead of discard shortly after block device fallocate was merged.
> > > zeroout isn't necessarily a "drop storage" operation like discard is,
> > > so we prefer to use that on block devices.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Thanks, applied.
> > 
> > 					- Ted
> 
> I just noticed this patch, sorry. I think we can still use fallocate,
> but we need to set the right flags to make sure it uses discard instead
> of zeroout. See fs/block_dev.c
> 
> 	switch (mode) {
> 	case FALLOC_FL_ZERO_RANGE:
> 	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
> 		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
> 					    GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
> 		break;
> 	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
> 		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
> 					     GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
> 		break;
> 	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
> 		error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
> 					     GFP_KERNEL, 0);
> 		break;
> 	default:
> 		return -EOPNOTSUPP;
> 	}
> 
> So if we want a discard (meaning we want to unallocate the blocks
> without necessarily making sure we can't read stale data from it) we
> have to use FALLOC_FL_NO_HIDE_STALE.
> 
> So the flags would be FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE

Userspace isn't allowed to pass in _NO_HIDE_STALE; see
FALLOC_FL_SUPPORTED_MASK in include/linux/falloc.h.

The behavior of "no hide stale" isn't defined in the manpages; it's
merely a reserved code point.

--D

> Ted, Darrick what do you think ? Can we keep the
> bcca9876a3428c10417c660b78933e6e70e8a5f5 commit and just change the
> flags ?
> 
> -Lukas

Patch

diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c
index 16e2052cd..74fc8a75d 100644
--- a/lib/ext2fs/unix_io.c
+++ b/lib/ext2fs/unix_io.c
@@ -1081,38 +1081,6 @@  static errcode_t unix_set_option(io_channel channel, const char *option,
 #define BLKDISCARD		_IO(0x12,119)
 #endif
 
-/*
- * Try a PUNCH_HOLE to unmap blocks, then BLKDISCARD if that doesn't work.
- * We prefer PUNCH_HOLE because it invalidates the page cache, even on block
- * devices.
- */
-static int __unix_discard(int fd, int is_bdev, off_t offset, off_t len)
-{
-#ifdef BLKDISCARD
-	__u64 range[2];
-#endif
-	int ret = -1;
-
-#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
-	ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-			offset, len);
-	if (ret == 0)
-		return 0;
-#endif
-#ifdef BLKDISCARD
-	if (is_bdev) {
-		range[0] = (__u64)offset;
-		range[1] = (__u64)len;
-
-		ret = ioctl(fd, BLKDISCARD, &range);
-		if (ret == 0)
-			return 0;
-	}
-#endif
-	errno = EOPNOTSUPP;
-	return ret;
-}
-
 static errcode_t unix_discard(io_channel channel, unsigned long long block,
 			      unsigned long long count)
 {
@@ -1123,10 +1091,31 @@  static errcode_t unix_discard(io_channel channel, unsigned long long block,
 	data = (struct unix_private_data *) channel->private_data;
 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
 
-	ret = __unix_discard(data->dev,
-			(channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE),
-			(off_t)(block) * channel->block_size + data->offset,
-			(off_t)(count) * channel->block_size);
+	if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
+#ifdef BLKDISCARD
+		__u64 range[2];
+
+		range[0] = (__u64)(block) * channel->block_size + data->offset;
+		range[1] = (__u64)(count) * channel->block_size;
+
+		ret = ioctl(data->dev, BLKDISCARD, &range);
+#else
+		goto unimplemented;
+#endif
+	} else {
+#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
+		/*
+		 * If we are not on block device, try to use punch hole
+		 * to reclaim free space.
+		 */
+		ret = fallocate(data->dev,
+				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				(off_t)(block) * channel->block_size + data->offset,
+				(off_t)(count) * channel->block_size);
+#else
+		goto unimplemented;
+#endif
+	}
 	if (ret < 0) {
 		if (errno == EOPNOTSUPP)
 			goto unimplemented;