diff mbox

ext4: Add support for SFITRIM, an ioctl for secure FITRIM.

Message ID 1402625647-31439-1-git-send-email-jpa@google.com
State New, archived
Headers show

Commit Message

JP Abgrall June 13, 2014, 2:14 a.m. UTC
This provides an interface for issuing secure trims to parallel
the existing interface for non-secure trim.

Signed-off-by: Geremy Condra <gcondra@google.com>
Signed-off-by: JP Abgrall <jpa@google.com>
---
 fs/ext4/ext4.h          |  3 ++-
 fs/ext4/ioctl.c         | 14 +++++++++++++-
 fs/ext4/mballoc.c       | 29 +++++++++++++++++++----------
 include/uapi/linux/fs.h |  1 +
 4 files changed, 35 insertions(+), 12 deletions(-)

Comments

Darrick Wong June 13, 2014, 2:36 a.m. UTC | #1
On Thu, Jun 12, 2014 at 07:14:07PM -0700, JP Abgrall wrote:
> This provides an interface for issuing secure trims to parallel
> the existing interface for non-secure trim.

What is 'secure discard'?  How does it differ from regular discard?

I guess it means 'really make this go away physical media'?

> Signed-off-by: Geremy Condra <gcondra@google.com>
> Signed-off-by: JP Abgrall <jpa@google.com>
> ---
>  fs/ext4/ext4.h          |  3 ++-
>  fs/ext4/ioctl.c         | 14 +++++++++++++-
>  fs/ext4/mballoc.c       | 29 +++++++++++++++++++----------
>  include/uapi/linux/fs.h |  1 +
>  4 files changed, 35 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7cc5a0e..cf2ddad 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2082,7 +2082,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
>  		ext4_group_t i, struct ext4_group_desc *desc);
>  extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
>  				ext4_fsblk_t block, unsigned long count);
> -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
> +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
> +				bool secure);
>  
>  /* inode.c */
>  struct buffer_head *ext4_getblk(handle_t *, struct inode *,
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 0f2252e..0a0b483 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -580,11 +580,13 @@ resizefs_out:
>  		return err;
>  	}
>  
> +	case SFITRIM:
>  	case FITRIM:
>  	{
>  		struct request_queue *q = bdev_get_queue(sb->s_bdev);
>  		struct fstrim_range range;
>  		int ret = 0;
> +		bool secure_trim = cmd == SFITRIM;
>  
>  		if (!capable(CAP_SYS_ADMIN))
>  			return -EPERM;
> @@ -592,13 +594,23 @@ resizefs_out:
>  		if (!blk_queue_discard(q))
>  			return -EOPNOTSUPP;
>  
> +		if (secure_trim && !blk_queue_secdiscard(q))
> +			return -EOPNOTSUPP;
> +
> +		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
> +			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
> +			ext4_msg(sb, KERN_ERR,
> +				 "FITRIM not supported with bigalloc");
> +			return -EOPNOTSUPP;
> +		}

Huh?  I don't think this belongs in this patch.

Also, any plans to bring SFITRIM to the other FSes?

--D

> +
>  		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
>  		    sizeof(range)))
>  			return -EFAULT;
>  
>  		range.minlen = max((unsigned int)range.minlen,
>  				   q->limits.discard_granularity);
> -		ret = ext4_trim_fs(sb, &range);
> +		ret = ext4_trim_fs(sb, &range, secure_trim);
>  		if (ret < 0)
>  			return ret;
>  
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 59e3162..b470efe 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2734,16 +2734,18 @@ int ext4_mb_release(struct super_block *sb)
>  }
>  
>  static inline int ext4_issue_discard(struct super_block *sb,
> -		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
> +		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
> +		bool secure)
>  {
>  	ext4_fsblk_t discard_block;
> +	unsigned long flags = secure ? BLKDEV_DISCARD_SECURE : 0;
>  
>  	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
>  			 ext4_group_first_block_no(sb, block_group));
>  	count = EXT4_C2B(EXT4_SB(sb), count);
>  	trace_ext4_discard_blocks(sb,
>  			(unsigned long long) discard_block, count);
> -	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
> +	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
>  }
>  
>  /*
> @@ -2765,7 +2767,7 @@ static void ext4_free_data_callback(struct super_block *sb,
>  	if (test_opt(sb, DISCARD)) {
>  		err = ext4_issue_discard(sb, entry->efd_group,
>  					 entry->efd_start_cluster,
> -					 entry->efd_count);
> +					 entry->efd_count, 0);
>  		if (err && err != -EOPNOTSUPP)
>  			ext4_msg(sb, KERN_WARNING, "discard request in"
>  				 " group:%d block:%d count:%d failed"
> @@ -4804,7 +4806,8 @@ do_more:
>  		 * them with group lock_held
>  		 */
>  		if (test_opt(sb, DISCARD)) {
> -			err = ext4_issue_discard(sb, block_group, bit, count);
> +			err = ext4_issue_discard(sb, block_group, bit, count,
> +						 0);
>  			if (err && err != -EOPNOTSUPP)
>  				ext4_msg(sb, KERN_WARNING, "discard request in"
>  					 " group:%d block:%d count:%lu failed"
> @@ -5011,13 +5014,15 @@ error_return:
>   * @count:	number of blocks to TRIM
>   * @group:	alloc. group we are working with
>   * @e4b:	ext4 buddy for the group
> + * @secure:	false to issue a standard discard, true for secure discard
>   *
>   * Trim "count" blocks starting at "start" in the "group". To assure that no
>   * one will allocate those blocks, mark it as used in buddy bitmap. This must
>   * be called with under the group lock.
>   */
>  static int ext4_trim_extent(struct super_block *sb, int start, int count,
> -			     ext4_group_t group, struct ext4_buddy *e4b)
> +			    ext4_group_t group, struct ext4_buddy *e4b,
> +			    bool secure)
>  __releases(bitlock)
>  __acquires(bitlock)
>  {
> @@ -5038,7 +5043,7 @@ __acquires(bitlock)
>  	 */
>  	mb_mark_used(e4b, &ex);
>  	ext4_unlock_group(sb, group);
> -	ret = ext4_issue_discard(sb, group, start, count);
> +	ret = ext4_issue_discard(sb, group, start, count, secure);
>  	ext4_lock_group(sb, group);
>  	mb_free_blocks(NULL, e4b, start, ex.fe_len);
>  	return ret;
> @@ -5051,6 +5056,7 @@ __acquires(bitlock)
>   * @start:		first group block to examine
>   * @max:		last group block to examine
>   * @minblocks:		minimum extent block count
> + * @secure:		false for standard discard, true for secure discard
>   *
>   * ext4_trim_all_free walks through group's buddy bitmap searching for free
>   * extents. When the free block is found, ext4_trim_extent is called to TRIM
> @@ -5065,7 +5071,7 @@ __acquires(bitlock)
>  static ext4_grpblk_t
>  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
>  		   ext4_grpblk_t start, ext4_grpblk_t max,
> -		   ext4_grpblk_t minblocks)
> +		   ext4_grpblk_t minblocks, bool secure)
>  {
>  	void *bitmap;
>  	ext4_grpblk_t next, count = 0, free_count = 0;
> @@ -5098,7 +5104,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
>  
>  		if ((next - start) >= minblocks) {
>  			ret = ext4_trim_extent(sb, start,
> -					       next - start, group, &e4b);
> +					       next - start, group, &e4b,
> +					       secure);
>  			if (ret && ret != -EOPNOTSUPP)
>  				break;
>  			ret = 0;
> @@ -5140,6 +5147,7 @@ out:
>   * ext4_trim_fs() -- trim ioctl handle function
>   * @sb:			superblock for filesystem
>   * @range:		fstrim_range structure
> + * @secure:		false for standard discard, true for secure discard
>   *
>   * start:	First Byte to trim
>   * len:		number of Bytes to trim from start
> @@ -5148,7 +5156,8 @@ out:
>   * start to start+len. For each such a group ext4_trim_all_free function
>   * is invoked to trim all free space.
>   */
> -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
> +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
> +			bool secure)
>  {
>  	struct ext4_group_info *grp;
>  	ext4_group_t group, first_group, last_group;
> @@ -5204,7 +5213,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
>  
>  		if (grp->bb_free >= minlen) {
>  			cnt = ext4_trim_all_free(sb, group, first_cluster,
> -						end, minlen);
> +						end, minlen, secure);
>  			if (cnt < 0) {
>  				ret = cnt;
>  				break;
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index ca1a11b..efbd8f8 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -156,6 +156,7 @@ struct inodes_stat_t {
>  #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
>  #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
>  #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
> +#define SFITRIM	_IOWR('X', 122, struct fstrim_range)	/* Secure trim */
>  
>  #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
>  #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
> -- 
> 2.0.0.526.g5318336
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen June 13, 2014, 2:36 a.m. UTC | #2
On 6/12/14, 9:14 PM, JP Abgrall wrote:
> This provides an interface for issuing secure trims to parallel
> the existing interface for non-secure trim.

Which does what?  A bit of digging through git history tells
me, but an idiot reader like myself doesn't know what a
"secure trim" is.  ;)  Would be nice to have that info,
and the reason for elevating it from a block-level ioctl
to an fs-level ioctl in the commit log.

IOWS: your commit log says what, but not why.

Also:

You're adding a new high-level IOCTL, so let's get a bit more
visibility than just linux-ext4; linux-fsdevel cc'd.

one other ext4-specific note below.

> Signed-off-by: Geremy Condra <gcondra@google.com>
> Signed-off-by: JP Abgrall <jpa@google.com>
> ---
>  fs/ext4/ext4.h          |  3 ++-
>  fs/ext4/ioctl.c         | 14 +++++++++++++-
>  fs/ext4/mballoc.c       | 29 +++++++++++++++++++----------
>  include/uapi/linux/fs.h |  1 +
>  4 files changed, 35 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7cc5a0e..cf2ddad 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2082,7 +2082,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
>  		ext4_group_t i, struct ext4_group_desc *desc);
>  extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
>  				ext4_fsblk_t block, unsigned long count);
> -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
> +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
> +				bool secure);
>  
>  /* inode.c */
>  struct buffer_head *ext4_getblk(handle_t *, struct inode *,
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 0f2252e..0a0b483 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -580,11 +580,13 @@ resizefs_out:
>  		return err;
>  	}
>  
> +	case SFITRIM:
>  	case FITRIM:
>  	{
>  		struct request_queue *q = bdev_get_queue(sb->s_bdev);
>  		struct fstrim_range range;
>  		int ret = 0;
> +		bool secure_trim = cmd == SFITRIM;
>  
>  		if (!capable(CAP_SYS_ADMIN))
>  			return -EPERM;
> @@ -592,13 +594,23 @@ resizefs_out:
>  		if (!blk_queue_discard(q))
>  			return -EOPNOTSUPP;
>  
> +		if (secure_trim && !blk_queue_secdiscard(q))
> +			return -EOPNOTSUPP;
> +
> +		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
> +			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
> +			ext4_msg(sb, KERN_ERR,
> +				 "FITRIM not supported with bigalloc");
> +			return -EOPNOTSUPP;
> +		}
> +

This last conditional is unrelated to the patch; if BIGALLOC has another
incomplete part of its implementation, please send it as a standalone patch,
not buried in this one.

Thanks,
-Eric

>  		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
>  		    sizeof(range)))
>  			return -EFAULT;
>  
>  		range.minlen = max((unsigned int)range.minlen,
>  				   q->limits.discard_granularity);
> -		ret = ext4_trim_fs(sb, &range);
> +		ret = ext4_trim_fs(sb, &range, secure_trim);
>  		if (ret < 0)
>  			return ret;
>  
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 59e3162..b470efe 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2734,16 +2734,18 @@ int ext4_mb_release(struct super_block *sb)
>  }
>  
>  static inline int ext4_issue_discard(struct super_block *sb,
> -		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
> +		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
> +		bool secure)
>  {
>  	ext4_fsblk_t discard_block;
> +	unsigned long flags = secure ? BLKDEV_DISCARD_SECURE : 0;
>  
>  	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
>  			 ext4_group_first_block_no(sb, block_group));
>  	count = EXT4_C2B(EXT4_SB(sb), count);
>  	trace_ext4_discard_blocks(sb,
>  			(unsigned long long) discard_block, count);
> -	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
> +	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
>  }
>  
>  /*
> @@ -2765,7 +2767,7 @@ static void ext4_free_data_callback(struct super_block *sb,
>  	if (test_opt(sb, DISCARD)) {
>  		err = ext4_issue_discard(sb, entry->efd_group,
>  					 entry->efd_start_cluster,
> -					 entry->efd_count);
> +					 entry->efd_count, 0);
>  		if (err && err != -EOPNOTSUPP)
>  			ext4_msg(sb, KERN_WARNING, "discard request in"
>  				 " group:%d block:%d count:%d failed"
> @@ -4804,7 +4806,8 @@ do_more:
>  		 * them with group lock_held
>  		 */
>  		if (test_opt(sb, DISCARD)) {
> -			err = ext4_issue_discard(sb, block_group, bit, count);
> +			err = ext4_issue_discard(sb, block_group, bit, count,
> +						 0);
>  			if (err && err != -EOPNOTSUPP)
>  				ext4_msg(sb, KERN_WARNING, "discard request in"
>  					 " group:%d block:%d count:%lu failed"
> @@ -5011,13 +5014,15 @@ error_return:
>   * @count:	number of blocks to TRIM
>   * @group:	alloc. group we are working with
>   * @e4b:	ext4 buddy for the group
> + * @secure:	false to issue a standard discard, true for secure discard
>   *
>   * Trim "count" blocks starting at "start" in the "group". To assure that no
>   * one will allocate those blocks, mark it as used in buddy bitmap. This must
>   * be called with under the group lock.
>   */
>  static int ext4_trim_extent(struct super_block *sb, int start, int count,
> -			     ext4_group_t group, struct ext4_buddy *e4b)
> +			    ext4_group_t group, struct ext4_buddy *e4b,
> +			    bool secure)
>  __releases(bitlock)
>  __acquires(bitlock)
>  {
> @@ -5038,7 +5043,7 @@ __acquires(bitlock)
>  	 */
>  	mb_mark_used(e4b, &ex);
>  	ext4_unlock_group(sb, group);
> -	ret = ext4_issue_discard(sb, group, start, count);
> +	ret = ext4_issue_discard(sb, group, start, count, secure);
>  	ext4_lock_group(sb, group);
>  	mb_free_blocks(NULL, e4b, start, ex.fe_len);
>  	return ret;
> @@ -5051,6 +5056,7 @@ __acquires(bitlock)
>   * @start:		first group block to examine
>   * @max:		last group block to examine
>   * @minblocks:		minimum extent block count
> + * @secure:		false for standard discard, true for secure discard
>   *
>   * ext4_trim_all_free walks through group's buddy bitmap searching for free
>   * extents. When the free block is found, ext4_trim_extent is called to TRIM
> @@ -5065,7 +5071,7 @@ __acquires(bitlock)
>  static ext4_grpblk_t
>  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
>  		   ext4_grpblk_t start, ext4_grpblk_t max,
> -		   ext4_grpblk_t minblocks)
> +		   ext4_grpblk_t minblocks, bool secure)
>  {
>  	void *bitmap;
>  	ext4_grpblk_t next, count = 0, free_count = 0;
> @@ -5098,7 +5104,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
>  
>  		if ((next - start) >= minblocks) {
>  			ret = ext4_trim_extent(sb, start,
> -					       next - start, group, &e4b);
> +					       next - start, group, &e4b,
> +					       secure);
>  			if (ret && ret != -EOPNOTSUPP)
>  				break;
>  			ret = 0;
> @@ -5140,6 +5147,7 @@ out:
>   * ext4_trim_fs() -- trim ioctl handle function
>   * @sb:			superblock for filesystem
>   * @range:		fstrim_range structure
> + * @secure:		false for standard discard, true for secure discard
>   *
>   * start:	First Byte to trim
>   * len:		number of Bytes to trim from start
> @@ -5148,7 +5156,8 @@ out:
>   * start to start+len. For each such a group ext4_trim_all_free function
>   * is invoked to trim all free space.
>   */
> -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
> +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
> +			bool secure)
>  {
>  	struct ext4_group_info *grp;
>  	ext4_group_t group, first_group, last_group;
> @@ -5204,7 +5213,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
>  
>  		if (grp->bb_free >= minlen) {
>  			cnt = ext4_trim_all_free(sb, group, first_cluster,
> -						end, minlen);
> +						end, minlen, secure);
>  			if (cnt < 0) {
>  				ret = cnt;
>  				break;
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index ca1a11b..efbd8f8 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -156,6 +156,7 @@ struct inodes_stat_t {
>  #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
>  #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
>  #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
> +#define SFITRIM	_IOWR('X', 122, struct fstrim_range)	/* Secure trim */
>  
>  #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
>  #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
JP Abgrall June 13, 2014, 2:57 a.m. UTC | #3
On Thu, Jun 12, 2014 at 7:36 PM, Darrick J. Wong
<darrick.wong@oracle.com> wrote:
> What is 'secure discard'?  How does it differ from regular discard?
> I guess it means 'really make this go away physical media'?

It tells the device to really get rid of all the data for the sectors
and not just put them back into the pool of free ones. Mostly useful
for flash storage. eMMC spec introduces secure erase which is
currently used for other things than secure trimming. The eMMC
controller is then responsible for making sure the data is not
available anymore.

>> +             if (secure_trim && !blk_queue_secdiscard(q))
>> +                     return -EOPNOTSUPP;
>> +
>> +             if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
>> +                            EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
>> +                     ext4_msg(sb, KERN_ERR,
>> +                              "FITRIM not supported with bigalloc");
>> +                     return -EOPNOTSUPP;
>> +             }
>
> Huh?  I don't think this belongs in this patch.

Yuck. Messed up cherry-pick without paying enough attention. Will clean up.


> Also, any plans to bring SFITRIM to the other FSes?

Probably any FS with the plumbing for some form of secure discard and
FITRIM should be easy enough.
Have not looked into it yet beyond ext4 and F2FS.

--
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
JP Abgrall June 13, 2014, 3:02 a.m. UTC | #4
On Thu, Jun 12, 2014 at 7:36 PM, Eric Sandeen <sandeen@redhat.com> wrote:

> IOWS: your commit log says what, but not why.

Will upload a version with some more "why".


> You're adding a new high-level IOCTL, so let's get a bit more
> visibility than just linux-ext4; linux-fsdevel cc'd.

Wasn't too sure about that one, because I didn't feel like committing
to other FSes for now even if they do have FITRIM.
Will remember to include them.

>> +             if (secure_trim && !blk_queue_secdiscard(q))
>> +                     return -EOPNOTSUPP;
>> +
>> +             if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
>> +                            EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
>> +                     ext4_msg(sb, KERN_ERR,
>> +                              "FITRIM not supported with bigalloc");
>> +                     return -EOPNOTSUPP;
>> +             }
>> +
>
> This last conditional is unrelated to the patch; if BIGALLOC has another
> incomplete part of its implementation, please send it as a standalone patch,
> not buried in this one.

Yup. My bad.

--
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen June 13, 2014, 3:12 a.m. UTC | #5
On 6/12/14, 10:02 PM, JP Abgrall wrote:
> On Thu, Jun 12, 2014 at 7:36 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> 
>> IOWS: your commit log says what, but not why.
> 
> Will upload a version with some more "why".
> 
> 
>> You're adding a new high-level IOCTL, so let's get a bit more
>> visibility than just linux-ext4; linux-fsdevel cc'd.
> 
> Wasn't too sure about that one, because I didn't feel like committing
> to other FSes for now even if they do have FITRIM.
> Will remember to include them.

You don't need to implement it for other filesystems, but whether or
not this should be a new fs-level ioctl is something worth airing
out on linux-fsdevel.

Thanks,
-Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner June 13, 2014, 3:15 a.m. UTC | #6
On Thu, Jun 12, 2014 at 09:36:49PM -0500, Eric Sandeen wrote:
> On 6/12/14, 9:14 PM, JP Abgrall wrote:
> > This provides an interface for issuing secure trims to parallel
> > the existing interface for non-secure trim.
> 
> Which does what?  A bit of digging through git history tells
> me, but an idiot reader like myself doesn't know what a
> "secure trim" is.  ;)  Would be nice to have that info,
> and the reason for elevating it from a block-level ioctl
> to an fs-level ioctl in the commit log.
> 
> IOWS: your commit log says what, but not why.

"Secure discard" is only "secure" if every discard that is issued is
secure. Filesystems optimise away repeated discards for freed
regions, so FITRIM followed by SFITRIM means the regions trimmed by
the initial FITRIM are not securely erased from the underlying
media, and you can't get then to be securely erased until
<hand-wave> stuff happens.

Indeed, mixing -o discard and SFITRIM is a recipe for
confusion and leakage - "but I used secure trim on the device!" -
and so all discards either have to be secure or not.

Further, some filesystems may other copies of stale data in the
discarded ranges (e.g. data from unlinked files in the journal or
even in unused portions of metadata blocks), so you can't really say
that secure TRIM provides any sort of security against leakage at
the filesystem layer...

Hence I think secure discard should be a block layer property and
controlled at that layer rather than giving an incorrect impression
that FITRIM provides protection against unintentional data leakage
from the filesystem....

Cheers,

Dave.
JP Abgrall June 13, 2014, 3:19 a.m. UTC | #7
On Thu, Jun 12, 2014 at 8:12 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> On 6/12/14, 10:02 PM, JP Abgrall wrote:
>> On Thu, Jun 12, 2014 at 7:36 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>>> You're adding a new high-level IOCTL, so let's get a bit more
>>> visibility than just linux-ext4; linux-fsdevel cc'd.
>>
>> Wasn't too sure about that one, because I didn't feel like committing
>> to other FSes for now even if they do have FITRIM.
>> Will remember to include them.
>
> You don't need to implement it for other filesystems, but whether or
> not this should be a new fs-level ioctl is something worth airing
> out on linux-fsdevel.

 FITRIM/SFITRIM
vs
 FITRIM/EXT4_IOC_SFITRIM ?

I don't mind either way.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen June 13, 2014, 3:24 a.m. UTC | #8
On 6/12/14, 10:19 PM, JP Abgrall wrote:
> On Thu, Jun 12, 2014 at 8:12 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>> On 6/12/14, 10:02 PM, JP Abgrall wrote:
>>> On Thu, Jun 12, 2014 at 7:36 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>>>> You're adding a new high-level IOCTL, so let's get a bit more
>>>> visibility than just linux-ext4; linux-fsdevel cc'd.
>>>
>>> Wasn't too sure about that one, because I didn't feel like committing
>>> to other FSes for now even if they do have FITRIM.
>>> Will remember to include them.
>>
>> You don't need to implement it for other filesystems, but whether or
>> not this should be a new fs-level ioctl is something worth airing
>> out on linux-fsdevel.
> 
>  FITRIM/SFITRIM
> vs
>  FITRIM/EXT4_IOC_SFITRIM ?
> 
> I don't mind either way.

Well, no, I'm not talking about making it another ext4-only implementation
- I'm talking about whether or not it's a good idea at all;
see for example Dave's reply.

If it's deemed a good idea, then yes, define SFITRIM, and implement
it for ext4, and let others follow.

If it's not a good idea then... well, perhaps don't do it at all.

This gets back to the "why" - what do you want to accomplish?
When would a user use this?  What if they have mounted -o discard,
or already issued an FITRIM?

What's the ultimate goal, here?

-Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner June 13, 2014, 3:30 a.m. UTC | #9
On Fri, Jun 13, 2014 at 01:15:38PM +1000, Dave Chinner wrote:
> On Thu, Jun 12, 2014 at 09:36:49PM -0500, Eric Sandeen wrote:
> > On 6/12/14, 9:14 PM, JP Abgrall wrote:
> > > This provides an interface for issuing secure trims to parallel
> > > the existing interface for non-secure trim.
> > 
> > Which does what?  A bit of digging through git history tells
> > me, but an idiot reader like myself doesn't know what a
> > "secure trim" is.  ;)  Would be nice to have that info,
> > and the reason for elevating it from a block-level ioctl
> > to an fs-level ioctl in the commit log.
> > 
> > IOWS: your commit log says what, but not why.
> 
> "Secure discard" is only "secure" if every discard that is issued is
> secure. Filesystems optimise away repeated discards for freed
> regions, so FITRIM followed by SFITRIM means the regions trimmed by
> the initial FITRIM are not securely erased from the underlying
> media, and you can't get then to be securely erased until
> <hand-wave> stuff happens.
> 
> Indeed, mixing -o discard and SFITRIM is a recipe for
> confusion and leakage - "but I used secure trim on the device!" -
> and so all discards either have to be secure or not.
> 
> Further, some filesystems may other copies of stale data in the
> discarded ranges (e.g. data from unlinked files in the journal or
> even in unused portions of metadata blocks), so you can't really say
> that secure TRIM provides any sort of security against leakage at
> the filesystem layer...

Oh, and while I think of it secure discard at the filesystem level
isn't even a guarantee that you'll get rid of all stale references
to a sector - if the filesystem has freed and then re-allocated a
block without having gone through a discard cycle on that block,
then the underlying device may have old copies of the block that it
hasn't garbage collected and SFITRIM won't clean those up because it
won't ask to trim in-use blocks....

So, really, secure trim from a filesystem perspective can leak stale
data at multiple layers....

Cheers,

Dave.
JP Abgrall June 13, 2014, 4:37 a.m. UTC | #10
On Thu, Jun 12, 2014 at 8:24 PM, Eric Sandeen <sandeen@sandeen.net> wrote:
> - I'm talking about whether or not it's a good idea at all;
> see for example Dave's reply.

I see. I'll reply to in his comments.

> This gets back to the "why" - what do you want to accomplish?
> When would a user use this?
> What if they have mounted -o discard,
> or already issued an FITRIM?

Android. No -o discard. A user should not be able to FITRIM.
The system would do the SFITRIM.

> What's the ultimate goal, here?
It was to make sure that data was not left behind after some steps were taken.
And one of the steps seemed to be to replace the FITRIM used in
Android with something better knowing that their was no -o discard
(see my reply to Dave Chinner).

--
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
JP Abgrall June 13, 2014, 4:37 a.m. UTC | #11
On Thu, Jun 12, 2014 at 8:30 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Fri, Jun 13, 2014 at 01:15:38PM +1000, Dave Chinner wrote:
>> Indeed, mixing -o discard and SFITRIM is a recipe for
>> confusion and leakage - "but I used secure trim on the device!" -
>> and so all discards either have to be secure or not.

The idea was to keep on not using -o discard. And move from FITRIM to SFITRIM.

> Oh, and while I think of it secure discard at the filesystem level
> isn't even a guarantee that you'll get rid of all stale references
> to a sector - if the filesystem has freed and then re-allocated a
> block without having gone through a discard cycle on that block,
> then the underlying device may have old copies of the block that it
> hasn't garbage collected and SFITRIM won't clean those up because it
> won't ask to trim in-use blocks....

Arg. So, if understand this correctly, if the eMMC chip won't get a
secure discard/trim of a block that gets reassigned to the FS, then
data duplicates within the eMMC related to that block are not cleared,
and the next SFITRIM won't even reach that block or the duplicates as
the FS says they are in use.


> So, really, secure trim from a filesystem perspective can leak stale
> data at multiple layers....

.. back to the drawing board to evaluate how much leakage we can live
with or maybe go down a path of fibmap + some secure form of erase
(eMMC level secure trim).

Thanks for now.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner June 13, 2014, 5:07 a.m. UTC | #12
On Thu, Jun 12, 2014 at 09:37:58PM -0700, JP Abgrall wrote:
> On Thu, Jun 12, 2014 at 8:30 PM, Dave Chinner <david@fromorbit.com> wrote:
> > On Fri, Jun 13, 2014 at 01:15:38PM +1000, Dave Chinner wrote:
> >> Indeed, mixing -o discard and SFITRIM is a recipe for
> >> confusion and leakage - "but I used secure trim on the device!" -
> >> and so all discards either have to be secure or not.
> 
> The idea was to keep on not using -o discard. And move from FITRIM to SFITRIM.

IOWs, you want either normal discard or secure discard, and not a
mix of both. IOWs, you don't need SFITRIM, you need a "block device
does secure discard only" configuration flag...

> > Oh, and while I think of it secure discard at the filesystem level
> > isn't even a guarantee that you'll get rid of all stale references
> > to a sector - if the filesystem has freed and then re-allocated a
> > block without having gone through a discard cycle on that block,
> > then the underlying device may have old copies of the block that it
> > hasn't garbage collected and SFITRIM won't clean those up because it
> > won't ask to trim in-use blocks....
> 
> Arg. So, if understand this correctly, if the eMMC chip won't get a
> secure discard/trim of a block that gets reassigned to the FS, then
						      ^^ within

> data duplicates within the eMMC related to that block are not cleared,
> and the next SFITRIM won't even reach that block or the duplicates as
> the FS says they are in use.

Pretty much.

And even using -o discard is no guarantee that the filesystem will
issue a discard between freeing and re-using a block e.g.  XFS
explicitly avoids issuing discards for blocks it re-uses immediately
because they are always considered "in-use" from a transactional
POV. Hence there is no place where the block is considered free, and
hence there isn't a point in time where a discard can be safely
issued on that block.

Cheers,

Dave.
Theodore Ts'o June 13, 2014, 2:20 p.m. UTC | #13
On Fri, Jun 13, 2014 at 03:07:03PM +1000, Dave Chinner wrote:
> > 
> > Arg. So, if understand this correctly, if the eMMC chip won't get a
> > secure discard/trim of a block that gets reassigned to the FS, then
> 						      ^^ within
> 
> > data duplicates within the eMMC related to that block are not cleared,
> > and the next SFITRIM won't even reach that block or the duplicates as
> > the FS says they are in use.
> 
> Pretty much.

If you really want this to work, and be 100% secure, you really need
to do the secure discard at the file system layer.  The file system
could make sure that every single block gets a secure discard before
it gets reused.  Now as long as you can be sure the flash controller
won't discard certain trims because the range is too small or the
flash controller is too busy (DISCARD is "advisory" which means the
flash controller is free to drop any discard request on the floor; you
should check and see if secure discard is a similarly advisory
request.  Yes, that would be broken, but the flash controller
developers leaned on the standards bodies to make discard_zeros_data
to be similarly useless.)

One question --- do you really need to use secure discard on all
blocks, or just on certain critical bits of data?  If so, there may be
other ways of meeting your security requirements.  One of the things
which Michael Halcrow has been implementing for filesystem level
encryption for ChromeOS, so that selecting ext4 files can be encrypted
using per-user keys.  He recently has gotten his proof of concept
working with a global fixed key, so he's pretty far along.  (Although
I'm sure we'll need to do quite a bit of cleanup before it will be
ready for upstream submission.)  Depending on what your security
requirements are, perhaps this might help meet your security
requirements.

Feel free to contact Michael and I at our google.com e-mails and
perhaps schedule a meeting if that would be helpful.

Cheers,

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o June 13, 2014, 2:31 p.m. UTC | #14
On Fri, Jun 13, 2014 at 10:20:54AM -0400, Theodore Ts'o wrote:
> 
> If you really want this to work, and be 100% secure, you really need
> to do the secure discard at the file system layer.  The file system
> could make sure that every single block gets a secure discard before
> it gets reused.

BTW, one major downside of doing a secure trim after every time that a
block has been released is that it will massively increase the flash
wear, since if you do a secure trim on a single 4k block in 512k erase
block, assuming that secure trim has been implemented properly from a
security perspective, it will need to copy out all of the used portion
of the 512k erase block, and then erase it.

This is one of the reasons why I asked if you really need to worry
about securely discarding all of the blocks on the file system, or
just blocks containing specific really security-sensitive information
(i.e., for Google Wallet, etc.)

If so, you might be better off either doing per-file encryption, or
per-file secure discard.

Cheers,

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
JP Abgrall June 13, 2014, 7:44 p.m. UTC | #15
On Fri, Jun 13, 2014 at 7:31 AM, Theodore Ts'o <tytso@mit.edu> wrote:
> On Fri, Jun 13, 2014 at 10:20:54AM -0400, Theodore Ts'o wrote:
>>
>> If you really want this to work, and be 100% secure, you really need
>> to do the secure discard at the file system layer.  The file system
>> could make sure that every single block gets a secure discard before
>> it gets reused.
>
> BTW, one major downside of doing a secure trim after every time that a
> block has been released is that it will massively increase the flash
> wear, since if you do a secure trim on a single 4k block in 512k erase
> block, assuming that secure trim has been implemented properly from a
> security perspective, it will need to copy out all of the used portion
> of the 512k erase block, and then erase it.

We did not plan on doing always-on secure-discard or always
secure-trim for those reasons.


> This is one of the reasons why I asked if you really need to worry
> about securely discarding all of the blocks on the file system, or
> just blocks containing specific really security-sensitive information
> (i.e., for Google Wallet, etc.)

Part of the "why" for this SFITRIM patch:
At some point we need to delete a bunch of files and packages and we
want to reduce the volume of recoverable data (file content, file
names, file names within databases or other files,...).
These are not security-critical files.
We understand that not all of the data can be purged, but covering
most of it would be nice.
We currently only care about ext4.
The current expected leftovers from a cleanup would be:
  - data blocks that were modified during the life-time of the files.
This includes directories containing those filenames.
  - file names in top level directories for directories that are not
getting deleted.
  - databases that are not set for deletion which referenced the files
being deleted.


> If so, you might be better off either doing per-file encryption, or
> per-file secure discard.

The per-file secure discard seems to be the way to go, as there are
only a few places in Android where this needs to be turned on.
The  current idletime-fstrim would  switch from FITRIM to SFITRIM to
reduce the leftovers.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen June 13, 2014, 7:57 p.m. UTC | #16
On 6/13/14, 2:44 PM, JP Abgrall wrote:
> On Fri, Jun 13, 2014 at 7:31 AM, Theodore Ts'o <tytso@mit.edu> wrote:

<snip>

> 
>> If so, you might be better off either doing per-file encryption, or
>> per-file secure discard.
> 
> The per-file secure discard seems to be the way to go, as there are
> only a few places in Android where this needs to be turned on.
> The  current idletime-fstrim would  switch from FITRIM to SFITRIM to
> reduce the leftovers.

Apologies if this is a dumb thing to point out, but...
mmc is the only in-kernel driver (aside from xen) which can
even set the flags necessary to enable secure discard; and then
only if mmc_can_secure_erase_trim() is true, so it depends on
the card, I guess.

I don't know what device you're running on, but figured it might
be worth pointing out that not all hardware even supports this
capability.

-Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
JP Abgrall June 13, 2014, 8:12 p.m. UTC | #17
On Fri, Jun 13, 2014 at 12:57 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> I don't know what device you're running on,

At least Nexus devices that will get updates. :)

> but figured it might
> be worth pointing out that not all hardware even supports this
> capability.

True.
For future devices, the feature will be for certain eMMC devices,
certain kernels, certain FSes only. All stuff that we don't control,
but we can give advice on.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lukas Czerner June 18, 2014, 9:48 a.m. UTC | #18
On Thu, 12 Jun 2014, JP Abgrall wrote:

> Date: Thu, 12 Jun 2014 19:14:07 -0700
> From: JP Abgrall <jpa@google.com>
> To: linux-ext4@vger.kernel.org
> Cc: jpa@google.com, gcondra@google.com
> Subject: [PATCH] ext4: Add support for SFITRIM, an ioctl for secure FITRIM.
> 
> This provides an interface for issuing secure trims to parallel
> the existing interface for non-secure trim.
> 
> Signed-off-by: Geremy Condra <gcondra@google.com>
> Signed-off-by: JP Abgrall <jpa@google.com>
> ---
>  fs/ext4/ext4.h          |  3 ++-
>  fs/ext4/ioctl.c         | 14 +++++++++++++-
>  fs/ext4/mballoc.c       | 29 +++++++++++++++++++----------
>  include/uapi/linux/fs.h |  1 +
>  4 files changed, 35 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7cc5a0e..cf2ddad 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2082,7 +2082,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
>  		ext4_group_t i, struct ext4_group_desc *desc);
>  extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
>  				ext4_fsblk_t block, unsigned long count);
> -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
> +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
> +				bool secure);

int flags would be much better in case we want to add other things
later.

>  
>  /* inode.c */
>  struct buffer_head *ext4_getblk(handle_t *, struct inode *,
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 0f2252e..0a0b483 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -580,11 +580,13 @@ resizefs_out:
>  		return err;
>  	}
>  
> +	case SFITRIM:

As I said in a different email. Do not call it secure, How about
"deep discard"  - FIDTRIM ?

>  	case FITRIM:
>  	{
>  		struct request_queue *q = bdev_get_queue(sb->s_bdev);
>  		struct fstrim_range range;
>  		int ret = 0;
> +		bool secure_trim = cmd == SFITRIM;

If we're going to introduce "int flags" for the ext4_trim_fs() we
have to come up with our own flags for this.

>  
>  		if (!capable(CAP_SYS_ADMIN))
>  			return -EPERM;
> @@ -592,13 +594,23 @@ resizefs_out:
>  		if (!blk_queue_discard(q))
>  			return -EOPNOTSUPP;
>  
> +		if (secure_trim && !blk_queue_secdiscard(q))
> +			return -EOPNOTSUPP;
> +
> +		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
> +			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
> +			ext4_msg(sb, KERN_ERR,
> +				 "FITRIM not supported with bigalloc");
> +			return -EOPNOTSUPP;
> +		}

Why ? I have not noticed that it ever stopped working. Have you seen
any problems with this ?

> +
>  		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
>  		    sizeof(range)))
>  			return -EFAULT;
>  
>  		range.minlen = max((unsigned int)range.minlen,
>  				   q->limits.discard_granularity);
> -		ret = ext4_trim_fs(sb, &range);
> +		ret = ext4_trim_fs(sb, &range, secure_trim);
>  		if (ret < 0)
>  			return ret;
>  
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 59e3162..b470efe 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2734,16 +2734,18 @@ int ext4_mb_release(struct super_block *sb)
>  }
>  
>  static inline int ext4_issue_discard(struct super_block *sb,
> -		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
> +		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
> +		bool secure)

again int flags would be better.


>  {
>  	ext4_fsblk_t discard_block;
> +	unsigned long flags = secure ? BLKDEV_DISCARD_SECURE : 0;
>  
>  	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
>  			 ext4_group_first_block_no(sb, block_group));
>  	count = EXT4_C2B(EXT4_SB(sb), count);
>  	trace_ext4_discard_blocks(sb,
>  			(unsigned long long) discard_block, count);
> -	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
> +	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
>  }
>  
>  /*
> @@ -2765,7 +2767,7 @@ static void ext4_free_data_callback(struct super_block *sb,
>  	if (test_opt(sb, DISCARD)) {
>  		err = ext4_issue_discard(sb, entry->efd_group,
>  					 entry->efd_start_cluster,
> -					 entry->efd_count);
> +					 entry->efd_count, 0);
>  		if (err && err != -EOPNOTSUPP)
>  			ext4_msg(sb, KERN_WARNING, "discard request in"
>  				 " group:%d block:%d count:%d failed"
> @@ -4804,7 +4806,8 @@ do_more:
>  		 * them with group lock_held
>  		 */
>  		if (test_opt(sb, DISCARD)) {
> -			err = ext4_issue_discard(sb, block_group, bit, count);
> +			err = ext4_issue_discard(sb, block_group, bit, count,
> +						 0);
>  			if (err && err != -EOPNOTSUPP)
>  				ext4_msg(sb, KERN_WARNING, "discard request in"
>  					 " group:%d block:%d count:%lu failed"
> @@ -5011,13 +5014,15 @@ error_return:
>   * @count:	number of blocks to TRIM
>   * @group:	alloc. group we are working with
>   * @e4b:	ext4 buddy for the group
> + * @secure:	false to issue a standard discard, true for secure discard
>   *
>   * Trim "count" blocks starting at "start" in the "group". To assure that no
>   * one will allocate those blocks, mark it as used in buddy bitmap. This must
>   * be called with under the group lock.
>   */
>  static int ext4_trim_extent(struct super_block *sb, int start, int count,
> -			     ext4_group_t group, struct ext4_buddy *e4b)
> +			    ext4_group_t group, struct ext4_buddy *e4b,
> +			    bool secure)
>  __releases(bitlock)
>  __acquires(bitlock)
>  {
> @@ -5038,7 +5043,7 @@ __acquires(bitlock)
>  	 */
>  	mb_mark_used(e4b, &ex);
>  	ext4_unlock_group(sb, group);
> -	ret = ext4_issue_discard(sb, group, start, count);
> +	ret = ext4_issue_discard(sb, group, start, count, secure);
>  	ext4_lock_group(sb, group);
>  	mb_free_blocks(NULL, e4b, start, ex.fe_len);
>  	return ret;
> @@ -5051,6 +5056,7 @@ __acquires(bitlock)
>   * @start:		first group block to examine
>   * @max:		last group block to examine
>   * @minblocks:		minimum extent block count
> + * @secure:		false for standard discard, true for secure discard
>   *
>   * ext4_trim_all_free walks through group's buddy bitmap searching for free
>   * extents. When the free block is found, ext4_trim_extent is called to TRIM
> @@ -5065,7 +5071,7 @@ __acquires(bitlock)
>  static ext4_grpblk_t
>  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
>  		   ext4_grpblk_t start, ext4_grpblk_t max,
> -		   ext4_grpblk_t minblocks)
> +		   ext4_grpblk_t minblocks, bool secure)

Also as I already mention to maximize benefits of this type of
discard we should really not take into account the optimization and
not skip groups if it's already been discarded.

We should also sync the file system to make sure we discard as much
blocks as we can from each group. It's not perfect and certainly not
secure, but it is "best effort".

Thanks!
-Lukas

>  {
>  	void *bitmap;
>  	ext4_grpblk_t next, count = 0, free_count = 0;
> @@ -5098,7 +5104,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
>  
>  		if ((next - start) >= minblocks) {
>  			ret = ext4_trim_extent(sb, start,
> -					       next - start, group, &e4b);
> +					       next - start, group, &e4b,
> +					       secure);
>  			if (ret && ret != -EOPNOTSUPP)
>  				break;
>  			ret = 0;
> @@ -5140,6 +5147,7 @@ out:
>   * ext4_trim_fs() -- trim ioctl handle function
>   * @sb:			superblock for filesystem
>   * @range:		fstrim_range structure
> + * @secure:		false for standard discard, true for secure discard
>   *
>   * start:	First Byte to trim
>   * len:		number of Bytes to trim from start
> @@ -5148,7 +5156,8 @@ out:
>   * start to start+len. For each such a group ext4_trim_all_free function
>   * is invoked to trim all free space.
>   */
> -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
> +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
> +			bool secure)
>  {
>  	struct ext4_group_info *grp;
>  	ext4_group_t group, first_group, last_group;
> @@ -5204,7 +5213,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
>  
>  		if (grp->bb_free >= minlen) {
>  			cnt = ext4_trim_all_free(sb, group, first_cluster,
> -						end, minlen);
> +						end, minlen, secure);
>  			if (cnt < 0) {
>  				ret = cnt;
>  				break;
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index ca1a11b..efbd8f8 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -156,6 +156,7 @@ struct inodes_stat_t {
>  #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
>  #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
>  #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
> +#define SFITRIM	_IOWR('X', 122, struct fstrim_range)	/* Secure trim */
>  
>  #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
>  #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7cc5a0e..cf2ddad 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2082,7 +2082,8 @@  extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 				ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
+				bool secure);
 
 /* inode.c */
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f2252e..0a0b483 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -580,11 +580,13 @@  resizefs_out:
 		return err;
 	}
 
+	case SFITRIM:
 	case FITRIM:
 	{
 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 		struct fstrim_range range;
 		int ret = 0;
+		bool secure_trim = cmd == SFITRIM;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -592,13 +594,23 @@  resizefs_out:
 		if (!blk_queue_discard(q))
 			return -EOPNOTSUPP;
 
+		if (secure_trim && !blk_queue_secdiscard(q))
+			return -EOPNOTSUPP;
+
+		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+			ext4_msg(sb, KERN_ERR,
+				 "FITRIM not supported with bigalloc");
+			return -EOPNOTSUPP;
+		}
+
 		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
 		    sizeof(range)))
 			return -EFAULT;
 
 		range.minlen = max((unsigned int)range.minlen,
 				   q->limits.discard_granularity);
-		ret = ext4_trim_fs(sb, &range);
+		ret = ext4_trim_fs(sb, &range, secure_trim);
 		if (ret < 0)
 			return ret;
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 59e3162..b470efe 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2734,16 +2734,18 @@  int ext4_mb_release(struct super_block *sb)
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+		bool secure)
 {
 	ext4_fsblk_t discard_block;
+	unsigned long flags = secure ? BLKDEV_DISCARD_SECURE : 0;
 
 	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
 			 ext4_group_first_block_no(sb, block_group));
 	count = EXT4_C2B(EXT4_SB(sb), count);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
-	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
 }
 
 /*
@@ -2765,7 +2767,7 @@  static void ext4_free_data_callback(struct super_block *sb,
 	if (test_opt(sb, DISCARD)) {
 		err = ext4_issue_discard(sb, entry->efd_group,
 					 entry->efd_start_cluster,
-					 entry->efd_count);
+					 entry->efd_count, 0);
 		if (err && err != -EOPNOTSUPP)
 			ext4_msg(sb, KERN_WARNING, "discard request in"
 				 " group:%d block:%d count:%d failed"
@@ -4804,7 +4806,8 @@  do_more:
 		 * them with group lock_held
 		 */
 		if (test_opt(sb, DISCARD)) {
-			err = ext4_issue_discard(sb, block_group, bit, count);
+			err = ext4_issue_discard(sb, block_group, bit, count,
+						 0);
 			if (err && err != -EOPNOTSUPP)
 				ext4_msg(sb, KERN_WARNING, "discard request in"
 					 " group:%d block:%d count:%lu failed"
@@ -5011,13 +5014,15 @@  error_return:
  * @count:	number of blocks to TRIM
  * @group:	alloc. group we are working with
  * @e4b:	ext4 buddy for the group
+ * @secure:	false to issue a standard discard, true for secure discard
  *
  * Trim "count" blocks starting at "start" in the "group". To assure that no
  * one will allocate those blocks, mark it as used in buddy bitmap. This must
  * be called with under the group lock.
  */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
-			     ext4_group_t group, struct ext4_buddy *e4b)
+			    ext4_group_t group, struct ext4_buddy *e4b,
+			    bool secure)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -5038,7 +5043,7 @@  __acquires(bitlock)
 	 */
 	mb_mark_used(e4b, &ex);
 	ext4_unlock_group(sb, group);
-	ret = ext4_issue_discard(sb, group, start, count);
+	ret = ext4_issue_discard(sb, group, start, count, secure);
 	ext4_lock_group(sb, group);
 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
 	return ret;
@@ -5051,6 +5056,7 @@  __acquires(bitlock)
  * @start:		first group block to examine
  * @max:		last group block to examine
  * @minblocks:		minimum extent block count
+ * @secure:		false for standard discard, true for secure discard
  *
  * ext4_trim_all_free walks through group's buddy bitmap searching for free
  * extents. When the free block is found, ext4_trim_extent is called to TRIM
@@ -5065,7 +5071,7 @@  __acquires(bitlock)
 static ext4_grpblk_t
 ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		   ext4_grpblk_t start, ext4_grpblk_t max,
-		   ext4_grpblk_t minblocks)
+		   ext4_grpblk_t minblocks, bool secure)
 {
 	void *bitmap;
 	ext4_grpblk_t next, count = 0, free_count = 0;
@@ -5098,7 +5104,8 @@  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 
 		if ((next - start) >= minblocks) {
 			ret = ext4_trim_extent(sb, start,
-					       next - start, group, &e4b);
+					       next - start, group, &e4b,
+					       secure);
 			if (ret && ret != -EOPNOTSUPP)
 				break;
 			ret = 0;
@@ -5140,6 +5147,7 @@  out:
  * ext4_trim_fs() -- trim ioctl handle function
  * @sb:			superblock for filesystem
  * @range:		fstrim_range structure
+ * @secure:		false for standard discard, true for secure discard
  *
  * start:	First Byte to trim
  * len:		number of Bytes to trim from start
@@ -5148,7 +5156,8 @@  out:
  * start to start+len. For each such a group ext4_trim_all_free function
  * is invoked to trim all free space.
  */
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+			bool secure)
 {
 	struct ext4_group_info *grp;
 	ext4_group_t group, first_group, last_group;
@@ -5204,7 +5213,7 @@  int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
 		if (grp->bb_free >= minlen) {
 			cnt = ext4_trim_all_free(sb, group, first_cluster,
-						end, minlen);
+						end, minlen, secure);
 			if (cnt < 0) {
 				ret = cnt;
 				break;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index ca1a11b..efbd8f8 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -156,6 +156,7 @@  struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define SFITRIM	_IOWR('X', 122, struct fstrim_range)	/* Secure trim */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)