diff mbox

[Ext4,Secure,Delete,7/7v4] ext4/jbd2: Secure Delete: Secure delete journal blocks

Message ID 1317971465-8517-8-git-send-email-achender@linux.vnet.ibm.com
State Rejected, archived
Headers show

Commit Message

Allison Henderson Oct. 7, 2011, 7:11 a.m. UTC
This patch modifies both ext4 and jbd2 such that the journal
blocks which may contain file data, are securely deleted
after the files data blocks are deleted.

Because old journal blocks may contain file data, we need
a way to find those blocks again when it comes time to secure
delete the file.  This patch adds a new list to the journal
structure to keep track of which vfs blocks the journal blocks
contain.

After a truncate or a punch hole operation has completed, a
new function ext4_secure_delete_jblks is called that flushes
the journal, and then searches the list for any journal blocks
that were used to journal the blocks that were just removed.
The found journal blocks are then secure deleted.

Signed-off-by: Allison Henderson <achender@linux.vnet.ibm.com>
---
:100644 100644 0cba63b... fdf73b5... M	fs/ext4/ext4.h
:100644 100644 984fac2... 81df943... M	fs/ext4/extents.c
:100644 100644 bd1facd... 083c516... M	fs/ext4/inode.c
:100644 100644 eef6979... 2734e7b... M	fs/jbd2/commit.c
:100644 100644 f24df13... 6dd8af7... M	fs/jbd2/journal.c
:100644 100644 38f307b... 8b84b43... M	include/linux/jbd2.h
 fs/ext4/ext4.h       |    3 +
 fs/ext4/extents.c    |   12 +++++
 fs/ext4/inode.c      |  110 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/jbd2/commit.c     |    6 +++
 fs/jbd2/journal.c    |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |   21 +++++++++
 6 files changed, 264 insertions(+), 0 deletions(-)

Comments

Darrick J. Wong Oct. 7, 2011, 6:35 p.m. UTC | #1
On Fri, Oct 07, 2011 at 12:11:05AM -0700, Allison Henderson wrote:
> This patch modifies both ext4 and jbd2 such that the journal
> blocks which may contain file data, are securely deleted
> after the files data blocks are deleted.
> 
> Because old journal blocks may contain file data, we need
> a way to find those blocks again when it comes time to secure
> delete the file.  This patch adds a new list to the journal
> structure to keep track of which vfs blocks the journal blocks
> contain.
> 
> After a truncate or a punch hole operation has completed, a
> new function ext4_secure_delete_jblks is called that flushes
> the journal, and then searches the list for any journal blocks
> that were used to journal the blocks that were just removed.
> The found journal blocks are then secure deleted.
> 
> Signed-off-by: Allison Henderson <achender@linux.vnet.ibm.com>
> ---
> :100644 100644 0cba63b... fdf73b5... M	fs/ext4/ext4.h
> :100644 100644 984fac2... 81df943... M	fs/ext4/extents.c
> :100644 100644 bd1facd... 083c516... M	fs/ext4/inode.c
> :100644 100644 eef6979... 2734e7b... M	fs/jbd2/commit.c
> :100644 100644 f24df13... 6dd8af7... M	fs/jbd2/journal.c
> :100644 100644 38f307b... 8b84b43... M	include/linux/jbd2.h
>  fs/ext4/ext4.h       |    3 +
>  fs/ext4/extents.c    |   12 +++++
>  fs/ext4/inode.c      |  110 +++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/jbd2/commit.c     |    6 +++
>  fs/jbd2/journal.c    |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/jbd2.h |   21 +++++++++
>  6 files changed, 264 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 0cba63b..fdf73b5 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2230,6 +2230,9 @@ extern int ext4_secure_delete_lblks(struct inode *inode,
>  			ext4_lblk_t first_block, unsigned long count);
>  extern int ext4_secure_delete_pblks(struct inode *inode,
>  			ext4_fsblk_t block, unsigned long count);
> +extern int ext4_secure_delete_jblks(struct inode *inode,
> +				ext4_lblk_t first_block, unsigned long count);
> +
> 
>  /* move_extent.c */
>  extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 984fac2..81df943 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4159,6 +4159,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
>  	struct super_block *sb = inode->i_sb;
>  	struct ext4_ext_cache cache_ex;
>  	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
> +	ext4_lblk_t first_secrm_blk, last_secrm_blk, secrm_blk_count;
>  	struct address_space *mapping = inode->i_mapping;
>  	struct ext4_map_blocks map;
>  	handle_t *handle;
> @@ -4317,6 +4318,17 @@ out:
>  	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
>  	ext4_mark_inode_dirty(handle, inode);
>  	ext4_journal_stop(handle);
> +
> +	if (!err && EXT4_I(inode)->i_flags & EXT4_SECRM_FL) {
> +		first_secrm_blk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
> +		last_secrm_blk = (offset + length + sb->s_blocksize - 1) >>
> +			EXT4_BLOCK_SIZE_BITS(sb);
> +		secrm_blk_count = last_secrm_blk - first_secrm_blk;
> +
> +		err = ext4_secure_delete_jblks(inode,
> +		   first_secrm_blk, secrm_blk_count);
> +	}
> +
>  	return err;
>  }
>  int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index bd1facd..083c516 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -909,6 +909,110 @@ int ext4_secure_delete_lblks(struct inode *inode, ext4_lblk_t first_block,
>  	return err;
>  }
> 
> +/*
> + * ext4_secure_delete_jblks()
> + *
> + * Secure deletes the journal blocks that contain
> + * the specified logical blocks
> + *
> + * @inode: The files inode
> + * @first_block: Starting logical block
> + * @count: The number of blocks to secure delete
> + *         from the journal
> + *
> + * Returns 0 on sucess or negative on error
> + */
> +int ext4_secure_delete_jblks(struct inode *inode,
> +				ext4_lblk_t first_block, unsigned long count){
> +
> +	unsigned long long jbd2_pblk_start = 0;
> +	unsigned long long jbd2_pblk_count = 0;
> +	ext4_lblk_t last_block = 0;
> +	struct list_head *tmp = NULL;
> +	struct list_head *cur = NULL;
> +	struct jbd2_blk_pair *b_pair = NULL;
> +	int err = 0;
> +	journal_t *journal = EXT4_JOURNAL(inode);
> +
> +	/* Do not allow last_block to wrap */
> +	last_block = first_block + count;
> +	if (last_block < first_block)
> +		last_block = EXT_MAX_BLOCKS;
> +
> +	/*
> +	 * Force the journal to finnish up any pending transactions
> +	 * before we start secure deleteing journal blocks
> +	 */
> +	err = ext4_force_commit((inode)->i_sb);
> +	if (err)
> +		return err;
> +
> +	spin_lock(&journal->j_pair_lock);
> +
> +	/* Loop over the journals blocks looking for our logical blocks */
> +	list_for_each_safe(cur, tmp, &journal->blk_pairs) {
> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);

Um.... I don't think ext4 should be accessing journal internals.  At a bare
minimum the stuff that mucks around with jbd2 ought to be in fs/jbd2 and
the ext4 parts stuffed in a wrapper in ext4_jbd2.[ch], since ocfs2 also uses
jbd2.

I'm also wondering -- this logical <-> journal block mapping doesn't seem to be
committed to disk anywhere.  What happens if jbd2 crashes before we get to
zeroing journal blocks?  Specifically, would the journal recovery code know
that a given journal block also needs secure deletion?

Here's a counterproposal: What if ext4 told jbd2 which blocks need to be
securely deleted while ext4 is creating the transactions?  jbd2 could then set
a JBD2_FLAG_SECURE_DELETE flag in journal_block_tag_t.t_flags (the descriptor
block), which would tell the recovery and commit code that the associated
journal block needs secure deletion when processing is complete.  I _think_ you
could just extend the functions called by ext4_jbd2.c to take a flags
parameter.  Does this sound better?  Or even sane? :)

(Not sure if ocfs2 cares about secure delete at all.)

--D

> +		if ((b_pair->vfs_inode == inode) &&
> +			(b_pair->vfs_lblk >= first_block) &&
> +			(b_pair->vfs_lblk < last_block)) {
> +
> +			/*
> +			 * It is likely that the journal blocks will be
> +			 * consecutive, so lets try to secure delete them
> +			 * in ranges
> +			 */
> +			if (jbd2_pblk_count == 0) {
> +				/*
> +				 * If there are no blocks in our range,
> +				 * then this one will be the first
> +				 */
> +				jbd2_pblk_start = b_pair->jbd2_pblk;
> +				jbd2_pblk_count = 1;
> +			} else if ((b_pair->jbd2_pblk) ==
> +				   (jbd2_pblk_start + jbd2_pblk_count)) {
> +				/*
> +				 * If this journal block is physically
> +				 * consecutive, then just increase the range
> +				 */
> +				jbd2_pblk_count++;
> +			} else if ((b_pair->jbd2_pblk) ==
> +					(jbd2_pblk_start - 1) &&
> +					jbd2_pblk_start > 0) {
> +				/*
> +				 * If this journal block is physically
> +				 * consecutive (from the start of the
> +				 * range), just increase the range from the
> +				 * other end.
> +				 */
> +				jbd2_pblk_count++;
> +				jbd2_pblk_start--;
> +			} else {
> +				/*
> +				 * If the block was not consecutive, secure
> +				 * delete the range, and restart the current
> +				 * range
> +				 */
> +
> +				err = ext4_secure_delete_pblks(journal->j_inode,
> +					jbd2_pblk_start, jbd2_pblk_count);
> +				if (err)
> +					goto out;
> +				jbd2_pblk_start = b_pair->jbd2_pblk;
> +				jbd2_pblk_count = 1;
> +			}
> +		}
> +	}
> +
> +	/* Secure delete any blocks still in our range */
> +	if (jbd2_pblk_count > 0)
> +		err = ext4_secure_delete_pblks(journal->j_inode,
> +			jbd2_pblk_start, jbd2_pblk_count);
> +
> +out:
> +	spin_unlock(&journal->j_pair_lock);
> +	return err;
> +}
> +
>  struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
>  			       ext4_lblk_t block, int create, int *err)
>  {
> @@ -3447,6 +3551,12 @@ void ext4_truncate(struct inode *inode)
>  	else
>  		ext4_ind_truncate(inode);
> 
> +	if (EXT4_I(inode)->i_flags & EXT4_SECRM_FL) {
> +		err = ext4_secure_delete_jblks(inode,
> +			inode->i_size >> EXT4_BLOCK_SIZE_BITS(inode->i_sb),
> +			EXT_MAX_BLOCKS);
> +	}
> +
>  	trace_ext4_truncate_exit(inode);
>  }
> 
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index eef6979..2734e7b 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -593,6 +593,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
>  			jbd2_journal_abort(journal, flags);
>  			continue;
>  		}
> +
> +		err = jbd2_record_pair(journal, jh2bh(jh), jh2bh(new_jh));
> +		if (err) {
> +			jbd2_journal_abort(journal, flags);
> +			continue;
> +		}
>  		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
>  		wbuf[bufs++] = jh2bh(new_jh);
> 
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index f24df13..6dd8af7 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -248,6 +248,100 @@ static void journal_kill_thread(journal_t *journal)
>  	}
>  	write_unlock(&journal->j_state_lock);
>  }
> +/*
> + * jbd2_record_pair()
> + * Updates the journals list of block pairs to contain an new pair of journal
> + * to vfs blocks.  This list is an in memory record of the journals blocks to
> + * help facilitate finding out what vfs blocks the journal blocks contain.
> + * Secure delete uses this list to clear out stale journal blocks that may
> + * still contain file data that needs to be secure deleted. This list is updated
> + * when a vfs data block is written to the journal or when a descriptor block
> + * is written to the journal.
> + *
> + * journal: The journal to update
> + * vfs_bh:  The vfs buffer_head that the jbd2_bh is journaling.  This
> + *          may be NULL if jbd2_bh does not represent a vfs block.
> + * jbd2_bh: The jbd2 buffer_head that has just been allocated for some
> + *          use in the journal. jbd2_bh may not be NULL.
> + *
> + * Returns 0 on sucess or negative on failure
> + */
> +int jbd2_record_pair(journal_t *journal,
> +		struct buffer_head *vfs_bh,
> +		struct buffer_head *jbd2_bh){
> +
> +	struct page *page;
> +	struct buffer_head *bh;
> +	struct buffer_head *head;
> +	pgoff_t index;
> +	struct jbd2_blk_pair *b_pair = NULL;
> +	struct list_head *cur;
> +	unsigned long long vfs_lblk = 0;
> +	unsigned long long vfs_pblk = 0;
> +	struct inode *vfs_inode = NULL;
> +
> +	/* If we have the vfs bh, figure out the logical block offset */
> +	if (vfs_bh) {
> +		page = vfs_bh->b_page;
> +		vfs_inode = page->mapping->host;
> +		vfs_pblk = vfs_bh->b_blocknr;
> +		index = page->index;
> +
> +		/* Find the block offset of the page */
> +		vfs_lblk = index <<
> +			(PAGE_CACHE_SHIFT - vfs_inode->i_sb->s_blocksize_bits);
> +
> +		/* Then add in the block offset of the bh in the page */
> +		bh = page_buffers(page);
> +		head = bh;
> +		do {
> +			if (bh == vfs_bh)
> +				break;
> +			bh = bh->b_this_page;
> +			vfs_lblk++;
> +		} while (bh != head);
> +	}
> +
> +	spin_lock(&journal->j_pair_lock);
> +	/*
> +	 * Add the pair to the list.  If there is already a pair
> +	 * for this journal block, just update it with the new vfs
> +	 * block info
> +	 */
> +	list_for_each(cur, &journal->blk_pairs) {
> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
> +		if (b_pair->jbd2_pblk == jbd2_bh->b_blocknr) {
> +			b_pair->vfs_inode = vfs_inode;
> +			b_pair->vfs_pblk = vfs_pblk;
> +			b_pair->vfs_lblk = vfs_lblk;
> +			b_pair->jbd2_pblk = jbd2_bh->b_blocknr;
> +
> +			break;
> +		} else
> +			b_pair = NULL;
> +	}
> +
> +	/*
> +	 * If the journal block was not found in the list,
> +	 * add a new pair to the list
> +	 */
> +	if (!b_pair) {
> +		b_pair = kzalloc(sizeof(struct jbd2_blk_pair), GFP_NOFS);
> +		if (!b_pair) {
> +			spin_unlock(&journal->j_pair_lock);
> +			return -ENOMEM;
> +		}
> +		b_pair->jbd2_pblk = jbd2_bh->b_blocknr;
> +		b_pair->vfs_inode = vfs_inode;
> +		b_pair->vfs_pblk = vfs_pblk;
> +		b_pair->vfs_lblk = vfs_lblk;
> +
> +		list_add(&b_pair->list, &journal->blk_pairs);
> +	}
> +
> +	spin_unlock(&journal->j_pair_lock);
> +	return 0;
> +}
> 
>  /*
>   * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
> @@ -739,7 +833,13 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
>  	lock_buffer(bh);
>  	memset(bh->b_data, 0, journal->j_blocksize);
>  	set_buffer_uptodate(bh);
> +	err = jbd2_record_pair(journal, NULL, bh);
> +
>  	unlock_buffer(bh);
> +
> +	if (err)
> +		return NULL;
> +
>  	BUFFER_TRACE(bh, "return this buffer");
>  	return jbd2_journal_add_journal_head(bh);
>  }
> @@ -895,9 +995,11 @@ static journal_t * journal_init_common (void)
>  	init_waitqueue_head(&journal->j_wait_commit);
>  	init_waitqueue_head(&journal->j_wait_updates);
>  	mutex_init(&journal->j_barrier);
> +	INIT_LIST_HEAD(&journal->blk_pairs);
>  	mutex_init(&journal->j_checkpoint_mutex);
>  	spin_lock_init(&journal->j_revoke_lock);
>  	spin_lock_init(&journal->j_list_lock);
> +	spin_lock_init(&journal->j_pair_lock);
>  	rwlock_init(&journal->j_state_lock);
> 
>  	journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
> @@ -1361,6 +1463,8 @@ recovery_error:
>  int jbd2_journal_destroy(journal_t *journal)
>  {
>  	int err = 0;
> +	struct list_head *cur;
> +	struct jbd2_blk_pair *b_pair;
> 
>  	/* Wait for the commit thread to wake up and die. */
>  	journal_kill_thread(journal);
> @@ -1405,6 +1509,14 @@ int jbd2_journal_destroy(journal_t *journal)
>  		iput(journal->j_inode);
>  	if (journal->j_revoke)
>  		jbd2_journal_destroy_revoke(journal);
> +
> +	spin_lock(&journal->j_pair_lock);
> +	list_for_each_prev(cur, &journal->blk_pairs) {
> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
> +		kfree(b_pair);
> +	}
> +	spin_unlock(&journal->j_pair_lock);
> +
>  	kfree(journal->j_wbuf);
>  	kfree(journal);
> 
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 38f307b..8b84b43 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -698,6 +698,19 @@ jbd2_time_diff(unsigned long start, unsigned long end)
> 
>  #define JBD2_NR_BATCH	64
> 
> +
> +/*
> + * List for keeping track of which vfs
> + * block a journal block contians.
> + */
> +struct jbd2_blk_pair {
> +	struct inode *vfs_inode;
> +	unsigned long long vfs_pblk;
> +	unsigned long long vfs_lblk;
> +	unsigned long long jbd2_pblk;
> +	struct list_head list;
> +};
> +
>  /**
>   * struct journal_s - The journal_s type is the concrete type associated with
>   *     journal_t.
> @@ -1002,6 +1015,9 @@ struct journal_s
>  	 * superblock pointer here
>  	 */
>  	void *j_private;
> +
> +	spinlock_t              j_pair_lock;
> +	struct list_head blk_pairs;
>  };
> 
>  /*
> @@ -1080,6 +1096,11 @@ jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
>  			      struct journal_head **jh_out,
>  			      unsigned long long   blocknr);
> 
> +extern int jbd2_record_pair(journal_t *journal,
> +		struct buffer_head *vfs_bh,
> +		struct buffer_head *jbd2_bh);
> +
> +
>  /* Transaction locking */
>  extern void		__wait_on_journal (journal_t *);
> 
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sunil Mushran Oct. 7, 2011, 7:31 p.m. UTC | #2
On 10/07/2011 11:35 AM, Darrick J. Wong wrote:
> Um.... I don't think ext4 should be accessing journal internals.  At a bare
> minimum the stuff that mucks around with jbd2 ought to be in fs/jbd2 and
> the ext4 parts stuffed in a wrapper in ext4_jbd2.[ch], since ocfs2 also uses
> jbd2.

I agree.

> I'm also wondering -- this logical<->  journal block mapping doesn't seem to be
> committed to disk anywhere.  What happens if jbd2 crashes before we get to
> zeroing journal blocks?  Specifically, would the journal recovery code know
> that a given journal block also needs secure deletion?
>
> Here's a counterproposal: What if ext4 told jbd2 which blocks need to be
> securely deleted while ext4 is creating the transactions?  jbd2 could then set
> a JBD2_FLAG_SECURE_DELETE flag in journal_block_tag_t.t_flags (the descriptor
> block), which would tell the recovery and commit code that the associated
> journal block needs secure deletion when processing is complete.  I _think_ you
> could just extend the functions called by ext4_jbd2.c to take a flags
> parameter.  Does this sound better?  Or even sane? :)
>
> (Not sure if ocfs2 cares about secure delete at all.)

It looks like a useful feature. Though I would be wary of wiring this in
the journaling layer. Mainly for performance reasons.

In ocfs2, we log the truncated bits to a node specific system file called
truncate_log. These bits are flushed to the global bitmap periodically
by a queued task. We do this because taking a cluster lock on the global
bitmap is very expensive.

If I were doing this, I would extend this scheme to handle secure deletes.
The queued task would zero out the clusters before clearing the bits
in the global bitmap.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Oct. 7, 2011, 7:54 p.m. UTC | #3
On 10/7/11 1:35 PM, Darrick J. Wong wrote:
> On Fri, Oct 07, 2011 at 12:11:05AM -0700, Allison Henderson wrote:
>> This patch modifies both ext4 and jbd2 such that the journal
>> blocks which may contain file data, are securely deleted
>> after the files data blocks are deleted.
>>
>> Because old journal blocks may contain file data, we need
>> a way to find those blocks again when it comes time to secure
>> delete the file.  This patch adds a new list to the journal
>> structure to keep track of which vfs blocks the journal blocks
>> contain.
>>
>> After a truncate or a punch hole operation has completed, a
>> new function ext4_secure_delete_jblks is called that flushes
>> the journal, and then searches the list for any journal blocks
>> that were used to journal the blocks that were just removed.
>> The found journal blocks are then secure deleted.

And what about directory data?  Those would appear to remain in the
journal at least...  And xattrs?

#!/bin/bash

rm -f testsecdel
truncate --size 256m  testsecdel
mkfs.ext4 -F testsecdel &>/dev/null
mount -o loop testsecdel mnt/
echo securedata > mnt/securefilename
setfattr -n user.securexattrname -v securexattrvalue mnt/securefilename 
LONGATTR=`for I in 1 2 3 4 5 6 7 8 9 0; do echo -n veryveryveryveryveryveryverylongsecurexattrvalue; done`
setfattr -n user.longsecurexattrname -v $LONGATTR mnt/securefilename 
sync

rm -f mnt/securefilename
umount mnt
strings testsecdel 

yields:

/mnt/test2/mnt
lost+found
securexattrname
Ylongsecurexattrname
mselinux
veryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvaluesecurexattrvalueunconfined_u:object_r:file_t:s0
lost+found
securefilename
/mnt/test2/mnt

(this was with ext4.ko hacked to always enable secure delete)

-Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Allison Henderson Oct. 7, 2011, 7:55 p.m. UTC | #4
On 10/07/2011 11:35 AM, Darrick J. Wong wrote:
> On Fri, Oct 07, 2011 at 12:11:05AM -0700, Allison Henderson wrote:
>> This patch modifies both ext4 and jbd2 such that the journal
>> blocks which may contain file data, are securely deleted
>> after the files data blocks are deleted.
>>
>> Because old journal blocks may contain file data, we need
>> a way to find those blocks again when it comes time to secure
>> delete the file.  This patch adds a new list to the journal
>> structure to keep track of which vfs blocks the journal blocks
>> contain.
>>
>> After a truncate or a punch hole operation has completed, a
>> new function ext4_secure_delete_jblks is called that flushes
>> the journal, and then searches the list for any journal blocks
>> that were used to journal the blocks that were just removed.
>> The found journal blocks are then secure deleted.
>>
>> Signed-off-by: Allison Henderson<achender@linux.vnet.ibm.com>
>> ---
>> :100644 100644 0cba63b... fdf73b5... M	fs/ext4/ext4.h
>> :100644 100644 984fac2... 81df943... M	fs/ext4/extents.c
>> :100644 100644 bd1facd... 083c516... M	fs/ext4/inode.c
>> :100644 100644 eef6979... 2734e7b... M	fs/jbd2/commit.c
>> :100644 100644 f24df13... 6dd8af7... M	fs/jbd2/journal.c
>> :100644 100644 38f307b... 8b84b43... M	include/linux/jbd2.h
>>   fs/ext4/ext4.h       |    3 +
>>   fs/ext4/extents.c    |   12 +++++
>>   fs/ext4/inode.c      |  110 +++++++++++++++++++++++++++++++++++++++++++++++++
>>   fs/jbd2/commit.c     |    6 +++
>>   fs/jbd2/journal.c    |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   include/linux/jbd2.h |   21 +++++++++
>>   6 files changed, 264 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 0cba63b..fdf73b5 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -2230,6 +2230,9 @@ extern int ext4_secure_delete_lblks(struct inode *inode,
>>   			ext4_lblk_t first_block, unsigned long count);
>>   extern int ext4_secure_delete_pblks(struct inode *inode,
>>   			ext4_fsblk_t block, unsigned long count);
>> +extern int ext4_secure_delete_jblks(struct inode *inode,
>> +				ext4_lblk_t first_block, unsigned long count);
>> +
>>
>>   /* move_extent.c */
>>   extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index 984fac2..81df943 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -4159,6 +4159,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
>>   	struct super_block *sb = inode->i_sb;
>>   	struct ext4_ext_cache cache_ex;
>>   	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
>> +	ext4_lblk_t first_secrm_blk, last_secrm_blk, secrm_blk_count;
>>   	struct address_space *mapping = inode->i_mapping;
>>   	struct ext4_map_blocks map;
>>   	handle_t *handle;
>> @@ -4317,6 +4318,17 @@ out:
>>   	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
>>   	ext4_mark_inode_dirty(handle, inode);
>>   	ext4_journal_stop(handle);
>> +
>> +	if (!err&&  EXT4_I(inode)->i_flags&  EXT4_SECRM_FL) {
>> +		first_secrm_blk = offset>>  EXT4_BLOCK_SIZE_BITS(sb);
>> +		last_secrm_blk = (offset + length + sb->s_blocksize - 1)>>
>> +			EXT4_BLOCK_SIZE_BITS(sb);
>> +		secrm_blk_count = last_secrm_blk - first_secrm_blk;
>> +
>> +		err = ext4_secure_delete_jblks(inode,
>> +		   first_secrm_blk, secrm_blk_count);
>> +	}
>> +
>>   	return err;
>>   }
>>   int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index bd1facd..083c516 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -909,6 +909,110 @@ int ext4_secure_delete_lblks(struct inode *inode, ext4_lblk_t first_block,
>>   	return err;
>>   }
>>
>> +/*
>> + * ext4_secure_delete_jblks()
>> + *
>> + * Secure deletes the journal blocks that contain
>> + * the specified logical blocks
>> + *
>> + * @inode: The files inode
>> + * @first_block: Starting logical block
>> + * @count: The number of blocks to secure delete
>> + *         from the journal
>> + *
>> + * Returns 0 on sucess or negative on error
>> + */
>> +int ext4_secure_delete_jblks(struct inode *inode,
>> +				ext4_lblk_t first_block, unsigned long count){
>> +
>> +	unsigned long long jbd2_pblk_start = 0;
>> +	unsigned long long jbd2_pblk_count = 0;
>> +	ext4_lblk_t last_block = 0;
>> +	struct list_head *tmp = NULL;
>> +	struct list_head *cur = NULL;
>> +	struct jbd2_blk_pair *b_pair = NULL;
>> +	int err = 0;
>> +	journal_t *journal = EXT4_JOURNAL(inode);
>> +
>> +	/* Do not allow last_block to wrap */
>> +	last_block = first_block + count;
>> +	if (last_block<  first_block)
>> +		last_block = EXT_MAX_BLOCKS;
>> +
>> +	/*
>> +	 * Force the journal to finnish up any pending transactions
>> +	 * before we start secure deleteing journal blocks
>> +	 */
>> +	err = ext4_force_commit((inode)->i_sb);
>> +	if (err)
>> +		return err;
>> +
>> +	spin_lock(&journal->j_pair_lock);
>> +
>> +	/* Loop over the journals blocks looking for our logical blocks */
>> +	list_for_each_safe(cur, tmp,&journal->blk_pairs) {
>> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
>
> Um.... I don't think ext4 should be accessing journal internals.  At a bare
> minimum the stuff that mucks around with jbd2 ought to be in fs/jbd2 and
> the ext4 parts stuffed in a wrapper in ext4_jbd2.[ch], since ocfs2 also uses
> jbd2.

Ok, I can move the looping the jbd2 and set up a call back for doing the 
actual secure deleting

>
> I'm also wondering -- this logical<->  journal block mapping doesn't seem to be
> committed to disk anywhere.  What happens if jbd2 crashes before we get to
> zeroing journal blocks?  Specifically, would the journal recovery code know
> that a given journal block also needs secure deletion?
>
> Here's a counterproposal: What if ext4 told jbd2 which blocks need to be
> securely deleted while ext4 is creating the transactions?  jbd2 could then set
> a JBD2_FLAG_SECURE_DELETE flag in journal_block_tag_t.t_flags (the descriptor
> block), which would tell the recovery and commit code that the associated
> journal block needs secure deletion when processing is complete.  I _think_ you
> could just extend the functions called by ext4_jbd2.c to take a flags
> parameter.  Does this sound better?  Or even sane? :)
>
> (Not sure if ocfs2 cares about secure delete at all.)
>
> --D
>

Well actually I had initially started out with something that tried to 
do the secure delete when the transactions had completed, but it didnt 
work out because most of the time the transactions had already long 
since completed before the secure delete flag even gets turned on. And 
then when we turn on the flag and start deleting, I have to get those 
old journal blocks back.  So I think the new idea might have the same 
problem.  But I think your right about needing to somehow commit it to 
the disk, otherwise those old journal blocks will still be there after a 
crash.

>> +		if ((b_pair->vfs_inode == inode)&&
>> +			(b_pair->vfs_lblk>= first_block)&&
>> +			(b_pair->vfs_lblk<  last_block)) {
>> +
>> +			/*
>> +			 * It is likely that the journal blocks will be
>> +			 * consecutive, so lets try to secure delete them
>> +			 * in ranges
>> +			 */
>> +			if (jbd2_pblk_count == 0) {
>> +				/*
>> +				 * If there are no blocks in our range,
>> +				 * then this one will be the first
>> +				 */
>> +				jbd2_pblk_start = b_pair->jbd2_pblk;
>> +				jbd2_pblk_count = 1;
>> +			} else if ((b_pair->jbd2_pblk) ==
>> +				   (jbd2_pblk_start + jbd2_pblk_count)) {
>> +				/*
>> +				 * If this journal block is physically
>> +				 * consecutive, then just increase the range
>> +				 */
>> +				jbd2_pblk_count++;
>> +			} else if ((b_pair->jbd2_pblk) ==
>> +					(jbd2_pblk_start - 1)&&
>> +					jbd2_pblk_start>  0) {
>> +				/*
>> +				 * If this journal block is physically
>> +				 * consecutive (from the start of the
>> +				 * range), just increase the range from the
>> +				 * other end.
>> +				 */
>> +				jbd2_pblk_count++;
>> +				jbd2_pblk_start--;
>> +			} else {
>> +				/*
>> +				 * If the block was not consecutive, secure
>> +				 * delete the range, and restart the current
>> +				 * range
>> +				 */
>> +
>> +				err = ext4_secure_delete_pblks(journal->j_inode,
>> +					jbd2_pblk_start, jbd2_pblk_count);
>> +				if (err)
>> +					goto out;
>> +				jbd2_pblk_start = b_pair->jbd2_pblk;
>> +				jbd2_pblk_count = 1;
>> +			}
>> +		}
>> +	}
>> +
>> +	/* Secure delete any blocks still in our range */
>> +	if (jbd2_pblk_count>  0)
>> +		err = ext4_secure_delete_pblks(journal->j_inode,
>> +			jbd2_pblk_start, jbd2_pblk_count);
>> +
>> +out:
>> +	spin_unlock(&journal->j_pair_lock);
>> +	return err;
>> +}
>> +
>>   struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
>>   			       ext4_lblk_t block, int create, int *err)
>>   {
>> @@ -3447,6 +3551,12 @@ void ext4_truncate(struct inode *inode)
>>   	else
>>   		ext4_ind_truncate(inode);
>>
>> +	if (EXT4_I(inode)->i_flags&  EXT4_SECRM_FL) {
>> +		err = ext4_secure_delete_jblks(inode,
>> +			inode->i_size>>  EXT4_BLOCK_SIZE_BITS(inode->i_sb),
>> +			EXT_MAX_BLOCKS);
>> +	}
>> +
>>   	trace_ext4_truncate_exit(inode);
>>   }
>>
>> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
>> index eef6979..2734e7b 100644
>> --- a/fs/jbd2/commit.c
>> +++ b/fs/jbd2/commit.c
>> @@ -593,6 +593,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
>>   			jbd2_journal_abort(journal, flags);
>>   			continue;
>>   		}
>> +
>> +		err = jbd2_record_pair(journal, jh2bh(jh), jh2bh(new_jh));
>> +		if (err) {
>> +			jbd2_journal_abort(journal, flags);
>> +			continue;
>> +		}
>>   		set_bit(BH_JWrite,&jh2bh(new_jh)->b_state);
>>   		wbuf[bufs++] = jh2bh(new_jh);
>>
>> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
>> index f24df13..6dd8af7 100644
>> --- a/fs/jbd2/journal.c
>> +++ b/fs/jbd2/journal.c
>> @@ -248,6 +248,100 @@ static void journal_kill_thread(journal_t *journal)
>>   	}
>>   	write_unlock(&journal->j_state_lock);
>>   }
>> +/*
>> + * jbd2_record_pair()
>> + * Updates the journals list of block pairs to contain an new pair of journal
>> + * to vfs blocks.  This list is an in memory record of the journals blocks to
>> + * help facilitate finding out what vfs blocks the journal blocks contain.
>> + * Secure delete uses this list to clear out stale journal blocks that may
>> + * still contain file data that needs to be secure deleted. This list is updated
>> + * when a vfs data block is written to the journal or when a descriptor block
>> + * is written to the journal.
>> + *
>> + * journal: The journal to update
>> + * vfs_bh:  The vfs buffer_head that the jbd2_bh is journaling.  This
>> + *          may be NULL if jbd2_bh does not represent a vfs block.
>> + * jbd2_bh: The jbd2 buffer_head that has just been allocated for some
>> + *          use in the journal. jbd2_bh may not be NULL.
>> + *
>> + * Returns 0 on sucess or negative on failure
>> + */
>> +int jbd2_record_pair(journal_t *journal,
>> +		struct buffer_head *vfs_bh,
>> +		struct buffer_head *jbd2_bh){
>> +
>> +	struct page *page;
>> +	struct buffer_head *bh;
>> +	struct buffer_head *head;
>> +	pgoff_t index;
>> +	struct jbd2_blk_pair *b_pair = NULL;
>> +	struct list_head *cur;
>> +	unsigned long long vfs_lblk = 0;
>> +	unsigned long long vfs_pblk = 0;
>> +	struct inode *vfs_inode = NULL;
>> +
>> +	/* If we have the vfs bh, figure out the logical block offset */
>> +	if (vfs_bh) {
>> +		page = vfs_bh->b_page;
>> +		vfs_inode = page->mapping->host;
>> +		vfs_pblk = vfs_bh->b_blocknr;
>> +		index = page->index;
>> +
>> +		/* Find the block offset of the page */
>> +		vfs_lblk = index<<
>> +			(PAGE_CACHE_SHIFT - vfs_inode->i_sb->s_blocksize_bits);
>> +
>> +		/* Then add in the block offset of the bh in the page */
>> +		bh = page_buffers(page);
>> +		head = bh;
>> +		do {
>> +			if (bh == vfs_bh)
>> +				break;
>> +			bh = bh->b_this_page;
>> +			vfs_lblk++;
>> +		} while (bh != head);
>> +	}
>> +
>> +	spin_lock(&journal->j_pair_lock);
>> +	/*
>> +	 * Add the pair to the list.  If there is already a pair
>> +	 * for this journal block, just update it with the new vfs
>> +	 * block info
>> +	 */
>> +	list_for_each(cur,&journal->blk_pairs) {
>> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
>> +		if (b_pair->jbd2_pblk == jbd2_bh->b_blocknr) {
>> +			b_pair->vfs_inode = vfs_inode;
>> +			b_pair->vfs_pblk = vfs_pblk;
>> +			b_pair->vfs_lblk = vfs_lblk;
>> +			b_pair->jbd2_pblk = jbd2_bh->b_blocknr;
>> +
>> +			break;
>> +		} else
>> +			b_pair = NULL;
>> +	}
>> +
>> +	/*
>> +	 * If the journal block was not found in the list,
>> +	 * add a new pair to the list
>> +	 */
>> +	if (!b_pair) {
>> +		b_pair = kzalloc(sizeof(struct jbd2_blk_pair), GFP_NOFS);
>> +		if (!b_pair) {
>> +			spin_unlock(&journal->j_pair_lock);
>> +			return -ENOMEM;
>> +		}
>> +		b_pair->jbd2_pblk = jbd2_bh->b_blocknr;
>> +		b_pair->vfs_inode = vfs_inode;
>> +		b_pair->vfs_pblk = vfs_pblk;
>> +		b_pair->vfs_lblk = vfs_lblk;
>> +
>> +		list_add(&b_pair->list,&journal->blk_pairs);
>> +	}
>> +
>> +	spin_unlock(&journal->j_pair_lock);
>> +	return 0;
>> +}
>>
>>   /*
>>    * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
>> @@ -739,7 +833,13 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
>>   	lock_buffer(bh);
>>   	memset(bh->b_data, 0, journal->j_blocksize);
>>   	set_buffer_uptodate(bh);
>> +	err = jbd2_record_pair(journal, NULL, bh);
>> +
>>   	unlock_buffer(bh);
>> +
>> +	if (err)
>> +		return NULL;
>> +
>>   	BUFFER_TRACE(bh, "return this buffer");
>>   	return jbd2_journal_add_journal_head(bh);
>>   }
>> @@ -895,9 +995,11 @@ static journal_t * journal_init_common (void)
>>   	init_waitqueue_head(&journal->j_wait_commit);
>>   	init_waitqueue_head(&journal->j_wait_updates);
>>   	mutex_init(&journal->j_barrier);
>> +	INIT_LIST_HEAD(&journal->blk_pairs);
>>   	mutex_init(&journal->j_checkpoint_mutex);
>>   	spin_lock_init(&journal->j_revoke_lock);
>>   	spin_lock_init(&journal->j_list_lock);
>> +	spin_lock_init(&journal->j_pair_lock);
>>   	rwlock_init(&journal->j_state_lock);
>>
>>   	journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
>> @@ -1361,6 +1463,8 @@ recovery_error:
>>   int jbd2_journal_destroy(journal_t *journal)
>>   {
>>   	int err = 0;
>> +	struct list_head *cur;
>> +	struct jbd2_blk_pair *b_pair;
>>
>>   	/* Wait for the commit thread to wake up and die. */
>>   	journal_kill_thread(journal);
>> @@ -1405,6 +1509,14 @@ int jbd2_journal_destroy(journal_t *journal)
>>   		iput(journal->j_inode);
>>   	if (journal->j_revoke)
>>   		jbd2_journal_destroy_revoke(journal);
>> +
>> +	spin_lock(&journal->j_pair_lock);
>> +	list_for_each_prev(cur,&journal->blk_pairs) {
>> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
>> +		kfree(b_pair);
>> +	}
>> +	spin_unlock(&journal->j_pair_lock);
>> +
>>   	kfree(journal->j_wbuf);
>>   	kfree(journal);
>>
>> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
>> index 38f307b..8b84b43 100644
>> --- a/include/linux/jbd2.h
>> +++ b/include/linux/jbd2.h
>> @@ -698,6 +698,19 @@ jbd2_time_diff(unsigned long start, unsigned long end)
>>
>>   #define JBD2_NR_BATCH	64
>>
>> +
>> +/*
>> + * List for keeping track of which vfs
>> + * block a journal block contians.
>> + */
>> +struct jbd2_blk_pair {
>> +	struct inode *vfs_inode;
>> +	unsigned long long vfs_pblk;
>> +	unsigned long long vfs_lblk;
>> +	unsigned long long jbd2_pblk;
>> +	struct list_head list;
>> +};
>> +
>>   /**
>>    * struct journal_s - The journal_s type is the concrete type associated with
>>    *     journal_t.
>> @@ -1002,6 +1015,9 @@ struct journal_s
>>   	 * superblock pointer here
>>   	 */
>>   	void *j_private;
>> +
>> +	spinlock_t              j_pair_lock;
>> +	struct list_head blk_pairs;
>>   };
>>
>>   /*
>> @@ -1080,6 +1096,11 @@ jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
>>   			      struct journal_head **jh_out,
>>   			      unsigned long long   blocknr);
>>
>> +extern int jbd2_record_pair(journal_t *journal,
>> +		struct buffer_head *vfs_bh,
>> +		struct buffer_head *jbd2_bh);
>> +
>> +
>>   /* Transaction locking */
>>   extern void		__wait_on_journal (journal_t *);
>>
>> --
>> 1.7.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Allison Henderson Oct. 7, 2011, 8:14 p.m. UTC | #5
On 10/07/2011 12:54 PM, Eric Sandeen wrote:
> On 10/7/11 1:35 PM, Darrick J. Wong wrote:
>> On Fri, Oct 07, 2011 at 12:11:05AM -0700, Allison Henderson wrote:
>>> This patch modifies both ext4 and jbd2 such that the journal
>>> blocks which may contain file data, are securely deleted
>>> after the files data blocks are deleted.
>>>
>>> Because old journal blocks may contain file data, we need
>>> a way to find those blocks again when it comes time to secure
>>> delete the file.  This patch adds a new list to the journal
>>> structure to keep track of which vfs blocks the journal blocks
>>> contain.
>>>
>>> After a truncate or a punch hole operation has completed, a
>>> new function ext4_secure_delete_jblks is called that flushes
>>> the journal, and then searches the list for any journal blocks
>>> that were used to journal the blocks that were just removed.
>>> The found journal blocks are then secure deleted.
>
> And what about directory data?  Those would appear to remain in the
> journal at least...  And xattrs?
>
> #!/bin/bash
>
> rm -f testsecdel
> truncate --size 256m  testsecdel
> mkfs.ext4 -F testsecdel&>/dev/null
> mount -o loop testsecdel mnt/
> echo securedata>  mnt/securefilename
> setfattr -n user.securexattrname -v securexattrvalue mnt/securefilename
> LONGATTR=`for I in 1 2 3 4 5 6 7 8 9 0; do echo -n veryveryveryveryveryveryverylongsecurexattrvalue; done`
> setfattr -n user.longsecurexattrname -v $LONGATTR mnt/securefilename
> sync
>
> rm -f mnt/securefilename
> umount mnt
> strings testsecdel
>
> yields:
>
> /mnt/test2/mnt
> lost+found
> securexattrname
> Ylongsecurexattrname
> mselinux
> veryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvalueveryveryveryveryveryveryverylongsecurexattrvaluesecurexattrvalueunconfined_u:object_r:file_t:s0
> lost+found
> securefilename
> /mnt/test2/mnt
>
> (this was with ext4.ko hacked to always enable secure delete)
>
> -Eric

alrighty, I will need to figure out how to get those out of there too. 
I will add this to my test case.  Thx!

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Oct. 7, 2011, 8:58 p.m. UTC | #6
On Fri, Oct 07, 2011 at 12:55:30PM -0700, Allison Henderson wrote:
> On 10/07/2011 11:35 AM, Darrick J. Wong wrote:
>> On Fri, Oct 07, 2011 at 12:11:05AM -0700, Allison Henderson wrote:
>>> This patch modifies both ext4 and jbd2 such that the journal
>>> blocks which may contain file data, are securely deleted
>>> after the files data blocks are deleted.
>>>
>>> Because old journal blocks may contain file data, we need
>>> a way to find those blocks again when it comes time to secure
>>> delete the file.  This patch adds a new list to the journal
>>> structure to keep track of which vfs blocks the journal blocks
>>> contain.
>>>
>>> After a truncate or a punch hole operation has completed, a
>>> new function ext4_secure_delete_jblks is called that flushes
>>> the journal, and then searches the list for any journal blocks
>>> that were used to journal the blocks that were just removed.
>>> The found journal blocks are then secure deleted.
>>>
>>> Signed-off-by: Allison Henderson<achender@linux.vnet.ibm.com>
>>> ---
>>> :100644 100644 0cba63b... fdf73b5... M	fs/ext4/ext4.h
>>> :100644 100644 984fac2... 81df943... M	fs/ext4/extents.c
>>> :100644 100644 bd1facd... 083c516... M	fs/ext4/inode.c
>>> :100644 100644 eef6979... 2734e7b... M	fs/jbd2/commit.c
>>> :100644 100644 f24df13... 6dd8af7... M	fs/jbd2/journal.c
>>> :100644 100644 38f307b... 8b84b43... M	include/linux/jbd2.h
>>>   fs/ext4/ext4.h       |    3 +
>>>   fs/ext4/extents.c    |   12 +++++
>>>   fs/ext4/inode.c      |  110 +++++++++++++++++++++++++++++++++++++++++++++++++
>>>   fs/jbd2/commit.c     |    6 +++
>>>   fs/jbd2/journal.c    |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   include/linux/jbd2.h |   21 +++++++++
>>>   6 files changed, 264 insertions(+), 0 deletions(-)
>>>
>>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>>> index 0cba63b..fdf73b5 100644
>>> --- a/fs/ext4/ext4.h
>>> +++ b/fs/ext4/ext4.h
>>> @@ -2230,6 +2230,9 @@ extern int ext4_secure_delete_lblks(struct inode *inode,
>>>   			ext4_lblk_t first_block, unsigned long count);
>>>   extern int ext4_secure_delete_pblks(struct inode *inode,
>>>   			ext4_fsblk_t block, unsigned long count);
>>> +extern int ext4_secure_delete_jblks(struct inode *inode,
>>> +				ext4_lblk_t first_block, unsigned long count);
>>> +
>>>
>>>   /* move_extent.c */
>>>   extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>> index 984fac2..81df943 100644
>>> --- a/fs/ext4/extents.c
>>> +++ b/fs/ext4/extents.c
>>> @@ -4159,6 +4159,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
>>>   	struct super_block *sb = inode->i_sb;
>>>   	struct ext4_ext_cache cache_ex;
>>>   	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
>>> +	ext4_lblk_t first_secrm_blk, last_secrm_blk, secrm_blk_count;
>>>   	struct address_space *mapping = inode->i_mapping;
>>>   	struct ext4_map_blocks map;
>>>   	handle_t *handle;
>>> @@ -4317,6 +4318,17 @@ out:
>>>   	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
>>>   	ext4_mark_inode_dirty(handle, inode);
>>>   	ext4_journal_stop(handle);
>>> +
>>> +	if (!err&&  EXT4_I(inode)->i_flags&  EXT4_SECRM_FL) {
>>> +		first_secrm_blk = offset>>  EXT4_BLOCK_SIZE_BITS(sb);
>>> +		last_secrm_blk = (offset + length + sb->s_blocksize - 1)>>
>>> +			EXT4_BLOCK_SIZE_BITS(sb);
>>> +		secrm_blk_count = last_secrm_blk - first_secrm_blk;
>>> +
>>> +		err = ext4_secure_delete_jblks(inode,
>>> +		   first_secrm_blk, secrm_blk_count);
>>> +	}
>>> +
>>>   	return err;
>>>   }
>>>   int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>>> index bd1facd..083c516 100644
>>> --- a/fs/ext4/inode.c
>>> +++ b/fs/ext4/inode.c
>>> @@ -909,6 +909,110 @@ int ext4_secure_delete_lblks(struct inode *inode, ext4_lblk_t first_block,
>>>   	return err;
>>>   }
>>>
>>> +/*
>>> + * ext4_secure_delete_jblks()
>>> + *
>>> + * Secure deletes the journal blocks that contain
>>> + * the specified logical blocks
>>> + *
>>> + * @inode: The files inode
>>> + * @first_block: Starting logical block
>>> + * @count: The number of blocks to secure delete
>>> + *         from the journal
>>> + *
>>> + * Returns 0 on sucess or negative on error
>>> + */
>>> +int ext4_secure_delete_jblks(struct inode *inode,
>>> +				ext4_lblk_t first_block, unsigned long count){
>>> +
>>> +	unsigned long long jbd2_pblk_start = 0;
>>> +	unsigned long long jbd2_pblk_count = 0;
>>> +	ext4_lblk_t last_block = 0;
>>> +	struct list_head *tmp = NULL;
>>> +	struct list_head *cur = NULL;
>>> +	struct jbd2_blk_pair *b_pair = NULL;
>>> +	int err = 0;
>>> +	journal_t *journal = EXT4_JOURNAL(inode);
>>> +
>>> +	/* Do not allow last_block to wrap */
>>> +	last_block = first_block + count;
>>> +	if (last_block<  first_block)
>>> +		last_block = EXT_MAX_BLOCKS;
>>> +
>>> +	/*
>>> +	 * Force the journal to finnish up any pending transactions
>>> +	 * before we start secure deleteing journal blocks
>>> +	 */
>>> +	err = ext4_force_commit((inode)->i_sb);
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	spin_lock(&journal->j_pair_lock);
>>> +
>>> +	/* Loop over the journals blocks looking for our logical blocks */
>>> +	list_for_each_safe(cur, tmp,&journal->blk_pairs) {
>>> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
>>
>> Um.... I don't think ext4 should be accessing journal internals.  At a bare
>> minimum the stuff that mucks around with jbd2 ought to be in fs/jbd2 and
>> the ext4 parts stuffed in a wrapper in ext4_jbd2.[ch], since ocfs2 also uses
>> jbd2.
>
> Ok, I can move the looping the jbd2 and set up a call back for doing the  
> actual secure deleting
>
>>
>> I'm also wondering -- this logical<->  journal block mapping doesn't seem to be
>> committed to disk anywhere.  What happens if jbd2 crashes before we get to
>> zeroing journal blocks?  Specifically, would the journal recovery code know
>> that a given journal block also needs secure deletion?
>>
>> Here's a counterproposal: What if ext4 told jbd2 which blocks need to be
>> securely deleted while ext4 is creating the transactions?  jbd2 could then set
>> a JBD2_FLAG_SECURE_DELETE flag in journal_block_tag_t.t_flags (the descriptor
>> block), which would tell the recovery and commit code that the associated
>> journal block needs secure deletion when processing is complete.  I _think_ you
>> could just extend the functions called by ext4_jbd2.c to take a flags
>> parameter.  Does this sound better?  Or even sane? :)
>>
>> (Not sure if ocfs2 cares about secure delete at all.)
>>
>> --D
>>
>
> Well actually I had initially started out with something that tried to  
> do the secure delete when the transactions had completed, but it didnt  
> work out because most of the time the transactions had already long  
> since completed before the secure delete flag even gets turned on. And  

Ahh, I see, you're concerned that:

$ touch /mnt/afile		# A
$ sync
$ chattr +s /mnt/afile
$ rm -rf /mnt/afile

...doesn't secure delete the journal blocks associated with the inode creation
in step A, because one cannot create files with the secure deletion flag set.
I could think of a few ways out of this...

0) Simply accept that there's no way to create files with "secure delete" set;
therefore, the inode creation and dir entry creation will always be hanging out
in the journal.  This requires users to notice this little detail and get the
following details correct: if filenames are important, then they'll have to
create the file with a bogus name, chattr +s, create a hard link with the real
name, and then unlink the first bogus name. (ha ha....)

1) When creating a file, lie to the journal by always telling it to securely
delete the blocks associated with the file create.  If the user really wants
secure deletion, the next step after creating the file ought to be setting the
+s flag.  This has the advantage that even the generic "new inode" contents are
also protected by secure deletion, which #0 doesn't provide.  However, we'd
take a speed hit for a feature that isn't necessarily the common case.

2) Teach the journal to keep track of the pblocks and go back and retroactively
delete older copies, sort of like what your current patch does.

As a side note, we could probably discard journal blocks when the transaction
finishes committing and flushing.

> then when we turn on the flag and start deleting, I have to get those  
> old journal blocks back.  So I think the new idea might have the same  
> problem.  But I think your right about needing to somehow commit it to  
> the disk, otherwise those old journal blocks will still be there after a  
> crash.



--D

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Allison Henderson Oct. 8, 2011, 12:06 a.m. UTC | #7
On 10/07/2011 01:58 PM, Darrick J. Wong wrote:
> On Fri, Oct 07, 2011 at 12:55:30PM -0700, Allison Henderson wrote:
>> On 10/07/2011 11:35 AM, Darrick J. Wong wrote:
>>> On Fri, Oct 07, 2011 at 12:11:05AM -0700, Allison Henderson wrote:
>>>> This patch modifies both ext4 and jbd2 such that the journal
>>>> blocks which may contain file data, are securely deleted
>>>> after the files data blocks are deleted.
>>>>
>>>> Because old journal blocks may contain file data, we need
>>>> a way to find those blocks again when it comes time to secure
>>>> delete the file.  This patch adds a new list to the journal
>>>> structure to keep track of which vfs blocks the journal blocks
>>>> contain.
>>>>
>>>> After a truncate or a punch hole operation has completed, a
>>>> new function ext4_secure_delete_jblks is called that flushes
>>>> the journal, and then searches the list for any journal blocks
>>>> that were used to journal the blocks that were just removed.
>>>> The found journal blocks are then secure deleted.
>>>>
>>>> Signed-off-by: Allison Henderson<achender@linux.vnet.ibm.com>
>>>> ---
>>>> :100644 100644 0cba63b... fdf73b5... M	fs/ext4/ext4.h
>>>> :100644 100644 984fac2... 81df943... M	fs/ext4/extents.c
>>>> :100644 100644 bd1facd... 083c516... M	fs/ext4/inode.c
>>>> :100644 100644 eef6979... 2734e7b... M	fs/jbd2/commit.c
>>>> :100644 100644 f24df13... 6dd8af7... M	fs/jbd2/journal.c
>>>> :100644 100644 38f307b... 8b84b43... M	include/linux/jbd2.h
>>>>    fs/ext4/ext4.h       |    3 +
>>>>    fs/ext4/extents.c    |   12 +++++
>>>>    fs/ext4/inode.c      |  110 +++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    fs/jbd2/commit.c     |    6 +++
>>>>    fs/jbd2/journal.c    |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    include/linux/jbd2.h |   21 +++++++++
>>>>    6 files changed, 264 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>>>> index 0cba63b..fdf73b5 100644
>>>> --- a/fs/ext4/ext4.h
>>>> +++ b/fs/ext4/ext4.h
>>>> @@ -2230,6 +2230,9 @@ extern int ext4_secure_delete_lblks(struct inode *inode,
>>>>    			ext4_lblk_t first_block, unsigned long count);
>>>>    extern int ext4_secure_delete_pblks(struct inode *inode,
>>>>    			ext4_fsblk_t block, unsigned long count);
>>>> +extern int ext4_secure_delete_jblks(struct inode *inode,
>>>> +				ext4_lblk_t first_block, unsigned long count);
>>>> +
>>>>
>>>>    /* move_extent.c */
>>>>    extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>> index 984fac2..81df943 100644
>>>> --- a/fs/ext4/extents.c
>>>> +++ b/fs/ext4/extents.c
>>>> @@ -4159,6 +4159,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
>>>>    	struct super_block *sb = inode->i_sb;
>>>>    	struct ext4_ext_cache cache_ex;
>>>>    	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
>>>> +	ext4_lblk_t first_secrm_blk, last_secrm_blk, secrm_blk_count;
>>>>    	struct address_space *mapping = inode->i_mapping;
>>>>    	struct ext4_map_blocks map;
>>>>    	handle_t *handle;
>>>> @@ -4317,6 +4318,17 @@ out:
>>>>    	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
>>>>    	ext4_mark_inode_dirty(handle, inode);
>>>>    	ext4_journal_stop(handle);
>>>> +
>>>> +	if (!err&&   EXT4_I(inode)->i_flags&   EXT4_SECRM_FL) {
>>>> +		first_secrm_blk = offset>>   EXT4_BLOCK_SIZE_BITS(sb);
>>>> +		last_secrm_blk = (offset + length + sb->s_blocksize - 1)>>
>>>> +			EXT4_BLOCK_SIZE_BITS(sb);
>>>> +		secrm_blk_count = last_secrm_blk - first_secrm_blk;
>>>> +
>>>> +		err = ext4_secure_delete_jblks(inode,
>>>> +		   first_secrm_blk, secrm_blk_count);
>>>> +	}
>>>> +
>>>>    	return err;
>>>>    }
>>>>    int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>>>> index bd1facd..083c516 100644
>>>> --- a/fs/ext4/inode.c
>>>> +++ b/fs/ext4/inode.c
>>>> @@ -909,6 +909,110 @@ int ext4_secure_delete_lblks(struct inode *inode, ext4_lblk_t first_block,
>>>>    	return err;
>>>>    }
>>>>
>>>> +/*
>>>> + * ext4_secure_delete_jblks()
>>>> + *
>>>> + * Secure deletes the journal blocks that contain
>>>> + * the specified logical blocks
>>>> + *
>>>> + * @inode: The files inode
>>>> + * @first_block: Starting logical block
>>>> + * @count: The number of blocks to secure delete
>>>> + *         from the journal
>>>> + *
>>>> + * Returns 0 on sucess or negative on error
>>>> + */
>>>> +int ext4_secure_delete_jblks(struct inode *inode,
>>>> +				ext4_lblk_t first_block, unsigned long count){
>>>> +
>>>> +	unsigned long long jbd2_pblk_start = 0;
>>>> +	unsigned long long jbd2_pblk_count = 0;
>>>> +	ext4_lblk_t last_block = 0;
>>>> +	struct list_head *tmp = NULL;
>>>> +	struct list_head *cur = NULL;
>>>> +	struct jbd2_blk_pair *b_pair = NULL;
>>>> +	int err = 0;
>>>> +	journal_t *journal = EXT4_JOURNAL(inode);
>>>> +
>>>> +	/* Do not allow last_block to wrap */
>>>> +	last_block = first_block + count;
>>>> +	if (last_block<   first_block)
>>>> +		last_block = EXT_MAX_BLOCKS;
>>>> +
>>>> +	/*
>>>> +	 * Force the journal to finnish up any pending transactions
>>>> +	 * before we start secure deleteing journal blocks
>>>> +	 */
>>>> +	err = ext4_force_commit((inode)->i_sb);
>>>> +	if (err)
>>>> +		return err;
>>>> +
>>>> +	spin_lock(&journal->j_pair_lock);
>>>> +
>>>> +	/* Loop over the journals blocks looking for our logical blocks */
>>>> +	list_for_each_safe(cur, tmp,&journal->blk_pairs) {
>>>> +		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
>>>
>>> Um.... I don't think ext4 should be accessing journal internals.  At a bare
>>> minimum the stuff that mucks around with jbd2 ought to be in fs/jbd2 and
>>> the ext4 parts stuffed in a wrapper in ext4_jbd2.[ch], since ocfs2 also uses
>>> jbd2.
>>
>> Ok, I can move the looping the jbd2 and set up a call back for doing the
>> actual secure deleting
>>
>>>
>>> I'm also wondering -- this logical<->   journal block mapping doesn't seem to be
>>> committed to disk anywhere.  What happens if jbd2 crashes before we get to
>>> zeroing journal blocks?  Specifically, would the journal recovery code know
>>> that a given journal block also needs secure deletion?
>>>
>>> Here's a counterproposal: What if ext4 told jbd2 which blocks need to be
>>> securely deleted while ext4 is creating the transactions?  jbd2 could then set
>>> a JBD2_FLAG_SECURE_DELETE flag in journal_block_tag_t.t_flags (the descriptor
>>> block), which would tell the recovery and commit code that the associated
>>> journal block needs secure deletion when processing is complete.  I _think_ you
>>> could just extend the functions called by ext4_jbd2.c to take a flags
>>> parameter.  Does this sound better?  Or even sane? :)
>>>
>>> (Not sure if ocfs2 cares about secure delete at all.)
>>>
>>> --D
>>>
>>
>> Well actually I had initially started out with something that tried to
>> do the secure delete when the transactions had completed, but it didnt
>> work out because most of the time the transactions had already long
>> since completed before the secure delete flag even gets turned on. And
>
> Ahh, I see, you're concerned that:
>
> $ touch /mnt/afile		# A
> $ sync
> $ chattr +s /mnt/afile
> $ rm -rf /mnt/afile
>
> ...doesn't secure delete the journal blocks associated with the inode creation
> in step A, because one cannot create files with the secure deletion flag set.
> I could think of a few ways out of this...
>
> 0) Simply accept that there's no way to create files with "secure delete" set;
> therefore, the inode creation and dir entry creation will always be hanging out
> in the journal.  This requires users to notice this little detail and get the
> following details correct: if filenames are important, then they'll have to
> create the file with a bogus name, chattr +s, create a hard link with the real
> name, and then unlink the first bogus name. (ha ha....)
>
> 1) When creating a file, lie to the journal by always telling it to securely
> delete the blocks associated with the file create.  If the user really wants
> secure deletion, the next step after creating the file ought to be setting the
> +s flag.  This has the advantage that even the generic "new inode" contents are
> also protected by secure deletion, which #0 doesn't provide.  However, we'd
> take a speed hit for a feature that isn't necessarily the common case.
>
> 2) Teach the journal to keep track of the pblocks and go back and retroactively
> delete older copies, sort of like what your current patch does.
>
> As a side note, we could probably discard journal blocks when the transaction
> finishes committing and flushing.
>

Alrighty, you've given me a few more ideas to play with.  I think I may 
go back and do a little more prototyping and see what I can come up 
with.  Thx for the thorough review!!

Allison Henderson

>> then when we turn on the flag and start deleting, I have to get those
>> old journal blocks back.  So I think the new idea might have the same
>> problem.  But I think your right about needing to somehow commit it to
>> the disk, otherwise those old journal blocks will still be there after a
>> crash.
>
>
>
> --D

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jonathan Corbet Oct. 10, 2011, 7:47 p.m. UTC | #8
One quick question:

On Fri,  7 Oct 2011 00:11:05 -0700
Allison Henderson <achender@linux.vnet.ibm.com> wrote:

> +	/* Secure delete any blocks still in our range */
> +	if (jbd2_pblk_count > 0)
> +		err = ext4_secure_delete_pblks(journal->j_inode,
> +			jbd2_pblk_start, jbd2_pblk_count);
> +
> +out:
> +	spin_unlock(&journal->j_pair_lock);

ext4_secure_delete_pblks() appears to do its job synchronously - it has
calls to things like sync_dirty_buffer() and such.  How can you do that
while holding ->j_pair_lock?

Thanks,

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jonathan Corbet Oct. 10, 2011, 8 p.m. UTC | #9
On Fri,  7 Oct 2011 00:11:05 -0700
Allison Henderson <achender@linux.vnet.ibm.com> wrote:

> +	/*
> +	 * If the journal block was not found in the list,
> +	 * add a new pair to the list
> +	 */
> +	if (!b_pair) {
> +		b_pair = kzalloc(sizeof(struct jbd2_blk_pair), GFP_NOFS);
> +		if (!b_pair) {
> +			spin_unlock(&journal->j_pair_lock);
> +			return -ENOMEM;
> +		}

Here too...  that really needs to be GFP_ATOMIC.

I'm wondering, though...it looks like, over a short period of time, you
will create an unordered linked list containing one entry for every
physical block in the journal.  You'll take a lock and search the whole
list for every block that is committed.  Wouldn't it be better just to
have an array indexed by the journal logical block offset?  Less memory,
faster access...?  Or am I missing something fundamental here?

Thanks,

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Allison Henderson Oct. 10, 2011, 11:35 p.m. UTC | #10
On 10/10/2011 12:47 PM, Jonathan Corbet wrote:
> One quick question:
>
> On Fri,  7 Oct 2011 00:11:05 -0700
> Allison Henderson<achender@linux.vnet.ibm.com>  wrote:
>
>> +	/* Secure delete any blocks still in our range */
>> +	if (jbd2_pblk_count>  0)
>> +		err = ext4_secure_delete_pblks(journal->j_inode,
>> +			jbd2_pblk_start, jbd2_pblk_count);
>> +
>> +out:
>> +	spin_unlock(&journal->j_pair_lock);
>
> ext4_secure_delete_pblks() appears to do its job synchronously - it has
> calls to things like sync_dirty_buffer() and such.  How can you do that
> while holding ->j_pair_lock?
>
> Thanks,
>
> jon
>

Hi Jon,

Well j_pair_lock is a lock I added to protect the new list of vfs
-> jbd2 block pairs. It is locked by the journal commit thread to
update the list when ever a journal block is modified. The above
code here is called by the same thread that performs a punch hole or
truncate operation, not the journal commit thread. So I'm not
immediately seeing why there would be any lock problems. Is there
another case I'm missing?

Allison

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Allison Henderson Oct. 10, 2011, 11:36 p.m. UTC | #11
On 10/10/2011 01:00 PM, Jonathan Corbet wrote:
> On Fri,  7 Oct 2011 00:11:05 -0700
> Allison Henderson<achender@linux.vnet.ibm.com>  wrote:
>
>> +	/*
>> +	 * If the journal block was not found in the list,
>> +	 * add a new pair to the list
>> +	 */
>> +	if (!b_pair) {
>> +		b_pair = kzalloc(sizeof(struct jbd2_blk_pair), GFP_NOFS);
>> +		if (!b_pair) {
>> +			spin_unlock(&journal->j_pair_lock);
>> +			return -ENOMEM;
>> +		}
>
> Here too...  that really needs to be GFP_ATOMIC.
>
> I'm wondering, though...it looks like, over a short period of time, you
> will create an unordered linked list containing one entry for every
> physical block in the journal.  You'll take a lock and search the whole
> list for every block that is committed.  Wouldn't it be better just to
> have an array indexed by the journal logical block offset?  Less memory,
> faster access...?  Or am I missing something fundamental here?
>
> Thanks,
>
> jon

Hi Jon,

I think that's a good suggestion. Initially I had made it a linked
list since most of the journals existing lists were implemented as
linked lists, but since this list ends up being a fixed size, an
array is a good optimization here. Thx!

Allison

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jonathan Corbet Oct. 10, 2011, 11:41 p.m. UTC | #12
On Mon, 10 Oct 2011 16:35:24 -0700
Allison Henderson <achender@linux.vnet.ibm.com> wrote:

> On 10/10/2011 12:47 PM, Jonathan Corbet wrote:
> > One quick question:
> >
> > On Fri,  7 Oct 2011 00:11:05 -0700
> > Allison Henderson<achender@linux.vnet.ibm.com>  wrote:
> >
> >> +	/* Secure delete any blocks still in our range */
> >> +	if (jbd2_pblk_count>  0)
> >> +		err = ext4_secure_delete_pblks(journal->j_inode,
> >> +			jbd2_pblk_start, jbd2_pblk_count);
> >> +
> >> +out:
> >> +	spin_unlock(&journal->j_pair_lock);
> >
> > ext4_secure_delete_pblks() appears to do its job synchronously - it has
> > calls to things like sync_dirty_buffer() and such.  How can you do that
> > while holding ->j_pair_lock?
> >
> > Thanks,
> >
> > jon
> >
> 
> Hi Jon,
> 
> Well j_pair_lock is a lock I added to protect the new list of vfs
> -> jbd2 block pairs. It is locked by the journal commit thread to
> update the list when ever a journal block is modified. The above
> code here is called by the same thread that performs a punch hole or
> truncate operation, not the journal commit thread. So I'm not
> immediately seeing why there would be any lock problems. Is there
> another case I'm missing?

The problem is that ext4_secure_delete_pblks() can sleep, unless I've
misunderstood things very badly.  That's not something you want to do
while holding a spinlock...

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Allison Henderson Oct. 11, 2011, 12:54 a.m. UTC | #13
On 10/10/2011 04:41 PM, Jonathan Corbet wrote:
> On Mon, 10 Oct 2011 16:35:24 -0700
> Allison Henderson<achender@linux.vnet.ibm.com>  wrote:
>
>> On 10/10/2011 12:47 PM, Jonathan Corbet wrote:
>>> One quick question:
>>>
>>> On Fri,  7 Oct 2011 00:11:05 -0700
>>> Allison Henderson<achender@linux.vnet.ibm.com>   wrote:
>>>
>>>> +	/* Secure delete any blocks still in our range */
>>>> +	if (jbd2_pblk_count>   0)
>>>> +		err = ext4_secure_delete_pblks(journal->j_inode,
>>>> +			jbd2_pblk_start, jbd2_pblk_count);
>>>> +
>>>> +out:
>>>> +	spin_unlock(&journal->j_pair_lock);
>>>
>>> ext4_secure_delete_pblks() appears to do its job synchronously - it has
>>> calls to things like sync_dirty_buffer() and such.  How can you do that
>>> while holding ->j_pair_lock?
>>>
>>> Thanks,
>>>
>>> jon
>>>
>>
>> Hi Jon,
>>
>> Well j_pair_lock is a lock I added to protect the new list of vfs
>> ->  jbd2 block pairs. It is locked by the journal commit thread to
>> update the list when ever a journal block is modified. The above
>> code here is called by the same thread that performs a punch hole or
>> truncate operation, not the journal commit thread. So I'm not
>> immediately seeing why there would be any lock problems. Is there
>> another case I'm missing?
>
> The problem is that ext4_secure_delete_pblks() can sleep, unless I've
> misunderstood things very badly.  That's not something you want to do
> while holding a spinlock...
>
> jon
>

Oh I see the concern now.  Ok, I can put in a semaphore instead.  Thx 
for catching that.  :)

Allison

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0cba63b..fdf73b5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2230,6 +2230,9 @@  extern int ext4_secure_delete_lblks(struct inode *inode,
 			ext4_lblk_t first_block, unsigned long count);
 extern int ext4_secure_delete_pblks(struct inode *inode,
 			ext4_fsblk_t block, unsigned long count);
+extern int ext4_secure_delete_jblks(struct inode *inode,
+				ext4_lblk_t first_block, unsigned long count);
+
 
 /* move_extent.c */
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 984fac2..81df943 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4159,6 +4159,7 @@  int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 	struct super_block *sb = inode->i_sb;
 	struct ext4_ext_cache cache_ex;
 	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
+	ext4_lblk_t first_secrm_blk, last_secrm_blk, secrm_blk_count;
 	struct address_space *mapping = inode->i_mapping;
 	struct ext4_map_blocks map;
 	handle_t *handle;
@@ -4317,6 +4318,17 @@  out:
 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
 	ext4_journal_stop(handle);
+
+	if (!err && EXT4_I(inode)->i_flags & EXT4_SECRM_FL) {
+		first_secrm_blk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+		last_secrm_blk = (offset + length + sb->s_blocksize - 1) >>
+			EXT4_BLOCK_SIZE_BITS(sb);
+		secrm_blk_count = last_secrm_blk - first_secrm_blk;
+
+		err = ext4_secure_delete_jblks(inode,
+		   first_secrm_blk, secrm_blk_count);
+	}
+
 	return err;
 }
 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bd1facd..083c516 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -909,6 +909,110 @@  int ext4_secure_delete_lblks(struct inode *inode, ext4_lblk_t first_block,
 	return err;
 }
 
+/*
+ * ext4_secure_delete_jblks()
+ *
+ * Secure deletes the journal blocks that contain
+ * the specified logical blocks
+ *
+ * @inode: The files inode
+ * @first_block: Starting logical block
+ * @count: The number of blocks to secure delete
+ *         from the journal
+ *
+ * Returns 0 on sucess or negative on error
+ */
+int ext4_secure_delete_jblks(struct inode *inode,
+				ext4_lblk_t first_block, unsigned long count){
+
+	unsigned long long jbd2_pblk_start = 0;
+	unsigned long long jbd2_pblk_count = 0;
+	ext4_lblk_t last_block = 0;
+	struct list_head *tmp = NULL;
+	struct list_head *cur = NULL;
+	struct jbd2_blk_pair *b_pair = NULL;
+	int err = 0;
+	journal_t *journal = EXT4_JOURNAL(inode);
+
+	/* Do not allow last_block to wrap */
+	last_block = first_block + count;
+	if (last_block < first_block)
+		last_block = EXT_MAX_BLOCKS;
+
+	/*
+	 * Force the journal to finnish up any pending transactions
+	 * before we start secure deleteing journal blocks
+	 */
+	err = ext4_force_commit((inode)->i_sb);
+	if (err)
+		return err;
+
+	spin_lock(&journal->j_pair_lock);
+
+	/* Loop over the journals blocks looking for our logical blocks */
+	list_for_each_safe(cur, tmp, &journal->blk_pairs) {
+		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
+		if ((b_pair->vfs_inode == inode) &&
+			(b_pair->vfs_lblk >= first_block) &&
+			(b_pair->vfs_lblk < last_block)) {
+
+			/*
+			 * It is likely that the journal blocks will be
+			 * consecutive, so lets try to secure delete them
+			 * in ranges
+			 */
+			if (jbd2_pblk_count == 0) {
+				/*
+				 * If there are no blocks in our range,
+				 * then this one will be the first
+				 */
+				jbd2_pblk_start = b_pair->jbd2_pblk;
+				jbd2_pblk_count = 1;
+			} else if ((b_pair->jbd2_pblk) ==
+				   (jbd2_pblk_start + jbd2_pblk_count)) {
+				/*
+				 * If this journal block is physically
+				 * consecutive, then just increase the range
+				 */
+				jbd2_pblk_count++;
+			} else if ((b_pair->jbd2_pblk) ==
+					(jbd2_pblk_start - 1) &&
+					jbd2_pblk_start > 0) {
+				/*
+				 * If this journal block is physically
+				 * consecutive (from the start of the
+				 * range), just increase the range from the
+				 * other end.
+				 */
+				jbd2_pblk_count++;
+				jbd2_pblk_start--;
+			} else {
+				/*
+				 * If the block was not consecutive, secure
+				 * delete the range, and restart the current
+				 * range
+				 */
+
+				err = ext4_secure_delete_pblks(journal->j_inode,
+					jbd2_pblk_start, jbd2_pblk_count);
+				if (err)
+					goto out;
+				jbd2_pblk_start = b_pair->jbd2_pblk;
+				jbd2_pblk_count = 1;
+			}
+		}
+	}
+
+	/* Secure delete any blocks still in our range */
+	if (jbd2_pblk_count > 0)
+		err = ext4_secure_delete_pblks(journal->j_inode,
+			jbd2_pblk_start, jbd2_pblk_count);
+
+out:
+	spin_unlock(&journal->j_pair_lock);
+	return err;
+}
+
 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 			       ext4_lblk_t block, int create, int *err)
 {
@@ -3447,6 +3551,12 @@  void ext4_truncate(struct inode *inode)
 	else
 		ext4_ind_truncate(inode);
 
+	if (EXT4_I(inode)->i_flags & EXT4_SECRM_FL) {
+		err = ext4_secure_delete_jblks(inode,
+			inode->i_size >> EXT4_BLOCK_SIZE_BITS(inode->i_sb),
+			EXT_MAX_BLOCKS);
+	}
+
 	trace_ext4_truncate_exit(inode);
 }
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979..2734e7b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -593,6 +593,12 @@  void jbd2_journal_commit_transaction(journal_t *journal)
 			jbd2_journal_abort(journal, flags);
 			continue;
 		}
+
+		err = jbd2_record_pair(journal, jh2bh(jh), jh2bh(new_jh));
+		if (err) {
+			jbd2_journal_abort(journal, flags);
+			continue;
+		}
 		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
 		wbuf[bufs++] = jh2bh(new_jh);
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f24df13..6dd8af7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -248,6 +248,100 @@  static void journal_kill_thread(journal_t *journal)
 	}
 	write_unlock(&journal->j_state_lock);
 }
+/*
+ * jbd2_record_pair()
+ * Updates the journals list of block pairs to contain an new pair of journal
+ * to vfs blocks.  This list is an in memory record of the journals blocks to
+ * help facilitate finding out what vfs blocks the journal blocks contain.
+ * Secure delete uses this list to clear out stale journal blocks that may
+ * still contain file data that needs to be secure deleted. This list is updated
+ * when a vfs data block is written to the journal or when a descriptor block
+ * is written to the journal.
+ *
+ * journal: The journal to update
+ * vfs_bh:  The vfs buffer_head that the jbd2_bh is journaling.  This
+ *          may be NULL if jbd2_bh does not represent a vfs block.
+ * jbd2_bh: The jbd2 buffer_head that has just been allocated for some
+ *          use in the journal. jbd2_bh may not be NULL.
+ *
+ * Returns 0 on sucess or negative on failure
+ */
+int jbd2_record_pair(journal_t *journal,
+		struct buffer_head *vfs_bh,
+		struct buffer_head *jbd2_bh){
+
+	struct page *page;
+	struct buffer_head *bh;
+	struct buffer_head *head;
+	pgoff_t index;
+	struct jbd2_blk_pair *b_pair = NULL;
+	struct list_head *cur;
+	unsigned long long vfs_lblk = 0;
+	unsigned long long vfs_pblk = 0;
+	struct inode *vfs_inode = NULL;
+
+	/* If we have the vfs bh, figure out the logical block offset */
+	if (vfs_bh) {
+		page = vfs_bh->b_page;
+		vfs_inode = page->mapping->host;
+		vfs_pblk = vfs_bh->b_blocknr;
+		index = page->index;
+
+		/* Find the block offset of the page */
+		vfs_lblk = index <<
+			(PAGE_CACHE_SHIFT - vfs_inode->i_sb->s_blocksize_bits);
+
+		/* Then add in the block offset of the bh in the page */
+		bh = page_buffers(page);
+		head = bh;
+		do {
+			if (bh == vfs_bh)
+				break;
+			bh = bh->b_this_page;
+			vfs_lblk++;
+		} while (bh != head);
+	}
+
+	spin_lock(&journal->j_pair_lock);
+	/*
+	 * Add the pair to the list.  If there is already a pair
+	 * for this journal block, just update it with the new vfs
+	 * block info
+	 */
+	list_for_each(cur, &journal->blk_pairs) {
+		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
+		if (b_pair->jbd2_pblk == jbd2_bh->b_blocknr) {
+			b_pair->vfs_inode = vfs_inode;
+			b_pair->vfs_pblk = vfs_pblk;
+			b_pair->vfs_lblk = vfs_lblk;
+			b_pair->jbd2_pblk = jbd2_bh->b_blocknr;
+
+			break;
+		} else
+			b_pair = NULL;
+	}
+
+	/*
+	 * If the journal block was not found in the list,
+	 * add a new pair to the list
+	 */
+	if (!b_pair) {
+		b_pair = kzalloc(sizeof(struct jbd2_blk_pair), GFP_NOFS);
+		if (!b_pair) {
+			spin_unlock(&journal->j_pair_lock);
+			return -ENOMEM;
+		}
+		b_pair->jbd2_pblk = jbd2_bh->b_blocknr;
+		b_pair->vfs_inode = vfs_inode;
+		b_pair->vfs_pblk = vfs_pblk;
+		b_pair->vfs_lblk = vfs_lblk;
+
+		list_add(&b_pair->list, &journal->blk_pairs);
+	}
+
+	spin_unlock(&journal->j_pair_lock);
+	return 0;
+}
 
 /*
  * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
@@ -739,7 +833,13 @@  struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 	lock_buffer(bh);
 	memset(bh->b_data, 0, journal->j_blocksize);
 	set_buffer_uptodate(bh);
+	err = jbd2_record_pair(journal, NULL, bh);
+
 	unlock_buffer(bh);
+
+	if (err)
+		return NULL;
+
 	BUFFER_TRACE(bh, "return this buffer");
 	return jbd2_journal_add_journal_head(bh);
 }
@@ -895,9 +995,11 @@  static journal_t * journal_init_common (void)
 	init_waitqueue_head(&journal->j_wait_commit);
 	init_waitqueue_head(&journal->j_wait_updates);
 	mutex_init(&journal->j_barrier);
+	INIT_LIST_HEAD(&journal->blk_pairs);
 	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
 	spin_lock_init(&journal->j_list_lock);
+	spin_lock_init(&journal->j_pair_lock);
 	rwlock_init(&journal->j_state_lock);
 
 	journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
@@ -1361,6 +1463,8 @@  recovery_error:
 int jbd2_journal_destroy(journal_t *journal)
 {
 	int err = 0;
+	struct list_head *cur;
+	struct jbd2_blk_pair *b_pair;
 
 	/* Wait for the commit thread to wake up and die. */
 	journal_kill_thread(journal);
@@ -1405,6 +1509,14 @@  int jbd2_journal_destroy(journal_t *journal)
 		iput(journal->j_inode);
 	if (journal->j_revoke)
 		jbd2_journal_destroy_revoke(journal);
+
+	spin_lock(&journal->j_pair_lock);
+	list_for_each_prev(cur, &journal->blk_pairs) {
+		b_pair = list_entry(cur, struct jbd2_blk_pair, list);
+		kfree(b_pair);
+	}
+	spin_unlock(&journal->j_pair_lock);
+
 	kfree(journal->j_wbuf);
 	kfree(journal);
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 38f307b..8b84b43 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -698,6 +698,19 @@  jbd2_time_diff(unsigned long start, unsigned long end)
 
 #define JBD2_NR_BATCH	64
 
+
+/*
+ * List for keeping track of which vfs
+ * block a journal block contians.
+ */
+struct jbd2_blk_pair {
+	struct inode *vfs_inode;
+	unsigned long long vfs_pblk;
+	unsigned long long vfs_lblk;
+	unsigned long long jbd2_pblk;
+	struct list_head list;
+};
+
 /**
  * struct journal_s - The journal_s type is the concrete type associated with
  *     journal_t.
@@ -1002,6 +1015,9 @@  struct journal_s
 	 * superblock pointer here
 	 */
 	void *j_private;
+
+	spinlock_t              j_pair_lock;
+	struct list_head blk_pairs;
 };
 
 /*
@@ -1080,6 +1096,11 @@  jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head **jh_out,
 			      unsigned long long   blocknr);
 
+extern int jbd2_record_pair(journal_t *journal,
+		struct buffer_head *vfs_bh,
+		struct buffer_head *jbd2_bh);
+
+
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);