diff mbox

[5/8] ext4: call ext4_forget() from ext4_free_blocks()

Message ID 1258942710-31930-6-git-send-email-tytso@mit.edu
State Accepted, archived
Headers show

Commit Message

Theodore Ts'o Nov. 23, 2009, 2:18 a.m. UTC
Add the facility for ext4_forget() to be called form
ext4_free_blocks().  This simplifies the code in a large number of
places, and centralizes most of the work of calling ext4_forget() into
a single place.

Also fix a bug in the extents migration code; it wasn't calling
ext4_forget() when releasing the indirect blocks during the
conversion.  As a result, if the system cashed during or shortly after
the extents migration, and the released indirect blocks get reused as
data blocks, the journal replay would corrupt the data blocks.  With
this new patch, fixing this bug was as simple as adding the
EXT4_FREE_BLOCKS_FORGET flags to the call to ext4_free_blocks().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/ext4.h              |   10 +++++-
 fs/ext4/extents.c           |   24 ++++++---------
 fs/ext4/inode.c             |   67 ++++++++++++++++--------------------------
 fs/ext4/mballoc.c           |   49 +++++++++++++++++++++++--------
 fs/ext4/migrate.c           |   23 ++++++++++----
 fs/ext4/xattr.c             |    8 +++--
 include/trace/events/ext4.h |   16 ++++++----
 7 files changed, 109 insertions(+), 88 deletions(-)

Comments

Andreas Dilger Nov. 23, 2009, 7:22 p.m. UTC | #1
On 2009-11-22, at 19:18, Theodore Ts'o wrote:
> Add the facility for ext4_forget() to be called form
> ext4_free_blocks().  This simplifies the code in a large number of
> places, and centralizes most of the work of calling ext4_forget() into
> a single place.
>
> Also fix a bug in the extents migration code; it wasn't calling
> ext4_forget() when releasing the indirect blocks during the
> conversion.

Would this also solve Curt's problem, mentioned in "Bug in extent  
zeroout: blocks not marked as new" where bforget was not being called  
when the blocks are freed?

> As a result, if the system cashed during or shortly after the
> extents migration, and the released indirect blocks get reused as
> data blocks, the journal replay would corrupt the data blocks.
> With this new patch, fixing this bug was as simple as adding the
> EXT4_FREE_BLOCKS_FORGET flags to the call to ext4_free_blocks().
>
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> ---
> fs/ext4/ext4.h              |   10 +++++-
> fs/ext4/extents.c           |   24 ++++++---------
> fs/ext4/inode.c             |   67 +++++++++++++++ 
> +--------------------------
> fs/ext4/mballoc.c           |   49 +++++++++++++++++++++++--------
> fs/ext4/migrate.c           |   23 ++++++++++----
> fs/ext4/xattr.c             |    8 +++--
> include/trace/events/ext4.h |   16 ++++++----
> 7 files changed, 109 insertions(+), 88 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 210e1b5..4cfc2f0 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -376,6 +376,12 @@ struct ext4_new_group_data {
> 					 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
>
> /*
> + * Flags used by ext4_free_blocks
> + */
> +#define EXT4_FREE_BLOCKS_METADATA	0x0001
> +#define EXT4_FREE_BLOCKS_FORGET		0x0002
> +
> +/*
>  * ioctl commands
>  */
> #define	EXT4_IOC_GETFLAGS		FS_IOC_GETFLAGS
> @@ -1384,8 +1390,8 @@ extern void ext4_discard_preallocations(struct  
> inode *);
> extern int __init init_ext4_mballoc(void);
> extern void exit_ext4_mballoc(void);
> extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
> -			     ext4_fsblk_t block, unsigned long count,
> -			     int metadata);
> +			     struct buffer_head *bh, ext4_fsblk_t block,
> +			     unsigned long count, int flags);
> extern int ext4_mb_add_groupinfo(struct super_block *sb,
> 		ext4_group_t i, struct ext4_group_desc *desc);
> extern int ext4_mb_get_buddy_cache_lock(struct super_block *,  
> ext4_group_t);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 74dcff8..2c4a932 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1007,7 +1007,8 @@ cleanup:
> 		for (i = 0; i < depth; i++) {
> 			if (!ablocks[i])
> 				continue;
> -			ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
> +			ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
> +					 EXT4_FREE_BLOCKS_METADATA);
> 		}
> 	}
> 	kfree(ablocks);
> @@ -1957,7 +1958,6 @@ errout:
> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> 			struct ext4_ext_path *path)
> {
> -	struct buffer_head *bh;
> 	int err;
> 	ext4_fsblk_t leaf;
>
> @@ -1973,9 +1973,8 @@ static int ext4_ext_rm_idx(handle_t *handle,  
> struct inode *inode,
> 	if (err)
> 		return err;
> 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
> -	bh = sb_find_get_block(inode->i_sb, leaf);
> -	ext4_forget(handle, 1, inode, bh, leaf);
> -	ext4_free_blocks(handle, inode, leaf, 1, 1);
> +	ext4_free_blocks(handle, inode, 0, leaf, 1,
> +			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> 	return err;
> }
>
> @@ -2042,12 +2041,11 @@ static int ext4_remove_blocks(handle_t  
> *handle, struct inode *inode,
> 				struct ext4_extent *ex,
> 				ext4_lblk_t from, ext4_lblk_t to)
> {
> -	struct buffer_head *bh;
> 	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
> -	int i, metadata = 0;
> +	int flags = EXT4_FREE_BLOCKS_FORGET;
>
> 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> -		metadata = 1;
> +		flags |= EXT4_FREE_BLOCKS_METADATA;
> #ifdef EXTENTS_STATS
> 	{
> 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> @@ -2072,11 +2070,7 @@ static int ext4_remove_blocks(handle_t  
> *handle, struct inode *inode,
> 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
> 		start = ext_pblock(ex) + ee_len - num;
> 		ext_debug("free last %u blocks starting %llu\n", num, start);
> -		for (i = 0; i < num; i++) {
> -			bh = sb_find_get_block(inode->i_sb, start + i);
> -			ext4_forget(handle, metadata, inode, bh, start + i);
> -		}
> -		ext4_free_blocks(handle, inode, start, num, metadata);
> +		ext4_free_blocks(handle, inode, 0, start, num, flags);
> 	} else if (from == le32_to_cpu(ex->ee_block)
> 		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
> 		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
> @@ -3319,8 +3313,8 @@ int ext4_ext_get_blocks(handle_t *handle,  
> struct inode *inode,
> 		/* not a good idea to call discard here directly,
> 		 * but otherwise we'd need to call it every free() */
> 		ext4_discard_preallocations(inode);
> -		ext4_free_blocks(handle, inode, ext_pblock(&newex),
> -					ext4_ext_get_actual_len(&newex), 0);
> +		ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
> +				 ext4_ext_get_actual_len(&newex), 0);
> 		goto out2;
> 	}
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 72c6943..3b28e1f 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -669,7 +669,7 @@ allocated:
> 	return ret;
> failed_out:
> 	for (i = 0; i < index; i++)
> -		ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
> +		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
> 	return ret;
> }
>
> @@ -765,20 +765,20 @@ static int ext4_alloc_branch(handle_t *handle,  
> struct inode *inode,
> 	return err;
> failed:
> 	/* Allocation failed, free what we already allocated */
> +	ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
> 	for (i = 1; i <= n ; i++) {
> -		BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
> 		/*
> -		 * Note: is_metadata is 0 because branch[i].bh is
> -		 * newly allocated, so there is no need to revoke the
> -		 * block.  If we do, it's harmless, but not necessary.
> +		 * branch[i].bh is newly allocated, so there is no
> +		 * need to revoke the block, which is why we don't
> +		 * need to set EXT4_FREE_BLOCKS_METADATA.
> 		 */
> -		ext4_forget(handle, 0, inode, branch[i].bh,
> -			    branch[i].bh->b_blocknr);
> +		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
> +				 EXT4_FREE_BLOCKS_FORGET);
> 	}
> -	for (i = 0; i < indirect_blks; i++)
> -		ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
> +	for (i = n+1; i < indirect_blks; i++)
> +		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
>
> -	ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
> +	ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
>
> 	return err;
> }
> @@ -857,18 +857,16 @@ static int ext4_splice_branch(handle_t  
> *handle, struct inode *inode,
>
> err_out:
> 	for (i = 1; i <= num; i++) {
> -		BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
> 		/*
> -		 * Note: is_metadata is 0 because branch[i].bh is
> -		 * newly allocated, so there is no need to revoke the
> -		 * block.  If we do, it's harmless, but not necessary.
> +		 * branch[i].bh is newly allocated, so there is no
> +		 * need to revoke the block, which is why we don't
> +		 * need to set EXT4_FREE_BLOCKS_METADATA.
> 		 */
> -		ext4_forget(handle, 0, inode, where[i].bh,
> -			    where[i].bh->b_blocknr);
> -		ext4_free_blocks(handle, inode,
> -					le32_to_cpu(where[i-1].key), 1, 0);
> +		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
> +				 EXT4_FREE_BLOCKS_FORGET);
> 	}
> -	ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks,  
> 0);
> +	ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
> +			 blks, 0);
>
> 	return err;
> }
> @@ -4080,7 +4078,10 @@ static void ext4_clear_blocks(handle_t  
> *handle, struct inode *inode,
> 			      __le32 *last)
> {
> 	__le32 *p;
> -	int	is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
> +	int	flags = EXT4_FREE_BLOCKS_FORGET;
> +
> +	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> +		flags |= EXT4_FREE_BLOCKS_METADATA;
>
> 	if (try_to_extend_transaction(handle, inode)) {
> 		if (bh) {
> @@ -4096,27 +4097,10 @@ static void ext4_clear_blocks(handle_t  
> *handle, struct inode *inode,
> 		}
> 	}
>
> -	/*
> -	 * Any buffers which are on the journal will be in memory. We
> -	 * find them on the hash table so jbd2_journal_revoke() will
> -	 * run jbd2_journal_forget() on them.  We've already detached
> -	 * each block from the file, so bforget() in
> -	 * jbd2_journal_forget() should be safe.
> -	 *
> -	 * AKPM: turn on bforget in jbd2_journal_forget()!!!
> -	 */
> -	for (p = first; p < last; p++) {
> -		u32 nr = le32_to_cpu(*p);
> -		if (nr) {
> -			struct buffer_head *tbh;
> -
> -			*p = 0;
> -			tbh = sb_find_get_block(inode->i_sb, nr);
> -			ext4_forget(handle, is_metadata, inode, tbh, nr);
> -		}
> -	}
> +	for (p = first; p < last; p++)
> +		*p = 0;
>
> -	ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
> +	ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
> }
>
> /**
> @@ -4304,7 +4288,8 @@ static void ext4_free_branches(handle_t  
> *handle, struct inode *inode,
> 					    blocks_for_truncate(inode));
> 			}
>
> -			ext4_free_blocks(handle, inode, nr, 1, 1);
> +			ext4_free_blocks(handle, inode, 0, nr, 1,
> +					 EXT4_FREE_BLOCKS_METADATA);
>
> 			if (parent_bh) {
> 				/*
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 0dca90b..78de5d3 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -4436,8 +4436,8 @@ ext4_mb_free_metadata(handle_t *handle, struct  
> ext4_buddy *e4b,
>  * @metadata: 		Are these metadata blocks
>  */
> void ext4_free_blocks(handle_t *handle, struct inode *inode,
> -		      ext4_fsblk_t block, unsigned long count,
> -		      int metadata)
> +		      struct buffer_head *bh, ext4_fsblk_t block,
> +		      unsigned long count, int flags)
> {
> 	struct buffer_head *bitmap_bh = NULL;
> 	struct super_block *sb = inode->i_sb;
> @@ -4454,15 +4454,12 @@ void ext4_free_blocks(handle_t *handle,  
> struct inode *inode,
> 	int err = 0;
> 	int ret;
>
> -	/*
> -	 * We need to make sure we don't reuse the freed block until
> -	 * after the transaction is committed, which we can do by
> -	 * treating the block as metadata, below.  We make an
> -	 * exception if the inode is to be written in writeback mode
> -	 * since writeback mode has weak data consistency guarantees.
> -	 */
> -	if (!ext4_should_writeback_data(inode))
> -		metadata = 1;
> +	if (bh) {
> +		if (block)
> +			BUG_ON(block != bh->b_blocknr);
> +		else
> +			block = bh->b_blocknr;
> +	}
>
> 	sbi = EXT4_SB(sb);
> 	es = EXT4_SB(sb)->s_es;
> @@ -4476,7 +4473,32 @@ void ext4_free_blocks(handle_t *handle,  
> struct inode *inode,
> 	}
>
> 	ext4_debug("freeing block %llu\n", block);
> -	trace_ext4_free_blocks(inode, block, count, metadata);
> +	trace_ext4_free_blocks(inode, block, count, flags);
> +
> +	if (flags & EXT4_FREE_BLOCKS_FORGET) {
> +		struct buffer_head *tbh = bh;
> +		int i;
> +
> +		BUG_ON(bh && (count > 1));
> +
> +		for (i = 0; i < count; i++) {
> +			if (!bh)
> +				tbh = sb_find_get_block(inode->i_sb,
> +							block + i);
> +			ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
> +				    inode, tbh, block + i);
> +		}
> +	}
> +
> +	/*
> +	 * We need to make sure we don't reuse the freed block until
> +	 * after the transaction is committed, which we can do by
> +	 * treating the block as metadata, below.  We make an
> +	 * exception if the inode is to be written in writeback mode
> +	 * since writeback mode has weak data consistency guarantees.
> +	 */
> +	if (!ext4_should_writeback_data(inode))
> +		flags |= EXT4_FREE_BLOCKS_METADATA;
>
> 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
> 	if (ac) {
> @@ -4552,7 +4574,8 @@ do_more:
> 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
> 	if (err)
> 		goto error_return;
> -	if (metadata && ext4_handle_valid(handle)) {
> +
> +	if ((flags & EXT4_FREE_BLOCKS_METADATA) &&  
> ext4_handle_valid(handle)) {
> 		struct ext4_free_data *new_entry;
> 		/*
> 		 * blocks being freed are metadata. these blocks shouldn't
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index a93d5b8..d641e13 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
> 	for (i = 0; i < max_entries; i++) {
> 		if (tmp_idata[i]) {
> 			extend_credit_for_blkdel(handle, inode);
> -			ext4_free_blocks(handle, inode,
> -					le32_to_cpu(tmp_idata[i]), 1, 1);
> +			ext4_free_blocks(handle, inode, 0,
> +					 le32_to_cpu(tmp_idata[i]), 1,
> +					 EXT4_FREE_BLOCKS_METADATA |
> +					 EXT4_FREE_BLOCKS_FORGET);
> 		}
> 	}
> 	put_bh(bh);
> 	extend_credit_for_blkdel(handle, inode);
> -	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
> +	ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
> +			 EXT4_FREE_BLOCKS_METADATA |
> +			 EXT4_FREE_BLOCKS_FORGET);
> 	return 0;
> }
>
> @@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
> 	}
> 	put_bh(bh);
> 	extend_credit_for_blkdel(handle, inode);
> -	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
> +	ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
> +			 EXT4_FREE_BLOCKS_METADATA |
> +			 EXT4_FREE_BLOCKS_FORGET);
> 	return 0;
> }
>
> @@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle,  
> struct inode *inode, __le32 *i_data)
> 	/* ei->i_data[EXT4_IND_BLOCK] */
> 	if (i_data[0]) {
> 		extend_credit_for_blkdel(handle, inode);
> -		ext4_free_blocks(handle, inode,
> -				le32_to_cpu(i_data[0]), 1, 1);
> +		ext4_free_blocks(handle, inode, 0,
> +				le32_to_cpu(i_data[0]), 1,
> +				 EXT4_FREE_BLOCKS_METADATA |
> +				 EXT4_FREE_BLOCKS_FORGET);
> 	}
>
> 	/* ei->i_data[EXT4_DIND_BLOCK] */
> @@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct  
> inode *inode,
> 	}
> 	put_bh(bh);
> 	extend_credit_for_blkdel(handle, inode);
> -	ext4_free_blocks(handle, inode, block, 1, 1);
> +	ext4_free_blocks(handle, inode, 0, block, 1,
> +			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> 	return retval;
> }
>
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 0257019..910bf9a 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle,  
> struct inode *inode,
> 		ea_bdebug(bh, "refcount now=0; freeing");
> 		if (ce)
> 			mb_cache_entry_free(ce);
> -		ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
> 		get_bh(bh);
> -		ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
> +		ext4_free_blocks(handle, inode, bh, 0, 1,
> +				 EXT4_FREE_BLOCKS_METADATA |
> +				 EXT4_FREE_BLOCKS_FORGET);
> 	} else {
> 		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
> 		error = ext4_handle_dirty_metadata(handle, inode, bh);
> @@ -832,7 +833,8 @@ inserted:
> 			new_bh = sb_getblk(sb, block);
> 			if (!new_bh) {
> getblk_failed:
> -				ext4_free_blocks(handle, inode, block, 1, 1);
> +				ext4_free_blocks(handle, inode, 0, block, 1,
> +						 EXT4_FREE_BLOCKS_METADATA);
> 				error = -EIO;
> 				goto cleanup;
> 			}
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index b390e1f..74f628b 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -650,30 +650,32 @@ TRACE_EVENT(ext4_allocate_blocks,
>
> TRACE_EVENT(ext4_free_blocks,
> 	TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
> -			int metadata),
> +		 int flags),
>
> -	TP_ARGS(inode, block, count, metadata),
> +	TP_ARGS(inode, block, count, flags),
>
> 	TP_STRUCT__entry(
> 		__field(	dev_t,	dev			)
> 		__field(	ino_t,	ino			)
> +		__field(      umode_t, mode			)
> 		__field(	__u64,	block			)
> 		__field(	unsigned long,	count		)
> -		__field(	int,	metadata		)
> -
> +		__field(	 int,	flags			)
> 	),
>
> 	TP_fast_assign(
> 		__entry->dev		= inode->i_sb->s_dev;
> 		__entry->ino		= inode->i_ino;
> +		__entry->mode		= inode->i_mode;
> 		__entry->block		= block;
> 		__entry->count		= count;
> -		__entry->metadata	= metadata;
> +		__entry->flags		= flags;
> 	),
>
> -	TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
> +	TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
> 		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
> -		  __entry->block, __entry->count, __entry->metadata)
> +		  __entry->mode, __entry->block, __entry->count,
> +		  __entry->flags)
> );
>
> TRACE_EVENT(ext4_sync_file,
> -- 
> 1.6.5.216.g5288a.dirty


Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Nov. 23, 2009, 8:28 p.m. UTC | #2
On Mon, Nov 23, 2009 at 12:22:29PM -0700, Andreas Dilger wrote:
> >Also fix a bug in the extents migration code; it wasn't calling
> >ext4_forget() when releasing the indirect blocks during the
> >conversion.
> 
> Would this also solve Curt's problem, mentioned in "Bug in extent
> zeroout: blocks not marked as new" where bforget was not being
> called when the blocks are freed?

No, Curt was referring to the extents code which convert uninitialized
to initialized extents; the code I was referring to is the code which
migrates direct/indirect-mapped inodes to extent-mapped inodes.

						- Ted

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 210e1b5..4cfc2f0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -376,6 +376,12 @@  struct ext4_new_group_data {
 					 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
 
 /*
+ * Flags used by ext4_free_blocks
+ */
+#define EXT4_FREE_BLOCKS_METADATA	0x0001
+#define EXT4_FREE_BLOCKS_FORGET		0x0002
+
+/*
  * ioctl commands
  */
 #define	EXT4_IOC_GETFLAGS		FS_IOC_GETFLAGS
@@ -1384,8 +1390,8 @@  extern void ext4_discard_preallocations(struct inode *);
 extern int __init init_ext4_mballoc(void);
 extern void exit_ext4_mballoc(void);
 extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
-			     ext4_fsblk_t block, unsigned long count,
-			     int metadata);
+			     struct buffer_head *bh, ext4_fsblk_t block,
+			     unsigned long count, int flags);
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
 extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74dcff8..2c4a932 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1007,7 +1007,8 @@  cleanup:
 		for (i = 0; i < depth; i++) {
 			if (!ablocks[i])
 				continue;
-			ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+			ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+					 EXT4_FREE_BLOCKS_METADATA);
 		}
 	}
 	kfree(ablocks);
@@ -1957,7 +1958,6 @@  errout:
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path)
 {
-	struct buffer_head *bh;
 	int err;
 	ext4_fsblk_t leaf;
 
@@ -1973,9 +1973,8 @@  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	if (err)
 		return err;
 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
-	bh = sb_find_get_block(inode->i_sb, leaf);
-	ext4_forget(handle, 1, inode, bh, leaf);
-	ext4_free_blocks(handle, inode, leaf, 1, 1);
+	ext4_free_blocks(handle, inode, 0, leaf, 1,
+			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 	return err;
 }
 
@@ -2042,12 +2041,11 @@  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 				struct ext4_extent *ex,
 				ext4_lblk_t from, ext4_lblk_t to)
 {
-	struct buffer_head *bh;
 	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
-	int i, metadata = 0;
+	int flags = EXT4_FREE_BLOCKS_FORGET;
 
 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		metadata = 1;
+		flags |= EXT4_FREE_BLOCKS_METADATA;
 #ifdef EXTENTS_STATS
 	{
 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2070,7 @@  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
 		start = ext_pblock(ex) + ee_len - num;
 		ext_debug("free last %u blocks starting %llu\n", num, start);
-		for (i = 0; i < num; i++) {
-			bh = sb_find_get_block(inode->i_sb, start + i);
-			ext4_forget(handle, metadata, inode, bh, start + i);
-		}
-		ext4_free_blocks(handle, inode, start, num, metadata);
+		ext4_free_blocks(handle, inode, 0, start, num, flags);
 	} else if (from == le32_to_cpu(ex->ee_block)
 		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
 		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -3319,8 +3313,8 @@  int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		/* not a good idea to call discard here directly,
 		 * but otherwise we'd need to call it every free() */
 		ext4_discard_preallocations(inode);
-		ext4_free_blocks(handle, inode, ext_pblock(&newex),
-					ext4_ext_get_actual_len(&newex), 0);
+		ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+				 ext4_ext_get_actual_len(&newex), 0);
 		goto out2;
 	}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 72c6943..3b28e1f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -669,7 +669,7 @@  allocated:
 	return ret;
 failed_out:
 	for (i = 0; i < index; i++)
-		ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
 	return ret;
 }
 
@@ -765,20 +765,20 @@  static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 	return err;
 failed:
 	/* Allocation failed, free what we already allocated */
+	ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
 	for (i = 1; i <= n ; i++) {
-		BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
 		/* 
-		 * Note: is_metadata is 0 because branch[i].bh is
-		 * newly allocated, so there is no need to revoke the
-		 * block.  If we do, it's harmless, but not necessary.
+		 * branch[i].bh is newly allocated, so there is no
+		 * need to revoke the block, which is why we don't
+		 * need to set EXT4_FREE_BLOCKS_METADATA.
 		 */
-		ext4_forget(handle, 0, inode, branch[i].bh,
-			    branch[i].bh->b_blocknr);
+		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+				 EXT4_FREE_BLOCKS_FORGET);
 	}
-	for (i = 0; i < indirect_blks; i++)
-		ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+	for (i = n+1; i < indirect_blks; i++)
+		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
 
-	ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
+	ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
 
 	return err;
 }
@@ -857,18 +857,16 @@  static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
 err_out:
 	for (i = 1; i <= num; i++) {
-		BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
 		/* 
-		 * Note: is_metadata is 0 because branch[i].bh is
-		 * newly allocated, so there is no need to revoke the
-		 * block.  If we do, it's harmless, but not necessary.
+		 * branch[i].bh is newly allocated, so there is no
+		 * need to revoke the block, which is why we don't
+		 * need to set EXT4_FREE_BLOCKS_METADATA.
 		 */
-		ext4_forget(handle, 0, inode, where[i].bh,
-			    where[i].bh->b_blocknr);
-		ext4_free_blocks(handle, inode,
-					le32_to_cpu(where[i-1].key), 1, 0);
+		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+				 EXT4_FREE_BLOCKS_FORGET);
 	}
-	ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
+	ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+			 blks, 0);
 
 	return err;
 }
@@ -4080,7 +4078,10 @@  static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
 			      __le32 *last)
 {
 	__le32 *p;
-	int	is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
+	int	flags = EXT4_FREE_BLOCKS_FORGET;
+
+	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+		flags |= EXT4_FREE_BLOCKS_METADATA;
 
 	if (try_to_extend_transaction(handle, inode)) {
 		if (bh) {
@@ -4096,27 +4097,10 @@  static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
 		}
 	}
 
-	/*
-	 * Any buffers which are on the journal will be in memory. We
-	 * find them on the hash table so jbd2_journal_revoke() will
-	 * run jbd2_journal_forget() on them.  We've already detached
-	 * each block from the file, so bforget() in
-	 * jbd2_journal_forget() should be safe.
-	 *
-	 * AKPM: turn on bforget in jbd2_journal_forget()!!!
-	 */
-	for (p = first; p < last; p++) {
-		u32 nr = le32_to_cpu(*p);
-		if (nr) {
-			struct buffer_head *tbh;
-
-			*p = 0;
-			tbh = sb_find_get_block(inode->i_sb, nr);
-			ext4_forget(handle, is_metadata, inode, tbh, nr);
-		}
-	}
+	for (p = first; p < last; p++)
+		*p = 0;
 
-	ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
+	ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
 }
 
 /**
@@ -4304,7 +4288,8 @@  static void ext4_free_branches(handle_t *handle, struct inode *inode,
 					    blocks_for_truncate(inode));
 			}
 
-			ext4_free_blocks(handle, inode, nr, 1, 1);
+			ext4_free_blocks(handle, inode, 0, nr, 1,
+					 EXT4_FREE_BLOCKS_METADATA);
 
 			if (parent_bh) {
 				/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0dca90b..78de5d3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4436,8 +4436,8 @@  ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
  * @metadata: 		Are these metadata blocks
  */
 void ext4_free_blocks(handle_t *handle, struct inode *inode,
-		      ext4_fsblk_t block, unsigned long count,
-		      int metadata)
+		      struct buffer_head *bh, ext4_fsblk_t block,
+		      unsigned long count, int flags)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct super_block *sb = inode->i_sb;
@@ -4454,15 +4454,12 @@  void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	int err = 0;
 	int ret;
 
-	/* 
-	 * We need to make sure we don't reuse the freed block until
-	 * after the transaction is committed, which we can do by
-	 * treating the block as metadata, below.  We make an
-	 * exception if the inode is to be written in writeback mode
-	 * since writeback mode has weak data consistency guarantees.
-	 */
-	if (!ext4_should_writeback_data(inode))
-		metadata = 1;
+	if (bh) {
+		if (block)
+			BUG_ON(block != bh->b_blocknr);
+		else
+			block = bh->b_blocknr;
+	}
 
 	sbi = EXT4_SB(sb);
 	es = EXT4_SB(sb)->s_es;
@@ -4476,7 +4473,32 @@  void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	ext4_debug("freeing block %llu\n", block);
-	trace_ext4_free_blocks(inode, block, count, metadata);
+	trace_ext4_free_blocks(inode, block, count, flags);
+
+	if (flags & EXT4_FREE_BLOCKS_FORGET) {
+		struct buffer_head *tbh = bh;
+		int i;
+
+		BUG_ON(bh && (count > 1));
+
+		for (i = 0; i < count; i++) {
+			if (!bh)
+				tbh = sb_find_get_block(inode->i_sb,
+							block + i);
+			ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+				    inode, tbh, block + i);
+		}
+	}
+
+	/* 
+	 * We need to make sure we don't reuse the freed block until
+	 * after the transaction is committed, which we can do by
+	 * treating the block as metadata, below.  We make an
+	 * exception if the inode is to be written in writeback mode
+	 * since writeback mode has weak data consistency guarantees.
+	 */
+	if (!ext4_should_writeback_data(inode))
+		flags |= EXT4_FREE_BLOCKS_METADATA;
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 	if (ac) {
@@ -4552,7 +4574,8 @@  do_more:
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
 	if (err)
 		goto error_return;
-	if (metadata && ext4_handle_valid(handle)) {
+
+	if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
 		struct ext4_free_data *new_entry;
 		/*
 		 * blocks being freed are metadata. these blocks shouldn't
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a93d5b8..d641e13 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -262,13 +262,17 @@  static int free_dind_blocks(handle_t *handle,
 	for (i = 0; i < max_entries; i++) {
 		if (tmp_idata[i]) {
 			extend_credit_for_blkdel(handle, inode);
-			ext4_free_blocks(handle, inode,
-					le32_to_cpu(tmp_idata[i]), 1, 1);
+			ext4_free_blocks(handle, inode, 0,
+					 le32_to_cpu(tmp_idata[i]), 1,
+					 EXT4_FREE_BLOCKS_METADATA |
+					 EXT4_FREE_BLOCKS_FORGET);
 		}
 	}
 	put_bh(bh);
 	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+	ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+			 EXT4_FREE_BLOCKS_METADATA |
+			 EXT4_FREE_BLOCKS_FORGET);
 	return 0;
 }
 
@@ -297,7 +301,9 @@  static int free_tind_blocks(handle_t *handle,
 	}
 	put_bh(bh);
 	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+	ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+			 EXT4_FREE_BLOCKS_METADATA |
+			 EXT4_FREE_BLOCKS_FORGET);
 	return 0;
 }
 
@@ -308,8 +314,10 @@  static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 	/* ei->i_data[EXT4_IND_BLOCK] */
 	if (i_data[0]) {
 		extend_credit_for_blkdel(handle, inode);
-		ext4_free_blocks(handle, inode,
-				le32_to_cpu(i_data[0]), 1, 1);
+		ext4_free_blocks(handle, inode, 0,
+				le32_to_cpu(i_data[0]), 1,
+				 EXT4_FREE_BLOCKS_METADATA |
+				 EXT4_FREE_BLOCKS_FORGET);
 	}
 
 	/* ei->i_data[EXT4_DIND_BLOCK] */
@@ -419,7 +427,8 @@  static int free_ext_idx(handle_t *handle, struct inode *inode,
 	}
 	put_bh(bh);
 	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, block, 1, 1);
+	ext4_free_blocks(handle, inode, 0, block, 1,
+			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 	return retval;
 }
 
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 0257019..910bf9a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -482,9 +482,10 @@  ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		ea_bdebug(bh, "refcount now=0; freeing");
 		if (ce)
 			mb_cache_entry_free(ce);
-		ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
 		get_bh(bh);
-		ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
+		ext4_free_blocks(handle, inode, bh, 0, 1,
+				 EXT4_FREE_BLOCKS_METADATA |
+				 EXT4_FREE_BLOCKS_FORGET);
 	} else {
 		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
 		error = ext4_handle_dirty_metadata(handle, inode, bh);
@@ -832,7 +833,8 @@  inserted:
 			new_bh = sb_getblk(sb, block);
 			if (!new_bh) {
 getblk_failed:
-				ext4_free_blocks(handle, inode, block, 1, 1);
+				ext4_free_blocks(handle, inode, 0, block, 1,
+						 EXT4_FREE_BLOCKS_METADATA);
 				error = -EIO;
 				goto cleanup;
 			}
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b390e1f..74f628b 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -650,30 +650,32 @@  TRACE_EVENT(ext4_allocate_blocks,
 
 TRACE_EVENT(ext4_free_blocks,
 	TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
-			int metadata),
+		 int flags),
 
-	TP_ARGS(inode, block, count, metadata),
+	TP_ARGS(inode, block, count, flags),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
+		__field(      umode_t, mode			)
 		__field(	__u64,	block			)
 		__field(	unsigned long,	count		)
-		__field(	int,	metadata		)
-
+		__field(	 int,	flags			)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
+		__entry->mode		= inode->i_mode;
 		__entry->block		= block;
 		__entry->count		= count;
-		__entry->metadata	= metadata;
+		__entry->flags		= flags;
 	),
 
-	TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
+	TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
 		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->block, __entry->count, __entry->metadata)
+		  __entry->mode, __entry->block, __entry->count,
+		  __entry->flags)
 );
 
 TRACE_EVENT(ext4_sync_file,