diff mbox

[1/5] ext4: Always use ext4_bio_write_page() for writeout

Message ID 1357148744-4895-2-git-send-email-jack@suse.cz
State Superseded, archived
Headers show

Commit Message

Jan Kara Jan. 2, 2013, 5:45 p.m. UTC
Currently we sometimes used block_write_full_page() and sometimes
ext4_bio_write_page() for writeback (depending on mount options and call
path). Let's always use ext4_bio_write_page() to simplify things a bit.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4.h  |    1 -
 fs/ext4/inode.c |  128 ++++++-------------------------------------------------
 fs/ext4/super.c |    7 +--
 3 files changed, 16 insertions(+), 120 deletions(-)

Comments

Jan Kara Jan. 3, 2013, 2:18 p.m. UTC | #1
On Wed 02-01-13 18:45:40, Jan Kara wrote:
> Currently we sometimes used block_write_full_page() and sometimes
> ext4_bio_write_page() for writeback (depending on mount options and call
> path). Let's always use ext4_bio_write_page() to simplify things a bit.
  I just found out that we can also drop ext4_io_end_t->page and its
handling (I can fix that in V2 but I'll wait for some feedback before
sending it). Actually one argument against this patch could be that
ext4_bio_write_page() is rather memory hungry - ext4_io_end is
over 1 KB. I don't think it's critical and it seems to be possible to trim
that down significantly but I wanted to mention it so that people are not
surprised.

								Honza

> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/ext4/ext4.h  |    1 -
>  fs/ext4/inode.c |  128 ++++++-------------------------------------------------
>  fs/ext4/super.c |    7 +--
>  3 files changed, 16 insertions(+), 120 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 8462eb3..70c7030 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -985,7 +985,6 @@ struct ext4_inode_info {
>  #define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
>  #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
>  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
> -#define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
>  #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
>  #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
>  #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cb1c1ab..f95b511 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -134,8 +134,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
>  static void ext4_invalidatepage(struct page *page, unsigned long offset);
>  static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
>  				   struct buffer_head *bh_result, int create);
> -static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
> -static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
>  static int __ext4_journalled_writepage(struct page *page, unsigned int len);
>  static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
>  static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
> @@ -808,11 +806,10 @@ int ext4_walk_page_buffers(handle_t *handle,
>   * and the commit_write().  So doing the jbd2_journal_start at the start of
>   * prepare_write() is the right place.
>   *
> - * Also, this function can nest inside ext4_writepage() ->
> - * block_write_full_page(). In that case, we *know* that ext4_writepage()
> - * has generated enough buffer credits to do the whole page.  So we won't
> - * block on the journal in that case, which is good, because the caller may
> - * be PF_MEMALLOC.
> + * Also, this function can nest inside ext4_writepage().  In that case, we
> + * *know* that ext4_writepage() has generated enough buffer credits to do the
> + * whole page.  So we won't block on the journal in that case, which is good,
> + * because the caller may be PF_MEMALLOC.
>   *
>   * By accident, ext4 can be reentered when a transaction is open via
>   * quota file writes.  If we were to commit the transaction while thus
> @@ -1463,18 +1460,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
>  			 */
>  			if (unlikely(journal_data && PageChecked(page)))
>  				err = __ext4_journalled_writepage(page, len);
> -			else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
> +			else
>  				err = ext4_bio_write_page(&io_submit, page,
>  							  len, mpd->wbc);
> -			else if (buffer_uninit(page_bufs)) {
> -				ext4_set_bh_endio(page_bufs, inode);
> -				err = block_write_full_page_endio(page,
> -					noalloc_get_block_write,
> -					mpd->wbc, ext4_end_io_buffer_write);
> -			} else
> -				err = block_write_full_page(page,
> -					noalloc_get_block_write, mpd->wbc);
> -
>  			if (!err)
>  				mpd->pages_written++;
>  			/*
> @@ -1891,16 +1879,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
>  }
>  
>  /*
> - * This function is used as a standard get_block_t calback function
> - * when there is no desire to allocate any blocks.  It is used as a
> - * callback function for block_write_begin() and block_write_full_page().
> - * These functions should only try to map a single block at a time.
> + * This function is used as a standard get_block_t calback function when there
> + * is no desire to allocate any blocks.  It is used as a callback function for
> + * block_write_begin().  These functions should only try to map a single block
> + * at a time.
>   *
>   * Since this function doesn't do block allocations even if the caller
>   * requests it by passing in create=1, it is critically important that
>   * any caller checks to make sure that any buffer heads are returned
>   * by this function are either all already mapped or marked for
> - * delayed allocation before calling  block_write_full_page().  Otherwise,
> + * delayed allocation before calling ext4_bio_write_page().  Otherwise,
>   * b_blocknr could be left unitialized, and the page write functions will
>   * be taken by surprise.
>   */
> @@ -2040,6 +2028,7 @@ static int ext4_writepage(struct page *page,
>  	unsigned int len;
>  	struct buffer_head *page_bufs = NULL;
>  	struct inode *inode = page->mapping->host;
> +	struct ext4_io_submit io_submit;
>  
>  	trace_ext4_writepage(page);
>  	size = i_size_read(inode);
> @@ -2089,14 +2078,9 @@ static int ext4_writepage(struct page *page,
>  		 */
>  		return __ext4_journalled_writepage(page, len);
>  
> -	if (buffer_uninit(page_bufs)) {
> -		ext4_set_bh_endio(page_bufs, inode);
> -		ret = block_write_full_page_endio(page, noalloc_get_block_write,
> -					    wbc, ext4_end_io_buffer_write);
> -	} else
> -		ret = block_write_full_page(page, noalloc_get_block_write,
> -					    wbc);
> -
> +	memset(&io_submit, 0, sizeof(io_submit));
> +	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
> +	ext4_io_submit(&io_submit);
>  	return ret;
>  }
>  
> @@ -2858,26 +2842,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
>  	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
>  }
>  
> -static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
> -{
> -	struct buffer_head *head, *bh;
> -	unsigned int curr_off = 0;
> -
> -	if (!page_has_buffers(page))
> -		return;
> -	head = bh = page_buffers(page);
> -	do {
> -		if (offset <= curr_off && test_clear_buffer_uninit(bh)
> -					&& bh->b_private) {
> -			ext4_free_io_end(bh->b_private);
> -			bh->b_private = NULL;
> -			bh->b_end_io = NULL;
> -		}
> -		curr_off = curr_off + bh->b_size;
> -		bh = bh->b_this_page;
> -	} while (bh != head);
> -}
> -
>  static void ext4_invalidatepage(struct page *page, unsigned long offset)
>  {
>  	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
> @@ -2885,11 +2849,6 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
>  	trace_ext4_invalidatepage(page, offset);
>  
>  	/*
> -	 * free any io_end structure allocated for buffers to be discarded
> -	 */
> -	if (ext4_should_dioread_nolock(page->mapping->host))
> -		ext4_invalidatepage_free_endio(page, offset);
> -	/*
>  	 * If it's a full truncate we just forget about the pending dirtying
>  	 */
>  	if (offset == 0)
> @@ -2977,65 +2936,6 @@ out:
>  	ext4_add_complete_io(io_end);
>  }
>  
> -static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
> -{
> -	ext4_io_end_t *io_end = bh->b_private;
> -	struct inode *inode;
> -
> -	if (!test_clear_buffer_uninit(bh) || !io_end)
> -		goto out;
> -
> -	if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
> -		ext4_msg(io_end->inode->i_sb, KERN_INFO,
> -			 "sb umounted, discard end_io request for inode %lu",
> -			 io_end->inode->i_ino);
> -		ext4_free_io_end(io_end);
> -		goto out;
> -	}
> -
> -	/*
> -	 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
> -	 * but being more careful is always safe for the future change.
> -	 */
> -	inode = io_end->inode;
> -	ext4_set_io_unwritten_flag(inode, io_end);
> -	ext4_add_complete_io(io_end);
> -out:
> -	bh->b_private = NULL;
> -	bh->b_end_io = NULL;
> -	clear_buffer_uninit(bh);
> -	end_buffer_async_write(bh, uptodate);
> -}
> -
> -static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
> -{
> -	ext4_io_end_t *io_end;
> -	struct page *page = bh->b_page;
> -	loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
> -	size_t size = bh->b_size;
> -
> -retry:
> -	io_end = ext4_init_io_end(inode, GFP_ATOMIC);
> -	if (!io_end) {
> -		pr_warn_ratelimited("%s: allocation fail\n", __func__);
> -		schedule();
> -		goto retry;
> -	}
> -	io_end->offset = offset;
> -	io_end->size = size;
> -	/*
> -	 * We need to hold a reference to the page to make sure it
> -	 * doesn't get evicted before ext4_end_io_work() has a chance
> -	 * to convert the extent from written to unwritten.
> -	 */
> -	io_end->page = page;
> -	get_page(io_end->page);
> -
> -	bh->b_private = io_end;
> -	bh->b_end_io = ext4_end_io_buffer_write;
> -	return 0;
> -}
> -
>  /*
>   * For ext4 extent files, ext4 will do direct-io write to holes,
>   * preallocated extents, and those write extend the file, no need to
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index e09f7d1..afbe974 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1280,8 +1280,8 @@ static const match_table_t tokens = {
>  	{Opt_stripe, "stripe=%u"},
>  	{Opt_delalloc, "delalloc"},
>  	{Opt_nodelalloc, "nodelalloc"},
> -	{Opt_mblk_io_submit, "mblk_io_submit"},
> -	{Opt_nomblk_io_submit, "nomblk_io_submit"},
> +	{Opt_removed, "mblk_io_submit"},
> +	{Opt_removed, "nomblk_io_submit"},
>  	{Opt_block_validity, "block_validity"},
>  	{Opt_noblock_validity, "noblock_validity"},
>  	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
> @@ -1414,8 +1414,6 @@ static const struct mount_opts {
>  	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
>  	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
>  	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
> -	{Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
> -	{Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
>  	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
>  	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
>  	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
> @@ -3373,7 +3371,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  #ifdef CONFIG_EXT4_FS_POSIX_ACL
>  	set_opt(sb, POSIX_ACL);
>  #endif
> -	set_opt(sb, MBLK_IO_SUBMIT);
>  	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
>  		set_opt(sb, JOURNAL_DATA);
>  	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
> -- 
> 1.7.1
>
Zheng Liu Jan. 4, 2013, 7:18 a.m. UTC | #2
On Wed, Jan 02, 2013 at 06:45:40PM +0100, Jan Kara wrote:
> Currently we sometimes used block_write_full_page() and sometimes
> ext4_bio_write_page() for writeback (depending on mount options and call
> path). Let's always use ext4_bio_write_page() to simplify things a bit.
> 
> Signed-off-by: Jan Kara <jack@suse.cz>

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>

Regards,
                                                - Zheng

> ---
>  fs/ext4/ext4.h  |    1 -
>  fs/ext4/inode.c |  128 ++++++-------------------------------------------------
>  fs/ext4/super.c |    7 +--
>  3 files changed, 16 insertions(+), 120 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 8462eb3..70c7030 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -985,7 +985,6 @@ struct ext4_inode_info {
>  #define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
>  #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
>  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
> -#define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
>  #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
>  #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
>  #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cb1c1ab..f95b511 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -134,8 +134,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
>  static void ext4_invalidatepage(struct page *page, unsigned long offset);
>  static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
>  				   struct buffer_head *bh_result, int create);
> -static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
> -static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
>  static int __ext4_journalled_writepage(struct page *page, unsigned int len);
>  static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
>  static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
> @@ -808,11 +806,10 @@ int ext4_walk_page_buffers(handle_t *handle,
>   * and the commit_write().  So doing the jbd2_journal_start at the start of
>   * prepare_write() is the right place.
>   *
> - * Also, this function can nest inside ext4_writepage() ->
> - * block_write_full_page(). In that case, we *know* that ext4_writepage()
> - * has generated enough buffer credits to do the whole page.  So we won't
> - * block on the journal in that case, which is good, because the caller may
> - * be PF_MEMALLOC.
> + * Also, this function can nest inside ext4_writepage().  In that case, we
> + * *know* that ext4_writepage() has generated enough buffer credits to do the
> + * whole page.  So we won't block on the journal in that case, which is good,
> + * because the caller may be PF_MEMALLOC.
>   *
>   * By accident, ext4 can be reentered when a transaction is open via
>   * quota file writes.  If we were to commit the transaction while thus
> @@ -1463,18 +1460,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
>  			 */
>  			if (unlikely(journal_data && PageChecked(page)))
>  				err = __ext4_journalled_writepage(page, len);
> -			else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
> +			else
>  				err = ext4_bio_write_page(&io_submit, page,
>  							  len, mpd->wbc);
> -			else if (buffer_uninit(page_bufs)) {
> -				ext4_set_bh_endio(page_bufs, inode);
> -				err = block_write_full_page_endio(page,
> -					noalloc_get_block_write,
> -					mpd->wbc, ext4_end_io_buffer_write);
> -			} else
> -				err = block_write_full_page(page,
> -					noalloc_get_block_write, mpd->wbc);
> -
>  			if (!err)
>  				mpd->pages_written++;
>  			/*
> @@ -1891,16 +1879,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
>  }
>  
>  /*
> - * This function is used as a standard get_block_t calback function
> - * when there is no desire to allocate any blocks.  It is used as a
> - * callback function for block_write_begin() and block_write_full_page().
> - * These functions should only try to map a single block at a time.
> + * This function is used as a standard get_block_t calback function when there
> + * is no desire to allocate any blocks.  It is used as a callback function for
> + * block_write_begin().  These functions should only try to map a single block
> + * at a time.
>   *
>   * Since this function doesn't do block allocations even if the caller
>   * requests it by passing in create=1, it is critically important that
>   * any caller checks to make sure that any buffer heads are returned
>   * by this function are either all already mapped or marked for
> - * delayed allocation before calling  block_write_full_page().  Otherwise,
> + * delayed allocation before calling ext4_bio_write_page().  Otherwise,
>   * b_blocknr could be left unitialized, and the page write functions will
>   * be taken by surprise.
>   */
> @@ -2040,6 +2028,7 @@ static int ext4_writepage(struct page *page,
>  	unsigned int len;
>  	struct buffer_head *page_bufs = NULL;
>  	struct inode *inode = page->mapping->host;
> +	struct ext4_io_submit io_submit;
>  
>  	trace_ext4_writepage(page);
>  	size = i_size_read(inode);
> @@ -2089,14 +2078,9 @@ static int ext4_writepage(struct page *page,
>  		 */
>  		return __ext4_journalled_writepage(page, len);
>  
> -	if (buffer_uninit(page_bufs)) {
> -		ext4_set_bh_endio(page_bufs, inode);
> -		ret = block_write_full_page_endio(page, noalloc_get_block_write,
> -					    wbc, ext4_end_io_buffer_write);
> -	} else
> -		ret = block_write_full_page(page, noalloc_get_block_write,
> -					    wbc);
> -
> +	memset(&io_submit, 0, sizeof(io_submit));
> +	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
> +	ext4_io_submit(&io_submit);
>  	return ret;
>  }
>  
> @@ -2858,26 +2842,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
>  	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
>  }
>  
> -static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
> -{
> -	struct buffer_head *head, *bh;
> -	unsigned int curr_off = 0;
> -
> -	if (!page_has_buffers(page))
> -		return;
> -	head = bh = page_buffers(page);
> -	do {
> -		if (offset <= curr_off && test_clear_buffer_uninit(bh)
> -					&& bh->b_private) {
> -			ext4_free_io_end(bh->b_private);
> -			bh->b_private = NULL;
> -			bh->b_end_io = NULL;
> -		}
> -		curr_off = curr_off + bh->b_size;
> -		bh = bh->b_this_page;
> -	} while (bh != head);
> -}
> -
>  static void ext4_invalidatepage(struct page *page, unsigned long offset)
>  {
>  	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
> @@ -2885,11 +2849,6 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
>  	trace_ext4_invalidatepage(page, offset);
>  
>  	/*
> -	 * free any io_end structure allocated for buffers to be discarded
> -	 */
> -	if (ext4_should_dioread_nolock(page->mapping->host))
> -		ext4_invalidatepage_free_endio(page, offset);
> -	/*
>  	 * If it's a full truncate we just forget about the pending dirtying
>  	 */
>  	if (offset == 0)
> @@ -2977,65 +2936,6 @@ out:
>  	ext4_add_complete_io(io_end);
>  }
>  
> -static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
> -{
> -	ext4_io_end_t *io_end = bh->b_private;
> -	struct inode *inode;
> -
> -	if (!test_clear_buffer_uninit(bh) || !io_end)
> -		goto out;
> -
> -	if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
> -		ext4_msg(io_end->inode->i_sb, KERN_INFO,
> -			 "sb umounted, discard end_io request for inode %lu",
> -			 io_end->inode->i_ino);
> -		ext4_free_io_end(io_end);
> -		goto out;
> -	}
> -
> -	/*
> -	 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
> -	 * but being more careful is always safe for the future change.
> -	 */
> -	inode = io_end->inode;
> -	ext4_set_io_unwritten_flag(inode, io_end);
> -	ext4_add_complete_io(io_end);
> -out:
> -	bh->b_private = NULL;
> -	bh->b_end_io = NULL;
> -	clear_buffer_uninit(bh);
> -	end_buffer_async_write(bh, uptodate);
> -}
> -
> -static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
> -{
> -	ext4_io_end_t *io_end;
> -	struct page *page = bh->b_page;
> -	loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
> -	size_t size = bh->b_size;
> -
> -retry:
> -	io_end = ext4_init_io_end(inode, GFP_ATOMIC);
> -	if (!io_end) {
> -		pr_warn_ratelimited("%s: allocation fail\n", __func__);
> -		schedule();
> -		goto retry;
> -	}
> -	io_end->offset = offset;
> -	io_end->size = size;
> -	/*
> -	 * We need to hold a reference to the page to make sure it
> -	 * doesn't get evicted before ext4_end_io_work() has a chance
> -	 * to convert the extent from written to unwritten.
> -	 */
> -	io_end->page = page;
> -	get_page(io_end->page);
> -
> -	bh->b_private = io_end;
> -	bh->b_end_io = ext4_end_io_buffer_write;
> -	return 0;
> -}
> -
>  /*
>   * For ext4 extent files, ext4 will do direct-io write to holes,
>   * preallocated extents, and those write extend the file, no need to
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index e09f7d1..afbe974 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1280,8 +1280,8 @@ static const match_table_t tokens = {
>  	{Opt_stripe, "stripe=%u"},
>  	{Opt_delalloc, "delalloc"},
>  	{Opt_nodelalloc, "nodelalloc"},
> -	{Opt_mblk_io_submit, "mblk_io_submit"},
> -	{Opt_nomblk_io_submit, "nomblk_io_submit"},
> +	{Opt_removed, "mblk_io_submit"},
> +	{Opt_removed, "nomblk_io_submit"},
>  	{Opt_block_validity, "block_validity"},
>  	{Opt_noblock_validity, "noblock_validity"},
>  	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
> @@ -1414,8 +1414,6 @@ static const struct mount_opts {
>  	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
>  	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
>  	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
> -	{Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
> -	{Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
>  	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
>  	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
>  	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
> @@ -3373,7 +3371,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  #ifdef CONFIG_EXT4_FS_POSIX_ACL
>  	set_opt(sb, POSIX_ACL);
>  #endif
> -	set_opt(sb, MBLK_IO_SUBMIT);
>  	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
>  		set_opt(sb, JOURNAL_DATA);
>  	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Jan. 17, 2013, 6:31 p.m. UTC | #3
On Wed, Jan 02, 2013 at 06:45:40PM +0100, Jan Kara wrote:
> Currently we sometimes used block_write_full_page() and sometimes
> ext4_bio_write_page() for writeback (depending on mount options and call
> path). Let's always use ext4_bio_write_page() to simplify things a bit.
> 
> Signed-off-by: Jan Kara <jack@suse.cz>

I left in block_write_full_page() deliberately because it was
page_io.c was very tricky to get right.  When you make changes, if you
aren't careful, you can end up dereferencing a data structure after
it's been freed, which sometimes doesn't become visible until you do
some very serious stress testing.  (We had one case where it only
showed up if you were using xfstests in a VM with the memory cranked
down to a ridiculously low amount of memory.)  So having a way to
disable the page_io.c code path was useful in trying to debug things.

Fortunately, we haven't had a bug in that part of the ext4 code base
in quite a while, so maybe it's time for us to get rid of this
alternate code path.

> @@ -2885,11 +2849,6 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
>  	trace_ext4_invalidatepage(page, offset);
>  
>  	/*
> -	 * free any io_end structure allocated for buffers to be discarded
> -	 */
> -	if (ext4_should_dioread_nolock(page->mapping->host))
> -		ext4_invalidatepage_free_endio(page, offset);

What does this have to with always using ext4_bio_write_page()?  It
looks like this was a change that leaked from one of your follow-on
commits?

There was other removals of other functions, such as
ext4_set_bh_endio(), which I think should be broken out into another
commit.

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara Jan. 17, 2013, 9:30 p.m. UTC | #4
On Thu 17-01-13 13:31:37, Ted Tso wrote:
> On Wed, Jan 02, 2013 at 06:45:40PM +0100, Jan Kara wrote:
> > Currently we sometimes used block_write_full_page() and sometimes
> > ext4_bio_write_page() for writeback (depending on mount options and call
> > path). Let's always use ext4_bio_write_page() to simplify things a bit.
> > 
> > Signed-off-by: Jan Kara <jack@suse.cz>
> 
> I left in block_write_full_page() deliberately because it was
> page_io.c was very tricky to get right.  When you make changes, if you
> aren't careful, you can end up dereferencing a data structure after
> it's been freed, which sometimes doesn't become visible until you do
> some very serious stress testing.  (We had one case where it only
> showed up if you were using xfstests in a VM with the memory cranked
> down to a ridiculously low amount of memory.)  So having a way to
> disable the page_io.c code path was useful in trying to debug things.
  Yes, I understand this. Just I thought the time has come we may get rid
of the old code.

> Fortunately, we haven't had a bug in that part of the ext4 code base
> in quite a while, so maybe it's time for us to get rid of this
> alternate code path.
> 
> > @@ -2885,11 +2849,6 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
> >  	trace_ext4_invalidatepage(page, offset);
> >  
> >  	/*
> > -	 * free any io_end structure allocated for buffers to be discarded
> > -	 */
> > -	if (ext4_should_dioread_nolock(page->mapping->host))
> > -		ext4_invalidatepage_free_endio(page, offset);
> 
> What does this have to with always using ext4_bio_write_page()?  It
> looks like this was a change that leaked from one of your follow-on
> commits?
> 
> There was other removals of other functions, such as
> ext4_set_bh_endio(), which I think should be broken out into another
> commit.
  Once we stop using block_write_full_page(), all the removed functions
(including the change to ext4_invalidatepage() - that is there only for
nomblk_io_submit,dioread_nolock case) become unused so I removed them.

If you think removing of unused functions should be in a separate patch I
can do that but frankly I don't see a point. So do you mean it?

								Honza
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8462eb3..70c7030 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -985,7 +985,6 @@  struct ext4_inode_info {
 #define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
-#define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cb1c1ab..f95b511 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -134,8 +134,6 @@  static inline int ext4_begin_ordered_truncate(struct inode *inode,
 static void ext4_invalidatepage(struct page *page, unsigned long offset);
 static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create);
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
 static int __ext4_journalled_writepage(struct page *page, unsigned int len);
 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
 static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
@@ -808,11 +806,10 @@  int ext4_walk_page_buffers(handle_t *handle,
  * and the commit_write().  So doing the jbd2_journal_start at the start of
  * prepare_write() is the right place.
  *
- * Also, this function can nest inside ext4_writepage() ->
- * block_write_full_page(). In that case, we *know* that ext4_writepage()
- * has generated enough buffer credits to do the whole page.  So we won't
- * block on the journal in that case, which is good, because the caller may
- * be PF_MEMALLOC.
+ * Also, this function can nest inside ext4_writepage().  In that case, we
+ * *know* that ext4_writepage() has generated enough buffer credits to do the
+ * whole page.  So we won't block on the journal in that case, which is good,
+ * because the caller may be PF_MEMALLOC.
  *
  * By accident, ext4 can be reentered when a transaction is open via
  * quota file writes.  If we were to commit the transaction while thus
@@ -1463,18 +1460,9 @@  static int mpage_da_submit_io(struct mpage_da_data *mpd,
 			 */
 			if (unlikely(journal_data && PageChecked(page)))
 				err = __ext4_journalled_writepage(page, len);
-			else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
+			else
 				err = ext4_bio_write_page(&io_submit, page,
 							  len, mpd->wbc);
-			else if (buffer_uninit(page_bufs)) {
-				ext4_set_bh_endio(page_bufs, inode);
-				err = block_write_full_page_endio(page,
-					noalloc_get_block_write,
-					mpd->wbc, ext4_end_io_buffer_write);
-			} else
-				err = block_write_full_page(page,
-					noalloc_get_block_write, mpd->wbc);
-
 			if (!err)
 				mpd->pages_written++;
 			/*
@@ -1891,16 +1879,16 @@  int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 }
 
 /*
- * This function is used as a standard get_block_t calback function
- * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_write_begin() and block_write_full_page().
- * These functions should only try to map a single block at a time.
+ * This function is used as a standard get_block_t calback function when there
+ * is no desire to allocate any blocks.  It is used as a callback function for
+ * block_write_begin().  These functions should only try to map a single block
+ * at a time.
  *
  * Since this function doesn't do block allocations even if the caller
  * requests it by passing in create=1, it is critically important that
  * any caller checks to make sure that any buffer heads are returned
  * by this function are either all already mapped or marked for
- * delayed allocation before calling  block_write_full_page().  Otherwise,
+ * delayed allocation before calling ext4_bio_write_page().  Otherwise,
  * b_blocknr could be left unitialized, and the page write functions will
  * be taken by surprise.
  */
@@ -2040,6 +2028,7 @@  static int ext4_writepage(struct page *page,
 	unsigned int len;
 	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
+	struct ext4_io_submit io_submit;
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
@@ -2089,14 +2078,9 @@  static int ext4_writepage(struct page *page,
 		 */
 		return __ext4_journalled_writepage(page, len);
 
-	if (buffer_uninit(page_bufs)) {
-		ext4_set_bh_endio(page_bufs, inode);
-		ret = block_write_full_page_endio(page, noalloc_get_block_write,
-					    wbc, ext4_end_io_buffer_write);
-	} else
-		ret = block_write_full_page(page, noalloc_get_block_write,
-					    wbc);
-
+	memset(&io_submit, 0, sizeof(io_submit));
+	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+	ext4_io_submit(&io_submit);
 	return ret;
 }
 
@@ -2858,26 +2842,6 @@  ext4_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
 
-static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
-{
-	struct buffer_head *head, *bh;
-	unsigned int curr_off = 0;
-
-	if (!page_has_buffers(page))
-		return;
-	head = bh = page_buffers(page);
-	do {
-		if (offset <= curr_off && test_clear_buffer_uninit(bh)
-					&& bh->b_private) {
-			ext4_free_io_end(bh->b_private);
-			bh->b_private = NULL;
-			bh->b_end_io = NULL;
-		}
-		curr_off = curr_off + bh->b_size;
-		bh = bh->b_this_page;
-	} while (bh != head);
-}
-
 static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
 	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
@@ -2885,11 +2849,6 @@  static void ext4_invalidatepage(struct page *page, unsigned long offset)
 	trace_ext4_invalidatepage(page, offset);
 
 	/*
-	 * free any io_end structure allocated for buffers to be discarded
-	 */
-	if (ext4_should_dioread_nolock(page->mapping->host))
-		ext4_invalidatepage_free_endio(page, offset);
-	/*
 	 * If it's a full truncate we just forget about the pending dirtying
 	 */
 	if (offset == 0)
@@ -2977,65 +2936,6 @@  out:
 	ext4_add_complete_io(io_end);
 }
 
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
-{
-	ext4_io_end_t *io_end = bh->b_private;
-	struct inode *inode;
-
-	if (!test_clear_buffer_uninit(bh) || !io_end)
-		goto out;
-
-	if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
-		ext4_msg(io_end->inode->i_sb, KERN_INFO,
-			 "sb umounted, discard end_io request for inode %lu",
-			 io_end->inode->i_ino);
-		ext4_free_io_end(io_end);
-		goto out;
-	}
-
-	/*
-	 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
-	 * but being more careful is always safe for the future change.
-	 */
-	inode = io_end->inode;
-	ext4_set_io_unwritten_flag(inode, io_end);
-	ext4_add_complete_io(io_end);
-out:
-	bh->b_private = NULL;
-	bh->b_end_io = NULL;
-	clear_buffer_uninit(bh);
-	end_buffer_async_write(bh, uptodate);
-}
-
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
-{
-	ext4_io_end_t *io_end;
-	struct page *page = bh->b_page;
-	loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
-	size_t size = bh->b_size;
-
-retry:
-	io_end = ext4_init_io_end(inode, GFP_ATOMIC);
-	if (!io_end) {
-		pr_warn_ratelimited("%s: allocation fail\n", __func__);
-		schedule();
-		goto retry;
-	}
-	io_end->offset = offset;
-	io_end->size = size;
-	/*
-	 * We need to hold a reference to the page to make sure it
-	 * doesn't get evicted before ext4_end_io_work() has a chance
-	 * to convert the extent from written to unwritten.
-	 */
-	io_end->page = page;
-	get_page(io_end->page);
-
-	bh->b_private = io_end;
-	bh->b_end_io = ext4_end_io_buffer_write;
-	return 0;
-}
-
 /*
  * For ext4 extent files, ext4 will do direct-io write to holes,
  * preallocated extents, and those write extend the file, no need to
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e09f7d1..afbe974 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1280,8 +1280,8 @@  static const match_table_t tokens = {
 	{Opt_stripe, "stripe=%u"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
-	{Opt_mblk_io_submit, "mblk_io_submit"},
-	{Opt_nomblk_io_submit, "nomblk_io_submit"},
+	{Opt_removed, "mblk_io_submit"},
+	{Opt_removed, "nomblk_io_submit"},
 	{Opt_block_validity, "block_validity"},
 	{Opt_noblock_validity, "noblock_validity"},
 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1414,8 +1414,6 @@  static const struct mount_opts {
 	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
 	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
 	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
-	{Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
-	{Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
 	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
 	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
 	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
@@ -3373,7 +3371,6 @@  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	set_opt(sb, POSIX_ACL);
 #endif
-	set_opt(sb, MBLK_IO_SUBMIT);
 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
 		set_opt(sb, JOURNAL_DATA);
 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)