diff mbox

[2/2] ext4: truncate the file properly if we fail to copy data from userspace.

Message ID 1238491766-13182-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
State Superseded, archived
Headers show

Commit Message

Aneesh Kumar K.V March 31, 2009, 9:29 a.m. UTC
In generic_perform_write if we fail to copy the user data we don't
update the inode->i_size. We should truncate the file in the above case
so that we don't have blocks allocated outside inode->i_size. Add
the inode to orphan list in the same transaction as block allocation
This ensures that if we crash in between the recovery would do the truncate.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
CC:  Jan Kara <jack@suse.cz>
---
 fs/ext4/inode.c |  103 +++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 77 insertions(+), 26 deletions(-)

Comments

Jan Kara March 31, 2009, 9:38 a.m. UTC | #1
On Tue 31-03-09 14:59:26, Aneesh Kumar K.V wrote:
> In generic_perform_write if we fail to copy the user data we don't
> update the inode->i_size. We should truncate the file in the above case
> so that we don't have blocks allocated outside inode->i_size. Add
> the inode to orphan list in the same transaction as block allocation
> This ensures that if we crash in between the recovery would do the truncate.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> CC:  Jan Kara <jack@suse.cz>
  This patch looks fine as well.
  Reviewed-by: Jan Kara <jack@suse.cz>

									Honza

> ---
>  fs/ext4/inode.c |  103 +++++++++++++++++++++++++++++++++++++++++--------------
>  1 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 074185f..3997999 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1505,6 +1505,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
>  	return ext4_handle_dirty_metadata(handle, NULL, bh);
>  }
>  
> +static int ext4_generic_write_end(struct file *file,
> +				struct address_space *mapping,
> +				loff_t pos, unsigned len, unsigned copied,
> +				struct page *page, void *fsdata)
> +{
> +	int i_size_changed = 0;
> +	struct inode *inode = mapping->host;
> +	handle_t *handle = ext4_journal_current_handle();
> +
> +	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
> +
> +	/*
> +	 * No need to use i_size_read() here, the i_size
> +	 * cannot change under us because we hold i_mutex.
> +	 *
> +	 * But it's important to update i_size while still holding page lock:
> +	 * page writeout could otherwise come in and zero beyond i_size.
> +	 */
> +	if (pos + copied > inode->i_size) {
> +		i_size_write(inode, pos + copied);
> +		i_size_changed = 1;
> +	}
> +
> +	if (pos + copied >  EXT4_I(inode)->i_disksize) {
> +		/* We need to mark inode dirty even if
> +		 * new_i_size is less that inode->i_size
> +		 * bu greater than i_disksize.(hint delalloc)
> +		 */
> +		ext4_update_i_disksize(inode, (pos + copied));
> +		i_size_changed = 1;
> +	}
> +	unlock_page(page);
> +	page_cache_release(page);
> +
> +	/*
> +	 * Don't mark the inode dirty under page lock. First, it unnecessarily
> +	 * makes the holding time of page lock longer. Second, it forces lock
> +	 * ordering of page lock and transaction start for journaling
> +	 * filesystems.
> +	 */
> +	if (i_size_changed)
> +		ext4_mark_inode_dirty(handle, inode);
> +
> +	return copied;
> +}
> +
>  /*
>   * We need to pick up the new inode size which generic_commit_write gave us
>   * `file' can be NULL - eg, when called from page_symlink().
> @@ -1528,21 +1574,15 @@ static int ext4_ordered_write_end(struct file *file,
>  	ret = ext4_jbd2_file_inode(handle, inode);
>  
>  	if (ret == 0) {
> -		loff_t new_i_size;
> -
> -		new_i_size = pos + copied;
> -		if (new_i_size > EXT4_I(inode)->i_disksize) {
> -			ext4_update_i_disksize(inode, new_i_size);
> -			/* We need to mark inode dirty even if
> -			 * new_i_size is less that inode->i_size
> -			 * bu greater than i_disksize.(hint delalloc)
> -			 */
> -			ext4_mark_inode_dirty(handle, inode);
> -		}
> -
> -		ret2 = generic_write_end(file, mapping, pos, len, copied,
> +		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
>  							page, fsdata);
>  		copied = ret2;
> +		if (pos + len > inode->i_size)
> +			/* if we have allocated more blocks and copied
> +			 * less. We will have blocks allocated outside
> +			 * inode->i_size. So truncate them
> +			 */
> +			ext4_orphan_add(handle, inode);
>  		if (ret2 < 0)
>  			ret = ret2;
>  	}
> @@ -1550,6 +1590,9 @@ static int ext4_ordered_write_end(struct file *file,
>  	if (!ret)
>  		ret = ret2;
>  
> +	if (pos + len > inode->i_size)
> +		vmtruncate(inode, inode->i_size);
> +
>  	return ret ? ret : copied;
>  }
>  
> @@ -1561,25 +1604,21 @@ static int ext4_writeback_write_end(struct file *file,
>  	handle_t *handle = ext4_journal_current_handle();
>  	struct inode *inode = mapping->host;
>  	int ret = 0, ret2;
> -	loff_t new_i_size;
>  
>  	trace_mark(ext4_writeback_write_end,
>  		   "dev %s ino %lu pos %llu len %u copied %u",
>  		   inode->i_sb->s_id, inode->i_ino,
>  		   (unsigned long long) pos, len, copied);
> -	new_i_size = pos + copied;
> -	if (new_i_size > EXT4_I(inode)->i_disksize) {
> -		ext4_update_i_disksize(inode, new_i_size);
> -		/* We need to mark inode dirty even if
> -		 * new_i_size is less that inode->i_size
> -		 * bu greater than i_disksize.(hint delalloc)
> -		 */
> -		ext4_mark_inode_dirty(handle, inode);
> -	}
> -
> -	ret2 = generic_write_end(file, mapping, pos, len, copied,
> +	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
>  							page, fsdata);
>  	copied = ret2;
> +	if (pos + len > inode->i_size)
> +		/* if we have allocated more blocks and copied
> +		 * less. We will have blocks allocated outside
> +		 * inode->i_size. So truncate them
> +		 */
> +		ext4_orphan_add(handle, inode);
> +
>  	if (ret2 < 0)
>  		ret = ret2;
>  
> @@ -1587,6 +1626,9 @@ static int ext4_writeback_write_end(struct file *file,
>  	if (!ret)
>  		ret = ret2;
>  
> +	if (pos + len > inode->i_size)
> +		vmtruncate(inode, inode->i_size);
> +
>  	return ret ? ret : copied;
>  }
>  
> @@ -1631,10 +1673,19 @@ static int ext4_journalled_write_end(struct file *file,
>  	}
>  
>  	unlock_page(page);
> +	page_cache_release(page);
> +	if (pos + len > inode->i_size)
> +		/* if we have allocated more blocks and copied
> +		 * less. We will have blocks allocated outside
> +		 * inode->i_size. So truncate them
> +		 */
> +		ext4_orphan_add(handle, inode);
> +
>  	ret2 = ext4_journal_stop(handle);
>  	if (!ret)
>  		ret = ret2;
> -	page_cache_release(page);
> +	if (pos + len > inode->i_size)
> +		vmtruncate(inode, inode->i_size);
>  
>  	return ret ? ret : copied;
>  }
> -- 
> 1.6.2.1.404.gb0085.dirty
>
Theodore Ts'o April 5, 2009, 3:22 a.m. UTC | #2
On Tue, Mar 31, 2009 at 02:59:26PM +0530, Aneesh Kumar K.V wrote:
> In generic_perform_write if we fail to copy the user data we don't
> update the inode->i_size. We should truncate the file in the above case
> so that we don't have blocks allocated outside inode->i_size. Add
> the inode to orphan list in the same transaction as block allocation
> This ensures that if we crash in between the recovery would do the truncate.

Same problem as my comment in for the last patch; it seems rather
dangerous to try to call ext4_orphan_add() outside of ext4_truncate().
Can we instead call vmtruncate() inside the same transaction handle?
i.e., figure out how many journal credits will be needed for the
potential truncate, and add it to number of credits to reserve for the
begin_write and/or write_end handle, and then call vmtruncate before
calling ext4_journal_stop().  Can anyone see a problem with this
approach?

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara April 6, 2009, 10:07 a.m. UTC | #3
On Sat 04-04-09 23:22:11, Theodore Tso wrote:
> On Tue, Mar 31, 2009 at 02:59:26PM +0530, Aneesh Kumar K.V wrote:
> > In generic_perform_write if we fail to copy the user data we don't
> > update the inode->i_size. We should truncate the file in the above case
> > so that we don't have blocks allocated outside inode->i_size. Add
> > the inode to orphan list in the same transaction as block allocation
> > This ensures that if we crash in between the recovery would do the truncate.
> 
> Same problem as my comment in for the last patch; it seems rather
> dangerous to try to call ext4_orphan_add() outside of ext4_truncate().
> Can we instead call vmtruncate() inside the same transaction handle?
> i.e., figure out how many journal credits will be needed for the
> potential truncate, and add it to number of credits to reserve for the
> begin_write and/or write_end handle, and then call vmtruncate before
> calling ext4_journal_stop().  Can anyone see a problem with this
> approach?
  Just adding inode to orphan list seems more elegant to me and as I wrote
in my previous email, we already do it from a common path so if there are
bugs, we should fix them anyway ;).

									Honza
diff mbox

Patch

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 074185f..3997999 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1505,6 +1505,52 @@  static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 	return ext4_handle_dirty_metadata(handle, NULL, bh);
 }
 
+static int ext4_generic_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	int i_size_changed = 0;
+	struct inode *inode = mapping->host;
+	handle_t *handle = ext4_journal_current_handle();
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos + copied > inode->i_size) {
+		i_size_write(inode, pos + copied);
+		i_size_changed = 1;
+	}
+
+	if (pos + copied >  EXT4_I(inode)->i_disksize) {
+		/* We need to mark inode dirty even if
+		 * new_i_size is less that inode->i_size
+		 * bu greater than i_disksize.(hint delalloc)
+		 */
+		ext4_update_i_disksize(inode, (pos + copied));
+		i_size_changed = 1;
+	}
+	unlock_page(page);
+	page_cache_release(page);
+
+	/*
+	 * Don't mark the inode dirty under page lock. First, it unnecessarily
+	 * makes the holding time of page lock longer. Second, it forces lock
+	 * ordering of page lock and transaction start for journaling
+	 * filesystems.
+	 */
+	if (i_size_changed)
+		ext4_mark_inode_dirty(handle, inode);
+
+	return copied;
+}
+
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
@@ -1528,21 +1574,15 @@  static int ext4_ordered_write_end(struct file *file,
 	ret = ext4_jbd2_file_inode(handle, inode);
 
 	if (ret == 0) {
-		loff_t new_i_size;
-
-		new_i_size = pos + copied;
-		if (new_i_size > EXT4_I(inode)->i_disksize) {
-			ext4_update_i_disksize(inode, new_i_size);
-			/* We need to mark inode dirty even if
-			 * new_i_size is less that inode->i_size
-			 * bu greater than i_disksize.(hint delalloc)
-			 */
-			ext4_mark_inode_dirty(handle, inode);
-		}
-
-		ret2 = generic_write_end(file, mapping, pos, len, copied,
+		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 		copied = ret2;
+		if (pos + len > inode->i_size)
+			/* if we have allocated more blocks and copied
+			 * less. We will have blocks allocated outside
+			 * inode->i_size. So truncate them
+			 */
+			ext4_orphan_add(handle, inode);
 		if (ret2 < 0)
 			ret = ret2;
 	}
@@ -1550,6 +1590,9 @@  static int ext4_ordered_write_end(struct file *file,
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size)
+		vmtruncate(inode, inode->i_size);
+
 	return ret ? ret : copied;
 }
 
@@ -1561,25 +1604,21 @@  static int ext4_writeback_write_end(struct file *file,
 	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
-	loff_t new_i_size;
 
 	trace_mark(ext4_writeback_write_end,
 		   "dev %s ino %lu pos %llu len %u copied %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, copied);
-	new_i_size = pos + copied;
-	if (new_i_size > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, new_i_size);
-		/* We need to mark inode dirty even if
-		 * new_i_size is less that inode->i_size
-		 * bu greater than i_disksize.(hint delalloc)
-		 */
-		ext4_mark_inode_dirty(handle, inode);
-	}
-
-	ret2 = generic_write_end(file, mapping, pos, len, copied,
+	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 	copied = ret2;
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	if (ret2 < 0)
 		ret = ret2;
 
@@ -1587,6 +1626,9 @@  static int ext4_writeback_write_end(struct file *file,
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size)
+		vmtruncate(inode, inode->i_size);
+
 	return ret ? ret : copied;
 }
 
@@ -1631,10 +1673,19 @@  static int ext4_journalled_write_end(struct file *file,
 	}
 
 	unlock_page(page);
+	page_cache_release(page);
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	page_cache_release(page);
+	if (pos + len > inode->i_size)
+		vmtruncate(inode, inode->i_size);
 
 	return ret ? ret : copied;
 }