Patchwork [1/1] eCryptfs: Revert to a writethrough cache model

login
register
mail settings
Submitter Colin King
Date Aug. 20, 2012, 11:59 a.m.
Message ID <1345463998-31308-2-git-send-email-colin.king@canonical.com>
Download mbox | patch
Permalink /patch/178753/
State New
Headers show

Comments

Colin King - Aug. 20, 2012, 11:59 a.m.
From: Colin Ian King <colin.king@canonical.com>

BugLink: http://bugs.launchpad.net/bugs/1034012

A change was made about a year ago to get eCryptfs to better utilize its
page cache during writes. The idea was to do the page encryption
operations during page writeback, rather than doing them when initially
writing into the page cache, to reduce the number of page encryption
operations during sequential writes. This meant that the encrypted page
would only be written to the lower filesystem during page writeback,
which was a change from how eCryptfs had previously wrote to the lower
filesystem in ecryptfs_write_end().

The change caused a few eCryptfs-internal bugs that were shook out.
Unfortunately, more grave side effects have been identified that will
force changes outside of eCryptfs. Because the lower filesystem isn't
consulted until page writeback, eCryptfs has no way to pass lower write
errors (ENOSPC, mainly) back to userspace. Additionaly, it was reported
that quotas could be bypassed because of the way eCryptfs may sometimes
open the lower filesystem using a privileged kthread.

It would be nice to resolve the latest issues, but it is best if the
eCryptfs commits be reverted to the old behavior in the meantime.

This reverts:
32001d6f "eCryptfs: Flush file in vma close"
5be79de2 "eCryptfs: Flush dirty pages in setattr"
57db4e8d "ecryptfs: modify write path to encrypt page in writepage"

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: Colin King <colin.king@canonical.com>
Cc: Colin King <colin.king@canonical.com>
Cc: Thieu Le <thieule@google.com>A
(backported fromn commit 821f7494a77627fb1ab539591c57b22cdca702d6)
---
 fs/ecryptfs/file.c  |   32 ++------------------------------
 fs/ecryptfs/inode.c |    6 ------
 fs/ecryptfs/mmap.c  |   39 +++++++++++++--------------------------
 3 files changed, 15 insertions(+), 62 deletions(-)
Tim Gardner - Aug. 20, 2012, 12:17 p.m.
Good test results.
Seth Forshee - Aug. 20, 2012, 1:15 p.m.
On Mon, Aug 20, 2012 at 12:59:58PM +0100, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1034012
> 
> A change was made about a year ago to get eCryptfs to better utilize its
> page cache during writes. The idea was to do the page encryption
> operations during page writeback, rather than doing them when initially
> writing into the page cache, to reduce the number of page encryption
> operations during sequential writes. This meant that the encrypted page
> would only be written to the lower filesystem during page writeback,
> which was a change from how eCryptfs had previously wrote to the lower
> filesystem in ecryptfs_write_end().
> 
> The change caused a few eCryptfs-internal bugs that were shook out.
> Unfortunately, more grave side effects have been identified that will
> force changes outside of eCryptfs. Because the lower filesystem isn't
> consulted until page writeback, eCryptfs has no way to pass lower write
> errors (ENOSPC, mainly) back to userspace. Additionaly, it was reported
> that quotas could be bypassed because of the way eCryptfs may sometimes
> open the lower filesystem using a privileged kthread.
> 
> It would be nice to resolve the latest issues, but it is best if the
> eCryptfs commits be reverted to the old behavior in the meantime.
> 
> This reverts:
> 32001d6f "eCryptfs: Flush file in vma close"
> 5be79de2 "eCryptfs: Flush dirty pages in setattr"
> 57db4e8d "ecryptfs: modify write path to encrypt page in writepage"
> 
> Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> Tested-by: Colin King <colin.king@canonical.com>
> Cc: Colin King <colin.king@canonical.com>
> Cc: Thieu Le <thieule@google.com>A
> (backported fromn commit 821f7494a77627fb1ab539591c57b22cdca702d6)
> ---
>  fs/ecryptfs/file.c  |   32 ++------------------------------
>  fs/ecryptfs/inode.c |    6 ------
>  fs/ecryptfs/mmap.c  |   39 +++++++++++++--------------------------
>  3 files changed, 15 insertions(+), 62 deletions(-)
> 
> diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
> index 9b3ad32..3a5937b 100644
> --- a/fs/ecryptfs/file.c
> +++ b/fs/ecryptfs/file.c
> @@ -139,27 +139,6 @@ out:
>  	return rc;
>  }
>  
> -static void ecryptfs_vma_close(struct vm_area_struct *vma)
> -{
> -	filemap_write_and_wait(vma->vm_file->f_mapping);
> -}
> -
> -static const struct vm_operations_struct ecryptfs_file_vm_ops = {
> -	.close		= ecryptfs_vma_close,
> -	.fault		= filemap_fault,
> -};
> -
> -static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
> -{
> -	int rc;
> -
> -	rc = generic_file_mmap(file, vma);
> -	if (!rc)
> -		vma->vm_ops = &ecryptfs_file_vm_ops;
> -
> -	return rc;
> -}
> -
>  struct kmem_cache *ecryptfs_file_info_cache;
>  
>  static int read_or_initialize_metadata(struct dentry *dentry)
> @@ -312,14 +291,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
>  static int
>  ecryptfs_fsync(struct file *file, int datasync)
>  {
> -	int rc = 0;
> -
> -	rc = generic_file_fsync(file, datasync);
> -	if (rc)
> -		goto out;
> -	rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync);
> -out:
> -	return rc;
> +	return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
>  }
>  
>  static int ecryptfs_fasync(int fd, struct file *file, int flag)
> @@ -388,7 +360,7 @@ const struct file_operations ecryptfs_main_fops = {
>  #ifdef CONFIG_COMPAT
>  	.compat_ioctl = ecryptfs_compat_ioctl,
>  #endif
> -	.mmap = ecryptfs_file_mmap,
> +	.mmap = generic_file_mmap,
>  	.open = ecryptfs_open,
>  	.flush = ecryptfs_flush,
>  	.release = ecryptfs_release,
> diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
> index 6e57feb..654a71e 100644
> --- a/fs/ecryptfs/inode.c
> +++ b/fs/ecryptfs/inode.c
> @@ -1046,12 +1046,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
>  			goto out;
>  	}
>  
> -	if (S_ISREG(inode->i_mode)) {
> -		rc = filemap_write_and_wait(inode->i_mapping);
> -		if (rc)
> -			goto out;
> -		fsstack_copy_attr_all(inode, lower_inode);
> -	}
>  	memcpy(&lower_ia, ia, sizeof(lower_ia));
>  	if (ia->ia_valid & ATTR_FILE)
>  		lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
> diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
> index 6a44148..93a998a 100644
> --- a/fs/ecryptfs/mmap.c
> +++ b/fs/ecryptfs/mmap.c
> @@ -62,18 +62,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	int rc;
>  
> -	/*
> -	 * Refuse to write the page out if we are called from reclaim context
> -	 * since our writepage() path may potentially allocate memory when
> -	 * calling into the lower fs vfs_write() which may in turn invoke
> -	 * us again.
> -	 */
> -	if (current->flags & PF_MEMALLOC) {
> -		redirty_page_for_writepage(wbc, page);
> -		rc = 0;
> -		goto out;
> -	}
> -
>  	rc = ecryptfs_encrypt_page(page);
>  	if (rc) {
>  		ecryptfs_printk(KERN_WARNING, "Error encrypting "
> @@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file,
>  	struct ecryptfs_crypt_stat *crypt_stat =
>  		&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
>  	int rc;
> -	int need_unlock_page = 1;
>  
>  	ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
>  			"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
> @@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file,
>  			"zeros in page with index = [0x%.16lx]\n", index);
>  		goto out;
>  	}
> -	set_page_dirty(page);
> -	unlock_page(page);
> -	need_unlock_page = 0;
> +	rc = ecryptfs_encrypt_page(page);
> +	if (rc) {
> +		ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
> +				"index [0x%.16lx])\n", index);
> +		goto out;
> +	}
>  	if (pos + copied > i_size_read(ecryptfs_inode)) {
>  		i_size_write(ecryptfs_inode, pos + copied);
>  		ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
>  			"[0x%.16llx]\n",
>  			(unsigned long long)i_size_read(ecryptfs_inode));
> -		balance_dirty_pages_ratelimited(mapping);
> -		rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
> -		if (rc) {
> -			printk(KERN_ERR "Error writing inode size to metadata; "
> -			       "rc = [%d]\n", rc);
> -			goto out;
> -		}
>  	}
> -	rc = copied;
> +	rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
> +	if (rc)
> +		printk(KERN_ERR "Error writing inode size to metadata; "
> +		       "rc = [%d]\n", rc);
> +	else
> +		rc = copied;
>  out:
> -	if (need_unlock_page)
> -		unlock_page(page);
> +	unlock_page(page);
>  	page_cache_release(page);
>  	return rc;
>  }
> -- 
> 1.7.5.4
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Ben Hutchings - Sept. 29, 2012, 1:51 p.m.
On Mon, 2012-08-20 at 12:59 +0100, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1034012
[...]
> (backported fromn commit 821f7494a77627fb1ab539591c57b22cdca702d6)
[...]

Should this go into 3.2.y and other stable branches?

Ben.
Colin King - Sept. 29, 2012, 5:09 p.m.
On 29/09/12 14:51, Ben Hutchings wrote:
> On Mon, 2012-08-20 at 12:59 +0100, Colin King wrote:
>> From: Colin Ian King <colin.king@canonical.com>
>>
>> BugLink: http://bugs.launchpad.net/bugs/1034012
> [...]
>> (backported fromn commit 821f7494a77627fb1ab539591c57b22cdca702d6)
> [...]
>
> Should this go into 3.2.y and other stable branches?

Well it definitely should be applied from 3.2.y up to 3.5-rc6, but it 
won't cleanly apply to 3.2.y.  I've had a quick look and there seems to 
be some commits in precise that aren't in 3.2.y, namely:

98acd7c827321549d2e56adc6b801
	UBUNTU: SAUCE: eCryptfs: Improve statfs reportingc8268fba081
	
0bf24acb9f571c152a56155c38c2aab6608f27c0
	eCryptfs: Initialize empty lower files when opening them

This particular patch won't apply cleanly unless 
0bf24acb9f571c152a56155c38c2aab6608f27c0 from Precise is applied first.

Hope that helps.

Colin


>
> Ben.
>

Patch

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9b3ad32..3a5937b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -139,27 +139,6 @@  out:
 	return rc;
 }
 
-static void ecryptfs_vma_close(struct vm_area_struct *vma)
-{
-	filemap_write_and_wait(vma->vm_file->f_mapping);
-}
-
-static const struct vm_operations_struct ecryptfs_file_vm_ops = {
-	.close		= ecryptfs_vma_close,
-	.fault		= filemap_fault,
-};
-
-static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	int rc;
-
-	rc = generic_file_mmap(file, vma);
-	if (!rc)
-		vma->vm_ops = &ecryptfs_file_vm_ops;
-
-	return rc;
-}
-
 struct kmem_cache *ecryptfs_file_info_cache;
 
 static int read_or_initialize_metadata(struct dentry *dentry)
@@ -312,14 +291,7 @@  static int ecryptfs_release(struct inode *inode, struct file *file)
 static int
 ecryptfs_fsync(struct file *file, int datasync)
 {
-	int rc = 0;
-
-	rc = generic_file_fsync(file, datasync);
-	if (rc)
-		goto out;
-	rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync);
-out:
-	return rc;
+	return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
 }
 
 static int ecryptfs_fasync(int fd, struct file *file, int flag)
@@ -388,7 +360,7 @@  const struct file_operations ecryptfs_main_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = ecryptfs_compat_ioctl,
 #endif
-	.mmap = ecryptfs_file_mmap,
+	.mmap = generic_file_mmap,
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
 	.release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 6e57feb..654a71e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1046,12 +1046,6 @@  static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 			goto out;
 	}
 
-	if (S_ISREG(inode->i_mode)) {
-		rc = filemap_write_and_wait(inode->i_mapping);
-		if (rc)
-			goto out;
-		fsstack_copy_attr_all(inode, lower_inode);
-	}
 	memcpy(&lower_ia, ia, sizeof(lower_ia));
 	if (ia->ia_valid & ATTR_FILE)
 		lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 6a44148..93a998a 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -62,18 +62,6 @@  static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	int rc;
 
-	/*
-	 * Refuse to write the page out if we are called from reclaim context
-	 * since our writepage() path may potentially allocate memory when
-	 * calling into the lower fs vfs_write() which may in turn invoke
-	 * us again.
-	 */
-	if (current->flags & PF_MEMALLOC) {
-		redirty_page_for_writepage(wbc, page);
-		rc = 0;
-		goto out;
-	}
-
 	rc = ecryptfs_encrypt_page(page);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error encrypting "
@@ -498,7 +486,6 @@  static int ecryptfs_write_end(struct file *file,
 	struct ecryptfs_crypt_stat *crypt_stat =
 		&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
 	int rc;
-	int need_unlock_page = 1;
 
 	ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
 			"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
@@ -519,26 +506,26 @@  static int ecryptfs_write_end(struct file *file,
 			"zeros in page with index = [0x%.16lx]\n", index);
 		goto out;
 	}
-	set_page_dirty(page);
-	unlock_page(page);
-	need_unlock_page = 0;
+	rc = ecryptfs_encrypt_page(page);
+	if (rc) {
+		ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
+				"index [0x%.16lx])\n", index);
+		goto out;
+	}
 	if (pos + copied > i_size_read(ecryptfs_inode)) {
 		i_size_write(ecryptfs_inode, pos + copied);
 		ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
 			"[0x%.16llx]\n",
 			(unsigned long long)i_size_read(ecryptfs_inode));
-		balance_dirty_pages_ratelimited(mapping);
-		rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
-		if (rc) {
-			printk(KERN_ERR "Error writing inode size to metadata; "
-			       "rc = [%d]\n", rc);
-			goto out;
-		}
 	}
-	rc = copied;
+	rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
+	if (rc)
+		printk(KERN_ERR "Error writing inode size to metadata; "
+		       "rc = [%d]\n", rc);
+	else
+		rc = copied;
 out:
-	if (need_unlock_page)
-		unlock_page(page);
+	unlock_page(page);
 	page_cache_release(page);
 	return rc;
 }