Patchwork [v3,4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC

login
register
mail settings
Submitter Andrew Lutomirski
Date Aug. 16, 2013, 11:22 p.m.
Message ID <1c0ffd25b5adb4ea65a29e8858ab4674b10aa9df.1376679411.git.luto@amacapital.net>
Download mbox | patch
Permalink /patch/267935/
State Superseded
Headers show

Comments

Andrew Lutomirski - Aug. 16, 2013, 11:22 p.m.
This is probably unimportant but improves POSIX compliance.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 mm/msync.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 11 deletions(-)

Patch

diff --git a/mm/msync.c b/mm/msync.c
index 632df45..9e41acd 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,13 +13,16 @@ 
 #include <linux/file.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
 
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
  * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * Now all it does is ensure that file timestamps get updated, since POSIX
+ * requires it.  We track dirty pages correct without MS_ASYNC.
  *
  * The application may now run fsync() to
  * write out the dirty pages and wait on the writeout and check the result.
@@ -28,6 +31,57 @@ 
  * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  * applications.
  */
+
+static int msync_async_range(struct vm_area_struct *vma,
+			      unsigned long *start, unsigned long end)
+{
+	struct mm_struct *mm;
+	struct address_space *mapping;
+	int iters = 0;
+
+	while (*start < end && *start < vma->vm_end && iters < 128) {
+		unsigned int page_mask, page_increm;
+
+		/*
+		 * Require that the pte writable (because otherwise it can't
+		 * be dirty, so there's nothing to clean).
+		 *
+		 * In theory we could check the pte dirty bit, but this is
+		 * awkward and barely worth it.
+		 */
+		struct page *page = follow_page_mask(vma, *start,
+						     FOLL_GET | FOLL_WRITE,
+						     &page_mask);
+
+		if (page && !IS_ERR(page)) {
+			if (lock_page_killable(page) == 0) {
+				page_mkclean(page);
+				unlock_page(page);
+			}
+			put_page(page);
+		}
+
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		page_increm = 1 + (~(*start >> PAGE_SHIFT) & page_mask);
+		*start += page_increm * PAGE_SIZE;
+		cond_resched();
+		iters++;
+	}
+
+	/* XXX: try to do this only once? */
+	mapping = vma->vm_file->f_mapping;
+	if (mapping->a_ops->flush_cmtime)
+		mapping->a_ops->flush_cmtime(mapping);
+
+	/* Give mmap_sem writers a chance. */
+	mm = current->mm;
+	up_read(&mm->mmap_sem);
+	down_read(&mm->mmap_sem);
+	return 0;
+}
+
 SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 {
 	unsigned long end;
@@ -77,18 +131,25 @@  SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 			goto out_unlock;
 		}
 		file = vma->vm_file;
-		start = vma->vm_end;
-		if ((flags & MS_SYNC) && file &&
-				(vma->vm_flags & VM_SHARED)) {
-			get_file(file);
-			up_read(&mm->mmap_sem);
-			error = vfs_fsync(file, 0);
-			fput(file);
-			if (error || start >= end)
-				goto out;
-			down_read(&mm->mmap_sem);
+		if (file && vma->vm_flags & VM_SHARED) {
+			if (flags & MS_SYNC) {
+				start = vma->vm_end;
+				get_file(file);
+				up_read(&mm->mmap_sem);
+				error = vfs_fsync(file, 0);
+				fput(file);
+				if (error || start >= end)
+					goto out;
+				down_read(&mm->mmap_sem);
+			} else if ((vma->vm_flags & VM_WRITE) &&
+				   file->f_mapping) {
+				error = msync_async_range(vma, &start, end);
+			} else {
+				start = vma->vm_end;
+			}
 			vma = find_vma(mm, start);
 		} else {
+			start = vma->vm_end;
 			if (start >= end) {
 				error = 0;
 				goto out_unlock;