diff mbox series

[K,1/2] UBUNTU: SAUCE: for aufs mmap: print the virtual file path

Message ID 20220805063214.32432-3-andrea.righi@canonical.com
State New
Headers show
Series refactoring of overlayfs fix to properly support shiftfs | expand

Commit Message

Andrea Righi Aug. 5, 2022, 6:32 a.m. UTC
From: "J. R. Okajima" <hooanon05g@gmail.com>

BugLink: https://bugs.launchpad.net/bugs/1983640

For details, read the document in later commit.
I don't like this approach, and UnionMount seems to try solving a
similar problem by customizing f_dentry and d_inode. Ideally it is
better to result the same fix, but it may not be so easy because
UnionMount doesn't have its virtual inode/dentry/file objects as it is
not a 'filesystem.'

Signed-off-by: J. R. Okajima <hooanon05g@gmail.com>
(cherry picked from commit f8a27912904bdc40a27f434a9b6d19f56e7fb3b6)
[ https://github.com/sfjro/aufs5-linux ]

This patch is required to fix:
a80dceb9e85 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")

Please, refer to bug:
https://bugs.launchpad.net/bugs/1967924
for more details.

See also next commit
"UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files"
that is the missing fix to properly support overlayfs with shiftfs.

(backported from https://github.com/sfjro/aufs5-linux)
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
---
 fs/proc/base.c           |  2 +-
 fs/proc/nommu.c          |  5 ++-
 fs/proc/task_mmu.c       |  7 +++-
 fs/proc/task_nommu.c     |  5 ++-
 include/linux/mm.h       | 22 ++++++++++
 include/linux/mm_types.h |  2 +
 kernel/fork.c            |  2 +-
 mm/Makefile              |  2 +-
 mm/filemap.c             |  2 +-
 mm/mmap.c                | 33 +++++++++++----
 mm/nommu.c               | 10 ++---
 mm/prfile.c              | 86 ++++++++++++++++++++++++++++++++++++++++
 12 files changed, 157 insertions(+), 21 deletions(-)
 create mode 100644 mm/prfile.c
diff mbox series

Patch

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8f797fe7e1f0..b92d4ad6dca7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2190,7 +2190,7 @@  static int map_files_get_link(struct dentry *dentry, struct path *path)
 	rc = -ENOENT;
 	vma = find_exact_vma(mm, vm_start, vm_end);
 	if (vma && vma->vm_file) {
-		*path = vma->vm_file->f_path;
+		*path = vma_pr_or_file(vma)->f_path;
 		path_get(path);
 		rc = 0;
 	}
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 13452b32e2bd..38acccfef9d4 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -40,7 +40,10 @@  static int nommu_region_show(struct seq_file *m, struct vm_region *region)
 	file = region->vm_file;
 
 	if (file) {
-		struct inode *inode = file_inode(region->vm_file);
+		struct inode *inode;
+
+		file = vmr_pr_or_file(region);
+		inode = file_inode(file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 	}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d04e3470d4c..f57a3bed280a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -281,7 +281,10 @@  show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	const char *name = NULL;
 
 	if (file) {
-		struct inode *inode = file_inode(vma->vm_file);
+		struct inode *inode;
+
+		file = vma_pr_or_file(vma);
+		inode = file_inode(file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
@@ -1908,7 +1911,7 @@  static int show_numa_map(struct seq_file *m, void *v)
 	struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
 	struct vm_area_struct *vma = v;
 	struct numa_maps *md = &numa_priv->md;
-	struct file *file = vma->vm_file;
+	struct file *file = vma_pr_or_file(vma);
 	struct mm_struct *mm = vma->vm_mm;
 	struct mempolicy *pol;
 	char buffer[64];
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index a6d21fc0033c..02c2de31196e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -155,7 +155,10 @@  static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 	file = vma->vm_file;
 
 	if (file) {
-		struct inode *inode = file_inode(vma->vm_file);
+		struct inode *inode;
+
+		file = vma_pr_or_file(vma);
+		inode = file_inode(file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7898e29bcfb5..9bad719be310 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1909,6 +1909,28 @@  static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
+				      int);
+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
+
+#define vma_file_update_time(vma)	vma_do_file_update_time(vma, __func__, \
+								__LINE__)
+#define vma_pr_or_file(vma)		vma_do_pr_or_file(vma, __func__, \
+							  __LINE__)
+#define vma_get_file(vma)		vma_do_get_file(vma, __func__, __LINE__)
+#define vma_fput(vma)			vma_do_fput(vma, __func__, __LINE__)
+
+#ifndef CONFIG_MMU
+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
+extern void vmr_do_fput(struct vm_region *, const char[], int);
+
+#define vmr_pr_or_file(region)		vmr_do_pr_or_file(region, __func__, \
+							  __LINE__)
+#define vmr_fput(region)		vmr_do_fput(region, __func__, __LINE__)
+#endif /* !CONFIG_MMU */
+
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c29ab4c0cd5c..ec281b09ac78 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -367,6 +367,7 @@  struct vm_region {
 	unsigned long	vm_top;		/* region allocated to here */
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
+	struct file	*vm_prfile;	/* the virtual backing file or NULL */
 
 	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
@@ -463,6 +464,7 @@  struct vm_area_struct {
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
 					   units */
 	struct file * vm_file;		/* File we map to (can be NULL). */
+	struct file *vm_prfile;		/* shadow of vm_file */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 
 #ifdef CONFIG_SWAP
diff --git a/kernel/fork.c b/kernel/fork.c
index 168838deeea4..316748a73130 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -667,7 +667,7 @@  static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		if (file) {
 			struct address_space *mapping = file->f_mapping;
 
-			get_file(file);
+			vma_get_file(tmp);
 			i_mmap_lock_write(mapping);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping_allow_writable(mapping);
diff --git a/mm/Makefile b/mm/Makefile
index 6f9ffa968a1a..d882b0490f08 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -54,7 +54,7 @@  obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   mm_init.o percpu.o slab_common.o \
 			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
-			   debug.o gup.o mmap_lock.o $(mmu-y)
+			   prfile.o debug.o gup.o mmap_lock.o $(mmu-y)
 
 # Give 'page_alloc' its own module-parameter namespace
 page-alloc-y := page_alloc.o
diff --git a/mm/filemap.c b/mm/filemap.c
index ffdfbc8b0e3c..2932f7020bfe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3431,7 +3431,7 @@  vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
 	vm_fault_t ret = VM_FAULT_LOCKED;
 
 	sb_start_pagefault(mapping->host->i_sb);
-	file_update_time(vmf->vma->vm_file);
+	vma_file_update_time(vmf->vma);
 	folio_lock(folio);
 	if (folio->mapping != mapping) {
 		folio_unlock(folio);
diff --git a/mm/mmap.c b/mm/mmap.c
index 61e6135c54ef..e44d353f67e2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -189,7 +189,7 @@  static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
-		fput(vma->vm_file);
+		vma_fput(vma);
 	mpol_put(vma_policy(vma));
 	vm_area_free(vma);
 	return next;
@@ -958,7 +958,7 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	if (remove_next) {
 		if (file) {
 			uprobe_munmap(next, next->vm_start, next->vm_end);
-			fput(file);
+			vma_fput(vma);
 		}
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
@@ -1889,7 +1889,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	return addr;
 
 unmap_and_free_vma:
-	fput(vma->vm_file);
+	vma_fput(vma);
 	vma->vm_file = NULL;
 
 	/* Undo any partial mapping done by a device driver. */
@@ -2751,7 +2751,7 @@  int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_free_mpol;
 
 	if (new->vm_file)
-		get_file(new->vm_file);
+		vma_get_file(new);
 
 	if (new->vm_ops && new->vm_ops->open)
 		new->vm_ops->open(new);
@@ -2770,7 +2770,7 @@  int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (new->vm_ops && new->vm_ops->close)
 		new->vm_ops->close(new);
 	if (new->vm_file)
-		fput(new->vm_file);
+		vma_fput(new);
 	unlink_anon_vmas(new);
  out_free_mpol:
 	mpol_put(vma_policy(new));
@@ -2942,7 +2942,7 @@  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 	struct vm_area_struct *vma;
 	unsigned long populate = 0;
 	unsigned long ret = -EINVAL;
-	struct file *file;
+	struct file *file, *prfile;
 
 	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
 		     current->comm, current->pid);
@@ -2998,10 +2998,27 @@  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 	if (vma->vm_flags & VM_LOCKED)
 		flags |= MAP_LOCKED;
 
-	file = get_file(vma->vm_file);
+	vma_get_file(vma);
+	file = vma->vm_file;
+	prfile = vma->vm_prfile;
 	ret = do_mmap(vma->vm_file, start, size,
 			prot, flags, pgoff, &populate, NULL);
+	if (!IS_ERR_VALUE(ret) && file && prfile) {
+		struct vm_area_struct *new_vma;
+
+		new_vma = find_vma(mm, ret);
+		if (!new_vma->vm_prfile)
+			new_vma->vm_prfile = prfile;
+		if (new_vma != vma)
+			get_file(prfile);
+	}
+	/*
+	 * two fput()s instead of vma_fput(vma),
+	 * coz vma may not be available anymore.
+	 */
 	fput(file);
+	if (prfile)
+		fput(prfile);
 out:
 	mmap_write_unlock(mm);
 	if (populate)
@@ -3275,7 +3292,7 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		if (anon_vma_clone(new_vma, vma))
 			goto out_free_mempol;
 		if (new_vma->vm_file)
-			get_file(new_vma->vm_file);
+			vma_get_file(new_vma);
 		if (new_vma->vm_ops && new_vma->vm_ops->open)
 			new_vma->vm_ops->open(new_vma);
 		vma_link(mm, new_vma, prev, rb_link, rb_parent);
diff --git a/mm/nommu.c b/mm/nommu.c
index 9d7afc2d959e..917851d5b7c6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -524,7 +524,7 @@  static void __put_nommu_region(struct vm_region *region)
 		up_write(&nommu_region_sem);
 
 		if (region->vm_file)
-			fput(region->vm_file);
+			vmr_fput(region);
 
 		/* IO memory and memory shared directly out of the pagecache
 		 * from ramfs/tmpfs mustn't be released here */
@@ -656,7 +656,7 @@  static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
-		fput(vma->vm_file);
+		vma_fput(vma);
 	put_nommu_region(vma->vm_region);
 	vm_area_free(vma);
 }
@@ -1176,7 +1176,7 @@  unsigned long do_mmap(struct file *file,
 					goto error_just_free;
 				}
 			}
-			fput(region->vm_file);
+			vmr_fput(region);
 			kmem_cache_free(vm_region_jar, region);
 			region = pregion;
 			result = start;
@@ -1253,10 +1253,10 @@  unsigned long do_mmap(struct file *file,
 	up_write(&nommu_region_sem);
 error:
 	if (region->vm_file)
-		fput(region->vm_file);
+		vmr_fput(region);
 	kmem_cache_free(vm_region_jar, region);
 	if (vma->vm_file)
-		fput(vma->vm_file);
+		vma_fput(vma);
 	vm_area_free(vma);
 	return ret;
 
diff --git a/mm/prfile.c b/mm/prfile.c
new file mode 100644
index 000000000000..511543ab1b41
--- /dev/null
+++ b/mm/prfile.c
@@ -0,0 +1,86 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mainly for aufs which mmap(2) different file and wants to print different
+ * path in /proc/PID/maps.
+ * Call these functions via macros defined in linux/mm.h.
+ *
+ * See Documentation/filesystems/aufs/design/06mmap.txt
+ *
+ * Copyright (c) 2014-2021 Junjro R. Okajima
+ * Copyright (c) 2014 Ian Campbell
+ */
+
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+
+/* #define PRFILE_TRACE */
+static inline void prfile_trace(struct file *f, struct file *pr,
+			      const char func[], int line, const char func2[])
+{
+#ifdef PRFILE_TRACE
+	if (pr)
+		pr_info("%s:%d: %s, %pD2\n", func, line, func2, f);
+#endif
+}
+
+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
+			     int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	file_update_time(f);
+	if (f && pr)
+		file_update_time(pr);
+}
+
+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
+			       int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	return (f && pr) ? pr : f;
+}
+
+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	get_file(f);
+	if (f && pr)
+		get_file(pr);
+}
+
+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	fput(f);
+	if (f && pr)
+		fput(pr);
+}
+
+#ifndef CONFIG_MMU
+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
+			       int line)
+{
+	struct file *f = region->vm_file, *pr = region->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	return (f && pr) ? pr : f;
+}
+
+void vmr_do_fput(struct vm_region *region, const char func[], int line)
+{
+	struct file *f = region->vm_file, *pr = region->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	fput(f);
+	if (f && pr)
+		fput(pr);
+}
+#endif /* !CONFIG_MMU */