diff mbox series

UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files

Message ID 20210426070139.21781-1-alexander@mihalicyn.com
State New
Headers show
Series UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files | expand

Commit Message

Alexander Mikhalitsyn April 26, 2021, 7:01 a.m. UTC
From: Alexander Mikhalitsyn <alexander@mihalicyn.com>

The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
shiftfs as underlay") and it broke checkpoint/restore of docker
contains:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257

The following script can be used to trigger the issue:
  #!/bin/bash

  cat > test.py << EOF
  import sys

  f = open("/proc/self/maps")

  for l in f.readlines():
    if "python" not in l:
      continue
    print(l)
    s = l.split()
    start, end = s[0].split("-")
    fname = s[-1]
    print(start, end, fname)
    break
  else:
    sys.exit(1)

  test_file1 = open(fname)
  test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))

  fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
  fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()

  if fdinfo1 != fdinfo2:
    print("FAIL")
    print(test_file1)
    print(fdinfo1)
    print(test_file2)
    print(fdinfo2)
    sys.exit(1)
  print("PASS")
  EOF
  sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py

Thanks to Andrei Vagin for the reproducer and investigation of this problem.

Cc: Andrei Vagin <avagin@gmail.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Stefan Bader <stefan.bader@canonical.com>
Cc: Connor Kuehl <connor.kuehl@canonical.com>
Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

Fixes: 58009298c6bd ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
---
 fs/overlayfs/file.c      | 17 +++++++++++++++++
 fs/overlayfs/overlayfs.h | 12 ++++++++++++
 2 files changed, 29 insertions(+)

Comments

Krzysztof Kozlowski April 26, 2021, 7:38 a.m. UTC | #1
On 26/04/2021 09:01, alexander@mihalicyn.com wrote:
> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> 
> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> shiftfs as underlay") and it broke checkpoint/restore of docker
> contains:
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257

Thanks for the patch.

It seems you are posting it for Focal, so you need to follow the SRU
process:
https://wiki.ubuntu.com/Kernel/Dev/StablePatchFormat

> 
> The following script can be used to trigger the issue:
>   #!/bin/bash
> 
>   cat > test.py << EOF
>   import sys
> 
>   f = open("/proc/self/maps")
> 
>   for l in f.readlines():
>     if "python" not in l:
>       continue
>     print(l)
>     s = l.split()
>     start, end = s[0].split("-")
>     fname = s[-1]
>     print(start, end, fname)
>     break
>   else:
>     sys.exit(1)
> 
>   test_file1 = open(fname)
>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> 
>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> 
>   if fdinfo1 != fdinfo2:
>     print("FAIL")
>     print(test_file1)
>     print(fdinfo1)
>     print(test_file2)
>     print(fdinfo2)
>     sys.exit(1)
>   print("PASS")
>   EOF
>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> 
> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> 
> Cc: Andrei Vagin <avagin@gmail.com>
> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> Cc: Stefan Bader <stefan.bader@canonical.com>
> Cc: Connor Kuehl <connor.kuehl@canonical.com>
> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> 
> Fixes: 58009298c6bd ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> ---
>  fs/overlayfs/file.c      | 17 +++++++++++++++++
>  fs/overlayfs/overlayfs.h | 12 ++++++++++++
>  2 files changed, 29 insertions(+)
> 
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 0d3ea0cf3e98..7e63eb52bb0d 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -351,6 +351,23 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>  		vma->vm_file = file;
>  		fput(realfile);
>  	} else {
> +		/*
> +		 * In map_files_get_link() (fs/proc/base.c)
> +		 * we need to determine correct path from overlayfs.
> +		 * But real_mount(realfile->f_path.mnt) may be not
> +		 * equal to real_mount(file->f_path.mnt). In such case
> +		 * fdinfo of the same file which was opened from
> +		 * /proc/<pid>/map_files/... and "usual" path
> +		 * will show different mnt_id.
> +		 *
> +		 * We solve issue like in aufs by using additional
> +		 * field on struct vm_area_struct called "vm_prfile"
> +		 * which is used only for fdinfo/"printing" needs.
> +		 *
> +		 * See also mm/prfile.c
> +		 */
> +		ovl_vm_prfile_set(vma, file);
> +
>  		/* Drop reference count from previous vm_file value */
>  		fput(file);
>  	}
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 5f0188807666..a4e6023b4fdb 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -442,3 +442,15 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
>  
>  /* export.c */
>  extern const struct export_operations ovl_export_operations;
> +
> +/* handle vma->vm_prfile */
> +static inline void ovl_vm_prfile_set(struct vm_area_struct *vma,
> +				    struct file *file)

Why is this inline? I see it is being used only in one place, so it
should be regular static inside file.c. It seems you copied the pattern
from aufs but I have doubts that it's better to keep it the same way.

> +{
> +	get_file(file);
> +	vma->vm_prfile = file;
> +#ifndef CONFIG_MMU
> +	get_file(file);
> +	vma->vm_region->vm_prfile = file;
> +#endif
> +}
> 


Best regards,
Krzysztof
Alexander Mikhalitsyn April 26, 2021, 7:58 a.m. UTC | #2
Hi,

thank you for your comments. I've resent the patch
https://lists.ubuntu.com/archives/kernel-team/2021-April/119546.html

>Why is this inline? I see it is being used only in one place, so it
Sure, I will fix that.

Regards,
Alex

On Mon, Apr 26, 2021 at 10:38 AM Krzysztof Kozlowski
<krzysztof.kozlowski@canonical.com> wrote:
>
> On 26/04/2021 09:01, alexander@mihalicyn.com wrote:
> > From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >
> > The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > shiftfs as underlay") and it broke checkpoint/restore of docker
> > contains:
> > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
>
> Thanks for the patch.
>
> It seems you are posting it for Focal, so you need to follow the SRU
> process:
> https://wiki.ubuntu.com/Kernel/Dev/StablePatchFormat
>
> >
> > The following script can be used to trigger the issue:
> >   #!/bin/bash
> >
> >   cat > test.py << EOF
> >   import sys
> >
> >   f = open("/proc/self/maps")
> >
> >   for l in f.readlines():
> >     if "python" not in l:
> >       continue
> >     print(l)
> >     s = l.split()
> >     start, end = s[0].split("-")
> >     fname = s[-1]
> >     print(start, end, fname)
> >     break
> >   else:
> >     sys.exit(1)
> >
> >   test_file1 = open(fname)
> >   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> >
> >   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> >   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> >
> >   if fdinfo1 != fdinfo2:
> >     print("FAIL")
> >     print(test_file1)
> >     print(fdinfo1)
> >     print(test_file2)
> >     print(fdinfo2)
> >     sys.exit(1)
> >   print("PASS")
> >   EOF
> >   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> >
> > Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> >
> > Cc: Andrei Vagin <avagin@gmail.com>
> > Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > Cc: Stefan Bader <stefan.bader@canonical.com>
> > Cc: Connor Kuehl <connor.kuehl@canonical.com>
> > Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> >
> > Fixes: 58009298c6bd ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > ---
> >  fs/overlayfs/file.c      | 17 +++++++++++++++++
> >  fs/overlayfs/overlayfs.h | 12 ++++++++++++
> >  2 files changed, 29 insertions(+)
> >
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index 0d3ea0cf3e98..7e63eb52bb0d 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -351,6 +351,23 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >               vma->vm_file = file;
> >               fput(realfile);
> >       } else {
> > +             /*
> > +              * In map_files_get_link() (fs/proc/base.c)
> > +              * we need to determine correct path from overlayfs.
> > +              * But real_mount(realfile->f_path.mnt) may be not
> > +              * equal to real_mount(file->f_path.mnt). In such case
> > +              * fdinfo of the same file which was opened from
> > +              * /proc/<pid>/map_files/... and "usual" path
> > +              * will show different mnt_id.
> > +              *
> > +              * We solve issue like in aufs by using additional
> > +              * field on struct vm_area_struct called "vm_prfile"
> > +              * which is used only for fdinfo/"printing" needs.
> > +              *
> > +              * See also mm/prfile.c
> > +              */
> > +             ovl_vm_prfile_set(vma, file);
> > +
> >               /* Drop reference count from previous vm_file value */
> >               fput(file);
> >       }
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index 5f0188807666..a4e6023b4fdb 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -442,3 +442,15 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> >
> >  /* export.c */
> >  extern const struct export_operations ovl_export_operations;
> > +
> > +/* handle vma->vm_prfile */
> > +static inline void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > +                                 struct file *file)
>
> Why is this inline? I see it is being used only in one place, so it
> should be regular static inside file.c. It seems you copied the pattern
> from aufs but I have doubts that it's better to keep it the same way.
>
> > +{
> > +     get_file(file);
> > +     vma->vm_prfile = file;
> > +#ifndef CONFIG_MMU
> > +     get_file(file);
> > +     vma->vm_region->vm_prfile = file;
> > +#endif
> > +}
> >
>
>
> Best regards,
> Krzysztof
diff mbox series

Patch

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0d3ea0cf3e98..7e63eb52bb0d 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -351,6 +351,23 @@  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 		vma->vm_file = file;
 		fput(realfile);
 	} else {
+		/*
+		 * In map_files_get_link() (fs/proc/base.c)
+		 * we need to determine correct path from overlayfs.
+		 * But real_mount(realfile->f_path.mnt) may be not
+		 * equal to real_mount(file->f_path.mnt). In such case
+		 * fdinfo of the same file which was opened from
+		 * /proc/<pid>/map_files/... and "usual" path
+		 * will show different mnt_id.
+		 *
+		 * We solve issue like in aufs by using additional
+		 * field on struct vm_area_struct called "vm_prfile"
+		 * which is used only for fdinfo/"printing" needs.
+		 *
+		 * See also mm/prfile.c
+		 */
+		ovl_vm_prfile_set(vma, file);
+
 		/* Drop reference count from previous vm_file value */
 		fput(file);
 	}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 5f0188807666..a4e6023b4fdb 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -442,3 +442,15 @@  int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 
 /* export.c */
 extern const struct export_operations ovl_export_operations;
+
+/* handle vma->vm_prfile */
+static inline void ovl_vm_prfile_set(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	get_file(file);
+	vma->vm_prfile = file;
+#ifndef CONFIG_MMU
+	get_file(file);
+	vma->vm_region->vm_prfile = file;
+#endif
+}