diff mbox series

[SRU,F,G,H,v2] UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files

Message ID 20210426081121.37363-1-alexander@mihalicyn.com
State New
Headers show
Series [SRU,F,G,H,v2] UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files | expand

Commit Message

Alexander Mikhalitsyn April 26, 2021, 8:11 a.m. UTC
From: Alexander Mikhalitsyn <alexander@mihalicyn.com>

BugLink: https://bugs.launchpad.net/bugs/1857257

The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
shiftfs as underlay") and it broke checkpoint/restore of docker
contains:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257

The following script can be used to trigger the issue:
  #!/bin/bash

  cat > test.py << EOF
  import sys

  f = open("/proc/self/maps")

  for l in f.readlines():
    if "python" not in l:
      continue
    print(l)
    s = l.split()
    start, end = s[0].split("-")
    fname = s[-1]
    print(start, end, fname)
    break
  else:
    sys.exit(1)

  test_file1 = open(fname)
  test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))

  fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
  fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()

  if fdinfo1 != fdinfo2:
    print("FAIL")
    print(test_file1)
    print(fdinfo1)
    print(test_file2)
    print(fdinfo2)
    sys.exit(1)
  print("PASS")
  EOF
  sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py

Thanks to Andrei Vagin for the reproducer and investigation of this problem.

Cc: Andrei Vagin <avagin@gmail.com>
Cc: Adrian Reber <areber@redhat.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Stefan Bader <stefan.bader@canonical.com>
Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>

Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
---
 fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

Comments

Christian Brauner April 27, 2021, 9:46 a.m. UTC | #1
On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1857257
> 
> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> shiftfs as underlay") and it broke checkpoint/restore of docker
> contains:
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> 
> The following script can be used to trigger the issue:
>   #!/bin/bash
> 
>   cat > test.py << EOF
>   import sys
> 
>   f = open("/proc/self/maps")
> 
>   for l in f.readlines():
>     if "python" not in l:
>       continue
>     print(l)
>     s = l.split()
>     start, end = s[0].split("-")
>     fname = s[-1]
>     print(start, end, fname)
>     break
>   else:
>     sys.exit(1)
> 
>   test_file1 = open(fname)
>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> 
>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> 
>   if fdinfo1 != fdinfo2:
>     print("FAIL")
>     print(test_file1)
>     print(fdinfo1)
>     print(test_file2)
>     print(fdinfo2)
>     sys.exit(1)
>   print("PASS")
>   EOF
>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> 
> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> 
> Cc: Andrei Vagin <avagin@gmail.com>
> Cc: Adrian Reber <areber@redhat.com>
> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> Cc: Stefan Bader <stefan.bader@canonical.com>
> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> 
> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> ---

Hey,

Thanks for the patch!
Fwiw, Andrei already tried to fix this a while ago but it caused another
regression which forced us to revert the fix for this issue.

>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 0d3ea0cf3e98..5fa520d0798e 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>  	return ret;
>  }
>  
> +/* handle vma->vm_prfile */
> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> +			      struct file *file)
> +{
> +	get_file(file);
> +	vma->vm_prfile = file;
> +#ifndef CONFIG_MMU
> +	get_file(file);
> +	vma->vm_region->vm_prfile = file;
> +#endif
> +}

I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
you explain, please?

> +
>  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>  {
>  	struct file *realfile = file->private_data;
> @@ -351,6 +363,23 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>  		vma->vm_file = file;
>  		fput(realfile);
>  	} else {
> +		/*
> +		 * In map_files_get_link() (fs/proc/base.c)
> +		 * we need to determine correct path from overlayfs.
> +		 * But real_mount(realfile->f_path.mnt) may be not
> +		 * equal to real_mount(file->f_path.mnt). In such case
> +		 * fdinfo of the same file which was opened from
> +		 * /proc/<pid>/map_files/... and "usual" path
> +		 * will show different mnt_id.
> +		 *
> +		 * We solve issue like in aufs by using additional
> +		 * field on struct vm_area_struct called "vm_prfile"
> +		 * which is used only for fdinfo/"printing" needs.
> +		 *
> +		 * See also mm/prfile.c
> +		 */
> +		ovl_vm_prfile_set(vma, file);
> +
>  		/* Drop reference count from previous vm_file value */
>  		fput(file);
>  	}
> -- 
> 2.30.2
>
Krzysztof Kozlowski April 27, 2021, 10:03 a.m. UTC | #2
On 27/04/2021 11:46, Christian Brauner wrote:
> On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
>> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
>>
>> BugLink: https://bugs.launchpad.net/bugs/1857257
>>
>> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
>> shiftfs as underlay") and it broke checkpoint/restore of docker
>> contains:
>> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
>>
>> The following script can be used to trigger the issue:
>>   #!/bin/bash
>>
>>   cat > test.py << EOF
>>   import sys
>>
>>   f = open("/proc/self/maps")
>>
>>   for l in f.readlines():
>>     if "python" not in l:
>>       continue
>>     print(l)
>>     s = l.split()
>>     start, end = s[0].split("-")
>>     fname = s[-1]
>>     print(start, end, fname)
>>     break
>>   else:
>>     sys.exit(1)
>>
>>   test_file1 = open(fname)
>>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
>>
>>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
>>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
>>
>>   if fdinfo1 != fdinfo2:
>>     print("FAIL")
>>     print(test_file1)
>>     print(fdinfo1)
>>     print(test_file2)
>>     print(fdinfo2)
>>     sys.exit(1)
>>   print("PASS")
>>   EOF
>>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
>>
>> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
>>
>> Cc: Andrei Vagin <avagin@gmail.com>
>> Cc: Adrian Reber <areber@redhat.com>
>> Cc: Christian Brauner <christian.brauner@ubuntu.com>
>> Cc: Stefan Bader <stefan.bader@canonical.com>
>> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
>> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
>>
>> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
>> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
>> ---
> 
> Hey,
> 
> Thanks for the patch!
> Fwiw, Andrei already tried to fix this a while ago but it caused another
> regression which forced us to revert the fix for this issue.

Did you mean you tried this patch and it caused regressions?

> 
>>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
>>  1 file changed, 29 insertions(+)
>>
>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>> index 0d3ea0cf3e98..5fa520d0798e 100644
>> --- a/fs/overlayfs/file.c
>> +++ b/fs/overlayfs/file.c
>> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>>  	return ret;
>>  }
>>  
>> +/* handle vma->vm_prfile */
>> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
>> +			      struct file *file)
>> +{
>> +	get_file(file);
>> +	vma->vm_prfile = file;
>> +#ifndef CONFIG_MMU
>> +	get_file(file);
>> +	vma->vm_region->vm_prfile = file;
>> +#endif
>> +}
> 
> I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> you explain, please?

It has. The aufs patchset brought it and basically this patch depends on
aufs.

Best regards,
Krzysztof
Alexander Mikhalitsyn April 27, 2021, 10:12 a.m. UTC | #3
Hi, Christian!

Thanks for your attention to the patch!
This field is introduced with aufs
(a3a49a17226f99a5e5dbdcbd328398cbf3c2959e) and it allows to have two struct
files connected with
vma. One for memory management needs and seconds for things like
proc/<pid>/map_files, numa_maps and so on.

Alex

On Tue, Apr 27, 2021 at 12:47 PM Christian Brauner <
christian.brauner@ubuntu.com> wrote:

> On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >
> > BugLink: https://bugs.launchpad.net/bugs/1857257
> >
> > The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > shiftfs as underlay") and it broke checkpoint/restore of docker
> > contains:
> > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> >
> > The following script can be used to trigger the issue:
> >   #!/bin/bash
> >
> >   cat > test.py << EOF
> >   import sys
> >
> >   f = open("/proc/self/maps")
> >
> >   for l in f.readlines():
> >     if "python" not in l:
> >       continue
> >     print(l)
> >     s = l.split()
> >     start, end = s[0].split("-")
> >     fname = s[-1]
> >     print(start, end, fname)
> >     break
> >   else:
> >     sys.exit(1)
> >
> >   test_file1 = open(fname)
> >   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> >
> >   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> >   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> >
> >   if fdinfo1 != fdinfo2:
> >     print("FAIL")
> >     print(test_file1)
> >     print(fdinfo1)
> >     print(test_file2)
> >     print(fdinfo2)
> >     sys.exit(1)
> >   print("PASS")
> >   EOF
> >   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python
> /mnt/test.py
> >
> > Thanks to Andrei Vagin for the reproducer and investigation of this
> problem.
> >
> > Cc: Andrei Vagin <avagin@gmail.com>
> > Cc: Adrian Reber <areber@redhat.com>
> > Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > Cc: Stefan Bader <stefan.bader@canonical.com>
> > Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> >
> > Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as
> underlay")
> > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > ---
>
> Hey,
>
> Thanks for the patch!
> Fwiw, Andrei already tried to fix this a while ago but it caused another
> regression which forced us to revert the fix for this issue.
>
> >  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> >  1 file changed, 29 insertions(+)
> >
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index 0d3ea0cf3e98..5fa520d0798e 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t
> start, loff_t end, int datasync)
> >       return ret;
> >  }
> >
> > +/* handle vma->vm_prfile */
> > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > +                           struct file *file)
> > +{
> > +     get_file(file);
> > +     vma->vm_prfile = file;
> > +#ifndef CONFIG_MMU
> > +     get_file(file);
> > +     vma->vm_region->vm_prfile = file;
> > +#endif
> > +}
>
> I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> you explain, please?
>
> > +
> >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >  {
> >       struct file *realfile = file->private_data;
> > @@ -351,6 +363,23 @@ static int ovl_mmap(struct file *file, struct
> vm_area_struct *vma)
> >               vma->vm_file = file;
> >               fput(realfile);
> >       } else {
> > +             /*
> > +              * In map_files_get_link() (fs/proc/base.c)
> > +              * we need to determine correct path from overlayfs.
> > +              * But real_mount(realfile->f_path.mnt) may be not
> > +              * equal to real_mount(file->f_path.mnt). In such case
> > +              * fdinfo of the same file which was opened from
> > +              * /proc/<pid>/map_files/... and "usual" path
> > +              * will show different mnt_id.
> > +              *
> > +              * We solve issue like in aufs by using additional
> > +              * field on struct vm_area_struct called "vm_prfile"
> > +              * which is used only for fdinfo/"printing" needs.
> > +              *
> > +              * See also mm/prfile.c
> > +              */
> > +             ovl_vm_prfile_set(vma, file);
> > +
> >               /* Drop reference count from previous vm_file value */
> >               fput(file);
> >       }
> > --
> > 2.30.2
> >
>
Alexander Mikhalitsyn April 27, 2021, 10:23 a.m. UTC | #4
On Tue, Apr 27, 2021 at 1:03 PM Krzysztof Kozlowski
<krzysztof.kozlowski@canonical.com> wrote:
>
> On 27/04/2021 11:46, Christian Brauner wrote:
> > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >>
> >> BugLink: https://bugs.launchpad.net/bugs/1857257
> >>
> >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> >> shiftfs as underlay") and it broke checkpoint/restore of docker
> >> contains:
> >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> >>
> >> The following script can be used to trigger the issue:
> >>   #!/bin/bash
> >>
> >>   cat > test.py << EOF
> >>   import sys
> >>
> >>   f = open("/proc/self/maps")
> >>
> >>   for l in f.readlines():
> >>     if "python" not in l:
> >>       continue
> >>     print(l)
> >>     s = l.split()
> >>     start, end = s[0].split("-")
> >>     fname = s[-1]
> >>     print(start, end, fname)
> >>     break
> >>   else:
> >>     sys.exit(1)
> >>
> >>   test_file1 = open(fname)
> >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> >>
> >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> >>
> >>   if fdinfo1 != fdinfo2:
> >>     print("FAIL")
> >>     print(test_file1)
> >>     print(fdinfo1)
> >>     print(test_file2)
> >>     print(fdinfo2)
> >>     sys.exit(1)
> >>   print("PASS")
> >>   EOF
> >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> >>
> >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> >>
> >> Cc: Andrei Vagin <avagin@gmail.com>
> >> Cc: Adrian Reber <areber@redhat.com>
> >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> >> Cc: Stefan Bader <stefan.bader@canonical.com>
> >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> >>
> >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >> ---
> >
> > Hey,
> >
> > Thanks for the patch!
> > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > regression which forced us to revert the fix for this issue.
>
> Did you mean you tried this patch and it caused regressions?

It's a long story, I believe that Christian is talking about our
previous attempt to fix this problem
6f18a84 ("UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with
shiftfs as underlay")
but this particular patch uses a completely different approach.

>
> >
> >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> >>  1 file changed, 29 insertions(+)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index 0d3ea0cf3e98..5fa520d0798e 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >>      return ret;
> >>  }
> >>
> >> +/* handle vma->vm_prfile */
> >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> >> +                          struct file *file)
> >> +{
> >> +    get_file(file);
> >> +    vma->vm_prfile = file;
> >> +#ifndef CONFIG_MMU
> >> +    get_file(file);
> >> +    vma->vm_region->vm_prfile = file;
> >> +#endif
> >> +}
> >
> > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > you explain, please?
>
> It has. The aufs patchset brought it and basically this patch depends on
> aufs.
>
> Best regards,
> Krzysztof
Christian Brauner April 27, 2021, 10:28 a.m. UTC | #5
On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> On 27/04/2021 11:46, Christian Brauner wrote:
> > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >>
> >> BugLink: https://bugs.launchpad.net/bugs/1857257
> >>
> >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> >> shiftfs as underlay") and it broke checkpoint/restore of docker
> >> contains:
> >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> >>
> >> The following script can be used to trigger the issue:
> >>   #!/bin/bash
> >>
> >>   cat > test.py << EOF
> >>   import sys
> >>
> >>   f = open("/proc/self/maps")
> >>
> >>   for l in f.readlines():
> >>     if "python" not in l:
> >>       continue
> >>     print(l)
> >>     s = l.split()
> >>     start, end = s[0].split("-")
> >>     fname = s[-1]
> >>     print(start, end, fname)
> >>     break
> >>   else:
> >>     sys.exit(1)
> >>
> >>   test_file1 = open(fname)
> >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> >>
> >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> >>
> >>   if fdinfo1 != fdinfo2:
> >>     print("FAIL")
> >>     print(test_file1)
> >>     print(fdinfo1)
> >>     print(test_file2)
> >>     print(fdinfo2)
> >>     sys.exit(1)
> >>   print("PASS")
> >>   EOF
> >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> >>
> >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> >>
> >> Cc: Andrei Vagin <avagin@gmail.com>
> >> Cc: Adrian Reber <areber@redhat.com>
> >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> >> Cc: Stefan Bader <stefan.bader@canonical.com>
> >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> >>
> >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >> ---
> > 
> > Hey,
> > 
> > Thanks for the patch!
> > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > regression which forced us to revert the fix for this issue.
> 
> Did you mean you tried this patch and it caused regressions?

No, we had an earlier fix for this issue which didn't depend on aufs
that Andrei sent:

commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Date:   Thu May 21 12:55:43 2020 +0200

    Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
    
    BugLink: https://bugs.launchpad.net/bugs/1879690
    
    This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
    
    The change applied for LP: #1857257 and its followup fix LP: #1876645
    introduced a regression on overlayfs. Revert these commits for now.
    
    Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
    Acked-by: Colin Ian King <colin.king@canonical.com>
    Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
    Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

Shifts is trying to overcome a shortcoming in overlayfs itself where
overlayfs is constructing a fake path where fake path means mnt and
dentry from the overlay but inode from another (lower or upper)
filesystem which is a hack to workaround related issues to the one we're
doing here.

> 
> > 
> >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> >>  1 file changed, 29 insertions(+)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index 0d3ea0cf3e98..5fa520d0798e 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >>  	return ret;
> >>  }
> >>  
> >> +/* handle vma->vm_prfile */
> >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> >> +			      struct file *file)
> >> +{
> >> +	get_file(file);
> >> +	vma->vm_prfile = file;
> >> +#ifndef CONFIG_MMU
> >> +	get_file(file);
> >> +	vma->vm_region->vm_prfile = file;
> >> +#endif
> >> +}
> > 
> > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > you explain, please?
> 
> It has. The aufs patchset brought it and basically this patch depends on
> aufs.

I meant upstream but good to know.
Btw, why do we still carry aufs?
This is not my business but I fear this will mean you're committing to
carrying either aufs or a specific aufs patch with Ubuntu for the sake
of overlayfs even if you wanted to drop aufs in the future.
Alexander Mikhalitsyn April 27, 2021, 10:37 a.m. UTC | #6
On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
<christian.brauner@ubuntu.com> wrote:
>
> On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > On 27/04/2021 11:46, Christian Brauner wrote:
> > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > >>
> > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > >>
> > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > >> contains:
> > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > >>
> > >> The following script can be used to trigger the issue:
> > >>   #!/bin/bash
> > >>
> > >>   cat > test.py << EOF
> > >>   import sys
> > >>
> > >>   f = open("/proc/self/maps")
> > >>
> > >>   for l in f.readlines():
> > >>     if "python" not in l:
> > >>       continue
> > >>     print(l)
> > >>     s = l.split()
> > >>     start, end = s[0].split("-")
> > >>     fname = s[-1]
> > >>     print(start, end, fname)
> > >>     break
> > >>   else:
> > >>     sys.exit(1)
> > >>
> > >>   test_file1 = open(fname)
> > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > >>
> > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > >>
> > >>   if fdinfo1 != fdinfo2:
> > >>     print("FAIL")
> > >>     print(test_file1)
> > >>     print(fdinfo1)
> > >>     print(test_file2)
> > >>     print(fdinfo2)
> > >>     sys.exit(1)
> > >>   print("PASS")
> > >>   EOF
> > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > >>
> > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > >>
> > >> Cc: Andrei Vagin <avagin@gmail.com>
> > >> Cc: Adrian Reber <areber@redhat.com>
> > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > >>
> > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > >> ---
> > >
> > > Hey,
> > >
> > > Thanks for the patch!
> > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > regression which forced us to revert the fix for this issue.
> >
> > Did you mean you tried this patch and it caused regressions?
>
> No, we had an earlier fix for this issue which didn't depend on aufs
> that Andrei sent:
>
> commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> Date:   Thu May 21 12:55:43 2020 +0200
>
>     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
>
>     BugLink: https://bugs.launchpad.net/bugs/1879690
>
>     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
>
>     The change applied for LP: #1857257 and its followup fix LP: #1876645
>     introduced a regression on overlayfs. Revert these commits for now.
>
>     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
>     Acked-by: Colin Ian King <colin.king@canonical.com>
>     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
>     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
>
> Shifts is trying to overcome a shortcoming in overlayfs itself where
> overlayfs is constructing a fake path where fake path means mnt and
> dentry from the overlay but inode from another (lower or upper)
> filesystem which is a hack to workaround related issues to the one we're
> doing here.
>
> >
> > >
> > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > >>  1 file changed, 29 insertions(+)
> > >>
> > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > >> --- a/fs/overlayfs/file.c
> > >> +++ b/fs/overlayfs/file.c
> > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > >>    return ret;
> > >>  }
> > >>
> > >> +/* handle vma->vm_prfile */
> > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > >> +                        struct file *file)
> > >> +{
> > >> +  get_file(file);
> > >> +  vma->vm_prfile = file;
> > >> +#ifndef CONFIG_MMU
> > >> +  get_file(file);
> > >> +  vma->vm_region->vm_prfile = file;
> > >> +#endif
> > >> +}
> > >
> > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > you explain, please?
> >
> > It has. The aufs patchset brought it and basically this patch depends on
> > aufs.
>
> I meant upstream but good to know.
> Btw, why do we still carry aufs?
> This is not my business but I fear this will mean you're committing to
> carrying either aufs or a specific aufs patch with Ubuntu for the sake
> of overlayfs even if you wanted to drop aufs in the future.

If the aufs will be dropped from the Ubuntu kernel, this part with
vma_pr_or_file() and additional
vma field can be safely saved in the kernel.
It's an independent part (mm/prfile.c + several small changes in
proc). So, it's safe to keep
it in the kernel for overlayfs/shiftfs needs.

It's also important to notice that this problem with incorrect
map_files mnt_id is also valid
for current shiftfs implementation. If we decide to take this
particular patch for overlayfs,
I will prepare an analogical fix for the shiftfs.
Christian Brauner April 27, 2021, 10:43 a.m. UTC | #7
On Tue, Apr 27, 2021 at 01:37:28PM +0300, Alexander Mihalicyn wrote:
> On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
> <christian.brauner@ubuntu.com> wrote:
> >
> > On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > > On 27/04/2021 11:46, Christian Brauner wrote:
> > > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > >>
> > > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > > >>
> > > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > > >> contains:
> > > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > >>
> > > >> The following script can be used to trigger the issue:
> > > >>   #!/bin/bash
> > > >>
> > > >>   cat > test.py << EOF
> > > >>   import sys
> > > >>
> > > >>   f = open("/proc/self/maps")
> > > >>
> > > >>   for l in f.readlines():
> > > >>     if "python" not in l:
> > > >>       continue
> > > >>     print(l)
> > > >>     s = l.split()
> > > >>     start, end = s[0].split("-")
> > > >>     fname = s[-1]
> > > >>     print(start, end, fname)
> > > >>     break
> > > >>   else:
> > > >>     sys.exit(1)
> > > >>
> > > >>   test_file1 = open(fname)
> > > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > >>
> > > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > >>
> > > >>   if fdinfo1 != fdinfo2:
> > > >>     print("FAIL")
> > > >>     print(test_file1)
> > > >>     print(fdinfo1)
> > > >>     print(test_file2)
> > > >>     print(fdinfo2)
> > > >>     sys.exit(1)
> > > >>   print("PASS")
> > > >>   EOF
> > > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > >>
> > > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > >>
> > > >> Cc: Andrei Vagin <avagin@gmail.com>
> > > >> Cc: Adrian Reber <areber@redhat.com>
> > > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > >>
> > > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > >> ---
> > > >
> > > > Hey,
> > > >
> > > > Thanks for the patch!
> > > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > > regression which forced us to revert the fix for this issue.
> > >
> > > Did you mean you tried this patch and it caused regressions?
> >
> > No, we had an earlier fix for this issue which didn't depend on aufs
> > that Andrei sent:
> >
> > commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> > Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > Date:   Thu May 21 12:55:43 2020 +0200
> >
> >     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
> >
> >     BugLink: https://bugs.launchpad.net/bugs/1879690
> >
> >     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
> >
> >     The change applied for LP: #1857257 and its followup fix LP: #1876645
> >     introduced a regression on overlayfs. Revert these commits for now.
> >
> >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> >     Acked-by: Colin Ian King <colin.king@canonical.com>
> >     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
> >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> >
> > Shifts is trying to overcome a shortcoming in overlayfs itself where
> > overlayfs is constructing a fake path where fake path means mnt and
> > dentry from the overlay but inode from another (lower or upper)
> > filesystem which is a hack to workaround related issues to the one we're
> > doing here.
> >
> > >
> > > >
> > > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > > >>  1 file changed, 29 insertions(+)
> > > >>
> > > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > > >> --- a/fs/overlayfs/file.c
> > > >> +++ b/fs/overlayfs/file.c
> > > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > >>    return ret;
> > > >>  }
> > > >>
> > > >> +/* handle vma->vm_prfile */
> > > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > >> +                        struct file *file)
> > > >> +{
> > > >> +  get_file(file);
> > > >> +  vma->vm_prfile = file;
> > > >> +#ifndef CONFIG_MMU
> > > >> +  get_file(file);
> > > >> +  vma->vm_region->vm_prfile = file;
> > > >> +#endif
> > > >> +}
> > > >
> > > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > > you explain, please?
> > >
> > > It has. The aufs patchset brought it and basically this patch depends on
> > > aufs.
> >
> > I meant upstream but good to know.
> > Btw, why do we still carry aufs?
> > This is not my business but I fear this will mean you're committing to
> > carrying either aufs or a specific aufs patch with Ubuntu for the sake
> > of overlayfs even if you wanted to drop aufs in the future.
> 
> If the aufs will be dropped from the Ubuntu kernel, this part with
> vma_pr_or_file() and additional
> vma field can be safely saved in the kernel.
> It's an independent part (mm/prfile.c + several small changes in
> proc). So, it's safe to keep
> it in the kernel for overlayfs/shiftfs needs.
> 
> It's also important to notice that this problem with incorrect
> map_files mnt_id is also valid
> for current shiftfs implementation. If we decide to take this
> particular patch for overlayfs,
> I will prepare an analogical fix for the shiftfs.

So what's the wrong mnt_id here, i.e. is criu confused because it sees
the lower fs's mnt_id when it should see the mnt_id of shiftfs/overlayfs
itself or the other way around?
Alexander Mikhalitsyn April 27, 2021, 10:47 a.m. UTC | #8
On Tue, Apr 27, 2021 at 1:43 PM Christian Brauner
<christian.brauner@ubuntu.com> wrote:
>
> On Tue, Apr 27, 2021 at 01:37:28PM +0300, Alexander Mihalicyn wrote:
> > On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
> > <christian.brauner@ubuntu.com> wrote:
> > >
> > > On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > > > On 27/04/2021 11:46, Christian Brauner wrote:
> > > > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > >>
> > > > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > > > >>
> > > > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > > > >> contains:
> > > > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > > >>
> > > > >> The following script can be used to trigger the issue:
> > > > >>   #!/bin/bash
> > > > >>
> > > > >>   cat > test.py << EOF
> > > > >>   import sys
> > > > >>
> > > > >>   f = open("/proc/self/maps")
> > > > >>
> > > > >>   for l in f.readlines():
> > > > >>     if "python" not in l:
> > > > >>       continue
> > > > >>     print(l)
> > > > >>     s = l.split()
> > > > >>     start, end = s[0].split("-")
> > > > >>     fname = s[-1]
> > > > >>     print(start, end, fname)
> > > > >>     break
> > > > >>   else:
> > > > >>     sys.exit(1)
> > > > >>
> > > > >>   test_file1 = open(fname)
> > > > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > > >>
> > > > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > > >>
> > > > >>   if fdinfo1 != fdinfo2:
> > > > >>     print("FAIL")
> > > > >>     print(test_file1)
> > > > >>     print(fdinfo1)
> > > > >>     print(test_file2)
> > > > >>     print(fdinfo2)
> > > > >>     sys.exit(1)
> > > > >>   print("PASS")
> > > > >>   EOF
> > > > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > > >>
> > > > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > > >>
> > > > >> Cc: Andrei Vagin <avagin@gmail.com>
> > > > >> Cc: Adrian Reber <areber@redhat.com>
> > > > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > > > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > > >>
> > > > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > >> ---
> > > > >
> > > > > Hey,
> > > > >
> > > > > Thanks for the patch!
> > > > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > > > regression which forced us to revert the fix for this issue.
> > > >
> > > > Did you mean you tried this patch and it caused regressions?
> > >
> > > No, we had an earlier fix for this issue which didn't depend on aufs
> > > that Andrei sent:
> > >
> > > commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> > > Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > Date:   Thu May 21 12:55:43 2020 +0200
> > >
> > >     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
> > >
> > >     BugLink: https://bugs.launchpad.net/bugs/1879690
> > >
> > >     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
> > >
> > >     The change applied for LP: #1857257 and its followup fix LP: #1876645
> > >     introduced a regression on overlayfs. Revert these commits for now.
> > >
> > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > >     Acked-by: Colin Ian King <colin.king@canonical.com>
> > >     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
> > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > >
> > > Shifts is trying to overcome a shortcoming in overlayfs itself where
> > > overlayfs is constructing a fake path where fake path means mnt and
> > > dentry from the overlay but inode from another (lower or upper)
> > > filesystem which is a hack to workaround related issues to the one we're
> > > doing here.
> > >
> > > >
> > > > >
> > > > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > > > >>  1 file changed, 29 insertions(+)
> > > > >>
> > > > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > > > >> --- a/fs/overlayfs/file.c
> > > > >> +++ b/fs/overlayfs/file.c
> > > > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > >>    return ret;
> > > > >>  }
> > > > >>
> > > > >> +/* handle vma->vm_prfile */
> > > > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > >> +                        struct file *file)
> > > > >> +{
> > > > >> +  get_file(file);
> > > > >> +  vma->vm_prfile = file;
> > > > >> +#ifndef CONFIG_MMU
> > > > >> +  get_file(file);
> > > > >> +  vma->vm_region->vm_prfile = file;
> > > > >> +#endif
> > > > >> +}
> > > > >
> > > > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > > > you explain, please?
> > > >
> > > > It has. The aufs patchset brought it and basically this patch depends on
> > > > aufs.
> > >
> > > I meant upstream but good to know.
> > > Btw, why do we still carry aufs?
> > > This is not my business but I fear this will mean you're committing to
> > > carrying either aufs or a specific aufs patch with Ubuntu for the sake
> > > of overlayfs even if you wanted to drop aufs in the future.
> >
> > If the aufs will be dropped from the Ubuntu kernel, this part with
> > vma_pr_or_file() and additional
> > vma field can be safely saved in the kernel.
> > It's an independent part (mm/prfile.c + several small changes in
> > proc). So, it's safe to keep
> > it in the kernel for overlayfs/shiftfs needs.
> >
> > It's also important to notice that this problem with incorrect
> > map_files mnt_id is also valid
> > for current shiftfs implementation. If we decide to take this
> > particular patch for overlayfs,
> > I will prepare an analogical fix for the shiftfs.
>
> So what's the wrong mnt_id here, i.e. is criu confused because it sees
> the lower fs's mnt_id when it should see the mnt_id of shiftfs/overlayfs
> itself or the other way around?

Yes. If we mmap() file which was opened through shiftfs/overlayfs we
expect that if we reopen the same file but through
/proc/<pid>/map_files/0x111-0x222
we get file descriptor from the same mount but not from the underlying
filesystem. I'm not
sure but this weird behaviour theoretically may also cause security issues.
Christian Brauner April 27, 2021, 11:27 a.m. UTC | #9
On Tue, Apr 27, 2021 at 01:47:17PM +0300, Alexander Mihalicyn wrote:
> On Tue, Apr 27, 2021 at 1:43 PM Christian Brauner
> <christian.brauner@ubuntu.com> wrote:
> >
> > On Tue, Apr 27, 2021 at 01:37:28PM +0300, Alexander Mihalicyn wrote:
> > > On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
> > > <christian.brauner@ubuntu.com> wrote:
> > > >
> > > > On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > > > > On 27/04/2021 11:46, Christian Brauner wrote:
> > > > > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > > > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > >>
> > > > > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > > > > >>
> > > > > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > > > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > > > > >> contains:
> > > > > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > > > >>
> > > > > >> The following script can be used to trigger the issue:
> > > > > >>   #!/bin/bash
> > > > > >>
> > > > > >>   cat > test.py << EOF
> > > > > >>   import sys
> > > > > >>
> > > > > >>   f = open("/proc/self/maps")
> > > > > >>
> > > > > >>   for l in f.readlines():
> > > > > >>     if "python" not in l:
> > > > > >>       continue
> > > > > >>     print(l)
> > > > > >>     s = l.split()
> > > > > >>     start, end = s[0].split("-")
> > > > > >>     fname = s[-1]
> > > > > >>     print(start, end, fname)
> > > > > >>     break
> > > > > >>   else:
> > > > > >>     sys.exit(1)
> > > > > >>
> > > > > >>   test_file1 = open(fname)
> > > > > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > > > >>
> > > > > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > > > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > > > >>
> > > > > >>   if fdinfo1 != fdinfo2:
> > > > > >>     print("FAIL")
> > > > > >>     print(test_file1)
> > > > > >>     print(fdinfo1)
> > > > > >>     print(test_file2)
> > > > > >>     print(fdinfo2)
> > > > > >>     sys.exit(1)
> > > > > >>   print("PASS")
> > > > > >>   EOF
> > > > > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > > > >>
> > > > > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > > > >>
> > > > > >> Cc: Andrei Vagin <avagin@gmail.com>
> > > > > >> Cc: Adrian Reber <areber@redhat.com>
> > > > > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > > > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > > > > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > > > >>
> > > > > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > > > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > >> ---
> > > > > >
> > > > > > Hey,
> > > > > >
> > > > > > Thanks for the patch!
> > > > > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > > > > regression which forced us to revert the fix for this issue.
> > > > >
> > > > > Did you mean you tried this patch and it caused regressions?
> > > >
> > > > No, we had an earlier fix for this issue which didn't depend on aufs
> > > > that Andrei sent:
> > > >
> > > > commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> > > > Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > Date:   Thu May 21 12:55:43 2020 +0200
> > > >
> > > >     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
> > > >
> > > >     BugLink: https://bugs.launchpad.net/bugs/1879690
> > > >
> > > >     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
> > > >
> > > >     The change applied for LP: #1857257 and its followup fix LP: #1876645
> > > >     introduced a regression on overlayfs. Revert these commits for now.
> > > >
> > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > >     Acked-by: Colin Ian King <colin.king@canonical.com>
> > > >     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
> > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > >
> > > > Shifts is trying to overcome a shortcoming in overlayfs itself where
> > > > overlayfs is constructing a fake path where fake path means mnt and
> > > > dentry from the overlay but inode from another (lower or upper)
> > > > filesystem which is a hack to workaround related issues to the one we're
> > > > doing here.
> > > >
> > > > >
> > > > > >
> > > > > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > > > > >>  1 file changed, 29 insertions(+)
> > > > > >>
> > > > > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > > > > >> --- a/fs/overlayfs/file.c
> > > > > >> +++ b/fs/overlayfs/file.c
> > > > > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > > >>    return ret;
> > > > > >>  }
> > > > > >>
> > > > > >> +/* handle vma->vm_prfile */
> > > > > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > > >> +                        struct file *file)
> > > > > >> +{
> > > > > >> +  get_file(file);
> > > > > >> +  vma->vm_prfile = file;
> > > > > >> +#ifndef CONFIG_MMU
> > > > > >> +  get_file(file);
> > > > > >> +  vma->vm_region->vm_prfile = file;
> > > > > >> +#endif
> > > > > >> +}
> > > > > >
> > > > > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > > > > you explain, please?
> > > > >
> > > > > It has. The aufs patchset brought it and basically this patch depends on
> > > > > aufs.
> > > >
> > > > I meant upstream but good to know.
> > > > Btw, why do we still carry aufs?
> > > > This is not my business but I fear this will mean you're committing to
> > > > carrying either aufs or a specific aufs patch with Ubuntu for the sake
> > > > of overlayfs even if you wanted to drop aufs in the future.
> > >
> > > If the aufs will be dropped from the Ubuntu kernel, this part with
> > > vma_pr_or_file() and additional
> > > vma field can be safely saved in the kernel.
> > > It's an independent part (mm/prfile.c + several small changes in
> > > proc). So, it's safe to keep
> > > it in the kernel for overlayfs/shiftfs needs.
> > >
> > > It's also important to notice that this problem with incorrect
> > > map_files mnt_id is also valid
> > > for current shiftfs implementation. If we decide to take this
> > > particular patch for overlayfs,
> > > I will prepare an analogical fix for the shiftfs.
> >
> > So what's the wrong mnt_id here, i.e. is criu confused because it sees
> > the lower fs's mnt_id when it should see the mnt_id of shiftfs/overlayfs
> > itself or the other way around?
> 
> Yes. If we mmap() file which was opened through shiftfs/overlayfs we
> expect that if we reopen the same file but through
> /proc/<pid>/map_files/0x111-0x222
> we get file descriptor from the same mount but not from the underlying
> filesystem. I'm not
> sure but this weird behaviour theoretically may also cause security issues.

Stacking filesystems are crap is the summary you're looking for.

Jokes aside, this is I think the reason for the overlayfs fake-path
quackery, i.e. combining mount and dentry from overlayfs with a
non-overlayfs inode. If your patch doesn't regress anything and we carry
that aufs crap anyway then fine with me. Though I think this needs to be
fixed in the upstream kernel, i.e. overlayfs shouldn't need to do stuff
like this in the first place.
Christian Brauner April 27, 2021, 11:28 a.m. UTC | #10
On Tue, Apr 27, 2021 at 01:27:14PM +0200, Christian Brauner wrote:
> On Tue, Apr 27, 2021 at 01:47:17PM +0300, Alexander Mihalicyn wrote:
> > On Tue, Apr 27, 2021 at 1:43 PM Christian Brauner
> > <christian.brauner@ubuntu.com> wrote:
> > >
> > > On Tue, Apr 27, 2021 at 01:37:28PM +0300, Alexander Mihalicyn wrote:
> > > > On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
> > > > <christian.brauner@ubuntu.com> wrote:
> > > > >
> > > > > On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > > > > > On 27/04/2021 11:46, Christian Brauner wrote:
> > > > > > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > > > > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > > >>
> > > > > > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > > > > > >>
> > > > > > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > > > > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > > > > > >> contains:
> > > > > > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > > > > >>
> > > > > > >> The following script can be used to trigger the issue:
> > > > > > >>   #!/bin/bash
> > > > > > >>
> > > > > > >>   cat > test.py << EOF
> > > > > > >>   import sys
> > > > > > >>
> > > > > > >>   f = open("/proc/self/maps")
> > > > > > >>
> > > > > > >>   for l in f.readlines():
> > > > > > >>     if "python" not in l:
> > > > > > >>       continue
> > > > > > >>     print(l)
> > > > > > >>     s = l.split()
> > > > > > >>     start, end = s[0].split("-")
> > > > > > >>     fname = s[-1]
> > > > > > >>     print(start, end, fname)
> > > > > > >>     break
> > > > > > >>   else:
> > > > > > >>     sys.exit(1)
> > > > > > >>
> > > > > > >>   test_file1 = open(fname)
> > > > > > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > > > > >>
> > > > > > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > > > > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > > > > >>
> > > > > > >>   if fdinfo1 != fdinfo2:
> > > > > > >>     print("FAIL")
> > > > > > >>     print(test_file1)
> > > > > > >>     print(fdinfo1)
> > > > > > >>     print(test_file2)
> > > > > > >>     print(fdinfo2)
> > > > > > >>     sys.exit(1)
> > > > > > >>   print("PASS")
> > > > > > >>   EOF
> > > > > > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > > > > >>
> > > > > > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > > > > >>
> > > > > > >> Cc: Andrei Vagin <avagin@gmail.com>
> > > > > > >> Cc: Adrian Reber <areber@redhat.com>
> > > > > > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > > > > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > > > > > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > > > > >>
> > > > > > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > > > > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > > >> ---
> > > > > > >
> > > > > > > Hey,
> > > > > > >
> > > > > > > Thanks for the patch!
> > > > > > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > > > > > regression which forced us to revert the fix for this issue.
> > > > > >
> > > > > > Did you mean you tried this patch and it caused regressions?
> > > > >
> > > > > No, we had an earlier fix for this issue which didn't depend on aufs
> > > > > that Andrei sent:
> > > > >
> > > > > commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> > > > > Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > Date:   Thu May 21 12:55:43 2020 +0200
> > > > >
> > > > >     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
> > > > >
> > > > >     BugLink: https://bugs.launchpad.net/bugs/1879690
> > > > >
> > > > >     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
> > > > >
> > > > >     The change applied for LP: #1857257 and its followup fix LP: #1876645
> > > > >     introduced a regression on overlayfs. Revert these commits for now.
> > > > >
> > > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > >     Acked-by: Colin Ian King <colin.king@canonical.com>
> > > > >     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
> > > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > >
> > > > > Shifts is trying to overcome a shortcoming in overlayfs itself where
> > > > > overlayfs is constructing a fake path where fake path means mnt and
> > > > > dentry from the overlay but inode from another (lower or upper)
> > > > > filesystem which is a hack to workaround related issues to the one we're
> > > > > doing here.
> > > > >
> > > > > >
> > > > > > >
> > > > > > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > > > > > >>  1 file changed, 29 insertions(+)
> > > > > > >>
> > > > > > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > > > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > > > > > >> --- a/fs/overlayfs/file.c
> > > > > > >> +++ b/fs/overlayfs/file.c
> > > > > > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > > > >>    return ret;
> > > > > > >>  }
> > > > > > >>
> > > > > > >> +/* handle vma->vm_prfile */
> > > > > > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > > > >> +                        struct file *file)
> > > > > > >> +{
> > > > > > >> +  get_file(file);
> > > > > > >> +  vma->vm_prfile = file;
> > > > > > >> +#ifndef CONFIG_MMU
> > > > > > >> +  get_file(file);
> > > > > > >> +  vma->vm_region->vm_prfile = file;
> > > > > > >> +#endif
> > > > > > >> +}
> > > > > > >
> > > > > > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > > > > > you explain, please?
> > > > > >
> > > > > > It has. The aufs patchset brought it and basically this patch depends on
> > > > > > aufs.
> > > > >
> > > > > I meant upstream but good to know.
> > > > > Btw, why do we still carry aufs?
> > > > > This is not my business but I fear this will mean you're committing to
> > > > > carrying either aufs or a specific aufs patch with Ubuntu for the sake
> > > > > of overlayfs even if you wanted to drop aufs in the future.
> > > >
> > > > If the aufs will be dropped from the Ubuntu kernel, this part with
> > > > vma_pr_or_file() and additional
> > > > vma field can be safely saved in the kernel.
> > > > It's an independent part (mm/prfile.c + several small changes in
> > > > proc). So, it's safe to keep
> > > > it in the kernel for overlayfs/shiftfs needs.
> > > >
> > > > It's also important to notice that this problem with incorrect
> > > > map_files mnt_id is also valid
> > > > for current shiftfs implementation. If we decide to take this
> > > > particular patch for overlayfs,
> > > > I will prepare an analogical fix for the shiftfs.
> > >
> > > So what's the wrong mnt_id here, i.e. is criu confused because it sees
> > > the lower fs's mnt_id when it should see the mnt_id of shiftfs/overlayfs
> > > itself or the other way around?
> > 
> > Yes. If we mmap() file which was opened through shiftfs/overlayfs we
> > expect that if we reopen the same file but through
> > /proc/<pid>/map_files/0x111-0x222
> > we get file descriptor from the same mount but not from the underlying
> > filesystem. I'm not
> > sure but this weird behaviour theoretically may also cause security issues.
> 
> Stacking filesystems are crap is the summary you're looking for.
> 
> Jokes aside, this is I think the reason for the overlayfs fake-path
> quackery, i.e. combining mount and dentry from overlayfs with a
> non-overlayfs inode. If your patch doesn't regress anything and we carry
> that aufs crap anyway then fine with me. Though I think this needs to be
> fixed in the upstream kernel, i.e. overlayfs shouldn't need to do stuff
> like this in the first place.

Oh, and we totally missed Seth here. Adding him.
Christian Brauner April 27, 2021, 11:31 a.m. UTC | #11
On Tue, Apr 27, 2021 at 01:28:47PM +0200, Christian Brauner wrote:
> On Tue, Apr 27, 2021 at 01:27:14PM +0200, Christian Brauner wrote:
> > On Tue, Apr 27, 2021 at 01:47:17PM +0300, Alexander Mihalicyn wrote:
> > > On Tue, Apr 27, 2021 at 1:43 PM Christian Brauner
> > > <christian.brauner@ubuntu.com> wrote:
> > > >
> > > > On Tue, Apr 27, 2021 at 01:37:28PM +0300, Alexander Mihalicyn wrote:
> > > > > On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
> > > > > <christian.brauner@ubuntu.com> wrote:
> > > > > >
> > > > > > On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > > > > > > On 27/04/2021 11:46, Christian Brauner wrote:
> > > > > > > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > > > > > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > > > >>
> > > > > > > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > > > > > > >>
> > > > > > > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > > > > > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > > > > > > >> contains:
> > > > > > > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > > > > > >>
> > > > > > > >> The following script can be used to trigger the issue:
> > > > > > > >>   #!/bin/bash
> > > > > > > >>
> > > > > > > >>   cat > test.py << EOF
> > > > > > > >>   import sys
> > > > > > > >>
> > > > > > > >>   f = open("/proc/self/maps")
> > > > > > > >>
> > > > > > > >>   for l in f.readlines():
> > > > > > > >>     if "python" not in l:
> > > > > > > >>       continue
> > > > > > > >>     print(l)
> > > > > > > >>     s = l.split()
> > > > > > > >>     start, end = s[0].split("-")
> > > > > > > >>     fname = s[-1]
> > > > > > > >>     print(start, end, fname)
> > > > > > > >>     break
> > > > > > > >>   else:
> > > > > > > >>     sys.exit(1)
> > > > > > > >>
> > > > > > > >>   test_file1 = open(fname)
> > > > > > > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > > > > > >>
> > > > > > > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > > > > > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > > > > > >>
> > > > > > > >>   if fdinfo1 != fdinfo2:
> > > > > > > >>     print("FAIL")
> > > > > > > >>     print(test_file1)
> > > > > > > >>     print(fdinfo1)
> > > > > > > >>     print(test_file2)
> > > > > > > >>     print(fdinfo2)
> > > > > > > >>     sys.exit(1)
> > > > > > > >>   print("PASS")
> > > > > > > >>   EOF
> > > > > > > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > > > > > >>
> > > > > > > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > > > > > >>
> > > > > > > >> Cc: Andrei Vagin <avagin@gmail.com>
> > > > > > > >> Cc: Adrian Reber <areber@redhat.com>
> > > > > > > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > > > > > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > > > > > > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > > > > > >>
> > > > > > > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > > > > > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > > > >> ---
> > > > > > > >
> > > > > > > > Hey,
> > > > > > > >
> > > > > > > > Thanks for the patch!
> > > > > > > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > > > > > > regression which forced us to revert the fix for this issue.
> > > > > > >
> > > > > > > Did you mean you tried this patch and it caused regressions?
> > > > > >
> > > > > > No, we had an earlier fix for this issue which didn't depend on aufs
> > > > > > that Andrei sent:
> > > > > >
> > > > > > commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> > > > > > Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > Date:   Thu May 21 12:55:43 2020 +0200
> > > > > >
> > > > > >     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
> > > > > >
> > > > > >     BugLink: https://bugs.launchpad.net/bugs/1879690
> > > > > >
> > > > > >     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
> > > > > >
> > > > > >     The change applied for LP: #1857257 and its followup fix LP: #1876645
> > > > > >     introduced a regression on overlayfs. Revert these commits for now.
> > > > > >
> > > > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > >     Acked-by: Colin Ian King <colin.king@canonical.com>
> > > > > >     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
> > > > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > >
> > > > > > Shifts is trying to overcome a shortcoming in overlayfs itself where
> > > > > > overlayfs is constructing a fake path where fake path means mnt and
> > > > > > dentry from the overlay but inode from another (lower or upper)
> > > > > > filesystem which is a hack to workaround related issues to the one we're
> > > > > > doing here.
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > > > > > > >>  1 file changed, 29 insertions(+)
> > > > > > > >>
> > > > > > > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > > > > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > > > > > > >> --- a/fs/overlayfs/file.c
> > > > > > > >> +++ b/fs/overlayfs/file.c
> > > > > > > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > > > > >>    return ret;
> > > > > > > >>  }
> > > > > > > >>
> > > > > > > >> +/* handle vma->vm_prfile */
> > > > > > > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > > > > >> +                        struct file *file)
> > > > > > > >> +{
> > > > > > > >> +  get_file(file);
> > > > > > > >> +  vma->vm_prfile = file;
> > > > > > > >> +#ifndef CONFIG_MMU
> > > > > > > >> +  get_file(file);
> > > > > > > >> +  vma->vm_region->vm_prfile = file;
> > > > > > > >> +#endif
> > > > > > > >> +}
> > > > > > > >
> > > > > > > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > > > > > > you explain, please?
> > > > > > >
> > > > > > > It has. The aufs patchset brought it and basically this patch depends on
> > > > > > > aufs.
> > > > > >
> > > > > > I meant upstream but good to know.
> > > > > > Btw, why do we still carry aufs?
> > > > > > This is not my business but I fear this will mean you're committing to
> > > > > > carrying either aufs or a specific aufs patch with Ubuntu for the sake
> > > > > > of overlayfs even if you wanted to drop aufs in the future.
> > > > >
> > > > > If the aufs will be dropped from the Ubuntu kernel, this part with
> > > > > vma_pr_or_file() and additional
> > > > > vma field can be safely saved in the kernel.
> > > > > It's an independent part (mm/prfile.c + several small changes in
> > > > > proc). So, it's safe to keep
> > > > > it in the kernel for overlayfs/shiftfs needs.
> > > > >
> > > > > It's also important to notice that this problem with incorrect
> > > > > map_files mnt_id is also valid
> > > > > for current shiftfs implementation. If we decide to take this
> > > > > particular patch for overlayfs,
> > > > > I will prepare an analogical fix for the shiftfs.
> > > >
> > > > So what's the wrong mnt_id here, i.e. is criu confused because it sees
> > > > the lower fs's mnt_id when it should see the mnt_id of shiftfs/overlayfs
> > > > itself or the other way around?
> > > 
> > > Yes. If we mmap() file which was opened through shiftfs/overlayfs we
> > > expect that if we reopen the same file but through
> > > /proc/<pid>/map_files/0x111-0x222
> > > we get file descriptor from the same mount but not from the underlying
> > > filesystem. I'm not
> > > sure but this weird behaviour theoretically may also cause security issues.
> > 
> > Stacking filesystems are crap is the summary you're looking for.
> > 
> > Jokes aside, this is I think the reason for the overlayfs fake-path
> > quackery, i.e. combining mount and dentry from overlayfs with a
> > non-overlayfs inode. If your patch doesn't regress anything and we carry
> > that aufs crap anyway then fine with me. Though I think this needs to be
> > fixed in the upstream kernel, i.e. overlayfs shouldn't need to do stuff
> > like this in the first place.
> 
> Oh, and we totally missed Seth here. Adding him.

And thank you for working on this fix, Alexander.
Alexander Mikhalitsyn April 27, 2021, 11:46 a.m. UTC | #12
On Tue, Apr 27, 2021 at 2:31 PM Christian Brauner
<christian.brauner@ubuntu.com> wrote:
>
> On Tue, Apr 27, 2021 at 01:28:47PM +0200, Christian Brauner wrote:
> > On Tue, Apr 27, 2021 at 01:27:14PM +0200, Christian Brauner wrote:
> > > On Tue, Apr 27, 2021 at 01:47:17PM +0300, Alexander Mihalicyn wrote:
> > > > On Tue, Apr 27, 2021 at 1:43 PM Christian Brauner
> > > > <christian.brauner@ubuntu.com> wrote:
> > > > >
> > > > > On Tue, Apr 27, 2021 at 01:37:28PM +0300, Alexander Mihalicyn wrote:
> > > > > > On Tue, Apr 27, 2021 at 1:28 PM Christian Brauner
> > > > > > <christian.brauner@ubuntu.com> wrote:
> > > > > > >
> > > > > > > On Tue, Apr 27, 2021 at 12:03:17PM +0200, Krzysztof Kozlowski wrote:
> > > > > > > > On 27/04/2021 11:46, Christian Brauner wrote:
> > > > > > > > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > > > > > > >> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > > > > >>
> > > > > > > > >> BugLink: https://bugs.launchpad.net/bugs/1857257
> > > > > > > > >>
> > > > > > > > >> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > > > > > > >> shiftfs as underlay") and it broke checkpoint/restore of docker
> > > > > > > > >> contains:
> > > > > > > > >> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > > > > > > >>
> > > > > > > > >> The following script can be used to trigger the issue:
> > > > > > > > >>   #!/bin/bash
> > > > > > > > >>
> > > > > > > > >>   cat > test.py << EOF
> > > > > > > > >>   import sys
> > > > > > > > >>
> > > > > > > > >>   f = open("/proc/self/maps")
> > > > > > > > >>
> > > > > > > > >>   for l in f.readlines():
> > > > > > > > >>     if "python" not in l:
> > > > > > > > >>       continue
> > > > > > > > >>     print(l)
> > > > > > > > >>     s = l.split()
> > > > > > > > >>     start, end = s[0].split("-")
> > > > > > > > >>     fname = s[-1]
> > > > > > > > >>     print(start, end, fname)
> > > > > > > > >>     break
> > > > > > > > >>   else:
> > > > > > > > >>     sys.exit(1)
> > > > > > > > >>
> > > > > > > > >>   test_file1 = open(fname)
> > > > > > > > >>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > > > > > > >>
> > > > > > > > >>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > > > > > > >>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > > > > > > >>
> > > > > > > > >>   if fdinfo1 != fdinfo2:
> > > > > > > > >>     print("FAIL")
> > > > > > > > >>     print(test_file1)
> > > > > > > > >>     print(fdinfo1)
> > > > > > > > >>     print(test_file2)
> > > > > > > > >>     print(fdinfo2)
> > > > > > > > >>     sys.exit(1)
> > > > > > > > >>   print("PASS")
> > > > > > > > >>   EOF
> > > > > > > > >>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > > > > > > >>
> > > > > > > > >> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > > > > > > >>
> > > > > > > > >> Cc: Andrei Vagin <avagin@gmail.com>
> > > > > > > > >> Cc: Adrian Reber <areber@redhat.com>
> > > > > > > > >> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > > > > > > >> Cc: Stefan Bader <stefan.bader@canonical.com>
> > > > > > > > >> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > > > >> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > > > > > > >>
> > > > > > > > >> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > > > > > > >> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > > > > > >> ---
> > > > > > > > >
> > > > > > > > > Hey,
> > > > > > > > >
> > > > > > > > > Thanks for the patch!
> > > > > > > > > Fwiw, Andrei already tried to fix this a while ago but it caused another
> > > > > > > > > regression which forced us to revert the fix for this issue.
> > > > > > > >
> > > > > > > > Did you mean you tried this patch and it caused regressions?
> > > > > > >
> > > > > > > No, we had an earlier fix for this issue which didn't depend on aufs
> > > > > > > that Andrei sent:
> > > > > > >
> > > > > > > commit 9ac77e721a0edb87cbbe95f9e75d8e27d96cb051
> > > > > > > Author: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > > Date:   Thu May 21 12:55:43 2020 +0200
> > > > > > >
> > > > > > >     Revert "UBUNTU: SAUCE: overlayfs: use shiftfs hacks only with shiftfs as underlay"
> > > > > > >
> > > > > > >     BugLink: https://bugs.launchpad.net/bugs/1879690
> > > > > > >
> > > > > > >     This reverts commit 6f18a8434050333afc80b5bfce2e0e994c86b790.
> > > > > > >
> > > > > > >     The change applied for LP: #1857257 and its followup fix LP: #1876645
> > > > > > >     introduced a regression on overlayfs. Revert these commits for now.
> > > > > > >
> > > > > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > >     Acked-by: Colin Ian King <colin.king@canonical.com>
> > > > > > >     Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
> > > > > > >     Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > > > >
> > > > > > > Shifts is trying to overcome a shortcoming in overlayfs itself where
> > > > > > > overlayfs is constructing a fake path where fake path means mnt and
> > > > > > > dentry from the overlay but inode from another (lower or upper)
> > > > > > > filesystem which is a hack to workaround related issues to the one we're
> > > > > > > doing here.
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > >>  fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
> > > > > > > > >>  1 file changed, 29 insertions(+)
> > > > > > > > >>
> > > > > > > > >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > > > > > >> index 0d3ea0cf3e98..5fa520d0798e 100644
> > > > > > > > >> --- a/fs/overlayfs/file.c
> > > > > > > > >> +++ b/fs/overlayfs/file.c
> > > > > > > > >> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > > > > > >>    return ret;
> > > > > > > > >>  }
> > > > > > > > >>
> > > > > > > > >> +/* handle vma->vm_prfile */
> > > > > > > > >> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > > > > > >> +                        struct file *file)
> > > > > > > > >> +{
> > > > > > > > >> +  get_file(file);
> > > > > > > > >> +  vma->vm_prfile = file;
> > > > > > > > >> +#ifndef CONFIG_MMU
> > > > > > > > >> +  get_file(file);
> > > > > > > > >> +  vma->vm_region->vm_prfile = file;
> > > > > > > > >> +#endif
> > > > > > > > >> +}
> > > > > > > > >
> > > > > > > > > I'm confused struct vm_area_struct doesn't have a vm_prfile entry. Can
> > > > > > > > > you explain, please?
> > > > > > > >
> > > > > > > > It has. The aufs patchset brought it and basically this patch depends on
> > > > > > > > aufs.
> > > > > > >
> > > > > > > I meant upstream but good to know.
> > > > > > > Btw, why do we still carry aufs?
> > > > > > > This is not my business but I fear this will mean you're committing to
> > > > > > > carrying either aufs or a specific aufs patch with Ubuntu for the sake
> > > > > > > of overlayfs even if you wanted to drop aufs in the future.
> > > > > >
> > > > > > If the aufs will be dropped from the Ubuntu kernel, this part with
> > > > > > vma_pr_or_file() and additional
> > > > > > vma field can be safely saved in the kernel.
> > > > > > It's an independent part (mm/prfile.c + several small changes in
> > > > > > proc). So, it's safe to keep
> > > > > > it in the kernel for overlayfs/shiftfs needs.
> > > > > >
> > > > > > It's also important to notice that this problem with incorrect
> > > > > > map_files mnt_id is also valid
> > > > > > for current shiftfs implementation. If we decide to take this
> > > > > > particular patch for overlayfs,
> > > > > > I will prepare an analogical fix for the shiftfs.
> > > > >
> > > > > So what's the wrong mnt_id here, i.e. is criu confused because it sees
> > > > > the lower fs's mnt_id when it should see the mnt_id of shiftfs/overlayfs
> > > > > itself or the other way around?
> > > >
> > > > Yes. If we mmap() file which was opened through shiftfs/overlayfs we
> > > > expect that if we reopen the same file but through
> > > > /proc/<pid>/map_files/0x111-0x222
> > > > we get file descriptor from the same mount but not from the underlying
> > > > filesystem. I'm not
> > > > sure but this weird behaviour theoretically may also cause security issues.
> > >
> > > Stacking filesystems are crap is the summary you're looking for.
> > >
> > > Jokes aside, this is I think the reason for the overlayfs fake-path
> > > quackery, i.e. combining mount and dentry from overlayfs with a
> > > non-overlayfs inode. If your patch doesn't regress anything and we carry
> > > that aufs crap anyway then fine with me. Though I think this needs to be
> > > fixed in the upstream kernel, i.e. overlayfs shouldn't need to do stuff
> > > like this in the first place.

Yes, I think the reason why we have
realfile = open_with_fake_path(&file->f_path, flags, realinode, current_cred());
in ovl_open_realfile() in upstream version of the overlayfs
is to make things like /proc/<pid>/map_files/ work correctly. But
speaking honestly,
I think that the way that the aufs solves this problem by adding
additional field on struct vm_area_struct
looks clearer. When working with stacking filesystem we really have
two files - one for the mm needs and the second
to determine the real origin of the memory mapping (from which fd it
was mmapped).
Maybe we need just port that part of the aufs (mm/prfile.c) for the
upstream kernel? I think it's worth to come up
with a discussion on the unionfs mailing list. I believe that Amir and
Miklos will say something interesting about that problem.

> >
> > Oh, and we totally missed Seth here. Adding him.
>
> And thank you for working on this fix, Alexander.

Thank you for your attention to the problem and patches! ;)
Seth Forshee April 27, 2021, 7:42 p.m. UTC | #13
On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1857257
> 
> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> shiftfs as underlay") and it broke checkpoint/restore of docker
> contains:
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> 
> The following script can be used to trigger the issue:
>   #!/bin/bash
> 
>   cat > test.py << EOF
>   import sys
> 
>   f = open("/proc/self/maps")
> 
>   for l in f.readlines():
>     if "python" not in l:
>       continue
>     print(l)
>     s = l.split()
>     start, end = s[0].split("-")
>     fname = s[-1]
>     print(start, end, fname)
>     break
>   else:
>     sys.exit(1)
> 
>   test_file1 = open(fname)
>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> 
>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> 
>   if fdinfo1 != fdinfo2:
>     print("FAIL")
>     print(test_file1)
>     print(fdinfo1)
>     print(test_file2)
>     print(fdinfo2)
>     sys.exit(1)
>   print("PASS")
>   EOF
>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> 
> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> 
> Cc: Andrei Vagin <avagin@gmail.com>
> Cc: Adrian Reber <areber@redhat.com>
> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> Cc: Stefan Bader <stefan.bader@canonical.com>
> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> 
> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>

From review I think this patch looks okay, but I'd like to put it
through some regression testing before I ack it. I'll try to get some
testing done this week.

Thanks,
Seth
Alexander Mikhalitsyn April 27, 2021, 8:19 p.m. UTC | #14
On Tue, Apr 27, 2021 at 10:42 PM Seth Forshee
<seth.forshee@canonical.com> wrote:
>
> On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> >
> > BugLink: https://bugs.launchpad.net/bugs/1857257
> >
> > The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > shiftfs as underlay") and it broke checkpoint/restore of docker
> > contains:
> > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> >
> > The following script can be used to trigger the issue:
> >   #!/bin/bash
> >
> >   cat > test.py << EOF
> >   import sys
> >
> >   f = open("/proc/self/maps")
> >
> >   for l in f.readlines():
> >     if "python" not in l:
> >       continue
> >     print(l)
> >     s = l.split()
> >     start, end = s[0].split("-")
> >     fname = s[-1]
> >     print(start, end, fname)
> >     break
> >   else:
> >     sys.exit(1)
> >
> >   test_file1 = open(fname)
> >   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> >
> >   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> >   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> >
> >   if fdinfo1 != fdinfo2:
> >     print("FAIL")
> >     print(test_file1)
> >     print(fdinfo1)
> >     print(test_file2)
> >     print(fdinfo2)
> >     sys.exit(1)
> >   print("PASS")
> >   EOF
> >   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> >
> > Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> >
> > Cc: Andrei Vagin <avagin@gmail.com>
> > Cc: Adrian Reber <areber@redhat.com>
> > Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > Cc: Stefan Bader <stefan.bader@canonical.com>
> > Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> >
> > Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
>
> From review I think this patch looks okay, but I'd like to put it
> through some regression testing before I ack it. I'll try to get some
> testing done this week.

Thank you!

>
> Thanks,
> Seth

Alex.
Seth Forshee April 30, 2021, 8:37 p.m. UTC | #15
On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1857257
> 
> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> shiftfs as underlay") and it broke checkpoint/restore of docker
> contains:
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> 
> The following script can be used to trigger the issue:
>   #!/bin/bash
> 
>   cat > test.py << EOF
>   import sys
> 
>   f = open("/proc/self/maps")
> 
>   for l in f.readlines():
>     if "python" not in l:
>       continue
>     print(l)
>     s = l.split()
>     start, end = s[0].split("-")
>     fname = s[-1]
>     print(start, end, fname)
>     break
>   else:
>     sys.exit(1)
> 
>   test_file1 = open(fname)
>   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> 
>   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
>   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> 
>   if fdinfo1 != fdinfo2:
>     print("FAIL")
>     print(test_file1)
>     print(fdinfo1)
>     print(test_file2)
>     print(fdinfo2)
>     sys.exit(1)
>   print("PASS")
>   EOF
>   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> 
> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> 
> Cc: Andrei Vagin <avagin@gmail.com>
> Cc: Adrian Reber <areber@redhat.com>
> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> Cc: Stefan Bader <stefan.bader@canonical.com>
> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> 
> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>

I tested with and without this patch using the xfstests overlay tests
and saw no regressions. The use of vm_prfile is limited to very few use
cases, all within proc, so I think this change should be safe.

Acked-by: Seth Forshee <seth.forshee@canonical.com>
Alexander Mikhalitsyn May 3, 2021, 5:37 p.m. UTC | #16
On Fri, 30 Apr 2021 15:37:42 -0500
Seth Forshee <seth.forshee@canonical.com> wrote:

> On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > 
> > BugLink: https://bugs.launchpad.net/bugs/1857257
> > 
> > The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > shiftfs as underlay") and it broke checkpoint/restore of docker
> > contains:
> > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > 
> > The following script can be used to trigger the issue:
> >   #!/bin/bash
> > 
> >   cat > test.py << EOF
> >   import sys
> > 
> >   f = open("/proc/self/maps")
> > 
> >   for l in f.readlines():
> >     if "python" not in l:
> >       continue
> >     print(l)
> >     s = l.split()
> >     start, end = s[0].split("-")
> >     fname = s[-1]
> >     print(start, end, fname)
> >     break
> >   else:
> >     sys.exit(1)
> > 
> >   test_file1 = open(fname)
> >   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > 
> >   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> >   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > 
> >   if fdinfo1 != fdinfo2:
> >     print("FAIL")
> >     print(test_file1)
> >     print(fdinfo1)
> >     print(test_file2)
> >     print(fdinfo2)
> >     sys.exit(1)
> >   print("PASS")
> >   EOF
> >   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > 
> > Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > 
> > Cc: Andrei Vagin <avagin@gmail.com>
> > Cc: Adrian Reber <areber@redhat.com>
> > Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > Cc: Stefan Bader <stefan.bader@canonical.com>
> > Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > 
> > Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> 
> I tested with and without this patch using the xfstests overlay tests
> and saw no regressions. The use of vm_prfile is limited to very few use
> cases, all within proc, so I think this change should be safe.
> 
> Acked-by: Seth Forshee <seth.forshee@canonical.com>

Hi, Seth

Thanks for review and testing!
Do I need to resend this patch with your "Acked-by" tag or I need to wait for another maintainer and get second ACK?

Regards,
Alex
Seth Forshee May 3, 2021, 8:58 p.m. UTC | #17
On Mon, May 03, 2021 at 08:37:18PM +0300, Alexander Mikhalitsyn wrote:
> On Fri, 30 Apr 2021 15:37:42 -0500
> Seth Forshee <seth.forshee@canonical.com> wrote:
> 
> > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > 
> > > BugLink: https://bugs.launchpad.net/bugs/1857257
> > > 
> > > The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > shiftfs as underlay") and it broke checkpoint/restore of docker
> > > contains:
> > > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > 
> > > The following script can be used to trigger the issue:
> > >   #!/bin/bash
> > > 
> > >   cat > test.py << EOF
> > >   import sys
> > > 
> > >   f = open("/proc/self/maps")
> > > 
> > >   for l in f.readlines():
> > >     if "python" not in l:
> > >       continue
> > >     print(l)
> > >     s = l.split()
> > >     start, end = s[0].split("-")
> > >     fname = s[-1]
> > >     print(start, end, fname)
> > >     break
> > >   else:
> > >     sys.exit(1)
> > > 
> > >   test_file1 = open(fname)
> > >   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > 
> > >   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > >   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > 
> > >   if fdinfo1 != fdinfo2:
> > >     print("FAIL")
> > >     print(test_file1)
> > >     print(fdinfo1)
> > >     print(test_file2)
> > >     print(fdinfo2)
> > >     sys.exit(1)
> > >   print("PASS")
> > >   EOF
> > >   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > 
> > > Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > 
> > > Cc: Andrei Vagin <avagin@gmail.com>
> > > Cc: Adrian Reber <areber@redhat.com>
> > > Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > Cc: Stefan Bader <stefan.bader@canonical.com>
> > > Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > 
> > > Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > 
> > I tested with and without this patch using the xfstests overlay tests
> > and saw no regressions. The use of vm_prfile is limited to very few use
> > cases, all within proc, so I think this change should be safe.
> > 
> > Acked-by: Seth Forshee <seth.forshee@canonical.com>
> 
> Hi, Seth
> 
> Thanks for review and testing!
> Do I need to resend this patch with your "Acked-by" tag or I need to wait for another maintainer and get second ACK?

You don't need to resend at all, though the patch will need another ack.

Also, today I became aware of a stable patch for at least 5.11 which has
conflicts with this patch. I'm testing an updated version of the patch
with the conflicts resolved, which we might need to use for at least
hirsute/impish.

Seth
Alexander Mikhalitsyn May 4, 2021, 12:27 p.m. UTC | #18
On Mon, 3 May 2021 15:58:46 -0500
Seth Forshee <seth.forshee@canonical.com> wrote:

> On Mon, May 03, 2021 at 08:37:18PM +0300, Alexander Mikhalitsyn wrote:
> > On Fri, 30 Apr 2021 15:37:42 -0500
> > Seth Forshee <seth.forshee@canonical.com> wrote:
> > 
> > > On Mon, Apr 26, 2021 at 11:11:21AM +0300, alexander@mihalicyn.com wrote:
> > > > From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > > 
> > > > BugLink: https://bugs.launchpad.net/bugs/1857257
> > > > 
> > > > The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> > > > shiftfs as underlay") and it broke checkpoint/restore of docker
> > > > contains:
> > > > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> > > > 
> > > > The following script can be used to trigger the issue:
> > > >   #!/bin/bash
> > > > 
> > > >   cat > test.py << EOF
> > > >   import sys
> > > > 
> > > >   f = open("/proc/self/maps")
> > > > 
> > > >   for l in f.readlines():
> > > >     if "python" not in l:
> > > >       continue
> > > >     print(l)
> > > >     s = l.split()
> > > >     start, end = s[0].split("-")
> > > >     fname = s[-1]
> > > >     print(start, end, fname)
> > > >     break
> > > >   else:
> > > >     sys.exit(1)
> > > > 
> > > >   test_file1 = open(fname)
> > > >   test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> > > > 
> > > >   fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
> > > >   fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> > > > 
> > > >   if fdinfo1 != fdinfo2:
> > > >     print("FAIL")
> > > >     print(test_file1)
> > > >     print(fdinfo1)
> > > >     print(test_file2)
> > > >     print(fdinfo2)
> > > >     sys.exit(1)
> > > >   print("PASS")
> > > >   EOF
> > > >   sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> > > > 
> > > > Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> > > > 
> > > > Cc: Andrei Vagin <avagin@gmail.com>
> > > > Cc: Adrian Reber <areber@redhat.com>
> > > > Cc: Christian Brauner <christian.brauner@ubuntu.com>
> > > > Cc: Stefan Bader <stefan.bader@canonical.com>
> > > > Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> > > > Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> > > > 
> > > > Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> > > > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > 
> > > I tested with and without this patch using the xfstests overlay tests
> > > and saw no regressions. The use of vm_prfile is limited to very few use
> > > cases, all within proc, so I think this change should be safe.
> > > 
> > > Acked-by: Seth Forshee <seth.forshee@canonical.com>
> > 
> > Hi, Seth
> > 
> > Thanks for review and testing!
> > Do I need to resend this patch with your "Acked-by" tag or I need to wait for another maintainer and get second ACK?
> 
> You don't need to resend at all, though the patch will need another ack.

got it

> 
> Also, today I became aware of a stable patch for at least 5.11 which has
> conflicts with this patch. I'm testing an updated version of the patch
> with the conflicts resolved, which we might need to use for at least
> hirsute/impish.

thanks!

> 
> Seth

Alex
Stefan Bader May 7, 2021, 1:36 p.m. UTC | #19
On 26.04.21 10:11, alexander@mihalicyn.com wrote:
> From: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1857257
> 
> The hack was introduced in ("UBUNTU: SAUCE: overlayfs: allow with
> shiftfs as underlay") and it broke checkpoint/restore of docker
> contains:
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
> 
> The following script can be used to trigger the issue:
>    #!/bin/bash
> 
>    cat > test.py << EOF
>    import sys
> 
>    f = open("/proc/self/maps")
> 
>    for l in f.readlines():
>      if "python" not in l:
>        continue
>      print(l)
>      s = l.split()
>      start, end = s[0].split("-")
>      fname = s[-1]
>      print(start, end, fname)
>      break
>    else:
>      sys.exit(1)
> 
>    test_file1 = open(fname)
>    test_file2 = open("/proc/self/map_files/%s-%s" % (start, end))
> 
>    fdinfo1 = open("/proc/self/fdinfo/%d" % test_file1.fileno()).read()
>    fdinfo2 = open("/proc/self/fdinfo/%d" % test_file2.fileno()).read()
> 
>    if fdinfo1 != fdinfo2:
>      print("FAIL")
>      print(test_file1)
>      print(fdinfo1)
>      print(test_file2)
>      print(fdinfo2)
>      sys.exit(1)
>    print("PASS")
>    EOF
>    sudo docker run -it --privileged --rm -v `pwd`:/mnt python python /mnt/test.py
> 
> Thanks to Andrei Vagin for the reproducer and investigation of this problem.
> 
> Cc: Andrei Vagin <avagin@gmail.com>
> Cc: Adrian Reber <areber@redhat.com>
> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> Cc: Stefan Bader <stefan.bader@canonical.com>
> Cc: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
> Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> 
> Fixes: d24b8a5 ("UBUNTU: SAUCE: overlayfs: allow with shiftfs as underlay")
> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> ---

Applied v2 to groovy:linux (prep). Thanks.

-Stefan

>   fs/overlayfs/file.c | 29 +++++++++++++++++++++++++++++
>   1 file changed, 29 insertions(+)
> 
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 0d3ea0cf3e98..5fa520d0798e 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -325,6 +325,18 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>   	return ret;
>   }
>   
> +/* handle vma->vm_prfile */
> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> +			      struct file *file)
> +{
> +	get_file(file);
> +	vma->vm_prfile = file;
> +#ifndef CONFIG_MMU
> +	get_file(file);
> +	vma->vm_region->vm_prfile = file;
> +#endif
> +}
> +
>   static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>   {
>   	struct file *realfile = file->private_data;
> @@ -351,6 +363,23 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>   		vma->vm_file = file;
>   		fput(realfile);
>   	} else {
> +		/*
> +		 * In map_files_get_link() (fs/proc/base.c)
> +		 * we need to determine correct path from overlayfs.
> +		 * But real_mount(realfile->f_path.mnt) may be not
> +		 * equal to real_mount(file->f_path.mnt). In such case
> +		 * fdinfo of the same file which was opened from
> +		 * /proc/<pid>/map_files/... and "usual" path
> +		 * will show different mnt_id.
> +		 *
> +		 * We solve issue like in aufs by using additional
> +		 * field on struct vm_area_struct called "vm_prfile"
> +		 * which is used only for fdinfo/"printing" needs.
> +		 *
> +		 * See also mm/prfile.c
> +		 */
> +		ovl_vm_prfile_set(vma, file);
> +
>   		/* Drop reference count from previous vm_file value */
>   		fput(file);
>   	}
>
diff mbox series

Patch

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0d3ea0cf3e98..5fa520d0798e 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -325,6 +325,18 @@  static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+/* handle vma->vm_prfile */
+static void ovl_vm_prfile_set(struct vm_area_struct *vma,
+			      struct file *file)
+{
+	get_file(file);
+	vma->vm_prfile = file;
+#ifndef CONFIG_MMU
+	get_file(file);
+	vma->vm_region->vm_prfile = file;
+#endif
+}
+
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct file *realfile = file->private_data;
@@ -351,6 +363,23 @@  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 		vma->vm_file = file;
 		fput(realfile);
 	} else {
+		/*
+		 * In map_files_get_link() (fs/proc/base.c)
+		 * we need to determine correct path from overlayfs.
+		 * But real_mount(realfile->f_path.mnt) may be not
+		 * equal to real_mount(file->f_path.mnt). In such case
+		 * fdinfo of the same file which was opened from
+		 * /proc/<pid>/map_files/... and "usual" path
+		 * will show different mnt_id.
+		 *
+		 * We solve issue like in aufs by using additional
+		 * field on struct vm_area_struct called "vm_prfile"
+		 * which is used only for fdinfo/"printing" needs.
+		 *
+		 * See also mm/prfile.c
+		 */
+		ovl_vm_prfile_set(vma, file);
+
 		/* Drop reference count from previous vm_file value */
 		fput(file);
 	}