[09/11] vfs: push copy_file_ranges -EXDEV checks down

Message ID 20181203083416.28978-10-david@fromorbit.com
State New
Headers show
Series
  • fs: fixes for major copy_file_range() issues
Related show

Commit Message

Dave Chinner Dec. 3, 2018, 8:34 a.m.
From: Dave Chinner <dchinner@redhat.com>

We want to enable cross-filesystem copy_file_range functionality
where possible, so push the "same superblock only" checks down to
the individual filesystem callouts so they can make their own
decisions about cross-superblock copy offload.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/ceph/file.c      |  4 +++-
 fs/cifs/cifsfs.c    |  8 +++++++-
 fs/fuse/file.c      |  5 ++++-
 fs/nfs/nfs4file.c   | 16 ++++++++++------
 fs/overlayfs/file.c | 10 +++++++++-
 fs/read_write.c     | 10 ++++------
 6 files changed, 37 insertions(+), 16 deletions(-)

Comments

Amir Goldstein Dec. 3, 2018, 12:36 p.m. | #1
On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote:
>
> From: Dave Chinner <dchinner@redhat.com>
>
> We want to enable cross-filesystem copy_file_range functionality
> where possible, so push the "same superblock only" checks down to
> the individual filesystem callouts so they can make their own
> decisions about cross-superblock copy offload.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Looks good.
You may add
Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Similar comment about overlayfs as patch 3.

 diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 68736e5d6a56..34fb0398d016 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
>         const struct cred *old_cred;
>         loff_t ret;
>
> +       /*
> +        * Temporary. Cross device copy checks should be left to the copy file
> +        * call on the real inodes, but existing behaviour checks the upper
> +        * files only.
> +        */
> +       if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +               return -EXDEV;
> +
>         ret = ovl_real_fdget(file_out, &real_out);
>         if (ret)
>                 return ret;
> @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
>         ret =  ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
>                             OVL_COPY);
>
> -       if (ret == -EOPNOTSUPP)
> +       if (ret == -EOPNOTSUPP || ret == -EXDEV)
>                 ret = generic_copy_file_range(file_in, pos_in, file_out,
>                                         pos_out, len, flags);

This fallback is already provided by vfs_copy_file_range().

Thanks,
Amir.
Olga Kornievskaia Dec. 3, 2018, 5:58 p.m. | #2
On Mon, Dec 3, 2018 at 3:34 AM Dave Chinner <david@fromorbit.com> wrote:
>
> From: Dave Chinner <dchinner@redhat.com>
>
> We want to enable cross-filesystem copy_file_range functionality
> where possible, so push the "same superblock only" checks down to
> the individual filesystem callouts so they can make their own
> decisions about cross-superblock copy offload.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Overall VFS/NFS bits look good to me. I'm re-basing my client and
server patch series on top of this and will test it out.

Thank you.

> ---
>  fs/ceph/file.c      |  4 +++-
>  fs/cifs/cifsfs.c    |  8 +++++++-
>  fs/fuse/file.c      |  5 ++++-
>  fs/nfs/nfs4file.c   | 16 ++++++++++------
>  fs/overlayfs/file.c | 10 +++++++++-
>  fs/read_write.c     | 10 ++++------
>  6 files changed, 37 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index cf29f0410dcb..eb876e19c1dc 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
>
>         if (src_inode == dst_inode)
>                 return -EINVAL;
> +       if (src_inode->i_sb != dst_inode->i_sb)
> +               return -EXDEV;
>         if (ceph_snap(dst_inode) != CEPH_NOSNAP)
>                 return -EROFS;
>
> @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
>         ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
>                                         len, flags);
>
> -       if (ret == -EOPNOTSUPP)
> +       if (ret == -EOPNOTSUPP || ret == -EXDEV)
>                 ret = generic_copy_file_range(src_file, src_off, dst_file,
>                                         dst_off, len, flags);
>         return ret;
> diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> index 5ef4baec6234..03e4b9eacbd1 100644
> --- a/fs/cifs/cifsfs.c
> +++ b/fs/cifs/cifsfs.c
> @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
>                 goto out;
>         }
>
> +       if (src_inode->i_sb != target_inode->i_sb) {
> +               rc = -EXDEV;
> +               goto out;
> +       }
> +
> +
>         if (!src_file->private_data || !dst_file->private_data) {
>                 rc = -EBADF;
>                 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
> @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
>                                         len, flags);
>         free_xid(xid);
>
> -       if (rc == -EOPNOTSUPP)
> +       if (rc == -EOPNOTSUPP || rc == -EXDEV)
>                 rc = generic_copy_file_range(src_file, off, dst_file,
>                                         destoff, len, flags);
>         return rc;
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index b86fb0298739..0758f831a4eb 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
>         if (fc->no_copy_file_range)
>                 return -EOPNOTSUPP;
>
> +       if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +               return -EXDEV;
> +
>         inode_lock(inode_out);
>
>         if (fc->writeback_cache) {
> @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
>         ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
>                                         len, flags);
>
> -       if (ret == -EOPNOTSUPP)
> +       if (ret == -EOPNOTSUPP || ret == -EXDEV)
>                 ret = generic_copy_file_range(src_file, src_off, dst_file,
>                                         dst_off, len, flags);
>         return ret;
> diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> index d7766a6eb0f4..4783c0c1c49e 100644
> --- a/fs/nfs/nfs4file.c
> +++ b/fs/nfs/nfs4file.c
> @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
>                                     struct file *file_out, loff_t pos_out,
>                                     size_t count, unsigned int flags)
>  {
> -       ssize_t ret;
> +       ssize_t ret = -EXDEV;
>
>         if (file_inode(file_in) == file_inode(file_out))
>                 return -EINVAL;
> -retry:
> -       ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
> -       if (ret == -EAGAIN)
> -               goto retry;
>
> -       if (ret == -EOPNOTSUPP)
> +       /* only offload copy if superblock is the same */
> +       if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
> +               do {
> +                       ret = nfs42_proc_copy(file_in, pos_in, file_out,
> +                                       pos_out, count);
> +               } while (ret == -EAGAIN);
> +       }
> +
> +       if (ret == -EOPNOTSUPP || ret == -EXDEV)
>                 ret = generic_copy_file_range(file_in, pos_in, file_out,
>                                         pos_out, count, flags);
>         return ret;
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 68736e5d6a56..34fb0398d016 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
>         const struct cred *old_cred;
>         loff_t ret;
>
> +       /*
> +        * Temporary. Cross device copy checks should be left to the copy file
> +        * call on the real inodes, but existing behaviour checks the upper
> +        * files only.
> +        */
> +       if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +               return -EXDEV;
> +
>         ret = ovl_real_fdget(file_out, &real_out);
>         if (ret)
>                 return ret;
> @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
>         ret =  ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
>                             OVL_COPY);
>
> -       if (ret == -EOPNOTSUPP)
> +       if (ret == -EOPNOTSUPP || ret == -EXDEV)
>                 ret = generic_copy_file_range(file_in, pos_in, file_out,
>                                         pos_out, len, flags);
>         return ret;
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 174cf92eea1d..4e0666de0d69 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
>                             struct file *file_out, loff_t pos_out,
>                             size_t len, unsigned int flags)
>  {
> +       /* Temporary, do_splice_direct supports cross-sb copies */
> +       if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +               return -EXDEV;
> +
>         return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
>                         len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
>  }
> @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>                             struct file *file_out, loff_t pos_out,
>                             size_t len, unsigned int flags)
>  {
> -       struct inode *inode_in = file_inode(file_in);
> -       struct inode *inode_out = file_inode(file_out);
>         ssize_t ret;
>
>         if (flags != 0)
>                 return -EINVAL;
>
> -       /* this could be relaxed once a method supports cross-fs copies */
> -       if (inode_in->i_sb != inode_out->i_sb)
> -               return -EXDEV;
> -
>         ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
>                                         flags);
>         if (ret < 0)
> --
> 2.19.1
>
Anna Schumaker Dec. 3, 2018, 6:53 p.m. | #3
On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> We want to enable cross-filesystem copy_file_range functionality
> where possible, so push the "same superblock only" checks down to
> the individual filesystem callouts so they can make their own
> decisions about cross-superblock copy offload.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/ceph/file.c      |  4 +++-
>  fs/cifs/cifsfs.c    |  8 +++++++-
>  fs/fuse/file.c      |  5 ++++-
>  fs/nfs/nfs4file.c   | 16 ++++++++++------
>  fs/overlayfs/file.c | 10 +++++++++-
>  fs/read_write.c     | 10 ++++------
>  6 files changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index cf29f0410dcb..eb876e19c1dc 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file
> *src_file, loff_t src_off,
>  
>  	if (src_inode == dst_inode)
>  		return -EINVAL;
> +	if (src_inode->i_sb != dst_inode->i_sb)
> +		return -EXDEV;
>  	if (ceph_snap(dst_inode) != CEPH_NOSNAP)
>  		return -EROFS;
>  
> @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file
> *src_file, loff_t src_off,
>  	ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
>  					len, flags);
>  
> -	if (ret == -EOPNOTSUPP)
> +	if (ret == -EOPNOTSUPP || ret == -EXDEV)
>  		ret = generic_copy_file_range(src_file, src_off, dst_file,
>  					dst_off, len, flags);
>  	return ret;
> diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> index 5ef4baec6234..03e4b9eacbd1 100644
> --- a/fs/cifs/cifsfs.c
> +++ b/fs/cifs/cifsfs.c
> @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
>  		goto out;
>  	}
>  
> +	if (src_inode->i_sb != target_inode->i_sb) {
> +		rc = -EXDEV;
> +		goto out;
> +	}
> +
> +
>  	if (!src_file->private_data || !dst_file->private_data) {
>  		rc = -EBADF;
>  		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
> @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file
> *src_file, loff_t off,
>  					len, flags);
>  	free_xid(xid);
>  
> -	if (rc == -EOPNOTSUPP)
> +	if (rc == -EOPNOTSUPP || rc == -EXDEV)
>  		rc = generic_copy_file_range(src_file, off, dst_file,
>  					destoff, len, flags);
>  	return rc;
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index b86fb0298739..0758f831a4eb 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file
> *file_in, loff_t pos_in,
>  	if (fc->no_copy_file_range)
>  		return -EOPNOTSUPP;
>  
> +	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +		return -EXDEV;
> +
>  	inode_lock(inode_out);
>  
>  	if (fc->writeback_cache) {
> @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file
> *src_file, loff_t src_off,
>  	ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
>  					len, flags);
>  
> -	if (ret == -EOPNOTSUPP)
> +	if (ret == -EOPNOTSUPP || ret == -EXDEV)
>  		ret = generic_copy_file_range(src_file, src_off, dst_file,
>  					dst_off, len, flags);
>  	return ret;
> diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> index d7766a6eb0f4..4783c0c1c49e 100644
> --- a/fs/nfs/nfs4file.c
> +++ b/fs/nfs/nfs4file.c
> @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file
> *file_in, loff_t pos_in,
>  				    struct file *file_out, loff_t pos_out,
>  				    size_t count, unsigned int flags)
>  {
> -	ssize_t ret;
> +	ssize_t ret = -EXDEV;
>  
>  	if (file_inode(file_in) == file_inode(file_out))
>  		return -EINVAL;
> -retry:
> -	ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
> -	if (ret == -EAGAIN)
> -		goto retry;
>  
> -	if (ret == -EOPNOTSUPP)
> +	/* only offload copy if superblock is the same */
> +	if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
> +		do {
> +			ret = nfs42_proc_copy(file_in, pos_in, file_out,
> +					pos_out, count);
> +		} while (ret == -EAGAIN);

I'm not convinced we can actually return -EAGAIN from nfs42_proc_copy().  The
nfs_get_lock_context() function doesn't return it, and if _nfs42_proc_copy()
returns -EAGAIN it's immediately retried by nfs42_proc_copy() instead of
returning.

Olga, am I missing something here?
Anna

> +	}
> +
> +	if (ret == -EOPNOTSUPP || ret == -EXDEV)
>  		ret = generic_copy_file_range(file_in, pos_in, file_out,
>  					pos_out, count, flags);
>  	return ret;
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 68736e5d6a56..34fb0398d016 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t
> pos_in,
>  	const struct cred *old_cred;
>  	loff_t ret;
>  
> +	/*
> +	 * Temporary. Cross device copy checks should be left to the copy file
> +	 * call on the real inodes, but existing behaviour checks the upper
> +	 * files only.
> +	 */
> +	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +		return -EXDEV;
> +
>  	ret = ovl_real_fdget(file_out, &real_out);
>  	if (ret)
>  		return ret;
> @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in,
> loff_t pos_in,
>  	ret =  ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
>  			    OVL_COPY);
>  
> -	if (ret == -EOPNOTSUPP)
> +	if (ret == -EOPNOTSUPP || ret == -EXDEV)
>  		ret = generic_copy_file_range(file_in, pos_in, file_out,
>  					pos_out, len, flags);
>  	return ret;
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 174cf92eea1d..4e0666de0d69 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in,
> loff_t pos_in,
>  			    struct file *file_out, loff_t pos_out,
>  			    size_t len, unsigned int flags)
>  {
> +	/* Temporary, do_splice_direct supports cross-sb copies */
> +	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> +		return -EXDEV;
> +
>  	return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
>  			len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
>  }
> @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in,
> loff_t pos_in,
>  			    struct file *file_out, loff_t pos_out,
>  			    size_t len, unsigned int flags)
>  {
> -	struct inode *inode_in = file_inode(file_in);
> -	struct inode *inode_out = file_inode(file_out);
>  	ssize_t ret;
>  
>  	if (flags != 0)
>  		return -EINVAL;
>  
> -	/* this could be relaxed once a method supports cross-fs copies */
> -	if (inode_in->i_sb != inode_out->i_sb)
> -		return -EXDEV;
> -
>  	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
>  					flags);
>  	if (ret < 0)
Olga Kornievskaia Dec. 3, 2018, 7:27 p.m. | #4
On Mon, Dec 3, 2018 at 1:53 PM Anna Schumaker <schumaker.anna@gmail.com> wrote:
>
> On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > We want to enable cross-filesystem copy_file_range functionality
> > where possible, so push the "same superblock only" checks down to
> > the individual filesystem callouts so they can make their own
> > decisions about cross-superblock copy offload.
> >
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/ceph/file.c      |  4 +++-
> >  fs/cifs/cifsfs.c    |  8 +++++++-
> >  fs/fuse/file.c      |  5 ++++-
> >  fs/nfs/nfs4file.c   | 16 ++++++++++------
> >  fs/overlayfs/file.c | 10 +++++++++-
> >  fs/read_write.c     | 10 ++++------
> >  6 files changed, 37 insertions(+), 16 deletions(-)
> >
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index cf29f0410dcb..eb876e19c1dc 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file
> > *src_file, loff_t src_off,
> >
> >       if (src_inode == dst_inode)
> >               return -EINVAL;
> > +     if (src_inode->i_sb != dst_inode->i_sb)
> > +             return -EXDEV;
> >       if (ceph_snap(dst_inode) != CEPH_NOSNAP)
> >               return -EROFS;
> >
> > @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file
> > *src_file, loff_t src_off,
> >       ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
> >                                       len, flags);
> >
> > -     if (ret == -EOPNOTSUPP)
> > +     if (ret == -EOPNOTSUPP || ret == -EXDEV)
> >               ret = generic_copy_file_range(src_file, src_off, dst_file,
> >                                       dst_off, len, flags);
> >       return ret;
> > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > index 5ef4baec6234..03e4b9eacbd1 100644
> > --- a/fs/cifs/cifsfs.c
> > +++ b/fs/cifs/cifsfs.c
> > @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
> >               goto out;
> >       }
> >
> > +     if (src_inode->i_sb != target_inode->i_sb) {
> > +             rc = -EXDEV;
> > +             goto out;
> > +     }
> > +
> > +
> >       if (!src_file->private_data || !dst_file->private_data) {
> >               rc = -EBADF;
> >               cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
> > @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file
> > *src_file, loff_t off,
> >                                       len, flags);
> >       free_xid(xid);
> >
> > -     if (rc == -EOPNOTSUPP)
> > +     if (rc == -EOPNOTSUPP || rc == -EXDEV)
> >               rc = generic_copy_file_range(src_file, off, dst_file,
> >                                       destoff, len, flags);
> >       return rc;
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index b86fb0298739..0758f831a4eb 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file
> > *file_in, loff_t pos_in,
> >       if (fc->no_copy_file_range)
> >               return -EOPNOTSUPP;
> >
> > +     if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> > +             return -EXDEV;
> > +
> >       inode_lock(inode_out);
> >
> >       if (fc->writeback_cache) {
> > @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file
> > *src_file, loff_t src_off,
> >       ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
> >                                       len, flags);
> >
> > -     if (ret == -EOPNOTSUPP)
> > +     if (ret == -EOPNOTSUPP || ret == -EXDEV)
> >               ret = generic_copy_file_range(src_file, src_off, dst_file,
> >                                       dst_off, len, flags);
> >       return ret;
> > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> > index d7766a6eb0f4..4783c0c1c49e 100644
> > --- a/fs/nfs/nfs4file.c
> > +++ b/fs/nfs/nfs4file.c
> > @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file
> > *file_in, loff_t pos_in,
> >                                   struct file *file_out, loff_t pos_out,
> >                                   size_t count, unsigned int flags)
> >  {
> > -     ssize_t ret;
> > +     ssize_t ret = -EXDEV;
> >
> >       if (file_inode(file_in) == file_inode(file_out))
> >               return -EINVAL;
> > -retry:
> > -     ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
> > -     if (ret == -EAGAIN)
> > -             goto retry;
> >
> > -     if (ret == -EOPNOTSUPP)
> > +     /* only offload copy if superblock is the same */
> > +     if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
> > +             do {
> > +                     ret = nfs42_proc_copy(file_in, pos_in, file_out,
> > +                                     pos_out, count);
> > +             } while (ret == -EAGAIN);
>
> I'm not convinced we can actually return -EAGAIN from nfs42_proc_copy().  The
> nfs_get_lock_context() function doesn't return it, and if _nfs42_proc_copy()
> returns -EAGAIN it's immediately retried by nfs42_proc_copy() instead of
> returning.
>
> Olga, am I missing something here?

I'll update it in the client patches that are coming out.

> Anna
>
> > +     }
> > +
> > +     if (ret == -EOPNOTSUPP || ret == -EXDEV)
> >               ret = generic_copy_file_range(file_in, pos_in, file_out,
> >                                       pos_out, count, flags);
> >       return ret;
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index 68736e5d6a56..34fb0398d016 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t
> > pos_in,
> >       const struct cred *old_cred;
> >       loff_t ret;
> >
> > +     /*
> > +      * Temporary. Cross device copy checks should be left to the copy file
> > +      * call on the real inodes, but existing behaviour checks the upper
> > +      * files only.
> > +      */
> > +     if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> > +             return -EXDEV;
> > +
> >       ret = ovl_real_fdget(file_out, &real_out);
> >       if (ret)
> >               return ret;
> > @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in,
> > loff_t pos_in,
> >       ret =  ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
> >                           OVL_COPY);
> >
> > -     if (ret == -EOPNOTSUPP)
> > +     if (ret == -EOPNOTSUPP || ret == -EXDEV)
> >               ret = generic_copy_file_range(file_in, pos_in, file_out,
> >                                       pos_out, len, flags);
> >       return ret;
> > diff --git a/fs/read_write.c b/fs/read_write.c
> > index 174cf92eea1d..4e0666de0d69 100644
> > --- a/fs/read_write.c
> > +++ b/fs/read_write.c
> > @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in,
> > loff_t pos_in,
> >                           struct file *file_out, loff_t pos_out,
> >                           size_t len, unsigned int flags)
> >  {
> > +     /* Temporary, do_splice_direct supports cross-sb copies */
> > +     if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
> > +             return -EXDEV;
> > +
> >       return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
> >                       len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
> >  }
> > @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in,
> > loff_t pos_in,
> >                           struct file *file_out, loff_t pos_out,
> >                           size_t len, unsigned int flags)
> >  {
> > -     struct inode *inode_in = file_inode(file_in);
> > -     struct inode *inode_out = file_inode(file_out);
> >       ssize_t ret;
> >
> >       if (flags != 0)
> >               return -EINVAL;
> >
> > -     /* this could be relaxed once a method supports cross-fs copies */
> > -     if (inode_in->i_sb != inode_out->i_sb)
> > -             return -EXDEV;
> > -
> >       ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
> >                                       flags);
> >       if (ret < 0)
>
Dave Chinner Dec. 3, 2018, 11:40 p.m. | #5
On Mon, Dec 03, 2018 at 01:53:35PM -0500, Anna Schumaker wrote:
> On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > We want to enable cross-filesystem copy_file_range functionality
> > where possible, so push the "same superblock only" checks down to
> > the individual filesystem callouts so they can make their own
> > decisions about cross-superblock copy offload.
....
> > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> > index d7766a6eb0f4..4783c0c1c49e 100644
> > --- a/fs/nfs/nfs4file.c
> > +++ b/fs/nfs/nfs4file.c
> > @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file
> > *file_in, loff_t pos_in,
> >  				    struct file *file_out, loff_t pos_out,
> >  				    size_t count, unsigned int flags)
> >  {
> > -	ssize_t ret;
> > +	ssize_t ret = -EXDEV;
> >  
> >  	if (file_inode(file_in) == file_inode(file_out))
> >  		return -EINVAL;
> > -retry:
> > -	ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
> > -	if (ret == -EAGAIN)
> > -		goto retry;
> >  
> > -	if (ret == -EOPNOTSUPP)
> > +	/* only offload copy if superblock is the same */
> > +	if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
> > +		do {
> > +			ret = nfs42_proc_copy(file_in, pos_in, file_out,
> > +					pos_out, count);
> > +		} while (ret == -EAGAIN);
> 
> I'm not convinced we can actually return -EAGAIN from nfs42_proc_copy().  The
> nfs_get_lock_context() function doesn't return it, and if _nfs42_proc_copy()
> returns -EAGAIN it's immediately retried by nfs42_proc_copy() instead of
> returning.

Not really my concern, nor something that should be fixed in this
patchset. i.e. the function does the same thing before and after
this patch, so whether EAGAIN can occurr or not is irrelevant to
this patchset....

Cheers,

Dave.
Christoph Hellwig Dec. 4, 2018, 3:43 p.m. | #6
Well, this isn't bugfixes anymore, but adding new features..
Dave Chinner Dec. 4, 2018, 10:18 p.m. | #7
On Tue, Dec 04, 2018 at 07:43:47AM -0800, Christoph Hellwig wrote:
> Well, this isn't bugfixes anymore, but adding new features..

I made that perfectly clear in the cover description. I called it
twice, one of them explicitly stating that this series made these
infrastructure changes because we have pending functionality that
dependents on cross-device copies being supported in a sane manner.

I'll drop it if you want, but then I'll just have to come back after
all the NFS code is merged and do yet more cleanup work.

Cheers,

Dave.
Olga Kornievskaia Dec. 4, 2018, 11:33 p.m. | #8
On Tue, Dec 4, 2018 at 5:18 PM Dave Chinner <david@fromorbit.com> wrote:
>
> On Tue, Dec 04, 2018 at 07:43:47AM -0800, Christoph Hellwig wrote:
> > Well, this isn't bugfixes anymore, but adding new features..
>
> I made that perfectly clear in the cover description. I called it
> twice, one of them explicitly stating that this series made these
> infrastructure changes because we have pending functionality that
> dependents on cross-device copies being supported in a sane manner.
>
> I'll drop it if you want, but then I'll just have to come back after
> all the NFS code is merged and do yet more cleanup work.

This doesn't needs to be fixed in this patch series. I think Anna was
pointing out to me for something to take a look at.

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com
Christoph Hellwig Dec. 5, 2018, 2:09 p.m. | #9
On Wed, Dec 05, 2018 at 09:18:47AM +1100, Dave Chinner wrote:
> I'll drop it if you want, but then I'll just have to come back after
> all the NFS code is merged and do yet more cleanup work.

IFF we want these NFS "features" we'll have to get it right before
merging the code.  But even with that I'd rather fix the glaring
issues you are fixing in your first patches as a priority before
adding more features.  In other words:  don't worry about NFS, lets
get the existing code right before worrying about the next round
of potential issues.
Olga Kornievskaia Dec. 5, 2018, 5:01 p.m. | #10
On Wed, Dec 5, 2018 at 9:09 AM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Wed, Dec 05, 2018 at 09:18:47AM +1100, Dave Chinner wrote:
> > I'll drop it if you want, but then I'll just have to come back after
> > all the NFS code is merged and do yet more cleanup work.
>
> IFF we want these NFS "features" we'll have to get it right before
> merging the code.  But even with that I'd rather fix the glaring
> issues you are fixing in your first patches as a priority before
> adding more features.  In other words:  don't worry about NFS, lets
> get the existing code right before worrying about the next round
> of potential issues.

Dave,

Do you mind in v2 removing the 'retry, ret=EAGAIN' piece and leave the
call to the nfs42_copy_file_range() (with the superblock block check)?
If not, I could provide the patch.

This is a piece of code that got in as a part of the async copy
patches and it was meant for the upcoming server-to-server series.
This code will go right back in with the next series. But since dead
piece of code is glaring wrong currently by all means let's fix it.

Thank you.

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index cf29f0410dcb..eb876e19c1dc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1905,6 +1905,8 @@  static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 
 	if (src_inode == dst_inode)
 		return -EINVAL;
+	if (src_inode->i_sb != dst_inode->i_sb)
+		return -EXDEV;
 	if (ceph_snap(dst_inode) != CEPH_NOSNAP)
 		return -EROFS;
 
@@ -2105,7 +2107,7 @@  static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
 					len, flags);
 
-	if (ret == -EOPNOTSUPP)
+	if (ret == -EOPNOTSUPP || ret == -EXDEV)
 		ret = generic_copy_file_range(src_file, src_off, dst_file,
 					dst_off, len, flags);
 	return ret;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5ef4baec6234..03e4b9eacbd1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1072,6 +1072,12 @@  ssize_t cifs_file_copychunk_range(unsigned int xid,
 		goto out;
 	}
 
+	if (src_inode->i_sb != target_inode->i_sb) {
+		rc = -EXDEV;
+		goto out;
+	}
+
+
 	if (!src_file->private_data || !dst_file->private_data) {
 		rc = -EBADF;
 		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
@@ -1142,7 +1148,7 @@  static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
 					len, flags);
 	free_xid(xid);
 
-	if (rc == -EOPNOTSUPP)
+	if (rc == -EOPNOTSUPP || rc == -EXDEV)
 		rc = generic_copy_file_range(src_file, off, dst_file,
 					destoff, len, flags);
 	return rc;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b86fb0298739..0758f831a4eb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3053,6 +3053,9 @@  static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (fc->no_copy_file_range)
 		return -EOPNOTSUPP;
 
+	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+		return -EXDEV;
+
 	inode_lock(inode_out);
 
 	if (fc->writeback_cache) {
@@ -3109,7 +3112,7 @@  static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
 	ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
 					len, flags);
 
-	if (ret == -EOPNOTSUPP)
+	if (ret == -EOPNOTSUPP || ret == -EXDEV)
 		ret = generic_copy_file_range(src_file, src_off, dst_file,
 					dst_off, len, flags);
 	return ret;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index d7766a6eb0f4..4783c0c1c49e 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,16 +133,20 @@  static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 				    struct file *file_out, loff_t pos_out,
 				    size_t count, unsigned int flags)
 {
-	ssize_t ret;
+	ssize_t ret = -EXDEV;
 
 	if (file_inode(file_in) == file_inode(file_out))
 		return -EINVAL;
-retry:
-	ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
-	if (ret == -EAGAIN)
-		goto retry;
 
-	if (ret == -EOPNOTSUPP)
+	/* only offload copy if superblock is the same */
+	if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
+		do {
+			ret = nfs42_proc_copy(file_in, pos_in, file_out,
+					pos_out, count);
+		} while (ret == -EAGAIN);
+	}
+
+	if (ret == -EOPNOTSUPP || ret == -EXDEV)
 		ret = generic_copy_file_range(file_in, pos_in, file_out,
 					pos_out, count, flags);
 	return ret;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 68736e5d6a56..34fb0398d016 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -443,6 +443,14 @@  static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 	const struct cred *old_cred;
 	loff_t ret;
 
+	/*
+	 * Temporary. Cross device copy checks should be left to the copy file
+	 * call on the real inodes, but existing behaviour checks the upper
+	 * files only.
+	 */
+	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+		return -EXDEV;
+
 	ret = ovl_real_fdget(file_out, &real_out);
 	if (ret)
 		return ret;
@@ -491,7 +499,7 @@  static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 	ret =  ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
 			    OVL_COPY);
 
-	if (ret == -EOPNOTSUPP)
+	if (ret == -EOPNOTSUPP || ret == -EXDEV)
 		ret = generic_copy_file_range(file_in, pos_in, file_out,
 					pos_out, len, flags);
 	return ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 174cf92eea1d..4e0666de0d69 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1565,6 +1565,10 @@  ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
 			    struct file *file_out, loff_t pos_out,
 			    size_t len, unsigned int flags)
 {
+	/* Temporary, do_splice_direct supports cross-sb copies */
+	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+		return -EXDEV;
+
 	return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
 			len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
 }
@@ -1611,17 +1615,11 @@  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 			    struct file *file_out, loff_t pos_out,
 			    size_t len, unsigned int flags)
 {
-	struct inode *inode_in = file_inode(file_in);
-	struct inode *inode_out = file_inode(file_out);
 	ssize_t ret;
 
 	if (flags != 0)
 		return -EINVAL;
 
-	/* this could be relaxed once a method supports cross-fs copies */
-	if (inode_in->i_sb != inode_out->i_sb)
-		return -EXDEV;
-
 	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
 					flags);
 	if (ret < 0)