diff mbox series

[04/11] vfs: add missing checks to copy_file_range

Message ID 20181203083416.28978-5-david@fromorbit.com
State New
Headers show
Series fs: fixes for major copy_file_range() issues | expand

Commit Message

Dave Chinner Dec. 3, 2018, 8:34 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Like the clone and dedupe interfaces we've recently fixed, the
copy_file_range() implementation is missing basic sanity, limits and
boundary condition tests on the parameters that are passed to it
from userspace. Create a new "generic_copy_file_checks()" function
modelled on the generic_remap_checks() function to provide this
missing functionality.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/read_write.c    | 27 ++++++------------
 include/linux/fs.h |  3 ++
 mm/filemap.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 18 deletions(-)

Comments

Amir Goldstein Dec. 3, 2018, 12:42 p.m. UTC | #1
On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote:
>
> From: Dave Chinner <dchinner@redhat.com>
>
> Like the clone and dedupe interfaces we've recently fixed, the
> copy_file_range() implementation is missing basic sanity, limits and
> boundary condition tests on the parameters that are passed to it
> from userspace. Create a new "generic_copy_file_checks()" function
> modelled on the generic_remap_checks() function to provide this
> missing functionality.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---

Looks good.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Darrick Wong Dec. 3, 2018, 7:04 p.m. UTC | #2
On Mon, Dec 03, 2018 at 07:34:09PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Like the clone and dedupe interfaces we've recently fixed, the
> copy_file_range() implementation is missing basic sanity, limits and
> boundary condition tests on the parameters that are passed to it
> from userspace. Create a new "generic_copy_file_checks()" function
> modelled on the generic_remap_checks() function to provide this
> missing functionality.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/read_write.c    | 27 ++++++------------
>  include/linux/fs.h |  3 ++
>  mm/filemap.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 81 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 44339b44accc..69809345977e 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1578,7 +1578,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>  		return file_out->f_op->copy_file_range(file_in, pos_in, file_out,
>  						      pos_out, len, flags);
>  
> -	return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out,
> +	return generic_copy_file_range(file_in, pos_in, file_out, pos_out,
>  					len, flags);
>  }
>  
> @@ -1598,10 +1598,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  	if (flags != 0)
>  		return -EINVAL;
>  
> -	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> -		return -EISDIR;
> -	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> -		return -EINVAL;
> +	/* this could be relaxed once a method supports cross-fs copies */
> +	if (inode_in->i_sb != inode_out->i_sb)
> +		return -EXDEV;
> +
> +	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
> +					flags);
> +	if (ret < 0)
> +		return ret;
>  
>  	ret = rw_verify_area(READ, file_in, &pos_in, len);
>  	if (unlikely(ret))
> @@ -1611,22 +1615,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  	if (unlikely(ret))
>  		return ret;
>  
> -	if (!(file_in->f_mode & FMODE_READ) ||
> -	    !(file_out->f_mode & FMODE_WRITE) ||
> -	    (file_out->f_flags & O_APPEND))
> -		return -EBADF;
> -
> -	/* this could be relaxed once a method supports cross-fs copies */
> -	if (inode_in->i_sb != inode_out->i_sb)
> -		return -EXDEV;
> -
>  	if (len == 0)
>  		return 0;
>  
> -	/* If the source range crosses EOF, fail the copy */
> -	if (pos_in >= i_size(inode_in) || pos_in + len > i_size(inode_in))
> -		return -EINVAL;
> -
>  	file_start_write(file_out);
>  
>  	/*
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a4478764cf63..0d9d2d93d4df 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
>  extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
>  				struct file *file_out, loff_t pos_out,
>  				loff_t *count, unsigned int remap_flags);
> +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> +				struct file *file_out, loff_t pos_out,
> +				size_t *count, unsigned int flags);
>  extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
>  extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
>  extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 81adec8ee02c..0a170425935b 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
>  	return 0;
>  }
>  
> +
> +/*
> + * Performs necessary checks before doing a file copy
> + *
> + * Can adjust amount of bytes to copy
> + * Returns appropriate error code that caller should return or
> + * zero in case the copy should be allowed.
> + */
> +int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> +			 struct file *file_out, loff_t pos_out,
> +			 size_t *req_count, unsigned int flags)
> +{
> +	struct inode *inode_in = file_inode(file_in);
> +	struct inode *inode_out = file_inode(file_out);
> +	uint64_t count = *req_count;
> +	uint64_t bcount;
> +	loff_t size_in, size_out;
> +	loff_t bs = inode_out->i_sb->s_blocksize;
> +	int ret;
> +
> +	/* Don't touch certain kinds of inodes */
> +	if (IS_IMMUTABLE(inode_out))
> +		return -EPERM;
> +
> +	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
> +		return -ETXTBSY;
> +
> +	/* Don't copy dirs, pipes, sockets... */
> +	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> +		return -EISDIR;
> +	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> +		return -EINVAL;
> +
> +	if (!(file_in->f_mode & FMODE_READ) ||
> +	    !(file_out->f_mode & FMODE_WRITE) ||
> +	    (file_out->f_flags & O_APPEND))
> +		return -EBADF;

These five checks are the same as the ones that are split between
do_clone_file_range and generic_remap_file_range_prep.  Perhaps they
should be factored into a single function that can be called from the
do_clone_file_range function as well as do_copy_file_range?

(I suspect also vfs_dedupe_file_range_one() should call it too, but the
dedupe code is so grotty and weird...)

--D

> +
> +	/* Ensure offsets don't wrap. */
> +	if (pos_in + count < pos_in || pos_out + count < pos_out)
> +		return -EOVERFLOW;
> +
> +	size_in = i_size_read(inode_in);
> +	size_out = i_size_read(inode_out);
> +
> +	/* If the source range crosses EOF, fail the copy */
> +	if (pos_in >= size_in)
> +		return -EINVAL;
> +	if (pos_in + count > size_in)
> +		return -EINVAL;
> +
> +	ret = generic_access_check_limits(file_in, pos_in, &count);
> +	if (ret)
> +		return ret;
> +
> +	ret = generic_write_check_limits(file_out, pos_out, &count);
> +	if (ret)
> +		return ret;
> +
> +	/* Don't allow overlapped copying within the same file. */
> +	if (inode_in == inode_out &&
> +	    pos_out + count > pos_in &&
> +	    pos_out < pos_in + count)
> +		return -EINVAL;
> +
> +	*req_count = count;
> +	return 0;
> +}
> +
>  int pagecache_write_begin(struct file *file, struct address_space *mapping,
>  				loff_t pos, unsigned len, unsigned flags,
>  				struct page **pagep, void **fsdata)
> -- 
> 2.19.1
>
Olga Kornievskaia Dec. 3, 2018, 9:33 p.m. UTC | #3
On Mon, Dec 3, 2018 at 3:34 AM Dave Chinner <david@fromorbit.com> wrote:
>
> From: Dave Chinner <dchinner@redhat.com>
>
> Like the clone and dedupe interfaces we've recently fixed, the
> copy_file_range() implementation is missing basic sanity, limits and
> boundary condition tests on the parameters that are passed to it
> from userspace. Create a new "generic_copy_file_checks()" function
> modelled on the generic_remap_checks() function to provide this
> missing functionality.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/read_write.c    | 27 ++++++------------
>  include/linux/fs.h |  3 ++
>  mm/filemap.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 81 insertions(+), 18 deletions(-)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 44339b44accc..69809345977e 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1578,7 +1578,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>                 return file_out->f_op->copy_file_range(file_in, pos_in, file_out,
>                                                       pos_out, len, flags);
>
> -       return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out,
> +       return generic_copy_file_range(file_in, pos_in, file_out, pos_out,
>                                         len, flags);
>  }
>
> @@ -1598,10 +1598,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>         if (flags != 0)
>                 return -EINVAL;
>
> -       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> -               return -EISDIR;
> -       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> -               return -EINVAL;
> +       /* this could be relaxed once a method supports cross-fs copies */
> +       if (inode_in->i_sb != inode_out->i_sb)
> +               return -EXDEV;
> +
> +       ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
> +                                       flags);
> +       if (ret < 0)
> +               return ret;
>
>         ret = rw_verify_area(READ, file_in, &pos_in, len);
>         if (unlikely(ret))
> @@ -1611,22 +1615,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>         if (unlikely(ret))
>                 return ret;
>
> -       if (!(file_in->f_mode & FMODE_READ) ||
> -           !(file_out->f_mode & FMODE_WRITE) ||
> -           (file_out->f_flags & O_APPEND))
> -               return -EBADF;
> -
> -       /* this could be relaxed once a method supports cross-fs copies */
> -       if (inode_in->i_sb != inode_out->i_sb)
> -               return -EXDEV;
> -
>         if (len == 0)
>                 return 0;
>
> -       /* If the source range crosses EOF, fail the copy */
> -       if (pos_in >= i_size(inode_in) || pos_in + len > i_size(inode_in))
> -               return -EINVAL;
> -
>         file_start_write(file_out);
>
>         /*
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a4478764cf63..0d9d2d93d4df 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
>  extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
>                                 struct file *file_out, loff_t pos_out,
>                                 loff_t *count, unsigned int remap_flags);
> +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> +                               struct file *file_out, loff_t pos_out,
> +                               size_t *count, unsigned int flags);
>  extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
>  extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
>  extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 81adec8ee02c..0a170425935b 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
>         return 0;
>  }
>
> +
> +/*
> + * Performs necessary checks before doing a file copy
> + *
> + * Can adjust amount of bytes to copy
> + * Returns appropriate error code that caller should return or
> + * zero in case the copy should be allowed.
> + */
> +int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> +                        struct file *file_out, loff_t pos_out,
> +                        size_t *req_count, unsigned int flags)
> +{
> +       struct inode *inode_in = file_inode(file_in);
> +       struct inode *inode_out = file_inode(file_out);
> +       uint64_t count = *req_count;
> +       uint64_t bcount;
> +       loff_t size_in, size_out;
> +       loff_t bs = inode_out->i_sb->s_blocksize;
> +       int ret;

I got compile warnings:

mm/filemap.c: In function ‘generic_copy_file_checks’:
mm/filemap.c:2995:9: warning: unused variable ‘bs’ [-Wunused-variable]
  loff_t bs = inode_out->i_sb->s_blocksize;
         ^
mm/filemap.c:2993:11: warning: unused variable ‘bcount’ [-Wunused-variable]
  uint64_t bcount;

> +
> +       /* Don't touch certain kinds of inodes */
> +       if (IS_IMMUTABLE(inode_out))
> +               return -EPERM;
> +
> +       if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
> +               return -ETXTBSY;
> +
> +       /* Don't copy dirs, pipes, sockets... */
> +       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> +               return -EISDIR;
> +       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> +               return -EINVAL;
> +
> +       if (!(file_in->f_mode & FMODE_READ) ||
> +           !(file_out->f_mode & FMODE_WRITE) ||
> +           (file_out->f_flags & O_APPEND))
> +               return -EBADF;
> +
> +       /* Ensure offsets don't wrap. */
> +       if (pos_in + count < pos_in || pos_out + count < pos_out)
> +               return -EOVERFLOW;
> +
> +       size_in = i_size_read(inode_in);
> +       size_out = i_size_read(inode_out);
> +
> +       /* If the source range crosses EOF, fail the copy */
> +       if (pos_in >= size_in)
> +               return -EINVAL;
> +       if (pos_in + count > size_in)
> +               return -EINVAL;
> +
> +       ret = generic_access_check_limits(file_in, pos_in, &count);
> +       if (ret)
> +               return ret;
> +
> +       ret = generic_write_check_limits(file_out, pos_out, &count);
> +       if (ret)
> +               return ret;
> +
> +       /* Don't allow overlapped copying within the same file. */
> +       if (inode_in == inode_out &&
> +           pos_out + count > pos_in &&
> +           pos_out < pos_in + count)
> +               return -EINVAL;
> +
> +       *req_count = count;
> +       return 0;
> +}
> +
>  int pagecache_write_begin(struct file *file, struct address_space *mapping,
>                                 loff_t pos, unsigned len, unsigned flags,
>                                 struct page **pagep, void **fsdata)
> --
> 2.19.1
>
Dave Chinner Dec. 3, 2018, 11:04 p.m. UTC | #4
On Mon, Dec 03, 2018 at 04:33:30PM -0500, Olga Kornievskaia wrote:
> On Mon, Dec 3, 2018 at 3:34 AM Dave Chinner <david@fromorbit.com> wrote:
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
> >  extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
> >                                 struct file *file_out, loff_t pos_out,
> >                                 loff_t *count, unsigned int remap_flags);
> > +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> > +                               struct file *file_out, loff_t pos_out,
> > +                               size_t *count, unsigned int flags);
> >  extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
> >  extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
> >  extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 81adec8ee02c..0a170425935b 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
> >         return 0;
> >  }
> >
> > +
> > +/*
> > + * Performs necessary checks before doing a file copy
> > + *
> > + * Can adjust amount of bytes to copy
> > + * Returns appropriate error code that caller should return or
> > + * zero in case the copy should be allowed.
> > + */
> > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> > +                        struct file *file_out, loff_t pos_out,
> > +                        size_t *req_count, unsigned int flags)
> > +{
> > +       struct inode *inode_in = file_inode(file_in);
> > +       struct inode *inode_out = file_inode(file_out);
> > +       uint64_t count = *req_count;
> > +       uint64_t bcount;
> > +       loff_t size_in, size_out;
> > +       loff_t bs = inode_out->i_sb->s_blocksize;
> > +       int ret;
> 
> I got compile warnings:
> 
> mm/filemap.c: In function ‘generic_copy_file_checks’:
> mm/filemap.c:2995:9: warning: unused variable ‘bs’ [-Wunused-variable]
>   loff_t bs = inode_out->i_sb->s_blocksize;
>          ^
> mm/filemap.c:2993:11: warning: unused variable ‘bcount’ [-Wunused-variable]
>   uint64_t bcount;

Strange. Yes, they certainly are there when I compile my stack up to
this point, but when I compile the whole series they aren't there.

I'll fix it up.

Cheers,

Dave.
Christoph Hellwig Dec. 4, 2018, 3:18 p.m. UTC | #5
Ok, this fixes the earlier i_size() compile failure..
Luis Henriques Dec. 12, 2018, 11:31 a.m. UTC | #6
Dave Chinner <david@fromorbit.com> writes:

<snip>

> +int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> +			 struct file *file_out, loff_t pos_out,
> +			 size_t *req_count, unsigned int flags)
> +{

<snip>

> +	/* Don't allow overlapped copying within the same file. */
> +	if (inode_in == inode_out &&
> +	    pos_out + count > pos_in &&
> +	    pos_out < pos_in + count)
> +		return -EINVAL;

I was wondering if, with the above check, it would make sense to also
have an extra patch changing some filesystems (ceph, nfs and cifs) to
simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in ==
inode_out.  Something like the diff below (not tested!).

This caught my attention when I was running the latest generic xfstests
on ceph and realised that I had some new failures due to the recently
added copy_file_range support in fsx by Darrick.  The failures were
caused by the usage of the same fd both as source and destination.

Cheers,
Darrick Wong Dec. 12, 2018, 4:42 p.m. UTC | #7
On Wed, Dec 12, 2018 at 11:31:23AM +0000, Luis Henriques wrote:
> Dave Chinner <david@fromorbit.com> writes:
> 
> <snip>
> 
> > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> > +			 struct file *file_out, loff_t pos_out,
> > +			 size_t *req_count, unsigned int flags)
> > +{
> 
> <snip>
> 
> > +	/* Don't allow overlapped copying within the same file. */
> > +	if (inode_in == inode_out &&
> > +	    pos_out + count > pos_in &&
> > +	    pos_out < pos_in + count)
> > +		return -EINVAL;
> 
> I was wondering if, with the above check, it would make sense to also
> have an extra patch changing some filesystems (ceph, nfs and cifs) to
> simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in ==
> inode_out.  Something like the diff below (not tested!).
> 
> This caught my attention when I was running the latest generic xfstests
> on ceph and realised that I had some new failures due to the recently
> added copy_file_range support in fsx by Darrick.  The failures were
> caused by the usage of the same fd both as source and destination.

Looks reasonable to /me/, since EOPNOTSUPP currently triggers the
splice fallback.

--D

> Cheers,
> -- 
> Luis
> 
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 189df668b6a0..c22ac60ec0ba 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1904,7 +1904,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
>  	bool do_final_copy = false;
>  
>  	if (src_inode == dst_inode)
> -		return -EINVAL;
> +		return -EOPNOTSUPP;
>  	if (ceph_snap(dst_inode) != CEPH_NOSNAP)
>  		return -EROFS;
>  
> diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> index 865706edb307..d4f63eae531e 100644
> --- a/fs/cifs/cifsfs.c
> +++ b/fs/cifs/cifsfs.c
> @@ -1068,7 +1068,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
>  	cifs_dbg(FYI, "copychunk range\n");
>  
>  	if (src_inode == target_inode) {
> -		rc = -EINVAL;
> +		rc = -EOPNOTSUPP;
>  		goto out;
>  	}
>  
> diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> index 46d691ba04bc..910a2abade92 100644
> --- a/fs/nfs/nfs4file.c
> +++ b/fs/nfs/nfs4file.c
> @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
>  	ssize_t ret;
>  
>  	if (file_inode(file_in) == file_inode(file_out))
> -		return -EINVAL;
> +		return -EOPNOTSUPP;
>  retry:
>  	ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
>  	if (ret == -EAGAIN)
Olga Kornievskaia Dec. 12, 2018, 6:55 p.m. UTC | #8
On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote:
>
> Dave Chinner <david@fromorbit.com> writes:
>
> <snip>
>
> > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> > +                      struct file *file_out, loff_t pos_out,
> > +                      size_t *req_count, unsigned int flags)
> > +{
>
> <snip>
>
> > +     /* Don't allow overlapped copying within the same file. */
> > +     if (inode_in == inode_out &&
> > +         pos_out + count > pos_in &&
> > +         pos_out < pos_in + count)
> > +             return -EINVAL;
>
> I was wondering if, with the above check, it would make sense to also
> have an extra patch changing some filesystems (ceph, nfs and cifs) to
> simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in ==
> inode_out.  Something like the diff below (not tested!).
>
> This caught my attention when I was running the latest generic xfstests
> on ceph and realised that I had some new failures due to the recently
> added copy_file_range support in fsx by Darrick.  The failures were
> caused by the usage of the same fd both as source and destination.
>
> Cheers,
> --
> Luis
>
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 189df668b6a0..c22ac60ec0ba 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1904,7 +1904,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
>         bool do_final_copy = false;
>
>         if (src_inode == dst_inode)
> -               return -EINVAL;
> +               return -EOPNOTSUPP;
>         if (ceph_snap(dst_inode) != CEPH_NOSNAP)
>                 return -EROFS;
>
> diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> index 865706edb307..d4f63eae531e 100644
> --- a/fs/cifs/cifsfs.c
> +++ b/fs/cifs/cifsfs.c
> @@ -1068,7 +1068,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
>         cifs_dbg(FYI, "copychunk range\n");
>
>         if (src_inode == target_inode) {
> -               rc = -EINVAL;
> +               rc = -EOPNOTSUPP;
>                 goto out;
>         }
>
> diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> index 46d691ba04bc..910a2abade92 100644
> --- a/fs/nfs/nfs4file.c
> +++ b/fs/nfs/nfs4file.c
> @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
>         ssize_t ret;
>
>         if (file_inode(file_in) == file_inode(file_out))
> -               return -EINVAL;
> +               return -EOPNOTSUPP;

Please don't change the NFS bits. This is against the NFS
specifications. RFC 7862 15.2.3

(snippet)
SAVED_FH and CURRENT_FH must be different files.  If SAVED_FH and
   CURRENT_FH refer to the same file, the operation MUST fail with
   NFS4ERR_INVAL.

>  retry:
>         ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
>         if (ret == -EAGAIN)
Matthew Wilcox (Oracle) Dec. 12, 2018, 7:42 p.m. UTC | #9
On Wed, Dec 12, 2018 at 01:55:28PM -0500, Olga Kornievskaia wrote:
> On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote:
> > I was wondering if, with the above check, it would make sense to also
> > have an extra patch changing some filesystems (ceph, nfs and cifs) to
> > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in ==
> > inode_out.  Something like the diff below (not tested!).

> > +++ b/fs/nfs/nfs4file.c
> > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
> >         ssize_t ret;
> >
> >         if (file_inode(file_in) == file_inode(file_out))
> > -               return -EINVAL;
> > +               return -EOPNOTSUPP;
> 
> Please don't change the NFS bits. This is against the NFS
> specifications. RFC 7862 15.2.3
> 
> (snippet)
> SAVED_FH and CURRENT_FH must be different files.  If SAVED_FH and
>    CURRENT_FH refer to the same file, the operation MUST fail with
>    NFS4ERR_INVAL.

I don't see how that applies.  That refers to a requirement _in the
protocol_ that determines what the server MUST do if the client sends
it two FHs which refer to the same file.

What we're talking about here is how a Linux filesystem behaves when
receiving a copy_file_range() referring to the same file.  As long as
the Linux filesystem doesn't react by sending out one of these invalid
protocol messages, I don't see the problem.
Olga Kornievskaia Dec. 12, 2018, 8:22 p.m. UTC | #10
On Wed, Dec 12, 2018 at 2:43 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, Dec 12, 2018 at 01:55:28PM -0500, Olga Kornievskaia wrote:
> > On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote:
> > > I was wondering if, with the above check, it would make sense to also
> > > have an extra patch changing some filesystems (ceph, nfs and cifs) to
> > > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in ==
> > > inode_out.  Something like the diff below (not tested!).
>
> > > +++ b/fs/nfs/nfs4file.c
> > > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
> > >         ssize_t ret;
> > >
> > >         if (file_inode(file_in) == file_inode(file_out))
> > > -               return -EINVAL;
> > > +               return -EOPNOTSUPP;
> >
> > Please don't change the NFS bits. This is against the NFS
> > specifications. RFC 7862 15.2.3
> >
> > (snippet)
> > SAVED_FH and CURRENT_FH must be different files.  If SAVED_FH and
> >    CURRENT_FH refer to the same file, the operation MUST fail with
> >    NFS4ERR_INVAL.
>
> I don't see how that applies.  That refers to a requirement _in the
> protocol_ that determines what the server MUST do if the client sends
> it two FHs which refer to the same file.
>
> What we're talking about here is how a Linux filesystem behaves when
> receiving a copy_file_range() referring to the same file.  As long as
> the Linux filesystem doesn't react by sending out one of these invalid
> protocol messages, I don't see the problem.

Ok then this should be changed to call generic_copy_file_range() not
returning the EOPNOTSUPP since there is no longer fallback in vfs to
call the generic_copy_file_range() and in turn responsibility of each
file system.
Luis Henriques Dec. 13, 2018, 10:29 a.m. UTC | #11
Olga Kornievskaia <olga.kornievskaia@gmail.com> writes:

> On Wed, Dec 12, 2018 at 2:43 PM Matthew Wilcox <willy@infradead.org> wrote:
>>
>> On Wed, Dec 12, 2018 at 01:55:28PM -0500, Olga Kornievskaia wrote:
>> > On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote:
>> > > I was wondering if, with the above check, it would make sense to also
>> > > have an extra patch changing some filesystems (ceph, nfs and cifs) to
>> > > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in ==
>> > > inode_out.  Something like the diff below (not tested!).
>>
>> > > +++ b/fs/nfs/nfs4file.c
>> > > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
>> > >         ssize_t ret;
>> > >
>> > >         if (file_inode(file_in) == file_inode(file_out))
>> > > -               return -EINVAL;
>> > > +               return -EOPNOTSUPP;
>> >
>> > Please don't change the NFS bits. This is against the NFS
>> > specifications. RFC 7862 15.2.3
>> >
>> > (snippet)
>> > SAVED_FH and CURRENT_FH must be different files.  If SAVED_FH and
>> >    CURRENT_FH refer to the same file, the operation MUST fail with
>> >    NFS4ERR_INVAL.
>>
>> I don't see how that applies.  That refers to a requirement _in the
>> protocol_ that determines what the server MUST do if the client sends
>> it two FHs which refer to the same file.
>>
>> What we're talking about here is how a Linux filesystem behaves when
>> receiving a copy_file_range() referring to the same file.  As long as
>> the Linux filesystem doesn't react by sending out one of these invalid
>> protocol messages, I don't see the problem.
>
> Ok then this should be changed to call generic_copy_file_range() not
> returning the EOPNOTSUPP since there is no longer fallback in vfs to
> call the generic_copy_file_range() and in turn responsibility of each
> file system.

Ah, I didn't look close enough and didn't realised the nfs code was
doing something slightly different from the other 2 FSs.  In that case
simply deleting that check seems to be enough to fallback to the vfs
generic_copy_file_range.

Anyway, please find below an updated patch (with proper changelog).

Cheers,
diff mbox series

Patch

diff --git a/fs/read_write.c b/fs/read_write.c
index 44339b44accc..69809345977e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1578,7 +1578,7 @@  static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
 		return file_out->f_op->copy_file_range(file_in, pos_in, file_out,
 						      pos_out, len, flags);
 
-	return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out,
+	return generic_copy_file_range(file_in, pos_in, file_out, pos_out,
 					len, flags);
 }
 
@@ -1598,10 +1598,14 @@  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (flags != 0)
 		return -EINVAL;
 
-	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-		return -EISDIR;
-	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-		return -EINVAL;
+	/* this could be relaxed once a method supports cross-fs copies */
+	if (inode_in->i_sb != inode_out->i_sb)
+		return -EXDEV;
+
+	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
+					flags);
+	if (ret < 0)
+		return ret;
 
 	ret = rw_verify_area(READ, file_in, &pos_in, len);
 	if (unlikely(ret))
@@ -1611,22 +1615,9 @@  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (unlikely(ret))
 		return ret;
 
-	if (!(file_in->f_mode & FMODE_READ) ||
-	    !(file_out->f_mode & FMODE_WRITE) ||
-	    (file_out->f_flags & O_APPEND))
-		return -EBADF;
-
-	/* this could be relaxed once a method supports cross-fs copies */
-	if (inode_in->i_sb != inode_out->i_sb)
-		return -EXDEV;
-
 	if (len == 0)
 		return 0;
 
-	/* If the source range crosses EOF, fail the copy */
-	if (pos_in >= i_size(inode_in) || pos_in + len > i_size(inode_in))
-		return -EINVAL;
-
 	file_start_write(file_out);
 
 	/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a4478764cf63..0d9d2d93d4df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3022,6 +3022,9 @@  extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
 				struct file *file_out, loff_t pos_out,
 				loff_t *count, unsigned int remap_flags);
+extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				size_t *count, unsigned int flags);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 81adec8ee02c..0a170425935b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2975,6 +2975,75 @@  int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	return 0;
 }
 
+
+/*
+ * Performs necessary checks before doing a file copy
+ *
+ * Can adjust amount of bytes to copy
+ * Returns appropriate error code that caller should return or
+ * zero in case the copy should be allowed.
+ */
+int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+			 struct file *file_out, loff_t pos_out,
+			 size_t *req_count, unsigned int flags)
+{
+	struct inode *inode_in = file_inode(file_in);
+	struct inode *inode_out = file_inode(file_out);
+	uint64_t count = *req_count;
+	uint64_t bcount;
+	loff_t size_in, size_out;
+	loff_t bs = inode_out->i_sb->s_blocksize;
+	int ret;
+
+	/* Don't touch certain kinds of inodes */
+	if (IS_IMMUTABLE(inode_out))
+		return -EPERM;
+
+	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+		return -ETXTBSY;
+
+	/* Don't copy dirs, pipes, sockets... */
+	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+		return -EISDIR;
+	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+		return -EINVAL;
+
+	if (!(file_in->f_mode & FMODE_READ) ||
+	    !(file_out->f_mode & FMODE_WRITE) ||
+	    (file_out->f_flags & O_APPEND))
+		return -EBADF;
+
+	/* Ensure offsets don't wrap. */
+	if (pos_in + count < pos_in || pos_out + count < pos_out)
+		return -EOVERFLOW;
+
+	size_in = i_size_read(inode_in);
+	size_out = i_size_read(inode_out);
+
+	/* If the source range crosses EOF, fail the copy */
+	if (pos_in >= size_in)
+		return -EINVAL;
+	if (pos_in + count > size_in)
+		return -EINVAL;
+
+	ret = generic_access_check_limits(file_in, pos_in, &count);
+	if (ret)
+		return ret;
+
+	ret = generic_write_check_limits(file_out, pos_out, &count);
+	if (ret)
+		return ret;
+
+	/* Don't allow overlapped copying within the same file. */
+	if (inode_in == inode_out &&
+	    pos_out + count > pos_in &&
+	    pos_out < pos_in + count)
+		return -EINVAL;
+
+	*req_count = count;
+	return 0;
+}
+
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)