Patchwork Fix page reading error with mandatory locking style

login
register
mail settings
Submitter piastry@etersoft.ru
Date April 5, 2010, 6:01 a.m.
Message ID <201004051001.19667.piastry@etersoft.ru>
Download mbox | patch
Permalink /patch/49370/
State New
Headers show

Comments

piastry@etersoft.ru - April 5, 2010, 6:01 a.m.
From: Pavel Shilovsky <piastryyy@gmail.com>

[CIFS] Fix page reading error with mandatory locking style

If we lock file from one process from 1 to 2 and then try to read it
from another from 0 to 1 without direct mount options, reading fails.
That's why vfs tries to read whole page and fails due the lock from
the first process.

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
---
 fs/cifs/cifsfs.c |   47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 46 insertions(+), 1 deletions(-)
Jeff Layton - April 5, 2010, 12:20 p.m.
On Mon, 5 Apr 2010 10:01:19 +0400
piastry@etersoft.ru wrote:

> From: Pavel Shilovsky <piastryyy@gmail.com>
> 
> [CIFS] Fix page reading error with mandatory locking style
> 
> If we lock file from one process from 1 to 2 and then try to read it
> from another from 0 to 1 without direct mount options, reading fails.
> That's why vfs tries to read whole page and fails due the lock from
> the first process.
> 
> Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> ---
>  fs/cifs/cifsfs.c |   47 ++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 46 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> index ded66be..94a3b1f 100644
> --- a/fs/cifs/cifsfs.c
> +++ b/fs/cifs/cifsfs.c
> @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type *fs_type,
>  	return 0;
>  }
>  
> +static ssize_t cifs_sync_read(struct file *filp, char __user *buf,
> +				size_t len, loff_t *ppos)
> +{
> +	int retval, read, posix_locking = 0;
> +	struct file_lock pfLock;
> +	struct cifsInodeInfo *cifsInode;
> +	struct cifs_sb_info *cifs_sb;
> +	struct cifsTconInfo *tcon;
> +
> +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> +	tcon = cifs_sb->tcon;
> +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> +	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
> +	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> +		posix_locking = 1;
> +
> +	retval = cifs_revalidate_file(filp);
> +	if (retval < 0)
> +		return (ssize_t)retval;
> +

I'd prefer to see the above call in the aio_read function instead.
Steve's not too crazy about the idea though as he thinks it'll hurt
performance. My argument is that it doesn't matter how fast you get a
return from the syscall if the answer is wrong. Shrug.

> +	memset(&pfLock, 0, sizeof(pfLock));
> +	pfLock.fl_type = F_RDLCK;
> +	pfLock.fl_start = *ppos;
> +	pfLock.fl_end = *ppos+len;
> +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> +	if (cifsInode == NULL)
> +		return -ENOENT;
> +
> +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead &&
> +							!posix_locking) {
> +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> +		if (retval < 0)
> +			return (ssize_t)retval;
> +		if (pfLock.fl_type == F_UNLCK)
> +			read = do_sync_read(filp, buf, len, ppos);
> +		else
> +			return -EACCES;

This looks really inefficient. A read call on the wire has suddenly
turned into multiple lock/unlock calls and then a read. Maybe it would
be better to attempt the read first and then just fall back to the slow
path if that fails with an error that indicates that part of that page
is locked?

> +	} else
> +		read = do_sync_read(filp, buf, len, ppos);
> +
> +	if (read == -EACCES && !posix_locking)
> +		read = cifs_user_read(filp, buf, len, ppos);
> +	return read;
> +}
> +
>  static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
>  				   unsigned long nr_segs, loff_t pos)
>  {
> @@ -737,7 +782,7 @@ const struct inode_operations cifs_symlink_inode_ops = {
>  };
>  
>  const struct file_operations cifs_file_ops = {
> -	.read = do_sync_read,
> +	.read = cifs_sync_read,
>  	.write = do_sync_write,
>  	.aio_read = generic_file_aio_read,
>  	.aio_write = cifs_file_aio_write,
piastry@etersoft.ru - April 5, 2010, 8:09 p.m.
В сообщении от 5 апреля 2010 16:20:54 вы написали:
> On Mon, 5 Apr 2010 10:01:19 +0400
> 
> piastry@etersoft.ru wrote:
> > From: Pavel Shilovsky <piastryyy@gmail.com>
> > 
> > [CIFS] Fix page reading error with mandatory locking style
> > 
> > If we lock file from one process from 1 to 2 and then try to read it
> > from another from 0 to 1 without direct mount options, reading fails.
> > That's why vfs tries to read whole page and fails due the lock from
> > the first process.
> > 
> > Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> > ---
> > 
> >  fs/cifs/cifsfs.c |   47 ++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 files changed, 46 insertions(+), 1 deletions(-)
> > 
> > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > index ded66be..94a3b1f 100644
> > --- a/fs/cifs/cifsfs.c
> > +++ b/fs/cifs/cifsfs.c
> > @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type *fs_type,
> > 
> >  	return 0;
> >  
> >  }
> > 
> > +static ssize_t cifs_sync_read(struct file *filp, char __user *buf,
> > +				size_t len, loff_t *ppos)
> > +{
> > +	int retval, read, posix_locking = 0;
> > +	struct file_lock pfLock;
> > +	struct cifsInodeInfo *cifsInode;
> > +	struct cifs_sb_info *cifs_sb;
> > +	struct cifsTconInfo *tcon;
> > +
> > +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> > +	tcon = cifs_sb->tcon;
> > +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> > +	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
> > +	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> > +		posix_locking = 1;
> > +
> > +	retval = cifs_revalidate_file(filp);
> > +	if (retval < 0)
> > +		return (ssize_t)retval;
> > +
> 
> I'd prefer to see the above call in the aio_read function instead.
> Steve's not too crazy about the idea though as he thinks it'll hurt
> performance. My argument is that it doesn't matter how fast you get a
> return from the syscall if the answer is wrong. Shrug.
> 
> > +	memset(&pfLock, 0, sizeof(pfLock));
> > +	pfLock.fl_type = F_RDLCK;
> > +	pfLock.fl_start = *ppos;
> > +	pfLock.fl_end = *ppos+len;
> > +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> > +	if (cifsInode == NULL)
> > +		return -ENOENT;
> > +
> > +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead &&
> > +							!posix_locking) {
> > +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> > +		if (retval < 0)
> > +			return (ssize_t)retval;
> > +		if (pfLock.fl_type == F_UNLCK)
> > +			read = do_sync_read(filp, buf, len, ppos);
> > +		else
> > +			return -EACCES;
> 
> This looks really inefficient. A read call on the wire has suddenly
> turned into multiple lock/unlock calls and then a read. Maybe it would
> be better to attempt the read first and then just fall back to the slow
> path if that fails with an error that indicates that part of that page
> is locked?

That won't work. This is the situation:
1. The first process has valid pages.
2. The second process sets a lock from 1 to 2.
3. The first process try to read from 1 to 2:
	It reads page and call returns successfully because client dosn't read from server - it has valid cache pages! That's wrong!

That's why we have to check if there are no locks preventing us and then we can read from cache.

> 
> > +	} else
> > +		read = do_sync_read(filp, buf, len, ppos);
> > +
> > +	if (read == -EACCES && !posix_locking)
> > +		read = cifs_user_read(filp, buf, len, ppos);
> > +	return read;
> > +}
> > +
> > 
> >  static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct
> >  iovec *iov,
> >  
> >  				   unsigned long nr_segs, loff_t pos)
> >  
> >  {
> > 
> > @@ -737,7 +782,7 @@ const struct inode_operations cifs_symlink_inode_ops
> > = {
> > 
> >  };
> >  
> >  const struct file_operations cifs_file_ops = {
> > 
> > -	.read = do_sync_read,
> > +	.read = cifs_sync_read,
> > 
> >  	.write = do_sync_write,
> >  	.aio_read = generic_file_aio_read,
> >  	.aio_write = cifs_file_aio_write,

--
Best regards,
Pavel Shilovsky.
Jeff Layton - April 5, 2010, 8:18 p.m.
On Tue, 6 Apr 2010 00:09:31 +0400
Pavel Shilovsky <piastry@etersoft.ru> wrote:

> В сообщении от 5 апреля 2010 16:20:54 вы написали:
> > On Mon, 5 Apr 2010 10:01:19 +0400
> > 
> > piastry@etersoft.ru wrote:
> > > From: Pavel Shilovsky <piastryyy@gmail.com>
> > > 
> > > [CIFS] Fix page reading error with mandatory locking style
> > > 
> > > If we lock file from one process from 1 to 2 and then try to read it
> > > from another from 0 to 1 without direct mount options, reading fails.
> > > That's why vfs tries to read whole page and fails due the lock from
> > > the first process.
> > > 
> > > Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> > > ---
> > > 
> > >  fs/cifs/cifsfs.c |   47 ++++++++++++++++++++++++++++++++++++++++++++++-
> > >  1 files changed, 46 insertions(+), 1 deletions(-)
> > > 
> > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > > index ded66be..94a3b1f 100644
> > > --- a/fs/cifs/cifsfs.c
> > > +++ b/fs/cifs/cifsfs.c
> > > @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type *fs_type,
> > > 
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > +static ssize_t cifs_sync_read(struct file *filp, char __user *buf,
> > > +				size_t len, loff_t *ppos)
> > > +{
> > > +	int retval, read, posix_locking = 0;
> > > +	struct file_lock pfLock;
> > > +	struct cifsInodeInfo *cifsInode;
> > > +	struct cifs_sb_info *cifs_sb;
> > > +	struct cifsTconInfo *tcon;
> > > +
> > > +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> > > +	tcon = cifs_sb->tcon;
> > > +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> > > +	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
> > > +	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> > > +		posix_locking = 1;
> > > +
> > > +	retval = cifs_revalidate_file(filp);
> > > +	if (retval < 0)
> > > +		return (ssize_t)retval;
> > > +
> > 
> > I'd prefer to see the above call in the aio_read function instead.
> > Steve's not too crazy about the idea though as he thinks it'll hurt
> > performance. My argument is that it doesn't matter how fast you get a
> > return from the syscall if the answer is wrong. Shrug.
> > 
> > > +	memset(&pfLock, 0, sizeof(pfLock));
> > > +	pfLock.fl_type = F_RDLCK;
> > > +	pfLock.fl_start = *ppos;
> > > +	pfLock.fl_end = *ppos+len;
> > > +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> > > +	if (cifsInode == NULL)
> > > +		return -ENOENT;
> > > +
> > > +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead &&
> > > +							!posix_locking) {
> > > +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> > > +		if (retval < 0)
> > > +			return (ssize_t)retval;
> > > +		if (pfLock.fl_type == F_UNLCK)
> > > +			read = do_sync_read(filp, buf, len, ppos);
> > > +		else
> > > +			return -EACCES;
> > 
> > This looks really inefficient. A read call on the wire has suddenly
> > turned into multiple lock/unlock calls and then a read. Maybe it would
> > be better to attempt the read first and then just fall back to the slow
> > path if that fails with an error that indicates that part of that page
> > is locked?
> 
> That won't work. This is the situation:
> 1. The first process has valid pages.
> 2. The second process sets a lock from 1 to 2.
> 3. The first process try to read from 1 to 2:
> 	It reads page and call returns successfully because client dosn't read from server - it has valid cache pages! That's wrong!
> 
> That's why we have to check if there are no locks preventing us and then we can read from cache.
> 

Ahh, so you're wanting to enforce mandatory locking in the client even
when the cache is considered valid? If that's the case, it'd probably
be far more efficient to simply bypass the pagecache altogether for
reads than to try and do all of these lock calls on every read.

Both of these approaches sound like performance killers to me though.
What real-world problems does this solve? We necessarily have to make
tradeoffs in order to get decent performance out of the client. I'm
not sure fixing this is worth that cost.

> > 
> > > +	} else
> > > +		read = do_sync_read(filp, buf, len, ppos);
> > > +
> > > +	if (read == -EACCES && !posix_locking)
> > > +		read = cifs_user_read(filp, buf, len, ppos);
> > > +	return read;
> > > +}
> > > +
> > > 
> > >  static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct
> > >  iovec *iov,
> > >  
> > >  				   unsigned long nr_segs, loff_t pos)
> > >  
> > >  {
> > > 
> > > @@ -737,7 +782,7 @@ const struct inode_operations cifs_symlink_inode_ops
> > > = {
> > > 
> > >  };
> > >  
> > >  const struct file_operations cifs_file_ops = {
> > > 
> > > -	.read = do_sync_read,
> > > +	.read = cifs_sync_read,
> > > 
> > >  	.write = do_sync_write,
> > >  	.aio_read = generic_file_aio_read,
> > >  	.aio_write = cifs_file_aio_write,
> 
> --
> Best regards,
> Pavel Shilovsky.
>
Jeff Layton - April 5, 2010, 8:58 p.m.
On Tue, 6 Apr 2010 00:43:23 +0400
Pavel Shilovsky <piastry@etersoft.ru> wrote:

> В сообщении от 6 апреля 2010 00:18:58 вы написали:
> > On Tue, 6 Apr 2010 00:09:31 +0400
> > 
> > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > В сообщении от 5 апреля 2010 16:20:54 вы написали:
> > > > On Mon, 5 Apr 2010 10:01:19 +0400
> > > > 
> > > > piastry@etersoft.ru wrote:
> > > > > From: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > 
> > > > > [CIFS] Fix page reading error with mandatory locking style
> > > > > 
> > > > > If we lock file from one process from 1 to 2 and then try to read it
> > > > > from another from 0 to 1 without direct mount options, reading fails.
> > > > > That's why vfs tries to read whole page and fails due the lock from
> > > > > the first process.
> > > > > 
> > > > > Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > ---
> > > > > 
> > > > >  fs/cifs/cifsfs.c |   47
> > > > >  ++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 46
> > > > >  insertions(+), 1 deletions(-)
> > > > > 
> > > > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > > > > index ded66be..94a3b1f 100644
> > > > > --- a/fs/cifs/cifsfs.c
> > > > > +++ b/fs/cifs/cifsfs.c
> > > > > @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type *fs_type,
> > > > > 
> > > > >  	return 0;
> > > > >  
> > > > >  }
> > > > > 
> > > > > +static ssize_t cifs_sync_read(struct file *filp, char __user *buf,
> > > > > +				size_t len, loff_t *ppos)
> > > > > +{
> > > > > +	int retval, read, posix_locking = 0;
> > > > > +	struct file_lock pfLock;
> > > > > +	struct cifsInodeInfo *cifsInode;
> > > > > +	struct cifs_sb_info *cifs_sb;
> > > > > +	struct cifsTconInfo *tcon;
> > > > > +
> > > > > +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> > > > > +	tcon = cifs_sb->tcon;
> > > > > +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> > > > > +	    (CIFS_UNIX_FCNTL_CAP &
> > > > > le64_to_cpu(tcon->fsUnixInfo.Capability)) && +	   
> > > > > ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> > > > > +		posix_locking = 1;
> > > > > +
> > > > > +	retval = cifs_revalidate_file(filp);
> > > > > +	if (retval < 0)
> > > > > +		return (ssize_t)retval;
> > > > > +
> > > > 
> > > > I'd prefer to see the above call in the aio_read function instead.
> > > > Steve's not too crazy about the idea though as he thinks it'll hurt
> > > > performance. My argument is that it doesn't matter how fast you get a
> > > > return from the syscall if the answer is wrong. Shrug.
> > > > 
> > > > > +	memset(&pfLock, 0, sizeof(pfLock));
> > > > > +	pfLock.fl_type = F_RDLCK;
> > > > > +	pfLock.fl_start = *ppos;
> > > > > +	pfLock.fl_end = *ppos+len;
> > > > > +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> > > > > +	if (cifsInode == NULL)
> > > > > +		return -ENOENT;
> > > > > +
> > > > > +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead &&
> > > > > +							!posix_locking) {
> > > > > +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> > > > > +		if (retval < 0)
> > > > > +			return (ssize_t)retval;
> > > > > +		if (pfLock.fl_type == F_UNLCK)
> > > > > +			read = do_sync_read(filp, buf, len, ppos);
> > > > > +		else
> > > > > +			return -EACCES;
> > > > 
> > > > This looks really inefficient. A read call on the wire has suddenly
> > > > turned into multiple lock/unlock calls and then a read. Maybe it would
> > > > be better to attempt the read first and then just fall back to the slow
> > > > path if that fails with an error that indicates that part of that page
> > > > is locked?
> > > 
> > > That won't work. This is the situation:
> > > 1. The first process has valid pages.
> > > 2. The second process sets a lock from 1 to 2.
> > > 
> > > 3. The first process try to read from 1 to 2:
> > > 	It reads page and call returns successfully because client dosn't read
> > > 	from server - it has valid cache pages! That's wrong!
> > > 
> > > That's why we have to check if there are no locks preventing us and then
> > > we can read from cache.
> > 
> > Ahh, so you're wanting to enforce mandatory locking in the client even
> > when the cache is considered valid? If that's the case, it'd probably
> > be far more efficient to simply bypass the pagecache altogether for
> > reads than to try and do all of these lock calls on every read.
> 
> Yes, use the direct mount option is another approach.
> 
> > 
> > Both of these approaches sound like performance killers to me though.
> > What real-world problems does this solve? We necessarily have to make
> > tradeoffs in order to get decent performance out of the client. I'm
> > not sure fixing this is worth that cost.
> 
> There are many Windows applications that use locking ranges of files on SMB share (for example, database file that all clients use) as a mechanism for synchronization. If we want to 
> run them with WINE we need right behaviour of read/lock operations. We can switch this behviour on by mounting with special mount options ( for example, wine) . What do you think 
> about it? By default, it will be switched off.
> 

Re-cc'ing linux-cifs-client list...

What advantage would this new mount option give over forcedirectio?
piastry@etersoft.ru - April 5, 2010, 9:05 p.m.
В сообщении от 6 апреля 2010 00:58:05 вы написали:
> On Tue, 6 Apr 2010 00:43:23 +0400
> 
> Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > В сообщении от 6 апреля 2010 00:18:58 вы написали:
> > > On Tue, 6 Apr 2010 00:09:31 +0400
> > > 
> > > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > > В сообщении от 5 апреля 2010 16:20:54 вы написали:
> > > > > On Mon, 5 Apr 2010 10:01:19 +0400
> > > > > 
> > > > > piastry@etersoft.ru wrote:
> > > > > > From: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > > 
> > > > > > [CIFS] Fix page reading error with mandatory locking style
> > > > > > 
> > > > > > If we lock file from one process from 1 to 2 and then try to read
> > > > > > it from another from 0 to 1 without direct mount options,
> > > > > > reading fails. That's why vfs tries to read whole page and fails
> > > > > > due the lock from the first process.
> > > > > > 
> > > > > > Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > > ---
> > > > > > 
> > > > > >  fs/cifs/cifsfs.c |   47
> > > > > >  ++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed,
> > > > > >  46 insertions(+), 1 deletions(-)
> > > > > > 
> > > > > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > > > > > index ded66be..94a3b1f 100644
> > > > > > --- a/fs/cifs/cifsfs.c
> > > > > > +++ b/fs/cifs/cifsfs.c
> > > > > > @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type
> > > > > > *fs_type,
> > > > > > 
> > > > > >  	return 0;
> > > > > >  
> > > > > >  }
> > > > > > 
> > > > > > +static ssize_t cifs_sync_read(struct file *filp, char __user
> > > > > > *buf, +				size_t len, loff_t *ppos)
> > > > > > +{
> > > > > > +	int retval, read, posix_locking = 0;
> > > > > > +	struct file_lock pfLock;
> > > > > > +	struct cifsInodeInfo *cifsInode;
> > > > > > +	struct cifs_sb_info *cifs_sb;
> > > > > > +	struct cifsTconInfo *tcon;
> > > > > > +
> > > > > > +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> > > > > > +	tcon = cifs_sb->tcon;
> > > > > > +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> > > > > > +	    (CIFS_UNIX_FCNTL_CAP &
> > > > > > le64_to_cpu(tcon->fsUnixInfo.Capability)) && +
> > > > > > ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> > > > > > +		posix_locking = 1;
> > > > > > +
> > > > > > +	retval = cifs_revalidate_file(filp);
> > > > > > +	if (retval < 0)
> > > > > > +		return (ssize_t)retval;
> > > > > > +
> > > > > 
> > > > > I'd prefer to see the above call in the aio_read function instead.
> > > > > Steve's not too crazy about the idea though as he thinks it'll hurt
> > > > > performance. My argument is that it doesn't matter how fast you get
> > > > > a return from the syscall if the answer is wrong. Shrug.
> > > > > 
> > > > > > +	memset(&pfLock, 0, sizeof(pfLock));
> > > > > > +	pfLock.fl_type = F_RDLCK;
> > > > > > +	pfLock.fl_start = *ppos;
> > > > > > +	pfLock.fl_end = *ppos+len;
> > > > > > +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> > > > > > +	if (cifsInode == NULL)
> > > > > > +		return -ENOENT;
> > > > > > +
> > > > > > +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead
> > > > > > && +							!posix_locking) {
> > > > > > +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> > > > > > +		if (retval < 0)
> > > > > > +			return (ssize_t)retval;
> > > > > > +		if (pfLock.fl_type == F_UNLCK)
> > > > > > +			read = do_sync_read(filp, buf, len, ppos);
> > > > > > +		else
> > > > > > +			return -EACCES;
> > > > > 
> > > > > This looks really inefficient. A read call on the wire has suddenly
> > > > > turned into multiple lock/unlock calls and then a read. Maybe it
> > > > > would be better to attempt the read first and then just fall back
> > > > > to the slow path if that fails with an error that indicates that
> > > > > part of that page is locked?
> > > > 
> > > > That won't work. This is the situation:
> > > > 1. The first process has valid pages.
> > > > 2. The second process sets a lock from 1 to 2.
> > > > 
> > > > 3. The first process try to read from 1 to 2:
> > > > 	It reads page and call returns successfully because client dosn't
> > > > 	read from server - it has valid cache pages! That's wrong!
> > > > 
> > > > That's why we have to check if there are no locks preventing us and
> > > > then we can read from cache.
> > > 
> > > Ahh, so you're wanting to enforce mandatory locking in the client even
> > > when the cache is considered valid? If that's the case, it'd probably
> > > be far more efficient to simply bypass the pagecache altogether for
> > > reads than to try and do all of these lock calls on every read.
> > 
> > Yes, use the direct mount option is another approach.
> > 
> > > Both of these approaches sound like performance killers to me though.
> > > What real-world problems does this solve? We necessarily have to make
> > > tradeoffs in order to get decent performance out of the client. I'm
> > > not sure fixing this is worth that cost.
> > 
> > There are many Windows applications that use locking ranges of files on
> > SMB share (for example, database file that all clients use) as a
> > mechanism for synchronization. If we want to run them with WINE we need
> > right behaviour of read/lock operations. We can switch this behviour on
> > by mounting with special mount options ( for example, wine) . What do
> > you think about it? By default, it will be switched off.
> 
> Re-cc'ing linux-cifs-client list...
Sorry, I have just noticed it and resent it before reading your email.
> 
> What advantage would this new mount option give over forcedirectio?
I think it will get performance improving in comparison with forcedirectio when client has valid cache.
It seems me as the most common variant but, of course, it depends on how often a file is changed on server.

--
Best regards,
Pavel Shilovsky.
Jeff Layton - April 5, 2010, 9:17 p.m.
On Tue, 6 Apr 2010 01:05:38 +0400
Pavel Shilovsky <piastry@etersoft.ru> wrote:

> В сообщении от 6 апреля 2010 00:58:05 вы написали:
> > On Tue, 6 Apr 2010 00:43:23 +0400
> > 
> > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > В сообщении от 6 апреля 2010 00:18:58 вы написали:
> > > > On Tue, 6 Apr 2010 00:09:31 +0400
> > > > 
> > > > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > > > В сообщении от 5 апреля 2010 16:20:54 вы написали:
> > > > > > On Mon, 5 Apr 2010 10:01:19 +0400
> > > > > > 
> > > > > > piastry@etersoft.ru wrote:
> > > > > > > From: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > > > 
> > > > > > > [CIFS] Fix page reading error with mandatory locking style
> > > > > > > 
> > > > > > > If we lock file from one process from 1 to 2 and then try to read
> > > > > > > it from another from 0 to 1 without direct mount options,
> > > > > > > reading fails. That's why vfs tries to read whole page and fails
> > > > > > > due the lock from the first process.
> > > > > > > 
> > > > > > > Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > > > ---
> > > > > > > 
> > > > > > >  fs/cifs/cifsfs.c |   47
> > > > > > >  ++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed,
> > > > > > >  46 insertions(+), 1 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > > > > > > index ded66be..94a3b1f 100644
> > > > > > > --- a/fs/cifs/cifsfs.c
> > > > > > > +++ b/fs/cifs/cifsfs.c
> > > > > > > @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type
> > > > > > > *fs_type,
> > > > > > > 
> > > > > > >  	return 0;
> > > > > > >  
> > > > > > >  }
> > > > > > > 
> > > > > > > +static ssize_t cifs_sync_read(struct file *filp, char __user
> > > > > > > *buf, +				size_t len, loff_t *ppos)
> > > > > > > +{
> > > > > > > +	int retval, read, posix_locking = 0;
> > > > > > > +	struct file_lock pfLock;
> > > > > > > +	struct cifsInodeInfo *cifsInode;
> > > > > > > +	struct cifs_sb_info *cifs_sb;
> > > > > > > +	struct cifsTconInfo *tcon;
> > > > > > > +
> > > > > > > +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> > > > > > > +	tcon = cifs_sb->tcon;
> > > > > > > +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> > > > > > > +	    (CIFS_UNIX_FCNTL_CAP &
> > > > > > > le64_to_cpu(tcon->fsUnixInfo.Capability)) && +
> > > > > > > ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> > > > > > > +		posix_locking = 1;
> > > > > > > +
> > > > > > > +	retval = cifs_revalidate_file(filp);
> > > > > > > +	if (retval < 0)
> > > > > > > +		return (ssize_t)retval;
> > > > > > > +
> > > > > > 
> > > > > > I'd prefer to see the above call in the aio_read function instead.
> > > > > > Steve's not too crazy about the idea though as he thinks it'll hurt
> > > > > > performance. My argument is that it doesn't matter how fast you get
> > > > > > a return from the syscall if the answer is wrong. Shrug.
> > > > > > 
> > > > > > > +	memset(&pfLock, 0, sizeof(pfLock));
> > > > > > > +	pfLock.fl_type = F_RDLCK;
> > > > > > > +	pfLock.fl_start = *ppos;
> > > > > > > +	pfLock.fl_end = *ppos+len;
> > > > > > > +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> > > > > > > +	if (cifsInode == NULL)
> > > > > > > +		return -ENOENT;
> > > > > > > +
> > > > > > > +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead
> > > > > > > && +							!posix_locking) {
> > > > > > > +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> > > > > > > +		if (retval < 0)
> > > > > > > +			return (ssize_t)retval;
> > > > > > > +		if (pfLock.fl_type == F_UNLCK)
> > > > > > > +			read = do_sync_read(filp, buf, len, ppos);
> > > > > > > +		else
> > > > > > > +			return -EACCES;
> > > > > > 
> > > > > > This looks really inefficient. A read call on the wire has suddenly
> > > > > > turned into multiple lock/unlock calls and then a read. Maybe it
> > > > > > would be better to attempt the read first and then just fall back
> > > > > > to the slow path if that fails with an error that indicates that
> > > > > > part of that page is locked?
> > > > > 
> > > > > That won't work. This is the situation:
> > > > > 1. The first process has valid pages.
> > > > > 2. The second process sets a lock from 1 to 2.
> > > > > 
> > > > > 3. The first process try to read from 1 to 2:
> > > > > 	It reads page and call returns successfully because client dosn't
> > > > > 	read from server - it has valid cache pages! That's wrong!
> > > > > 
> > > > > That's why we have to check if there are no locks preventing us and
> > > > > then we can read from cache.
> > > > 
> > > > Ahh, so you're wanting to enforce mandatory locking in the client even
> > > > when the cache is considered valid? If that's the case, it'd probably
> > > > be far more efficient to simply bypass the pagecache altogether for
> > > > reads than to try and do all of these lock calls on every read.
> > > 
> > > Yes, use the direct mount option is another approach.
> > > 
> > > > Both of these approaches sound like performance killers to me though.
> > > > What real-world problems does this solve? We necessarily have to make
> > > > tradeoffs in order to get decent performance out of the client. I'm
> > > > not sure fixing this is worth that cost.
> > > 
> > > There are many Windows applications that use locking ranges of files on
> > > SMB share (for example, database file that all clients use) as a
> > > mechanism for synchronization. If we want to run them with WINE we need
> > > right behaviour of read/lock operations. We can switch this behviour on
> > > by mounting with special mount options ( for example, wine) . What do
> > > you think about it? By default, it will be switched off.
> > 
> > Re-cc'ing linux-cifs-client list...
> Sorry, I have just noticed it and resent it before reading your email.
> > 
> > What advantage would this new mount option give over forcedirectio?
> I think it will get performance improving in comparison with forcedirectio when client has valid cache.
> It seems me as the most common variant but, of course, it depends on how often a file is changed on server.
> 

Have you tested this? If so, how? What sort of performance improvement
did it give?

I'm somewhat skeptical...it seems to me that any performance gain you
get from reading from the cache will be diminished by having to do a
series of lock/unlock calls to test to see whether the file is locked.
Jeff Layton - April 5, 2010, 10:45 p.m.
On Mon, 5 Apr 2010 17:17:43 -0400
Jeff Layton <jlayton@samba.org> wrote:

> On Tue, 6 Apr 2010 01:05:38 +0400
> Pavel Shilovsky <piastry@etersoft.ru> wrote:
> 
> > В сообщении от 6 апреля 2010 00:58:05 вы написали:
> > > On Tue, 6 Apr 2010 00:43:23 +0400
> > > 
> > > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > > В сообщении от 6 апреля 2010 00:18:58 вы написали:
> > > > > On Tue, 6 Apr 2010 00:09:31 +0400
> > > > > 
> > > > > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > > > > В сообщении от 5 апреля 2010 16:20:54 вы написали:
> > > > > > > On Mon, 5 Apr 2010 10:01:19 +0400
> > > > > > > 
> > > > > > > piastry@etersoft.ru wrote:
> > > > > > > > From: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > > > > 
> > > > > > > > [CIFS] Fix page reading error with mandatory locking style
> > > > > > > > 
> > > > > > > > If we lock file from one process from 1 to 2 and then try to read
> > > > > > > > it from another from 0 to 1 without direct mount options,
> > > > > > > > reading fails. That's why vfs tries to read whole page and fails
> > > > > > > > due the lock from the first process.
> > > > > > > > 
> > > > > > > > Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
> > > > > > > > ---
> > > > > > > > 
> > > > > > > >  fs/cifs/cifsfs.c |   47
> > > > > > > >  ++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed,
> > > > > > > >  46 insertions(+), 1 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> > > > > > > > index ded66be..94a3b1f 100644
> > > > > > > > --- a/fs/cifs/cifsfs.c
> > > > > > > > +++ b/fs/cifs/cifsfs.c
> > > > > > > > @@ -616,6 +616,51 @@ cifs_get_sb(struct file_system_type
> > > > > > > > *fs_type,
> > > > > > > > 
> > > > > > > >  	return 0;
> > > > > > > >  
> > > > > > > >  }
> > > > > > > > 
> > > > > > > > +static ssize_t cifs_sync_read(struct file *filp, char __user
> > > > > > > > *buf, +				size_t len, loff_t *ppos)
> > > > > > > > +{
> > > > > > > > +	int retval, read, posix_locking = 0;
> > > > > > > > +	struct file_lock pfLock;
> > > > > > > > +	struct cifsInodeInfo *cifsInode;
> > > > > > > > +	struct cifs_sb_info *cifs_sb;
> > > > > > > > +	struct cifsTconInfo *tcon;
> > > > > > > > +
> > > > > > > > +	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
> > > > > > > > +	tcon = cifs_sb->tcon;
> > > > > > > > +	if ((tcon->ses->capabilities & CAP_UNIX) &&
> > > > > > > > +	    (CIFS_UNIX_FCNTL_CAP &
> > > > > > > > le64_to_cpu(tcon->fsUnixInfo.Capability)) && +
> > > > > > > > ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> > > > > > > > +		posix_locking = 1;
> > > > > > > > +
> > > > > > > > +	retval = cifs_revalidate_file(filp);
> > > > > > > > +	if (retval < 0)
> > > > > > > > +		return (ssize_t)retval;
> > > > > > > > +
> > > > > > > 
> > > > > > > I'd prefer to see the above call in the aio_read function instead.
> > > > > > > Steve's not too crazy about the idea though as he thinks it'll hurt
> > > > > > > performance. My argument is that it doesn't matter how fast you get
> > > > > > > a return from the syscall if the answer is wrong. Shrug.
> > > > > > > 
> > > > > > > > +	memset(&pfLock, 0, sizeof(pfLock));
> > > > > > > > +	pfLock.fl_type = F_RDLCK;
> > > > > > > > +	pfLock.fl_start = *ppos;
> > > > > > > > +	pfLock.fl_end = *ppos+len;
> > > > > > > > +	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
> > > > > > > > +	if (cifsInode == NULL)
> > > > > > > > +		return -ENOENT;
> > > > > > > > +
> > > > > > > > +	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead
> > > > > > > > && +							!posix_locking) {
> > > > > > > > +		retval = cifs_lock(filp, F_GETLK, &pfLock);
> > > > > > > > +		if (retval < 0)
> > > > > > > > +			return (ssize_t)retval;
> > > > > > > > +		if (pfLock.fl_type == F_UNLCK)
> > > > > > > > +			read = do_sync_read(filp, buf, len, ppos);
> > > > > > > > +		else
> > > > > > > > +			return -EACCES;
> > > > > > > 
> > > > > > > This looks really inefficient. A read call on the wire has suddenly
> > > > > > > turned into multiple lock/unlock calls and then a read. Maybe it
> > > > > > > would be better to attempt the read first and then just fall back
> > > > > > > to the slow path if that fails with an error that indicates that
> > > > > > > part of that page is locked?
> > > > > > 
> > > > > > That won't work. This is the situation:
> > > > > > 1. The first process has valid pages.
> > > > > > 2. The second process sets a lock from 1 to 2.
> > > > > > 
> > > > > > 3. The first process try to read from 1 to 2:
> > > > > > 	It reads page and call returns successfully because client dosn't
> > > > > > 	read from server - it has valid cache pages! That's wrong!
> > > > > > 
> > > > > > That's why we have to check if there are no locks preventing us and
> > > > > > then we can read from cache.
> > > > > 
> > > > > Ahh, so you're wanting to enforce mandatory locking in the client even
> > > > > when the cache is considered valid? If that's the case, it'd probably
> > > > > be far more efficient to simply bypass the pagecache altogether for
> > > > > reads than to try and do all of these lock calls on every read.
> > > > 
> > > > Yes, use the direct mount option is another approach.
> > > > 
> > > > > Both of these approaches sound like performance killers to me though.
> > > > > What real-world problems does this solve? We necessarily have to make
> > > > > tradeoffs in order to get decent performance out of the client. I'm
> > > > > not sure fixing this is worth that cost.
> > > > 
> > > > There are many Windows applications that use locking ranges of files on
> > > > SMB share (for example, database file that all clients use) as a
> > > > mechanism for synchronization. If we want to run them with WINE we need
> > > > right behaviour of read/lock operations. We can switch this behviour on
> > > > by mounting with special mount options ( for example, wine) . What do
> > > > you think about it? By default, it will be switched off.
> > > 
> > > Re-cc'ing linux-cifs-client list...
> > Sorry, I have just noticed it and resent it before reading your email.
> > > 
> > > What advantage would this new mount option give over forcedirectio?
> > I think it will get performance improving in comparison with forcedirectio when client has valid cache.
> > It seems me as the most common variant but, of course, it depends on how often a file is changed on server.
> > 
> 
> Have you tested this? If so, how? What sort of performance improvement
> did it give?
> 
> I'm somewhat skeptical...it seems to me that any performance gain you
> get from reading from the cache will be diminished by having to do a
> series of lock/unlock calls to test to see whether the file is locked.
> 

We must also consider too that the tests you're proposing to add to the
read codepath are racy. Between the time that you check to see if a
lock is present and satisfy the read, the lock state could change.

IMO, I don't think we should expend too much effort in trying to achieve
perfect cache consistency with applications that do not do proper
locking.

What *would* be helpful is to make CIFS treat locks as cache
consistency points -- revalidate the cache on a lock and flush dirty
pages on an unlock. That way, users who do want tight cache consistency
could use posix locks as hints for it. That's what NFS does and it
works well.

Patch

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ded66be..94a3b1f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,6 +616,51 @@  cifs_get_sb(struct file_system_type *fs_type,
 	return 0;
 }
 
+static ssize_t cifs_sync_read(struct file *filp, char __user *buf,
+				size_t len, loff_t *ppos)
+{
+	int retval, read, posix_locking = 0;
+	struct file_lock pfLock;
+	struct cifsInodeInfo *cifsInode;
+	struct cifs_sb_info *cifs_sb;
+	struct cifsTconInfo *tcon;
+
+	cifs_sb = CIFS_SB(filp->f_path.dentry->d_sb);
+	tcon = cifs_sb->tcon;
+	if ((tcon->ses->capabilities & CAP_UNIX) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		posix_locking = 1;
+
+	retval = cifs_revalidate_file(filp);
+	if (retval < 0)
+		return (ssize_t)retval;
+
+	memset(&pfLock, 0, sizeof(pfLock));
+	pfLock.fl_type = F_RDLCK;
+	pfLock.fl_start = *ppos;
+	pfLock.fl_end = *ppos+len;
+	cifsInode = CIFS_I(filp->f_path.dentry->d_inode);
+	if (cifsInode == NULL)
+		return -ENOENT;
+
+	if (!CIFS_I(filp->f_path.dentry->d_inode)->clientCanCacheRead &&
+							!posix_locking) {
+		retval = cifs_lock(filp, F_GETLK, &pfLock);
+		if (retval < 0)
+			return (ssize_t)retval;
+		if (pfLock.fl_type == F_UNLCK)
+			read = do_sync_read(filp, buf, len, ppos);
+		else
+			return -EACCES;
+	} else
+		read = do_sync_read(filp, buf, len, ppos);
+
+	if (read == -EACCES && !posix_locking)
+		read = cifs_user_read(filp, buf, len, ppos);
+	return read;
+}
+
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
@@ -737,7 +782,7 @@  const struct inode_operations cifs_symlink_inode_ops = {
 };
 
 const struct file_operations cifs_file_ops = {
-	.read = do_sync_read,
+	.read = cifs_sync_read,
 	.write = do_sync_write,
 	.aio_read = generic_file_aio_read,
 	.aio_write = cifs_file_aio_write,