diff mbox series

[RFC,02/53] netfs: Track the fpos above which the server has no data

Message ID 20231013155727.2217781-3-dhowells@redhat.com
State New
Headers show
Series netfs, afs, cifs: Delegate high-level I/O to netfslib | expand

Commit Message

David Howells Oct. 13, 2023, 3:56 p.m. UTC
Track the file position above which the server is not expected to have any
data and preemptively assume that we can simply fill blocks with zeroes
locally rather than attempting to download them - even if we've written
data back to the server.  Assume that any data that was written back above
that position is held in the local cache.  Call this the "zero point".

Make use of this to optimise away some reads from the server.  We need to
set the zero point in the following circumstances:

 (1) When we see an extant remote inode and have no cache for it, we set
     the zero_point to i_size.

 (2) On local inode creation, we set zero_point to 0.

 (3) On local truncation down, we reduce zero_point to the new i_size if
     the new i_size is lower.

 (4) On local truncation up, we don't change zero_point.

 (5) On local modification, we don't change zero_point.

 (6) On remote invalidation, we set zero_point to the new i_size.

 (7) If stored data is culled from the local cache, we must set zero_point
     above that if the data also got written to the server.

 (8) If dirty data is written back to the server, but not the local cache,
     we must set zero_point above that.

Assuming the above, any read from the server at or above the zero_point
position will return all zeroes.

The zero_point value can be stored in the cache, provided the above rules
are applied to it by any code that culls part of the local cache.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---
 fs/afs/inode.c           | 13 +++++++------
 fs/netfs/buffered_read.c | 40 +++++++++++++++++++++++++---------------
 include/linux/netfs.h    |  5 +++++
 3 files changed, 37 insertions(+), 21 deletions(-)

Comments

Jeffrey Layton Oct. 16, 2023, 3:43 p.m. UTC | #1
On Fri, 2023-10-13 at 16:56 +0100, David Howells wrote:
> Track the file position above which the server is not expected to have any
> data and preemptively assume that we can simply fill blocks with zeroes
> locally rather than attempting to download them - even if we've written
> data back to the server.  Assume that any data that was written back above
> that position is held in the local cache.  Call this the "zero point".
> 
> Make use of this to optimise away some reads from the server.  We need to
> set the zero point in the following circumstances:
> 
>  (1) When we see an extant remote inode and have no cache for it, we set
>      the zero_point to i_size.
> 
>  (2) On local inode creation, we set zero_point to 0.
> 
>  (3) On local truncation down, we reduce zero_point to the new i_size if
>      the new i_size is lower.
> 
>  (4) On local truncation up, we don't change zero_point.
> 
>  (5) On local modification, we don't change zero_point.
> 
>  (6) On remote invalidation, we set zero_point to the new i_size.
> 
>  (7) If stored data is culled from the local cache, we must set zero_point
>      above that if the data also got written to the server.
> 

When you say culled here, it sounds like you're just throwing out the
dirty cache without writing the data back. That shouldn't be allowed
though, so I must be misunderstanding what you mean here. Can you
explain?

>  (8) If dirty data is written back to the server, but not the local cache,
>      we must set zero_point above that.
> 

How do you write back without writing to the local cache? I'm guessing
this means you're doing a non-buffered write?

> Assuming the above, any read from the server at or above the zero_point
> position will return all zeroes.
> 
> The zero_point value can be stored in the cache, provided the above rules
> are applied to it by any code that culls part of the local cache.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Jeff Layton <jlayton@kernel.org>
> cc: linux-cachefs@redhat.com
> cc: linux-fsdevel@vger.kernel.org
> cc: linux-mm@kvack.org
> ---
>  fs/afs/inode.c           | 13 +++++++------
>  fs/netfs/buffered_read.c | 40 +++++++++++++++++++++++++---------------
>  include/linux/netfs.h    |  5 +++++
>  3 files changed, 37 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/afs/inode.c b/fs/afs/inode.c
> index 1c794a1896aa..46bc5574d6f5 100644
> --- a/fs/afs/inode.c
> +++ b/fs/afs/inode.c
> @@ -252,6 +252,7 @@ static void afs_apply_status(struct afs_operation *op,
>  		vnode->netfs.remote_i_size = status->size;
>  		if (change_size) {
>  			afs_set_i_size(vnode, status->size);
> +			vnode->netfs.zero_point = status->size;
>  			inode_set_ctime_to_ts(inode, t);
>  			inode->i_atime = t;
>  		}
> @@ -865,17 +866,17 @@ static void afs_setattr_success(struct afs_operation *op)
>  static void afs_setattr_edit_file(struct afs_operation *op)
>  {
>  	struct afs_vnode_param *vp = &op->file[0];
> -	struct inode *inode = &vp->vnode->netfs.inode;
> +	struct afs_vnode *vnode = vp->vnode;
>  
>  	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
>  		loff_t size = op->setattr.attr->ia_size;
>  		loff_t i_size = op->setattr.old_i_size;
>  
> -		if (size < i_size)
> -			truncate_pagecache(inode, size);
> -		if (size != i_size)
> -			fscache_resize_cookie(afs_vnode_cache(vp->vnode),
> -					      vp->scb.status.size);
> +		if (size != i_size) {
> +			truncate_pagecache(&vnode->netfs.inode, size);
> +			netfs_resize_file(&vnode->netfs, size);
> +			fscache_resize_cookie(afs_vnode_cache(vnode), size);
> +		}

Isn't this an existing bug? AFS is not setting remote_i_size in the
setattr path currently? I think this probably ought to be done in a
preliminary AFS patch.

>
>  	}
>  }
>  
> diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
> index 2cd3ccf4c439..a2852fa64ad0 100644
> --- a/fs/netfs/buffered_read.c
> +++ b/fs/netfs/buffered_read.c
> @@ -147,6 +147,22 @@ static void netfs_rreq_expand(struct netfs_io_request *rreq,
>  	}
>  }
>  
> +/*
> + * Begin an operation, and fetch the stored zero point value from the cookie if
> + * available.
> + */
> +static int netfs_begin_cache_operation(struct netfs_io_request *rreq,
> +				       struct netfs_inode *ctx)
> +{
> +	int ret = -ENOBUFS;
> +
> +	if (ctx->ops->begin_cache_operation) {
> +		ret = ctx->ops->begin_cache_operation(rreq);
> +		/* TODO: Get the zero point value from the cache */
> +	}
> +	return ret;
> +}
> +
>  /**
>   * netfs_readahead - Helper to manage a read request
>   * @ractl: The description of the readahead request
> @@ -180,11 +196,9 @@ void netfs_readahead(struct readahead_control *ractl)
>  	if (IS_ERR(rreq))
>  		return;
>  
> -	if (ctx->ops->begin_cache_operation) {
> -		ret = ctx->ops->begin_cache_operation(rreq);
> -		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -			goto cleanup_free;
> -	}
> +	ret = netfs_begin_cache_operation(rreq, ctx);
> +	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +		goto cleanup_free;
>  
>  	netfs_stat(&netfs_n_rh_readahead);
>  	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> @@ -238,11 +252,9 @@ int netfs_read_folio(struct file *file, struct folio *folio)
>  		goto alloc_error;
>  	}
>  
> -	if (ctx->ops->begin_cache_operation) {
> -		ret = ctx->ops->begin_cache_operation(rreq);
> -		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -			goto discard;
> -	}
> +	ret = netfs_begin_cache_operation(rreq, ctx);
> +	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +		goto discard;
>  
>  	netfs_stat(&netfs_n_rh_readpage);
>  	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
> @@ -390,11 +402,9 @@ int netfs_write_begin(struct netfs_inode *ctx,
>  	rreq->no_unlock_folio	= folio_index(folio);
>  	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
>  
> -	if (ctx->ops->begin_cache_operation) {
> -		ret = ctx->ops->begin_cache_operation(rreq);
> -		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -			goto error_put;
> -	}
> +	ret = netfs_begin_cache_operation(rreq, ctx);
> +	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +		goto error_put;
>  
>  	netfs_stat(&netfs_n_rh_write_begin);
>  	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
> diff --git a/include/linux/netfs.h b/include/linux/netfs.h
> index b447cb67f599..282511090ead 100644
> --- a/include/linux/netfs.h
> +++ b/include/linux/netfs.h
> @@ -129,6 +129,8 @@ struct netfs_inode {
>  	struct fscache_cookie	*cache;
>  #endif
>  	loff_t			remote_i_size;	/* Size of the remote file */
> +	loff_t			zero_point;	/* Size after which we assume there's no data
> +						 * on the server */

While I understand the concept, I'm not yet sure I understand how this
new value will be used. It might be better to merge this patch in with
the patch that adds the first user of this data.

>  };
>  
>  /*
> @@ -330,6 +332,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
>  {
>  	ctx->ops = ops;
>  	ctx->remote_i_size = i_size_read(&ctx->inode);
> +	ctx->zero_point = ctx->remote_i_size;
>  #if IS_ENABLED(CONFIG_FSCACHE)
>  	ctx->cache = NULL;
>  #endif
> @@ -345,6 +348,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
>  static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
>  {
>  	ctx->remote_i_size = new_i_size;
> +	if (new_i_size < ctx->zero_point)
> +		ctx->zero_point = new_i_size;
>  }
>  
>  /**
>
David Howells Oct. 16, 2023, 4:31 p.m. UTC | #2
Jeff Layton <jlayton@kernel.org> wrote:

> >  (7) If stored data is culled from the local cache, we must set zero_point
> >      above that if the data also got written to the server.
> 
> When you say culled here, it sounds like you're just throwing out the
> dirty cache without writing the data back. That shouldn't be allowed
> though, so I must be misunderstanding what you mean here. Can you
> explain?

I meant fscache specifically.  Too many caches - and some of them with the
same names!

> >  (8) If dirty data is written back to the server, but not the local cache,
> >      we must set zero_point above that.
> 
> How do you write back without writing to the local cache? I'm guessing
> this means you're doing a non-buffered write?

I meant fscache.  fscache can decline to honour a request to store data.

> > +		if (size != i_size) {
> > +			truncate_pagecache(&vnode->netfs.inode, size);
> > +			netfs_resize_file(&vnode->netfs, size);
> > +			fscache_resize_cookie(afs_vnode_cache(vnode), size);
> > +		}
> 
> Isn't this an existing bug? AFS is not setting remote_i_size in the
> setattr path currently? I think this probably ought to be done in a
> preliminary AFS patch.

It is being set.  afs_apply_status() sets it.  This is called by
afs_vnode_commit_status() which is called from afs_setattr_success().  The
value isn't updated until we get the return status from the server that
includes the new value.

> > +	loff_t			zero_point;	/* Size after which we assume there's no data
> > +						 * on the server */
> 
> While I understand the concept, I'm not yet sure I understand how this
> new value will be used. It might be better to merge this patch in with
> the patch that adds the first user of this data.

I'll consider it.  At least it might make sense to move them adjacent to each
other in the series.

David
diff mbox series

Patch

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1c794a1896aa..46bc5574d6f5 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -252,6 +252,7 @@  static void afs_apply_status(struct afs_operation *op,
 		vnode->netfs.remote_i_size = status->size;
 		if (change_size) {
 			afs_set_i_size(vnode, status->size);
+			vnode->netfs.zero_point = status->size;
 			inode_set_ctime_to_ts(inode, t);
 			inode->i_atime = t;
 		}
@@ -865,17 +866,17 @@  static void afs_setattr_success(struct afs_operation *op)
 static void afs_setattr_edit_file(struct afs_operation *op)
 {
 	struct afs_vnode_param *vp = &op->file[0];
-	struct inode *inode = &vp->vnode->netfs.inode;
+	struct afs_vnode *vnode = vp->vnode;
 
 	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
 		loff_t size = op->setattr.attr->ia_size;
 		loff_t i_size = op->setattr.old_i_size;
 
-		if (size < i_size)
-			truncate_pagecache(inode, size);
-		if (size != i_size)
-			fscache_resize_cookie(afs_vnode_cache(vp->vnode),
-					      vp->scb.status.size);
+		if (size != i_size) {
+			truncate_pagecache(&vnode->netfs.inode, size);
+			netfs_resize_file(&vnode->netfs, size);
+			fscache_resize_cookie(afs_vnode_cache(vnode), size);
+		}
 	}
 }
 
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 2cd3ccf4c439..a2852fa64ad0 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -147,6 +147,22 @@  static void netfs_rreq_expand(struct netfs_io_request *rreq,
 	}
 }
 
+/*
+ * Begin an operation, and fetch the stored zero point value from the cookie if
+ * available.
+ */
+static int netfs_begin_cache_operation(struct netfs_io_request *rreq,
+				       struct netfs_inode *ctx)
+{
+	int ret = -ENOBUFS;
+
+	if (ctx->ops->begin_cache_operation) {
+		ret = ctx->ops->begin_cache_operation(rreq);
+		/* TODO: Get the zero point value from the cache */
+	}
+	return ret;
+}
+
 /**
  * netfs_readahead - Helper to manage a read request
  * @ractl: The description of the readahead request
@@ -180,11 +196,9 @@  void netfs_readahead(struct readahead_control *ractl)
 	if (IS_ERR(rreq))
 		return;
 
-	if (ctx->ops->begin_cache_operation) {
-		ret = ctx->ops->begin_cache_operation(rreq);
-		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
-			goto cleanup_free;
-	}
+	ret = netfs_begin_cache_operation(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto cleanup_free;
 
 	netfs_stat(&netfs_n_rh_readahead);
 	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
@@ -238,11 +252,9 @@  int netfs_read_folio(struct file *file, struct folio *folio)
 		goto alloc_error;
 	}
 
-	if (ctx->ops->begin_cache_operation) {
-		ret = ctx->ops->begin_cache_operation(rreq);
-		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
-			goto discard;
-	}
+	ret = netfs_begin_cache_operation(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto discard;
 
 	netfs_stat(&netfs_n_rh_readpage);
 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
@@ -390,11 +402,9 @@  int netfs_write_begin(struct netfs_inode *ctx,
 	rreq->no_unlock_folio	= folio_index(folio);
 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
 
-	if (ctx->ops->begin_cache_operation) {
-		ret = ctx->ops->begin_cache_operation(rreq);
-		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
-			goto error_put;
-	}
+	ret = netfs_begin_cache_operation(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto error_put;
 
 	netfs_stat(&netfs_n_rh_write_begin);
 	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index b447cb67f599..282511090ead 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -129,6 +129,8 @@  struct netfs_inode {
 	struct fscache_cookie	*cache;
 #endif
 	loff_t			remote_i_size;	/* Size of the remote file */
+	loff_t			zero_point;	/* Size after which we assume there's no data
+						 * on the server */
 };
 
 /*
@@ -330,6 +332,7 @@  static inline void netfs_inode_init(struct netfs_inode *ctx,
 {
 	ctx->ops = ops;
 	ctx->remote_i_size = i_size_read(&ctx->inode);
+	ctx->zero_point = ctx->remote_i_size;
 #if IS_ENABLED(CONFIG_FSCACHE)
 	ctx->cache = NULL;
 #endif
@@ -345,6 +348,8 @@  static inline void netfs_inode_init(struct netfs_inode *ctx,
 static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
 {
 	ctx->remote_i_size = new_i_size;
+	if (new_i_size < ctx->zero_point)
+		ctx->zero_point = new_i_size;
 }
 
 /**